chore: checkpoint before Python removal

This commit is contained in:
2026-03-26 22:33:59 +00:00
parent 683cec9307
commit e568ddf82a
29972 changed files with 11269302 additions and 2 deletions

1
vendor/strsim/.cargo-checksum.json vendored Normal file
View File

@@ -0,0 +1 @@
{"files":{".cargo_vcs_info.json":"f1b4d455ec43c8d4dd906a420bc9d9337c85ab879116266517cb95b2a5255bbc",".editorconfig":"d87dec5ba57378b3c32bbf67526e434f7ac4f44d8b1acc7f44b6d8e7ee6919ad","CHANGELOG.md":"fa5e48fab6b5642005fb5fa9f113b116d9da561a829cd773739350042e87e225","Cargo.toml":"e74cc8f9ab1f8680c26aebab519e3fbda7521eebd1e8fdcebdfc2e1bf138a875","Cargo.toml.orig":"164788a125f395001110409f08b2357370acb9bb766b88365a3e38f885559244","LICENSE":"1e697ce8d21401fbf1bddd9b5c3fd4c4c79ae1e3bdf51f81761c85e11d5a89cd","README.md":"599d424147dfbf88943bb6d78cebca346488fd246611917b586d73502a684c3a","SECURITY.md":"db1925a1d93a212ff6b55e9fbbceebb7a8f3a64688d76598475a54f1f242f0bf","benches/benches.rs":"2f7fae162a517378b42af04b4b077ffd563171f7341cba55b4efca3b4c30426a","src/lib.rs":"6f0b31f95526ccc0a88ed788b6be9b929bd8ee32fd0c3f38b0399cb7e63954e3","tests/lib.rs":"4c8207a5728b82836795e2f87d7d7834db7276082f5ded640f34822feb750cb4"},"package":"7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"}

6
vendor/strsim/.cargo_vcs_info.json vendored Normal file
View File

@@ -0,0 +1,6 @@
{
"git": {
"sha1": "76c5a900e6e12cfc605eee5ab6e36300384c8682"
},
"path_in_vcs": ""
}

13
vendor/strsim/.editorconfig vendored Normal file
View File

@@ -0,0 +1,13 @@
# editorconfig.org
root = true
[*]
charset = utf-8
end_of_line = lf
indent_brace_style = K&R
indent_size = 4
indent_style = space
insert_final_newline = true
max_line_length = 80
trim_trailing_whitespace = true

233
vendor/strsim/CHANGELOG.md vendored Normal file
View File

@@ -0,0 +1,233 @@
# Change Log
This project attempts to adhere to [Semantic Versioning](http://semver.org).
## [Unreleased]
## [0.11.1] - (2024-04-03)
### Fixed
- Drop MSRV down to 1.56 which was mistakenly changed in 0.11.0
## [0.11.0] - (2024-01-07)
### Changed
- improve OSA implementation
- reduce runtime
- reduce binary size by more than `25%`
- reduce binary size of Levenshtein distance
- improve Damerau-Levenshtein implementation
- reduce memory usage from `O(N*M)` to `O(N+M)`
- reduce runtime in our own benchmark by more than `70%`
- reduce binary size by more than `25%`
- only boost similarity in Jaro-Winkler once the Jaro similarity exceeds 0.7
### Fixed
- Fix transposition counting in Jaro and Jaro-Winkler.
- Limit common prefix in Jaro-Winkler to 4 characters
## [0.10.0] - (2020-01-31)
### Added
- Sørensen-Dice implementation (thanks [@robjtede](https://github.com/robjtede))
## [0.9.3] - (2019-12-12)
### Fixed
- Fix Jaro and Jaro-Winkler when the arguments have lengths of 1 and are equal.
Previously, the functions would erroneously return 0 instead of 1. Thanks to
[@vvrably](https://github.com/vvrably) for pointing out the issue.
## [0.9.2] - (2019-05-09)
### Changed
- Revert back to the standard library hashmap because it will use hashbrown very
soon
- Remove ndarray in favor of using a single vector to represent the 2d grid in
Damerau-Levenshtein
## [0.9.1] - (2019-04-08)
### Changed
- Faster Damerau-Levenshtein implementation (thanks [@lovasoa](https://github.com/lovasoa))
## [0.9.0] - (2019-04-06)
### Added
- Generic distance functions (thanks [@lovasoa](https://github.com/lovasoa))
## [0.8.0] - (2018-08-19)
### Added
- Normalized versions of Levenshtein and Damerau-Levenshtein (thanks [@gentoid](https://github.com/gentoid))
## [0.7.0] - (2018-01-17)
### Changed
- Faster Levenshtein implementation (thanks [@wdv4758h](https://github.com/wdv4758h))
### Removed
- Remove the "against_vec" functions. They are one-liners now, so they don't
seem to add enough value to justify making the API larger. I didn't find
anybody using them when I skimmed through a GitHub search. If you do use them,
you can change the calls to something like:
```rust
let distances = strings.iter().map(|a| jaro(target, a)).collect();
```
## [0.6.0] - (2016-12-26)
### Added
- Add optimal string alignment distance
### Fixed
- Fix Damerau-Levenshtein implementation (previous implementation was actually
optimal string alignment; see this [Damerau-Levenshtein explanation])
## [0.5.2] - (2016-11-21)
### Changed
- Remove Cargo generated documentation in favor of a [docs.rs] link
## [0.5.1] - (2016-08-23)
### Added
- Add Cargo generated documentation
### Fixed
- Fix panic when Jaro or Jaro-Winkler are given strings both with a length of
one
## [0.5.0] - (2016-08-11)
### Changed
- Make Hamming faster (thanks @IBUzPE9) when the two strings have the same
length but slower when they have different lengths
## [0.4.1] - (2016-04-18)
### Added
- Add Vagrant setup for development
- Add AppVeyor configuration for Windows CI
### Fixed
- Fix metrics when given strings with multibyte characters (thanks @WanzenBug)
## [0.4.0] - (2015-06-10)
### Added
- For each metric, add a function that takes a vector of strings and returns a
vector of results (thanks @ovarene)
## [0.3.0] - (2015-04-30)
### Changed
- Remove usage of unstable Rust features
## [0.2.5] - (2015-04-24)
### Fixed
- Remove unnecessary `Float` import from doc tests
## [0.2.4] - (2015-04-15)
### Fixed
- Remove unused `core` feature flag
## [0.2.3] - (2015-04-01)
### Fixed
- Remove now unnecessary `Float` import
## [0.2.2] - (2015-03-29)
### Fixed
- Remove usage of `char_at` (marked as unstable)
## [0.2.1] - (2015-02-20)
### Fixed
- Update bit vector import to match Rust update
## [0.2.0] - (2015-02-19)
### Added
- Implement Damerau-Levenshtein
- Add tests in docs
## [0.1.1] - (2015-02-10)
### Added
- Configure Travis for CI
- Add rustdoc comments
### Fixed
- Limit Jaro-Winkler return value to a maximum of 1.0
- Fix float comparisons in tests
## [0.1.0] - (2015-02-09)
### Added
- Implement Hamming, Jaro, Jaro-Winkler, and Levenshtein
[Unreleased]: https://github.com/rapidfuzz/strsim-rs/compare/0.11.1...HEAD
[0.11.1]: https://github.com/rapidfuzz/strsim-rs/compare/0.11.0...0.11.1
[0.11.0]: https://github.com/rapidfuzz/strsim-rs/compare/0.10.0...0.11.0
[0.10.0]: https://github.com/rapidfuzz/strsim-rs/compare/0.9.3...0.10.0
[0.9.3]: https://github.com/rapidfuzz/strsim-rs/compare/0.9.2...0.9.3
[0.9.2]: https://github.com/rapidfuzz/strsim-rs/compare/0.9.1...0.9.2
[0.9.1]: https://github.com/rapidfuzz/strsim-rs/compare/0.9.0...0.9.1
[0.9.0]: https://github.com/rapidfuzz/strsim-rs/compare/0.8.0...0.9.0
[0.8.0]: https://github.com/rapidfuzz/strsim-rs/compare/0.7.0...0.8.0
[0.7.0]: https://github.com/rapidfuzz/strsim-rs/compare/0.6.0...0.7.0
[0.6.0]: https://github.com/rapidfuzz/strsim-rs/compare/0.5.2...0.6.0
[0.5.2]: https://github.com/rapidfuzz/strsim-rs/compare/0.5.1...0.5.2
[0.5.1]: https://github.com/rapidfuzz/strsim-rs/compare/0.5.0...0.5.1
[0.5.0]: https://github.com/rapidfuzz/strsim-rs/compare/0.4.1...0.5.0
[0.4.1]: https://github.com/rapidfuzz/strsim-rs/compare/0.4.0...0.4.1
[0.4.0]: https://github.com/rapidfuzz/strsim-rs/compare/0.3.0...0.4.0
[0.3.0]: https://github.com/rapidfuzz/strsim-rs/compare/0.2.5...0.3.0
[0.2.5]: https://github.com/rapidfuzz/strsim-rs/compare/0.2.4...0.2.5
[0.2.4]: https://github.com/rapidfuzz/strsim-rs/compare/0.2.3...0.2.4
[0.2.3]: https://github.com/rapidfuzz/strsim-rs/compare/0.2.2...0.2.3
[0.2.2]: https://github.com/rapidfuzz/strsim-rs/compare/0.2.1...0.2.2
[0.2.1]: https://github.com/rapidfuzz/strsim-rs/compare/0.2.0...0.2.1
[0.2.0]: https://github.com/rapidfuzz/strsim-rs/compare/0.1.1...0.2.0
[0.1.1]: https://github.com/rapidfuzz/strsim-rs/compare/0.1.0...0.1.1
[0.1.0]: https://github.com/rapidfuzz/strsim-rs/compare/fabad4...0.1.0
[docs.rs]: https://docs.rs/strsim/
[Damerau-Levenshtein explanation]:
http://scarcitycomputing.blogspot.com/2013/04/damerau-levenshtein-edit-distance.html

40
vendor/strsim/Cargo.toml vendored Normal file
View File

@@ -0,0 +1,40 @@
# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
#
# When uploading crates to the registry Cargo will automatically
# "normalize" Cargo.toml files for maximal compatibility
# with all versions of Cargo and also rewrite `path` dependencies
# to registry (e.g., crates.io) dependencies.
#
# If you are reading this file be aware that the original Cargo.toml
# will likely look very different (and much more reasonable).
# See Cargo.toml.orig for the original contents.
[package]
rust-version = "1.56"
name = "strsim"
version = "0.11.1"
authors = [
"Danny Guo <danny@dannyguo.com>",
"maxbachmann <oss@maxbachmann.de>",
]
exclude = [
"/.github",
"/dev",
]
description = """
Implementations of string similarity metrics. Includes Hamming, Levenshtein,
OSA, Damerau-Levenshtein, Jaro, Jaro-Winkler, and Sørensen-Dice.
"""
homepage = "https://github.com/rapidfuzz/strsim-rs"
documentation = "https://docs.rs/strsim/"
readme = "README.md"
keywords = [
"string",
"similarity",
"Hamming",
"Levenshtein",
"Jaro",
]
categories = ["text-processing"]
license = "MIT"
repository = "https://github.com/rapidfuzz/strsim-rs"

23
vendor/strsim/LICENSE vendored Normal file
View File

@@ -0,0 +1,23 @@
The MIT License (MIT)
Copyright (c) 2015 Danny Guo
Copyright (c) 2016 Titus Wormer <tituswormer@gmail.com>
Copyright (c) 2018 Akash Kurdekar
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

102
vendor/strsim/README.md vendored Normal file
View File

@@ -0,0 +1,102 @@
# strsim-rs
[![Crates.io](https://img.shields.io/crates/v/strsim.svg)](https://crates.io/crates/strsim)
[![Crates.io](https://img.shields.io/crates/l/strsim.svg?maxAge=2592000)](https://github.com/rapidfuzz/strsim-rs/blob/main/LICENSE)
[![CI status](https://github.com/rapidfuzz/strsim-rs/workflows/CI/badge.svg)](https://github.com/rapidfuzz/strsim-rs/actions?query=branch%3Amain)
[![unsafe forbidden](https://img.shields.io/badge/unsafe-forbidden-success.svg)](https://github.com/rust-secure-code/safety-dance/)
[Rust](https://www.rust-lang.org) implementations of [string similarity metrics]:
- [Hamming]
- [Levenshtein] - distance & normalized
- [Optimal string alignment]
- [Damerau-Levenshtein] - distance & normalized
- [Jaro and Jaro-Winkler]
- [Sørensen-Dice]
The normalized versions return values between `0.0` and `1.0`, where `1.0` means
an exact match.
There are also generic versions of the functions for non-string inputs.
## Installation
`strsim` is available on [crates.io](https://crates.io/crates/strsim). Add it to
your project:
```sh
cargo add strsim
```
## Usage
Go to [Docs.rs](https://docs.rs/strsim/) for the full documentation. You can
also clone the repo, and run `$ cargo doc --open`.
### Examples
```rust
extern crate strsim;
use strsim::{hamming, levenshtein, normalized_levenshtein, osa_distance,
damerau_levenshtein, normalized_damerau_levenshtein, jaro,
jaro_winkler, sorensen_dice};
fn main() {
match hamming("hamming", "hammers") {
Ok(distance) => assert_eq!(3, distance),
Err(why) => panic!("{:?}", why)
}
assert_eq!(levenshtein("kitten", "sitting"), 3);
assert!((normalized_levenshtein("kitten", "sitting") - 0.571).abs() < 0.001);
assert_eq!(osa_distance("ac", "cba"), 3);
assert_eq!(damerau_levenshtein("ac", "cba"), 2);
assert!((normalized_damerau_levenshtein("levenshtein", "löwenbräu") - 0.272).abs() <
0.001);
assert!((jaro("Friedrich Nietzsche", "Jean-Paul Sartre") - 0.392).abs() <
0.001);
assert!((jaro_winkler("cheeseburger", "cheese fries") - 0.911).abs() <
0.001);
assert_eq!(sorensen_dice("web applications", "applications of the web"),
0.7878787878787878);
}
```
Using the generic versions of the functions:
```rust
extern crate strsim;
use strsim::generic_levenshtein;
fn main() {
assert_eq!(2, generic_levenshtein(&[1, 2, 3], &[0, 2, 5]));
}
```
## Contributing
If you don't want to install Rust itself, you can run `$ ./dev` for a
development CLI if you have [Docker] installed.
Benchmarks require a Nightly toolchain. Run `$ cargo +nightly bench`.
## License
[MIT](https://github.com/rapidfuzz/strsim-rs/blob/main/LICENSE)
[string similarity metrics]:http://en.wikipedia.org/wiki/String_metric
[Damerau-Levenshtein]:http://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance
[Jaro and Jaro-Winkler]:http://en.wikipedia.org/wiki/Jaro%E2%80%93Winkler_distance
[Levenshtein]:http://en.wikipedia.org/wiki/Levenshtein_distance
[Hamming]:http://en.wikipedia.org/wiki/Hamming_distance
[Optimal string alignment]:https://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance#Optimal_string_alignment_distance
[Sørensen-Dice]:http://en.wikipedia.org/wiki/S%C3%B8rensen%E2%80%93Dice_coefficient
[Docker]:https://docs.docker.com/engine/installation/

19
vendor/strsim/SECURITY.md vendored Normal file
View File

@@ -0,0 +1,19 @@
## Reporting Security Issues
If you believe you have found a security vulnerability in the project, please report it to us through coordinated disclosure.
**Please do not report security vulnerabilities through public GitHub issues, discussions, or pull requests.**
Instead, please send an email to oss@maxbachmann.de.
Please include as much of the information listed below as you can to help us better understand and resolve the issue:
* The type of issue (e.g., buffer overflow, SQL injection, or cross-site scripting)
* Full paths of source file(s) related to the manifestation of the issue
* The location of the affected source code (tag/branch/commit or direct URL)
* Any special configuration required to reproduce the issue
* Step-by-step instructions to reproduce the issue
* Proof-of-concept or exploit code (if possible)
* Impact of the issue, including how an attacker might exploit the issue
This information will help us triage your report more quickly.

95
vendor/strsim/benches/benches.rs vendored Normal file
View File

@@ -0,0 +1,95 @@
//! Benchmarks for strsim.
#![feature(test)]
extern crate strsim;
extern crate test;
use self::test::Bencher;
#[bench]
fn bench_hamming(bencher: &mut Bencher) {
let a = "ACAAGATGCCATTGTCCCCCGGCCTCCTGCTGCTGCTGCTCTCCGGGG";
let b = "CCTGGAGGGTGGCCCCACCGGCCGAGACAGCGAGCATATGCAGGAAGC";
bencher.iter(|| {
strsim::hamming(a, b).unwrap();
})
}
#[bench]
fn bench_jaro(bencher: &mut Bencher) {
let a = "Philosopher Friedrich Nietzsche";
let b = "Philosopher Jean-Paul Sartre";
bencher.iter(|| {
strsim::jaro(a, b);
})
}
#[bench]
fn bench_jaro_winkler(bencher: &mut Bencher) {
let a = "Philosopher Friedrich Nietzsche";
let b = "Philosopher Jean-Paul Sartre";
bencher.iter(|| {
strsim::jaro_winkler(a, b);
})
}
#[bench]
fn bench_levenshtein(bencher: &mut Bencher) {
let a = "Philosopher Friedrich Nietzsche";
let b = "Philosopher Jean-Paul Sartre";
bencher.iter(|| {
strsim::levenshtein(a, b);
})
}
#[bench]
fn bench_levenshtein_on_u8(bencher: &mut Bencher) {
bencher.iter(|| {
strsim::generic_levenshtein(&vec![0u8; 30], &vec![7u8; 31]);
})
}
#[bench]
fn bench_normalized_levenshtein(bencher: &mut Bencher) {
let a = "Philosopher Friedrich Nietzsche";
let b = "Philosopher Jean-Paul Sartre";
bencher.iter(|| {
strsim::normalized_levenshtein(a, b);
})
}
#[bench]
fn bench_osa_distance(bencher: &mut Bencher) {
let a = "Philosopher Friedrich Nietzsche";
let b = "Philosopher Jean-Paul Sartre";
bencher.iter(|| {
strsim::osa_distance(a, b);
})
}
#[bench]
fn bench_damerau_levenshtein(bencher: &mut Bencher) {
let a = "Philosopher Friedrich Nietzsche";
let b = "Philosopher Jean-Paul Sartre";
bencher.iter(|| {
strsim::damerau_levenshtein(a, b);
})
}
#[bench]
fn bench_normalized_damerau_levenshtein(bencher: &mut Bencher) {
let a = "Philosopher Friedrich Nietzsche";
let b = "Philosopher Jean-Paul Sartre";
bencher.iter(|| {
strsim::normalized_damerau_levenshtein(a, b);
})
}
#[bench]
fn bench_sorensen_dice(bencher: &mut Bencher) {
let a = "Philosopher Friedrich Nietzsche";
let b = "Philosopher Jean-Paul Sartre";
bencher.iter(|| {
strsim::sorensen_dice(a, b);
})
}

1307
vendor/strsim/src/lib.rs vendored Normal file

File diff suppressed because it is too large Load Diff

Binary file not shown.

View File

View File

@@ -0,0 +1 @@
{"name":"strsim","vers":"0.11.1","deps":[],"features":{},"features2":null,"cksum":"5d05539126e16f914ad3d304cd12022e36bdeec89031bf5271a7ac07537374fc","yanked":null,"links":null,"rust_version":null,"v":2}

Binary file not shown.

71
vendor/strsim/tests/lib.rs vendored Normal file
View File

@@ -0,0 +1,71 @@
extern crate strsim;
use strsim::{
damerau_levenshtein, hamming, jaro, jaro_winkler, levenshtein, normalized_damerau_levenshtein,
normalized_levenshtein, osa_distance,
};
macro_rules! assert_delta {
($x:expr, $y:expr) => {
assert_delta!($x, $y, 1e-5);
};
($x:expr, $y:expr, $d:expr) => {
if ($x - $y).abs() > $d {
panic!(
"assertion failed: actual: `{}`, expected: `{}`: \
actual not within < {} of expected",
$x, $y, $d
);
}
};
}
#[test]
fn hamming_works() {
match hamming("hamming", "hammers") {
Ok(distance) => assert_eq!(3, distance),
Err(why) => panic!("{:?}", why),
}
}
#[test]
fn levenshtein_works() {
assert_eq!(3, levenshtein("kitten", "sitting"));
}
#[test]
fn normalized_levenshtein_works() {
assert_delta!(0.57142, normalized_levenshtein("kitten", "sitting"));
}
#[test]
fn osa_distance_works() {
assert_eq!(3, osa_distance("ac", "cba"));
}
#[test]
fn damerau_levenshtein_works() {
assert_eq!(2, damerau_levenshtein("ac", "cba"));
}
#[test]
fn normalized_damerau_levenshtein_works() {
assert_delta!(
0.27272,
normalized_damerau_levenshtein("levenshtein", "löwenbräu")
);
}
#[test]
fn jaro_works() {
assert_delta!(
0.392,
jaro("Friedrich Nietzsche", "Jean-Paul Sartre"),
0.001
);
}
#[test]
fn jaro_winkler_works() {
assert_delta!(0.866, jaro_winkler("cheeseburger", "cheese fries"), 0.001);
}