diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 929a000d..9e520662 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,7 +1,7 @@ Changelog --------- -[3.5.0] - 2023-10-30 +[3.5.0] - 2023-10-31 ^^^^^^^^^^^^^^^^^^^^ Changed ~~~~~~~ @@ -19,6 +19,7 @@ Performance Fixed ~~~~~ * the preprocessing function was always called through Python due to a broken C-API version check +* fix wraparound issue in simd implementation of Jaro and Jaro Winkler [3.4.0] - 2023-10-09 ^^^^^^^^^^^^^^^^^^^^ diff --git a/CMakeLists.txt b/CMakeLists.txt index 6dbacc39..f8680328 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -91,7 +91,7 @@ else() add_library(Taskflow::Taskflow ALIAS Taskflow) endif() -find_package(rapidfuzz 2.2.0 QUIET) +find_package(rapidfuzz 2.2.1 QUIET) if(rapidfuzz_FOUND) message(STATUS "Using system supplied version of rapidfuzz-cpp") else() diff --git a/extern/rapidfuzz-cpp b/extern/rapidfuzz-cpp index 39d36d5a..5bb1b06d 160000 --- a/extern/rapidfuzz-cpp +++ b/extern/rapidfuzz-cpp @@ -1 +1 @@ -Subproject commit 39d36d5a29a2b8c74b364174279f1101c5d69abf +Subproject commit 5bb1b06d41e8f6ae07839eb4f0fb8eed69b5c090 diff --git a/tests/distance/test_Jaro.py b/tests/distance/test_Jaro.py index 805a3e25..62819dff 100644 --- a/tests/distance/test_Jaro.py +++ b/tests/distance/test_Jaro.py @@ -11,6 +11,9 @@ def test_hash_special_case(): def test_edge_case_lengths(): + """ + these are largely found by fuzz tests and implemented here as regression tests + """ assert pytest.approx(Jaro.similarity("", "")) == 1 assert pytest.approx(Jaro.similarity("0", "0")) == 1 assert pytest.approx(Jaro.similarity("00", "00")) == 1 @@ -20,6 +23,14 @@ def test_edge_case_lengths(): assert pytest.approx(Jaro.similarity("0" * 64, "0" * 65)) == 0.994872 assert pytest.approx(Jaro.similarity("0" * 63, "0" * 65)) == 0.989744 + s1 = "000000001" + s2 = "0000010" + assert pytest.approx(Jaro.similarity(s1, s2)) == 0.878307 + + s1 = "01234567" + s2 = "0" * 170 + "7654321" + "0" * 200 + assert pytest.approx(Jaro.similarity(s1, s2)) == 0.548740 + s1 = "10000000000000000000000000000000000000000000000000000000000000020" s2 = "00000000000000000000000000000000000000000000000000000000000000000" assert pytest.approx(Jaro.similarity(s1, s2)) == 0.979487 diff --git a/tests/distance/test_JaroWinkler.py b/tests/distance/test_JaroWinkler.py index 4caadfe4..b5edb808 100644 --- a/tests/distance/test_JaroWinkler.py +++ b/tests/distance/test_JaroWinkler.py @@ -11,6 +11,9 @@ def test_hash_special_case(): def test_edge_case_lengths(): + """ + these are largely found by fuzz tests and implemented here as regression tests + """ assert pytest.approx(JaroWinkler.similarity("", "")) == 1.0 assert pytest.approx(JaroWinkler.similarity("0", "0")) == 1 assert pytest.approx(JaroWinkler.similarity("00", "00")) == 1 @@ -20,6 +23,10 @@ def test_edge_case_lengths(): assert pytest.approx(JaroWinkler.similarity("0" * 64, "0" * 65)) == 0.996923 assert pytest.approx(JaroWinkler.similarity("0" * 63, "0" * 65)) == 0.993846 + s1 = "000000001" + s2 = "0000010" + assert pytest.approx(JaroWinkler.similarity(s1, s2)) == 0.926984 + s1 = "10000000000000000000000000000000000000000000000000000000000000020" s2 = "00000000000000000000000000000000000000000000000000000000000000000" assert pytest.approx(JaroWinkler.similarity(s1, s2)) == 0.979487