From 2c8c070167d71dfd56c20283b61af7a39185e54f Mon Sep 17 00:00:00 2001 From: jlb52 Date: Fri, 19 Apr 2024 08:29:57 +0200 Subject: [PATCH] Adds examples to token_set_ratio --- README.md | 6 ++++++ src/rapidfuzz/fuzz_py.py | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/README.md b/README.md index 91db1781..b2fd6a6d 100644 --- a/README.md +++ b/README.md @@ -130,6 +130,12 @@ Scorers in RapidFuzz can be found in the modules `fuzz` and `distance`. 84.21052631578947 > fuzz.token_set_ratio("fuzzy was a bear", "fuzzy fuzzy was a bear") 100.0 +# Returns 100.0 if one string is a subset of the other, regardless of extra content in the longer string +> fuzz.token_set_ratio("fuzzy was a bear but not a dog", "fuzzy was a bear") +100.0 +# Score is reduced only when there is explicit disagreement in the two strings +> fuzz.token_set_ratio("fuzzy was a bear but not a dog", "fuzzy was a bear but not a cat") +92.3076923076923 ``` #### Weighted Ratio diff --git a/src/rapidfuzz/fuzz_py.py b/src/rapidfuzz/fuzz_py.py index 86d07b9c..25138d4b 100644 --- a/src/rapidfuzz/fuzz_py.py +++ b/src/rapidfuzz/fuzz_py.py @@ -434,6 +434,12 @@ def token_set_ratio( 83.8709716796875 >>> fuzz.token_set_ratio("fuzzy was a bear", "fuzzy fuzzy was a bear") 100.0 + # Returns 100.0 if one string is a subset of the other, regardless of extra content in the longer string + >>> fuzz.token_set_ratio("fuzzy was a bear but not a dog", "fuzzy was a bear") + 100.0 + # Score is reduced only when there is explicit disagreement in the two strings + >>> fuzz.token_set_ratio("fuzzy was a bear but not a dog", "fuzzy was a bear but not a cat") + 92.3076923076923 """ setupPandas() if is_none(s1) or is_none(s2):