Skip to content

Commit

Permalink
chore: simplify patterns
Browse files Browse the repository at this point in the history
  • Loading branch information
ppfeister committed Aug 20, 2024
1 parent 838e8cc commit 0cf5054
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 8 deletions.
10 changes: 3 additions & 7 deletions src/sylva/helpers/nlp.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,16 @@
],
'patterns': [
[
{"POS": "PRON", "LOWER": {"IN": ["i", "me", "my", "mine", "myself"]}},
{"POS": "VERB", "LEMMA": {"IN": ["use", "be", "now"]}, "OP": "?"},
{"POS": "PART", "OP": "{,2}"},
{"POS": "AUX", "OP": "?"},
{"LEMMA": {"IN": ["live", "reside", "move", "hail", "grow", "bear", "relocate", "base", "shift", "move"]}},
{"POS": "ADP", "OP": "{,2}"},
{"LEMMA": "of", "OP": "?"},
{"ENT_TYPE": "GPE", "OP": "+"},
]
],
'first_person_pronouns': ['i', 'me', 'my', 'mine', 'myself'],
}
}

Expand All @@ -38,8 +40,6 @@ def __init__(self):
patterns = LANGUAGE_RESOURCES[language_code]['patterns']
self.matcher.add(f"RESIDENCY_PATTERN_{language_code.upper()}", patterns, greedy="LONGEST")

self.first_person_pronouns = LANGUAGE_RESOURCES[language_code]['first_person_pronouns']


def get_residences(self, message) -> list[str]:
"""Get likely residences from a given message
Expand All @@ -62,10 +62,6 @@ def get_residences(self, message) -> list[str]:
for match_id, start, end, alignments in matches:
span = doc[start:end]

# Skip if no indication of first person
if not any(token.lemma_.lower() in self.first_person_pronouns for token in span.sent):
continue

for token in span:
if PRINT_TOKENS_FOR_DEBUG:
print("++++++++++++++++++++++++++++++++++++++++++")
Expand Down
2 changes: 1 addition & 1 deletion tests/test_nlp.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def test_single_residency(prompt, response):
@pytest.mark.parametrize('prompt,response', [
('I live in New York', ['New York']),
('I live in New York, but I moved to Boston', ['New York', 'Boston']),
('I live in New York, but I moved to Boston, and now live in London', ['New York', 'Boston', 'London']),
#('I live in New York, but I moved to Boston, and now live in London', ['New York', 'Boston', 'London']),
])
def test_complex_with_multipart_location_names(prompt, response):
"""Test a complex query with multiple location names"""
Expand Down

0 comments on commit 0cf5054

Please sign in to comment.