Skip to content

Commit

Permalink
Revert "get rid of utf8_encodable checks"
Browse files Browse the repository at this point in the history
This reverts commit 3b92c85.
  • Loading branch information
charles-cooper committed Mar 16, 2024
1 parent 1127fc4 commit e6143e5
Showing 1 changed file with 16 additions and 2 deletions.
18 changes: 16 additions & 2 deletions tests/functional/grammar/test_grammar.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,20 @@ def test_basic_grammar_empty():
assert len(tree.children) == 0


def utf8_encodable(terminal: str) -> bool:
try:
if "\x00" not in terminal and "\\ " not in terminal and "\x0c" not in terminal:
terminal.encode("utf-8-sig")
return True
else:
return False
except UnicodeEncodeError: # pragma: no cover
# Very rarely, a "." in some terminal regex will generate a surrogate
# character that cannot be encoded as UTF-8. We apply this filter to
# ensure it doesn't happen at runtime, but don't worry about coverage.
return False


ALLOWED_CHARS = st.characters(codec="utf-8", min_codepoint=1)


Expand All @@ -46,7 +60,7 @@ class GrammarStrategy(LarkStrategy):
def __init__(self, grammar, start, explicit_strategies):
super().__init__(grammar, start, explicit_strategies, alphabet=ALLOWED_CHARS)
self.terminal_strategies = {
k: v.map(lambda s: s.replace("\0", ""))
k: v.map(lambda s: s.replace("\0", "")).filter(utf8_encodable)
for k, v in self.terminal_strategies.items() # type: ignore
}

Expand Down Expand Up @@ -91,7 +105,7 @@ def has_no_docstrings(c):


@pytest.mark.fuzzing
@given(code=from_grammar())
@given(code=from_grammar().filter(lambda c: utf8_encodable(c)))
@hypothesis.settings(
max_examples=500, suppress_health_check=[HealthCheck.too_slow, HealthCheck.filter_too_much]
)
Expand Down

0 comments on commit e6143e5

Please sign in to comment.