Skip to content

Commit

Permalink
improve phone regex (fix #10)
Browse files Browse the repository at this point in the history
  • Loading branch information
jfilter committed Oct 15, 2020
1 parent d32aa94 commit 335ab64
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 4 deletions.
3 changes: 2 additions & 1 deletion cleantext/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,9 @@
flags=re.IGNORECASE | re.UNICODE,
)

# for more information: https://github.com/jfilter/clean-text/issues/10
PHONE_REGEX = re.compile(
r"(?:^|(?<=[^\w)]))(\+?1[ .-]?)?(\(?\d{3}\)?[ .-]?)?(\d{3}[ .-]?\d{4})(\s?(?:ext\.?|[#x-])\s?\d{2,6})?(?:$|(?=\W))"
r"((?:^|(?<=[^\w)]))(((\+?[01])|(\+\d{2}))[ .-]?)?(\(?\d{3,4}\)?/?[ .-]?)?(\d{3}[ .-]?\d{4})(\s?(?:ext\.?|[#x-])\s?\d{2,6})?(?:$|(?=\W)))|\+?\d{4,5}[ .-/]\d{6,9}"
)

NUMBERS_REGEX = re.compile(
Expand Down
25 changes: 22 additions & 3 deletions tests/test_clean.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,29 @@ def test_replace_emails():
assert cleantext.replace_emails(text, "*EMAIL*") == proc_text


phone_numbers = [
"+49 123 1548690",
"555-123-4567",
"2404 9099130",
"024049099130",
"02404 9099130",
"02404/9099130",
"+492404 9099130",
"+4924049099130",
"+492404/9099130",
"0160 123456789",
"0160/123456789",
"+32160 123456789",
"Tel.: 0160 123456789",
]


def test_replace_phone_numbers():
text = "I can be reached at 555-123-4567 through next Friday."
proc_text = "I can be reached at *PHONE* through next Friday."
assert cleantext.replace_phone_numbers(text, "*PHONE*") == proc_text
for x in phone_numbers:
x_phone = cleantext.replace_phone_numbers(x, "*PHONE*")
assert "PHONE" in x_phone and not any(map(str.isdigit, x_phone)), (
x + " / " + x_phone
)


def test_replace_numbers():
Expand Down

0 comments on commit 335ab64

Please sign in to comment.