From e4c2c021f7edf55ab82ba51ab9cc924e931dff24 Mon Sep 17 00:00:00 2001 From: chanomkaimuk <22185824+chanomkaimuk@users.noreply.github.com> Date: Sun, 13 Aug 2023 03:41:56 +0200 Subject: [PATCH 1/6] add tests for TW_react --- tests/test_react/test_react_tw.py | 71 +++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) create mode 100644 tests/test_react/test_react_tw.py diff --git a/tests/test_react/test_react_tw.py b/tests/test_react/test_react_tw.py new file mode 100644 index 0000000..b2cf504 --- /dev/null +++ b/tests/test_react/test_react_tw.py @@ -0,0 +1,71 @@ +from commands.reacttw.consts import TW_REGEX +import pytest + + +@pytest.mark.parametrize( + "test_str", + ( + "TAIWAN", + "FORMOSA", + "TAIPEI", + "TAOYUAN", + "TAICHUNG", + "TAINAN", + "KAOHSIUNG", + "MIAOLI", + "CHANGHUA", + "NANTOU", + "YUNLIN", + "PINGTUNG", + "YILAN", + "HUALIEN", + "TAITUNG", + "PENGHU", + "KINMEN", + "LIENCHIANG", + "KEELUNG", + "HSINCHU", + "CHIAYI", + "台灣", + "臺灣", + "臺北", + "台北", + "新北", + "桃園", + "臺中", + "台中", + "臺南", + "台南", + "高雄", + "新竹", + "苗栗", + "彰化", + "南投", + "雲林", + "嘉義", + "屏東", + "宜蘭", + "花蓮", + "臺東", + "台東", + "澎湖", + "金門", + "連江", + "基隆", + "新竹", + "嘉義", + "美麗島", + ), +) +def test_react_tw_regex_yes_match(test_str: str): + """Tests that these strings return TRUE.""" + # * isolated string + assert TW_REGEX.search(test_str) + # * surrounded by spaces + assert TW_REGEX.search(f" {test_str} ") + # * surrounded by text + assert TW_REGEX.search(f"a{test_str}b") + # * to lowercase + assert TW_REGEX.search(test_str.lower()) + # * to title case + assert TW_REGEX.search(test_str.title()) From 0daa837b2f3ec081c8ec451ed2944b475c31619e Mon Sep 17 00:00:00 2001 From: chanomkaimuk <22185824+chanomkaimuk@users.noreply.github.com> Date: Sun, 13 Aug 2023 03:53:32 +0200 Subject: [PATCH 2/6] add tests for Baltics react --- tests/test_react/test_react_baltics.py | 54 ++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) create mode 100644 tests/test_react/test_react_baltics.py diff --git a/tests/test_react/test_react_baltics.py b/tests/test_react/test_react_baltics.py new file mode 100644 index 0000000..0748cbc --- /dev/null +++ b/tests/test_react/test_react_baltics.py @@ -0,0 +1,54 @@ +from commands.reacttw.consts import TW_REGEX +import pytest + + +@pytest.mark.parametrize( + "test_str", + ( + # Lithuanian + "Taivanas", + "Taivane", + "Taivano", + "Taivanui", + "Taivanietis", + "Taivanietė", + "Taivaniečiai", + "Taivaniečiu", + "Taivaniečių", + "Taivaniečiui", + "Taivaniečiams", + "Taivanietiškas", + "Taivana", + # Latvian + "Taivāna", + "Taivānā", + "Taivānas", + "Taivānai", + "Taivānu", + "Taivānietis", + "Taivāniete", + "Taivānisks", + "Taivāniešu", + "Taivānietim", + "Taivānietei", + "Taivāniešiem", + "Taivānieti", + "Taivāniete", + "Taivānieši", + "Taivānietes", + "Taivānieši", + # ! Estonian (STILL MISSING) + ), +) +def test_react_tw_regex_yes_match(test_str: str): + """Tests that these strings return TRUE.""" + # * isolated string + assert TW_REGEX.search(test_str) + # * surrounded by spaces + assert TW_REGEX.search(f" {test_str} ") + # * surrounded by text + assert TW_REGEX.search(f"a{test_str}b") + # * to lowercase + assert TW_REGEX.search(test_str.lower()) + # * to title case + assert TW_REGEX.search(test_str.title()) From 358f164a7cbb151af6001cf0ae2ea79cc665989f Mon Sep 17 00:00:00 2001 From: chanomkaimuk <22185824+chanomkaimuk@users.noreply.github.com> Date: Sun, 13 Aug 2023 03:56:26 +0200 Subject: [PATCH 3/6] fix: taiwan regex -> baltic regex --- tests/test_react/test_react_baltics.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/test_react/test_react_baltics.py b/tests/test_react/test_react_baltics.py index 0748cbc..1dd9bc6 100644 --- a/tests/test_react/test_react_baltics.py +++ b/tests/test_react/test_react_baltics.py @@ -1,4 +1,4 @@ -from commands.reacttw.consts import TW_REGEX +from commands.react_baltics.consts import BALTIC_REGEX import pytest @@ -40,15 +40,15 @@ # ! Estonian (STILL MISSING) ), ) -def test_react_tw_regex_yes_match(test_str: str): +def test_react_baltics_regex_yes_match(test_str: str): """Tests that these strings return TRUE.""" # * isolated string - assert TW_REGEX.search(test_str) + assert BALTIC_REGEX.search(test_str) # * surrounded by spaces - assert TW_REGEX.search(f" {test_str} ") + assert BALTIC_REGEX.search(f" {test_str} ") # * surrounded by text - assert TW_REGEX.search(f"a{test_str}b") + assert BALTIC_REGEX.search(f"a{test_str}b") # * to lowercase - assert TW_REGEX.search(test_str.lower()) + assert BALTIC_REGEX.search(test_str.lower()) # * to title case - assert TW_REGEX.search(test_str.title()) + assert BALTIC_REGEX.search(test_str.title()) From 9cab61c8216966b2e7b2ef81c94c42508f86e021 Mon Sep 17 00:00:00 2001 From: chanomkaimuk <22185824+chanomkaimuk@users.noreply.github.com> Date: Sun, 13 Aug 2023 04:10:40 +0200 Subject: [PATCH 4/6] LT regex passes tests --- commands/react_baltics/consts.py | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/commands/react_baltics/consts.py b/commands/react_baltics/consts.py index 71cd145..d12f954 100644 --- a/commands/react_baltics/consts.py +++ b/commands/react_baltics/consts.py @@ -7,18 +7,7 @@ KEYWORDS = ( # Lithuanian - "Taivanas", - "Taivane", - "Taivano", - "Taivanui", - "Taivanietis", - "Taivanietė", - "Taivaniečiai", - r"Taivanieči(?:ų|u)", - "Taivaniečiui", - "Taivaniečiams", - "Taivanietiškas", - "Taivana", + r"Taivan(?:as|a|e|o|ui|ie(?:tiškas|tis|tė|či(?:ų|u|ai|ui|ams)))", # Latvian "Taivāna", "Taivānā", @@ -44,3 +33,4 @@ BALTIC_REGEX = compile( rf"(?:{'|'.join(KEYWORDS)})", flags=IGNORECASE | UNICODE ) +print(BALTIC_REGEX) From 9a9d3a583e543d93320369c13a60ff2837be1383 Mon Sep 17 00:00:00 2001 From: chanomkaimuk <22185824+chanomkaimuk@users.noreply.github.com> Date: Sun, 13 Aug 2023 04:14:01 +0200 Subject: [PATCH 5/6] LV regex passes tests --- commands/react_baltics/consts.py | 21 ++------------------- 1 file changed, 2 insertions(+), 19 deletions(-) diff --git a/commands/react_baltics/consts.py b/commands/react_baltics/consts.py index d12f954..b611537 100644 --- a/commands/react_baltics/consts.py +++ b/commands/react_baltics/consts.py @@ -9,28 +9,11 @@ # Lithuanian r"Taivan(?:as|a|e|o|ui|ie(?:tiškas|tis|tė|či(?:ų|u|ai|ui|ams)))", # Latvian - "Taivāna", - "Taivānā", - "Taivānas", - "Taivānai", - "Taivānu", - "Taivānietis", - "Taivāniete", - "Taivānisks", - "Taivāniešu", - "Taivānietim", - "Taivānietei", - "Taivāniešiem", - "Taivānieti", - "Taivāniete", - "Taivānieši", - "Taivānietes", - "Taivānieši", - # Estonian + r"Taivān(?:a|ā|as|u|isks|ie(?:tis|te|šu|tim|tei|šiem|ti|te|ši|tes|ši))", + # ! Estonian (STILL MISSING) ) BALTIC_REGEX = compile( rf"(?:{'|'.join(KEYWORDS)})", flags=IGNORECASE | UNICODE ) -print(BALTIC_REGEX) From f3adb9bf9452e2e0ed2f97c35a122fbdbda413ff Mon Sep 17 00:00:00 2001 From: chanomkaimuk <22185824+chanomkaimuk@users.noreply.github.com> Date: Sun, 13 Aug 2023 04:16:31 +0200 Subject: [PATCH 6/6] TW regex passes tests (more readable) --- commands/reacttw/consts.py | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/commands/reacttw/consts.py b/commands/reacttw/consts.py index c94183e..1b65d4c 100644 --- a/commands/reacttw/consts.py +++ b/commands/reacttw/consts.py @@ -41,16 +41,12 @@ "KEELUNG", "HSINCHU", "CHIAYI", - "台灣", - "臺灣", - "臺北", - "台北", + r"(台|臺)灣", + "(台|臺)北", "新北", "桃園", - "臺中", - "台中", - "臺南", - "台南", + r"(台|臺)中", + r"(台|臺)南", "高雄", "新竹", "苗栗", @@ -61,8 +57,7 @@ "屏東", "宜蘭", "花蓮", - "臺東", - "台東", + r"(台|臺)東", "澎湖", "金門", "連江",