From d1484070030a97cf9808ab04c7fc9b11a0f147e2 Mon Sep 17 00:00:00 2001 From: Tomas R Date: Thu, 14 Nov 2024 23:17:42 +0100 Subject: [PATCH] gh-126807: pygettext: Do not attempt to extract messages from function definitions. (GH-126808) Fixes a bug where pygettext would attempt to extract a message from a code like this: def _(x): pass This is because pygettext only looks at one token at a time and '_(x)' looks like a function call. However, since 'x' is not a string literal, it would erroneously issue a warning. (cherry picked from commit 9a456383bed52010b90bd491277ea855626a7bba) Co-authored-by: Tomas R. --- Lib/test/test_tools/test_i18n.py | 33 ++++++++++++++++--- ...-11-13-22-23-36.gh-issue-126807.vpaWuN.rst | 2 ++ Tools/i18n/pygettext.py | 6 ++++ 3 files changed, 36 insertions(+), 5 deletions(-) create mode 100644 Misc/NEWS.d/next/Tools-Demos/2024-11-13-22-23-36.gh-issue-126807.vpaWuN.rst diff --git a/Lib/test/test_tools/test_i18n.py b/Lib/test/test_tools/test_i18n.py index 21dead8f943bb7..6f71f0976819f1 100644 --- a/Lib/test/test_tools/test_i18n.py +++ b/Lib/test/test_tools/test_i18n.py @@ -87,17 +87,23 @@ def assert_POT_equal(self, expected, actual): self.maxDiff = None self.assertEqual(normalize_POT_file(expected), normalize_POT_file(actual)) - def extract_docstrings_from_str(self, module_content): - """ utility: return all msgids extracted from module_content """ - filename = 'test_docstrings.py' - with temp_cwd(None) as cwd: + def extract_from_str(self, module_content, *, args=(), strict=True): + """Return all msgids extracted from module_content.""" + filename = 'test.py' + with temp_cwd(None): with open(filename, 'w', encoding='utf-8') as fp: fp.write(module_content) - assert_python_ok('-Xutf8', self.script, '-D', filename) + res = assert_python_ok('-Xutf8', self.script, *args, filename) + if strict: + self.assertEqual(res.err, b'') with open('messages.pot', encoding='utf-8') as fp: data = fp.read() return self.get_msgids(data) + def extract_docstrings_from_str(self, module_content): + """Return all docstrings extracted from module_content.""" + return self.extract_from_str(module_content, args=('--docstrings',), strict=False) + def test_header(self): """Make sure the required fields are in the header, according to: http://www.gnu.org/software/gettext/manual/gettext.html#Header-Entry @@ -344,6 +350,23 @@ def test_calls_in_fstring_with_partially_wrong_expression(self): self.assertNotIn('foo', msgids) self.assertIn('bar', msgids) + def test_function_and_class_names(self): + """Test that function and class names are not mistakenly extracted.""" + msgids = self.extract_from_str(dedent('''\ + def _(x): + pass + + def _(x="foo"): + pass + + async def _(x): + pass + + class _(object): + pass + ''')) + self.assertEqual(msgids, ['']) + def test_pygettext_output(self): """Test that the pygettext output exactly matches snapshots.""" for input_file in DATA_DIR.glob('*.py'): diff --git a/Misc/NEWS.d/next/Tools-Demos/2024-11-13-22-23-36.gh-issue-126807.vpaWuN.rst b/Misc/NEWS.d/next/Tools-Demos/2024-11-13-22-23-36.gh-issue-126807.vpaWuN.rst new file mode 100644 index 00000000000000..310286ce8319ea --- /dev/null +++ b/Misc/NEWS.d/next/Tools-Demos/2024-11-13-22-23-36.gh-issue-126807.vpaWuN.rst @@ -0,0 +1,2 @@ +Fix extraction warnings in :program:`pygettext.py` caused by mistaking +function definitions for function calls. diff --git a/Tools/i18n/pygettext.py b/Tools/i18n/pygettext.py index 3a0b27ba420e7a..0d16e8f7da0071 100755 --- a/Tools/i18n/pygettext.py +++ b/Tools/i18n/pygettext.py @@ -341,6 +341,9 @@ def __waiting(self, ttype, tstring, lineno): if ttype == tokenize.NAME and tstring in ('class', 'def'): self.__state = self.__suiteseen return + if ttype == tokenize.NAME and tstring in ('class', 'def'): + self.__state = self.__ignorenext + return if ttype == tokenize.NAME and tstring in opts.keywords: self.__state = self.__keywordseen return @@ -448,6 +451,9 @@ def __openseen(self, ttype, tstring, lineno): }, file=sys.stderr) self.__state = self.__waiting + def __ignorenext(self, ttype, tstring, lineno): + self.__state = self.__waiting + def __addentry(self, msg, lineno=None, isdocstring=0): if lineno is None: lineno = self.__lineno