Skip to content

Commit

Permalink
gh-124529: Fix _strptime to make %c/%x accept year with fewer digits
Browse files Browse the repository at this point in the history
  • Loading branch information
zuo committed Sep 30, 2024
1 parent 6f4d64b commit 827db45
Show file tree
Hide file tree
Showing 3 changed files with 282 additions and 8 deletions.
30 changes: 26 additions & 4 deletions Lib/_strptime.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,8 +199,6 @@ def __init__(self, locale_time=None):
'V': r"(?P<V>5[0-3]|0[1-9]|[1-4]\d|\d)",
# W is set below by using 'U'
'y': r"(?P<y>\d\d)",
#XXX: Does 'Y' need to worry about having less or more than
# 4 digits?
'Y': r"(?P<Y>\d\d\d\d)",
'z': r"(?P<z>[+-]\d\d:?[0-5]\d(:?[0-5]\d(\.\d{1,6})?)?|(?-i:Z))",
'A': self.__seqToRE(self.locale_time.f_weekday, 'A'),
Expand All @@ -213,8 +211,10 @@ def __init__(self, locale_time=None):
'Z'),
'%': '%'})
base.__setitem__('W', base.__getitem__('U').replace('U', 'W'))
base.__setitem__('c', self.pattern(self.locale_time.LC_date_time))
base.__setitem__('x', self.pattern(self.locale_time.LC_date))
base.__setitem__(
'c', self.__pattern_with_lax_year(self.locale_time.LC_date_time))
base.__setitem__(
'x', self.__pattern_with_lax_year(self.locale_time.LC_date))
base.__setitem__('X', self.pattern(self.locale_time.LC_time))

def __seqToRE(self, to_convert, directive):
Expand All @@ -236,6 +236,26 @@ def __seqToRE(self, to_convert, directive):
regex = '(?P<%s>%s' % (directive, regex)
return '%s)' % regex

def __pattern_with_lax_year(self, format):
"""Like pattern(), but making %Y and %y accept also fewer digits.
Necessary to ensure that strptime() is able to parse strftime()'s
output when %c or %x is used -- considering that for some locales
and platforms (e.g., 'C.UTF-8' on Linux), formatting with either
%c or %x may produce a year number representation that is shorter
than the usual four or two digits, if the number is small enough
(e.g., '999' instead of `0999', or '9' instead of '09').
Note that this helper is not used to generate the regex patterns
for %Y and %y (these two still match, respectively, only four or
two digits, exactly).
"""
pattern = self.pattern(format)
pattern = pattern.replace(self['Y'], r"(?P<Y>\d{1,4})")
pattern = pattern.replace(self['y'], r"(?P<y>\d{1,2})")
return pattern

def pattern(self, format):
"""Return regex pattern for the format string.
Expand Down Expand Up @@ -374,6 +394,7 @@ def _strptime(data_string, format="%a %b %d %H:%M:%S %Y"):
# U, W
# worthless without day of the week
if group_key == 'y':
# 1 or 2 digits (1 only for directive c or x; see TimeRE.__init__)
year = int(found_dict['y'])
# Open Group specification for strptime() states that a %y
#value in the range of [00, 68] is in the century 2000, while
Expand All @@ -383,6 +404,7 @@ def _strptime(data_string, format="%a %b %d %H:%M:%S %Y"):
else:
year += 1900
elif group_key == 'Y':
# 1-4 digits (1-3 only for directive c or x; see TimeRE.__init__)
year = int(found_dict['Y'])
elif group_key == 'G':
iso_year = int(found_dict['G'])
Expand Down
131 changes: 131 additions & 0 deletions Lib/test/datetimetester.py
Original file line number Diff line number Diff line change
Expand Up @@ -1185,6 +1185,40 @@ def test_strptime_leap_year(self):
date.strptime('20-03-14', '%y-%m-%d')
date.strptime('02-29,2024', '%m-%d,%Y')

def test_strftime_strptime_roundtrip(self):
for fmt in [
'%c',
'%x',
'%Y%m%d',
'm:%m d:%d y:%y H:%H M:%M S:%S f:%f and some text',
]:
with self.subTest(fmt=fmt):
sample = date(1999, 3, 17).strftime(fmt)
if '1999' in sample:
year_seq = [
1, 9, 10, 99, 100, 999, # <- gh-124529 (ad %c/%x)
1000, 1410, 1989, 2024, 2095, 9999]
elif '99' in sample:
year_seq = [
1969, 1999,
2000, 2001, 2009, # <- gh-124529 (ad %c/%x)
2068]
else:
self.skipTest(f"these subtests need locale for which "
f"{fmt!r} includes year in some variant")
for year in year_seq:
for instance in [
date(year, 1, 1),
date(year, 6, 4),
date(year, 12, 31),
]:
reason = (f'strftime/strptime roundtrip '
f'for {fmt=} and {year=}')
with self.subTest(reason=reason, instance=instance):
formatted = instance.strftime(fmt)
parsed = date.strptime(formatted, fmt)
self.assertEqual(parsed, instance, msg=reason)

class SubclassDate(date):
sub_var = 1

Expand Down Expand Up @@ -2124,6 +2158,35 @@ def test_fromisocalendar_type_errors(self):
with self.assertRaises(TypeError):
self.theclass.fromisocalendar(*isocal)

def test_strptime_accepting_year_with_fewer_digits(self): # gh-124529
concerned_formats = '%c', '%x'

def run_subtest():
reason = (f'strptime accepting year with fewer '
f'digits for {fmt=} and {input_string=}')
with self.subTest(reason=reason):
expected = prototype_inst.replace(year=year)
parsed = self.theclass.strptime(input_string, fmt)
self.assertEqual(parsed, expected, msg=reason)

prototype_inst = self.theclass.strptime('1999', '%Y')
for fmt in concerned_formats:
with self.subTest(fmt=fmt):
sample = prototype_inst.strftime(fmt)
if (sample_4digits := '1999') in sample:
for year in [1, 9, 10, 99, 100, 999]:
y_digits = str(year)
input_string = sample.replace(sample_4digits, y_digits)
run_subtest()
elif (sample_2digits := '99') in sample:
for year in [2000, 2001, 2009]:
y_digits = str(year - 2000)
input_string = sample.replace(sample_2digits, y_digits)
run_subtest()
else:
self.skipTest(f"these subtests need locale for which "
f"{fmt!r} includes year in some variant")


#############################################################################
# datetime tests
Expand Down Expand Up @@ -2955,6 +3018,48 @@ def test_more_strftime(self):
except UnicodeEncodeError:
pass

def test_strftime_strptime_roundtrip(self):
for tz in [
None,
UTC,
timezone(timedelta(hours=2)),
timezone(timedelta(hours=-7)),
]:
fmt_suffix = '' if tz is None else ' %z'
for fmt in [
'%c %f',
'%x %X %f',
'%Y%m%d%H%M%S%f',
'm:%m d:%d y:%y H:%H M:%M S:%S f:%f and some text',
]:
fmt += fmt_suffix
with self.subTest(fmt=fmt):
sample = self.theclass(1999, 3, 17, 0, 0).strftime(fmt)
if '1999' in sample:
year_seq = [
1, 9, 10, 99, 100, 999, # <- gh-124529 (ad %c/%x)
1000, 1410, 1989, 2024, 2095, 9999]
elif '99' in sample:
year_seq = [
1969, 1999,
2000, 2001, 2009, # <- gh-124529 (ad %c/%x)
2068]
else:
self.skipTest(f"these subtests need locale for which "
f"{fmt!r} includes year in some variant")
for year in year_seq:
for instance in [
self.theclass(year, 1, 1, 0, 0, 0, tzinfo=tz),
self.theclass(year, 6, 4, 1, 42, 7, 99, tzinfo=tz),
self.theclass(year, 12, 31, 23, 59, 59, tzinfo=tz),
]:
reason = (f'strftime/strptime roundtrip '
f'for {fmt=} and {year=}')
with self.subTest(reason=reason, instance=instance):
formatted = instance.strftime(fmt)
parsed = self.theclass.strptime(formatted, fmt)
self.assertEqual(parsed, instance, msg=reason)

def test_extract(self):
dt = self.theclass(2002, 3, 4, 18, 45, 3, 1234)
self.assertEqual(dt.date(), date(2002, 3, 4))
Expand Down Expand Up @@ -3901,6 +4006,32 @@ def test_strptime_single_digit(self):
newdate = self.theclass.strptime(string, format)
self.assertEqual(newdate, target, msg=reason)

def test_strftime_strptime_roundtrip(self):
for tz in [
None,
UTC,
timezone(timedelta(hours=2)),
timezone(timedelta(hours=-7)),
]:
fmt_suffix = '' if tz is None else ' %z'
for fmt in [
'%c %f',
'%X %f',
'%H%M%S%f',
'm:%m d:%d y:%y H:%H M:%M S:%S f:%f and some text',
]:
fmt += fmt_suffix
for instance in [
self.theclass(0, 0, 0, tzinfo=tz),
self.theclass(1, 42, 7, tzinfo=tz),
self.theclass(23, 59, 59, 654321, tzinfo=tz),
]:
reason = f'strftime/strptime round trip for {fmt=}'
with self.subTest(reason=reason, instance=instance):
formatted = instance.strftime(fmt)
parsed = self.theclass.strptime(formatted, fmt)
self.assertEqual(parsed, instance, msg=reason)

def test_bool(self):
# time is always True.
cls = self.theclass
Expand Down
129 changes: 125 additions & 4 deletions Lib/test/test_strptime.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,11 +161,42 @@ def test_compile(self):
for directive in ('a','A','b','B','c','d','G','H','I','j','m','M','p',
'S','u','U','V','w','W','x','X','y','Y','Z','%'):
fmt = "%d %Y" if directive == 'd' else "%" + directive
input_string = time.strftime(fmt)
compiled = self.time_re.compile(fmt)
found = compiled.match(time.strftime(fmt))
self.assertTrue(found, "Matching failed on '%s' using '%s' regex" %
(time.strftime(fmt),
compiled.pattern))
found = compiled.match(input_string)
self.assertTrue(found,
(f"Matching failed on '{input_string}' "
f"using '{compiled.pattern}' regex"))
for directive in ('c', 'x'):
fmt = "%" + directive
with self.subTest(f"{fmt!r} should match input containing "
f"year with fewer digits than usual"):
# gh-124529
params = _input_str_and_expected_year_for_few_digits_year(fmt)
if params is None:
self.skipTest(f"this subtest needs locale for which "
f"{fmt!r} includes year in some variant")
input_string, _ = params
compiled = self.time_re.compile(fmt)
found = compiled.match(input_string)
self.assertTrue(found,
(f"Matching failed on '{input_string}' "
f"using '{compiled.pattern}' regex"))
for directive in ('y', 'Y'):
fmt = "%" + directive
with self.subTest(f"{fmt!r} should not match input containing "
f"year with fewer digits than usual"):
params = _input_str_and_expected_year_for_few_digits_year(fmt)
if params is None:
self.skipTest(f"this subtest needs locale for which "
f"{fmt!r} includes year in some variant")
input_string, _ = params
compiled = self.time_re.compile(fmt)
found = compiled.match(input_string)
self.assertFalse(found,
(f"Matching unexpectedly succeeded "
f"on '{input_string}' using "
f"'{compiled.pattern}' regex"))

def test_blankpattern(self):
# Make sure when tuple or something has no values no regex is generated.
Expand Down Expand Up @@ -299,6 +330,25 @@ def helper(self, directive, position):
(directive, strf_output, strp_output[position],
self.time_tuple[position]))

def helper_for_directives_accepting_few_digits_year(self, directive):
fmt = "%" + directive
params = _input_str_and_expected_year_for_few_digits_year(fmt)
if params is None:
self.skipTest(f"test needs locale for which {fmt!r} "
f"includes year in some variant")
input_string, expected_year = params
try:
output_year = _strptime._strptime(input_string, fmt)[0][0]
except ValueError as exc:
# See: gh-124529
self.fail(f"testing of {directive!r} directive failed; "
f"{input_string!r} -> exception: {exc!r}")
else:
self.assertEqual(output_year, expected_year,
(f"testing of {directive!r} directive failed; "
f"{input_string!r} -> output including year "
f"{output_year!r} != {expected_year!r}"))

def test_year(self):
# Test that the year is handled properly
for directive in ('y', 'Y'):
Expand All @@ -312,6 +362,17 @@ def test_year(self):
"'y' test failed; passed in '%s' "
"and returned '%s'" % (bound, strp_output[0]))

def test_bad_year(self):
for directive, bad_inputs in (
('y', ('9', '100', 'ni')),
('Y', ('7', '42', '999', '10000', 'SPAM')),
):
fmt = "%" + directive
for input_val in bad_inputs:
with self.subTest(directive=directive, input_val=input_val):
with self.assertRaises(ValueError):
_strptime._strptime_time(input_val, fmt)

def test_month(self):
# Test for month directives
for directive in ('B', 'b', 'm'):
Expand Down Expand Up @@ -454,11 +515,21 @@ def test_date_time(self):
for position in range(6):
self.helper('c', position)

def test_date_time_accepting_few_digits_year(self): # gh-124529
# Test %c directive with input containing year
# number consisting of fewer digits than usual
self.helper_for_directives_accepting_few_digits_year('c')

def test_date(self):
# Test %x directive
for position in range(0,3):
self.helper('x', position)

def test_date_accepting_few_digits_year(self): # gh-124529
# Test %x directive with input containing year
# number consisting of fewer digits than usual
self.helper_for_directives_accepting_few_digits_year('x')

def test_time(self):
# Test %X directive
for position in range(3,6):
Expand Down Expand Up @@ -769,5 +840,55 @@ def test_TimeRE_recreation_timezone(self):
_strptime._strptime_time(oldtzname[1], '%Z')


def _input_str_and_expected_year_for_few_digits_year(fmt):
# This helper, for the given format string (fmt), returns a 2-tuple:
# (<strptime input string>, <expected year>)
# where:
# * <strptime input string> -- is a `strftime(fmt)`-result-like str
# containing a year number which is *shorter* than the usual four
# or two digits (namely: the contained year number consist of just
# one digit: 7; the choice of this particular digit is arbitrary);
# * <expected year> -- is an int representing the year number that
# is expected to be part of the result of a `strptime(<strptime
# input string>, fmt)` call (namely: either 7 or 2007, depending
# on the given format string and current locale...); however, it
# is None if <strptime input string> does *not* contain the year
# part (for the given format string and current locale).

# 1. Prepare auxiliary *magic* time data (note that the magic values
# we use here are guaranteed to be compatible with `time.strftime()`
# and also well distinguishable within a formatted string, thanks to
# the fact that the amount of overloaded numbers is minimized, as in
# `_strptime.LocaleTime.__calc_date_time()`...):
magic_year = 1999
magic_tt = (magic_year, 3, 17, 22, 44, 55, 2, 76, 0)
magic_4digits = str(magic_year)
magic_2digits = magic_4digits[-2:]

# 2. Pick our example year whose representation
# is shorter than the usual four or two digits:
input_year_str = '7'

# 3. Determine the <strptime input string> part of the return value:
input_string = time.strftime(fmt, magic_tt)
if (index_4digits := input_string.find(magic_4digits)) != -1:
# `input_string` contains up-to-4-digit year representation
input_string = input_string.replace(magic_4digits, input_year_str)
if (index_2digits := input_string.find(magic_2digits)) != -1:
# `input_string` contains up-to-2-digit year representation
input_string = input_string.replace(magic_2digits, input_year_str)

# 4. Determine the <expected year> part of the return value:
if index_4digits > index_2digits:
expected_year = int(input_year_str)
elif index_4digits < index_2digits:
expected_year = 2000 + int(input_year_str)
else:
assert index_4digits == index_2digits == -1
expected_year = None

return input_string, expected_year


if __name__ == '__main__':
unittest.main()

0 comments on commit 827db45

Please sign in to comment.