Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
7635fe1
TEMP: Add isoformatter test
pganssle Oct 21, 2021
b9b7a03
Add support for YYYYMMDD
pganssle Oct 21, 2021
c746b96
Expand support for ISO 8601 times
pganssle Oct 21, 2021
00978f9
Add support for ISO calendar-style strings
pganssle Oct 22, 2021
c36e306
Rework how string sanitization works
pganssle Oct 22, 2021
0234cae
WIP
pganssle Nov 16, 2021
ee1a7e3
Move Isoformatter into test helper, add date/time tests
pganssle Apr 27, 2022
7d2fd33
Final location for isoformatter and strategies
pganssle Apr 27, 2022
72266c4
Working version of date.isoformat
pganssle Apr 27, 2022
8067af1
Fix failure to set an error
pganssle May 1, 2022
7b9bca5
First version with time parsing allowed
pganssle May 1, 2022
328e781
Add support for leading T in time formatters
pganssle May 1, 2022
4d0e3a9
Fix pure python separator detection in YYYYWwwd
pganssle May 1, 2022
3e600f2
Version with all tests passing
pganssle May 1, 2022
e26f06f
Migrate fromisoformat tests to their own file
pganssle May 2, 2022
1ea0cd1
Fix bug in time parsing logic
pganssle May 2, 2022
1e3577f
s/ssize_t/size_t
pganssle May 2, 2022
6422799
Add fromisoformat example tests
pganssle May 2, 2022
3d24a15
Try to be consistent about use of double quotes in error messages
pganssle May 2, 2022
661b1b0
Update documentation
pganssle May 3, 2022
1defa1d
Remove isoformatter
pganssle May 3, 2022
75de7a4
Update out-of-date comment
pganssle May 3, 2022
07ee419
Only one space
pganssle May 3, 2022
3d0fb7a
Explicitly handle 0-length tzstr
pganssle May 3, 2022
cc8c737
Raise exceptions from None
pganssle May 3, 2022
31bf63e
Add test cases around week 53
pganssle May 3, 2022
5bfb3fc
Add examples around week 53
pganssle May 3, 2022
4879a47
Update docstrings
pganssle May 3, 2022
3cd657f
Add news entry
pganssle May 3, 2022
763d5bb
Add what's new entry
pganssle May 3, 2022
3a06505
Be consistent about ISO 8601
pganssle May 3, 2022
e643f02
Change name of isoformat separator detection function
pganssle May 5, 2022
5046809
Remove 'mode' logic and update comments
pganssle May 5, 2022
90093bf
Fix segfault case
pganssle May 5, 2022
6fc8157
Explicitly cast signed to unsigned
pganssle May 5, 2022
d9a766b
Document that ordinal dates are not supported
pganssle May 5, 2022
04ed787
Remove dead code
pganssle May 5, 2022
6da3e90
Various fixes
pganssle May 5, 2022
92cc0be
Fix example
pganssle May 5, 2022
bec0bee
Add example for time.fromisoformat
pganssle May 5, 2022
aad6011
Fix trailing colon
pganssle May 5, 2022
a33d776
Remove fromisoformat property test
pganssle May 5, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
First version with time parsing allowed
  • Loading branch information
pganssle committed May 5, 2022
commit 7b9bca528fda5f6db29569e92224e7b144db79fd
123 changes: 102 additions & 21 deletions Lib/datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -262,6 +262,60 @@ def _wrap_strftime(object, format, timetuple):
return _time.strftime(newformat, timetuple)

# Helpers for parsing the result of isoformat()
def _find_isoformat_separator(dtstr):
# See the comment in _datetimemodule.c:_findisoformat_separator
len_dtstr = len(dtstr)
if len_dtstr == 7:
return 7

assert len_dtstr > 7
date_separator = "-"
week_indicator = "W"

if dtstr[4] == date_separator:
if dtstr[5] == week_indicator:
if len_dtstr < 8:
raise ValueError("Invalid ISO string")
if len_dtstr > 8 and dtstr[8] == date_separator:
if len_dtstr == 9:
raise ValueError("Invalid ISO string")
if len_dtstr > 10 and dtstr[10].isdigit():
# This is as far as we need to resolve the ambiguity for
# the moment - if we have YYYY-Www-##, the separator is
# either a hyphen at 8 or a number at 10.
#
# We'll assume it's a hyphen at 8 because it's way more
# likely that someone will use a hyphen as a separator than
# a number, but at this point it's really best effort
# because this is an extension of the spec anyway.
# TODO(pganssle): Document this
return 8
return 10
else:
# YYYY-Www (8)
return 8
else:
# YYYY-MM-DD (10)
return 10
else:
if dtstr[4] == week_indicator:
# YYYYWww (7) or YYYYWwwd (8)
for idx in range(7, len_dtstr):
if not dtstr[idx].isdigit():
break
if idx < 9:
return idx

if idx % 2 == 0:
# If the index of the last number is even, it's YYYYWwwd
return 7
else:
return 8
else:
# YYYYMMDD (8)
return 8


def _parse_isoformat_date(dtstr):
# It is assumed that this function will only be called with a
# string of length exactly 10, and (though this is not used) ASCII-only
Expand Down Expand Up @@ -295,11 +349,14 @@ def _parse_isoformat_date(dtstr):
pos += has_sep
day = int(dtstr[pos:pos + 2])

return year, month, day
return [year, month, day]


_FRACTION_CORRECTION = [100000, 10000, 1000, 100, 10]


def _parse_hh_mm_ss_ff(tstr):
# Parses things of the form HH[:MM[:SS[.fff[fff]]]]
# Parses things of the form HH[:?MM[:?SS[{.,}fff[fff]]]]
len_str = len(tstr)

time_comps = [0, 0, 0, 0]
Expand All @@ -313,27 +370,36 @@ def _parse_hh_mm_ss_ff(tstr):
pos += 2
next_char = tstr[pos:pos+1]

if comp == 0:
has_sep = next_char == ':'

if not next_char or comp >= 2:
break

if next_char != ':':
if has_sep and next_char != ':':
raise ValueError('Invalid time separator: %c' % next_char)

pos += 1
pos += has_sep

if pos < len_str:
if tstr[pos] != '.':
if tstr[pos] not in '.,':
raise ValueError('Invalid microsecond component')
else:
pos += 1

len_remainder = len_str - pos
if len_remainder not in (3, 6):
raise ValueError('Invalid microsecond component')

time_comps[3] = int(tstr[pos:])
if len_remainder == 3:
time_comps[3] *= 1000
if len_remainder >= 6:
to_parse = 6
else:
to_parse = len_remainder

time_comps[3] = int(tstr[pos:(pos+to_parse)])
if to_parse < 6:
time_comps[3] *= _FRACTION_CORRECTION[to_parse-1]
if (len_remainder > to_parse
and not tstr[(pos+to_parse):].isdigit()):
raise ValueError('Non-digit values in unparsed fraction')

return time_comps

Expand All @@ -343,25 +409,35 @@ def _parse_isoformat_time(tstr):
if len_str < 2:
raise ValueError('Isoformat time too short')

# This is equivalent to re.search('[+-]', tstr), but faster
tz_pos = (tstr.find('-') + 1 or tstr.find('+') + 1)
# This is equivalent to re.search('[+-Z]', tstr), but faster
tz_pos = (tstr.find('-') + 1 or tstr.find('+') + 1 or tstr.find('Z') + 1)
timestr = tstr[:tz_pos-1] if tz_pos > 0 else tstr

time_comps = _parse_hh_mm_ss_ff(timestr)

tzi = None
if tz_pos > 0:
if tz_pos == len_str and tstr[-1] == 'Z':
tzi = timezone.utc
elif tz_pos > 0:
tzstr = tstr[tz_pos:]

# Valid time zone strings are:
# HH len: 2
# HHMM len: 4
# HH:MM len: 5
# HHMMSS len: 6
# HH:MM:SS len: 8
# HH:MM:SS.ffffff len: 15
# HH:MM:SS.f+ len: 10+

if len(tzstr) not in (5, 8, 15):
if (len_tzstr := len(tzstr)) < 10 and (len_tzstr % 2) and len_tzstr != 5:
raise ValueError('Malformed time zone string')

tz_comps = _parse_hh_mm_ss_ff(tzstr)

if tzstr == 'Z':
tz_comps = (0, 0, 0, 0)
else:
tz_comps = _parse_hh_mm_ss_ff(tzstr)

if all(x == 0 for x in tz_comps):
tzi = timezone.utc
else:
Expand Down Expand Up @@ -406,7 +482,7 @@ def _isoweek_to_gregorian(year, week, day):
day_1 = _isoweek1monday(year)
ord_day = day_1 + day_offset

return _ord2ymd(ord_day)
return list(_ord2ymd(ord_day))


# Just raise TypeError if the arg isn't None or a string.
Expand Down Expand Up @@ -1743,11 +1819,15 @@ def fromisoformat(cls, date_string):
if not isinstance(date_string, str):
raise TypeError('fromisoformat: argument must be str')

# Split this at the separator
dstr = date_string[0:10]
tstr = date_string[11:]
if len(date_string) < 7:
raise ValueError(f'Invalid isoformat string: {date_string!r}')

# Split this at the separator
try:
separator_location = _find_isoformat_separator(date_string)
dstr = date_string[0:separator_location]
tstr = date_string[(separator_location+1):]

date_components = _parse_isoformat_date(dstr)
except ValueError:
raise ValueError(f'Invalid isoformat string: {date_string!r}')
Expand Down Expand Up @@ -2537,7 +2617,8 @@ def _name_from_offset(delta):
_format_time, _format_offset, _index, _is_leap, _isoweek1monday, _math,
_ord2ymd, _time, _time_class, _tzinfo_class, _wrap_strftime, _ymd2ord,
_divide_and_round, _parse_isoformat_date, _parse_isoformat_time,
_parse_hh_mm_ss_ff, _IsoCalendarDate)
_parse_hh_mm_ss_ff, _IsoCalendarDate, _isoweek_to_gregorian,
_find_isoformat_separator, _FRACTION_CORRECTION)
# XXX Since import * above excludes names that start with _,
# docstring does not get overwritten. In the future, it may be
# appropriate to maintain a single module level docstring and
Expand Down
94 changes: 90 additions & 4 deletions Lib/test/datetimetester.py
Original file line number Diff line number Diff line change
Expand Up @@ -3073,6 +3073,18 @@ def test_fromisoformat_timespecs(self):
dt_rt = self.theclass.fromisoformat(dtstr)
self.assertEqual(dt, dt_rt)

def test_fromisoformat_examples_datetime(self):
test_cases = [
('2009-04-19T03:15:45.2345', self.theclass(2009, 4, 19, 3, 15, 45, 234500)),
('2009-04-19T03:15:45.1234567', self.theclass(2009, 4, 19, 3, 15, 45, 123456)),
]

for input_str, expected in test_cases:
with self.subTest(input_str=input_str):
actual = self.theclass.fromisoformat(input_str)

self.assertEqual(actual, expected)

def test_fromisoformat_fails_datetime(self):
# Test that fromisoformat() fails on invalid values
bad_strs = [
Expand All @@ -3086,8 +3098,6 @@ def test_fromisoformat_fails_datetime(self):
'2009-04-19T03;15:45', # Bad first time separator
'2009-04-19T03:15;45', # Bad second time separator
'2009-04-19T03:15:4500:00', # Bad time zone separator
'2009-04-19T03:15:45.2345', # Too many digits for milliseconds
'2009-04-19T03:15:45.1234567', # Too many digits for microseconds
'2009-04-19T03:15:45.123456+24:30', # Invalid time zone offset
'2009-04-19T03:15:45.123456-24:30', # Invalid negative offset
'2009-04-10ᛇᛇᛇᛇᛇ12:15', # Too many unicode separators
Expand Down Expand Up @@ -4032,6 +4042,24 @@ def test_fromisoformat_timespecs(self):
t_rt = self.theclass.fromisoformat(tstr)
self.assertEqual(t, t_rt)

def test_fromisoformat_fractions(self):
strs = [
('12:30:45.1', (12, 30, 45, 100000)),
('12:30:45.12', (12, 30, 45, 120000)),
('12:30:45.123', (12, 30, 45, 123000)),
('12:30:45.1234', (12, 30, 45, 123400)),
('12:30:45.12345', (12, 30, 45, 123450)),
('12:30:45.123456', (12, 30, 45, 123456)),
('12:30:45.1234567', (12, 30, 45, 123456)),
('12:30:45.12345678', (12, 30, 45, 123456)),
]

for time_str, time_comps in strs:
expected = self.theclass(*time_comps)
actual = self.theclass.fromisoformat(time_str)

self.assertEqual(actual, expected)

def test_fromisoformat_fails(self):
bad_strs = [
'', # Empty string
Expand All @@ -4045,15 +4073,17 @@ def test_fromisoformat_fails(self):
'1a:30:45.334034', # Invalid character in hours
'12:a0:45.334034', # Invalid character in minutes
'12:30:a5.334034', # Invalid character in seconds
'12:30:45.1234', # Too many digits for milliseconds
'12:30:45.1234567', # Too many digits for microseconds
'12:30:45.123456+24:30', # Invalid time zone offset
'12:30:45.123456-24:30', # Invalid negative offset
'12:30:45', # Uses full-width unicode colons
'12:30:45.123456a', # Non-numeric data after 6 components
'12:30:45.123456789a', # Non-numeric data after 9 components
'12:30:45․123456', # Uses \u2024 in place of decimal point
'12:30:45a', # Extra at tend of basic time
'12:30:45.123a', # Extra at end of millisecond time
'12:30:45.123456a', # Extra at end of microsecond time
'12:30:45.123456-', # Extra at end of microsecond time
'12:30:45.123456+', # Extra at end of microsecond time
'12:30:45.123456+12:00:30a', # Extra at end of full time
]

Expand All @@ -4080,6 +4110,62 @@ class TimeSubclass(self.theclass):
self.assertEqual(tsc, tsc_rt)
self.assertIsInstance(tsc_rt, TimeSubclass)

@hypothesis.given(
t=hypothesis.strategies.times(
timezones=iso_strategies.FIXED_TIMEZONES | hypothesis.strategies.none()
),
iso_formatter=iso_strategies.TIME_ISOFORMATTERS,
)
@_cross_product_examples(
t=[
time(0, 0),
time(12, 0),
time(23, 59, 59, 999999),
time(12, 0, tzinfo=timezone.utc),
time(12, 0, tzinfo=timezone(timedelta(hours=-5))),
],
iso_formatter=map(
IsoFormatter,
[
"%H:%M:%S",
"%H%M%S",
"%H:%M:%S.%(f6)",
"%H%M%S.%(f6)",
"%H:%M:%S.%(f3)",
"%H%M%S.%(f3)",
"%H:%M:%S[TZ:%H:%M]",
"%H:%M:%S[TZ:%H%M]",
],
),
)
@hypothesis.example(
t=time(0, 0, tzinfo=timezone.utc),
iso_formatter=IsoFormatter("%H:%M:%S[TZ:Z]"),
)
@_cross_product_examples(
t=[
time(0, 0, tzinfo=timezone(timedelta(hours=5, minutes=30))),
],
iso_formatter=map(
IsoFormatter, ("%H:%M:%S[TZ:%H]", "%H:%M:%S[TZ:%H:%M]")
),
)
def test_isoformat_times(self, t, iso_formatter):
input_str = iso_formatter.format(t)
actual = type(t).fromisoformat(input_str)
expected = iso_formatter.truncate(t)

self.assertEqual(
actual,
expected,
f"\n{actual} != {expected}\n"
+ f"actual = {actual!r}\n"
+ f"expected = {expected!r} \n"
+ f"input_str = {input_str}\n"
+ f"formatter = {iso_formatter!r}",
)


def test_subclass_timetz(self):

class C(self.theclass):
Expand Down
4 changes: 3 additions & 1 deletion Lib/test/test_datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@

def load_tests(loader, tests, pattern):
try:
pure_tests = import_fresh_module(TESTS, fresh=['datetime', '_strptime'],
pure_tests = import_fresh_module(TESTS, fresh=[
'datetime', '_strptime', 'test.isoformat_helpers.isoformatter',
'test.isoformat_helpers.strategies'],
blocked=['_datetime'])
fast_tests = import_fresh_module(TESTS, fresh=['datetime',
'_datetime', '_strptime'])
Expand Down