First version with time parsing allowed

python · pganssle · May 6, 2022 · Oct 21, 2021 · Oct 21, 2021 · Oct 21, 2021
commit 7b9bca528fda5f6db29569e92224e7b144db79fd
@@ -262,6 +262,60 @@ def _wrap_strftime(object, format, timetuple):
     return _time.strftime(newformat, timetuple)
 
 # Helpers for parsing the result of isoformat()
+def _find_isoformat_separator(dtstr):
+    # See the comment in _datetimemodule.c:_findisoformat_separator
+    len_dtstr = len(dtstr)
+    if len_dtstr == 7:
+        return 7
+
+    assert len_dtstr > 7
+    date_separator = "-"
+    week_indicator = "W"
+
+    if dtstr[4] == date_separator:
+        if dtstr[5] == week_indicator:
+            if len_dtstr < 8:
+                raise ValueError("Invalid ISO string")
+            if len_dtstr > 8 and dtstr[8] == date_separator:
+                if len_dtstr == 9:
+                    raise ValueError("Invalid ISO string")
+                if len_dtstr > 10 and dtstr[10].isdigit():
+                    # This is as far as we need to resolve the ambiguity for
+                    # the moment - if we have YYYY-Www-##, the separator is
+                    # either a hyphen at 8 or a number at 10.
+                    #
+                    # We'll assume it's a hyphen at 8 because it's way more
+                    # likely that someone will use a hyphen as a separator than
+                    # a number, but at this point it's really best effort
+                    # because this is an extension of the spec anyway.
+                    # TODO(pganssle): Document this
+                    return 8
+                return 10
+            else:
+                # YYYY-Www (8)
+                return 8
+        else:
+            # YYYY-MM-DD (10)
+            return 10
+    else:
+        if dtstr[4] == week_indicator:
+            # YYYYWww (7) or YYYYWwwd (8)
+            for idx in range(7, len_dtstr):
+                if not dtstr[idx].isdigit():
+                    break
+            if idx < 9:
+                return idx
+
+            if idx % 2 == 0:
+                # If the index of the last number is even, it's YYYYWwwd
+                return 7
+            else:
+                return 8
+        else:
+            # YYYYMMDD (8)
+            return 8
+
+
 def _parse_isoformat_date(dtstr):
     # It is assumed that this function will only be called with a
     # string of length exactly 10, and (though this is not used) ASCII-only
@@ -295,11 +349,14 @@ def _parse_isoformat_date(dtstr):
         pos += has_sep
         day = int(dtstr[pos:pos + 2])
 
-        return year, month, day
+        return [year, month, day]
+
+
+_FRACTION_CORRECTION = [100000, 10000, 1000, 100, 10]
 
 
 def _parse_hh_mm_ss_ff(tstr):
-    # Parses things of the form HH[:MM[:SS[.fff[fff]]]]
+    # Parses things of the form HH[:?MM[:?SS[{.,}fff[fff]]]]
     len_str = len(tstr)
 
     time_comps = [0, 0, 0, 0]
@@ -313,27 +370,36 @@ def _parse_hh_mm_ss_ff(tstr):
         pos += 2
         next_char = tstr[pos:pos+1]
 
+        if comp == 0:
+            has_sep = next_char == ':'
+
         if not next_char or comp >= 2:
             break
 
-        if next_char != ':':
+        if has_sep and next_char != ':':
             raise ValueError('Invalid time separator: %c' % next_char)
 
-        pos += 1
+        pos += has_sep
 
     if pos < len_str:
-        if tstr[pos] != '.':
+        if tstr[pos] not in '.,':
             raise ValueError('Invalid microsecond component')
         else:
             pos += 1
 
             len_remainder = len_str - pos
-            if len_remainder not in (3, 6):
-                raise ValueError('Invalid microsecond component')
 
-            time_comps[3] = int(tstr[pos:])
-            if len_remainder == 3:
-                time_comps[3] *= 1000
+            if len_remainder >= 6:
+                to_parse = 6
+            else:
+                to_parse = len_remainder
+
+            time_comps[3] = int(tstr[pos:(pos+to_parse)])
+            if to_parse < 6:
+                time_comps[3] *= _FRACTION_CORRECTION[to_parse-1]
+            if (len_remainder > to_parse
+                    and not tstr[(pos+to_parse):].isdigit()):
+                raise ValueError('Non-digit values in unparsed fraction')
 
     return time_comps
 
@@ -343,25 +409,35 @@ def _parse_isoformat_time(tstr):
     if len_str < 2:
         raise ValueError('Isoformat time too short')
 
-    # This is equivalent to re.search('[+-]', tstr), but faster
-    tz_pos = (tstr.find('-') + 1 or tstr.find('+') + 1)
+    # This is equivalent to re.search('[+-Z]', tstr), but faster
+    tz_pos = (tstr.find('-') + 1  or tstr.find('+') + 1 or tstr.find('Z') + 1)
     timestr = tstr[:tz_pos-1] if tz_pos > 0 else tstr
 
     time_comps = _parse_hh_mm_ss_ff(timestr)
 
     tzi = None
-    if tz_pos > 0:
+    if tz_pos == len_str and tstr[-1] == 'Z':
+        tzi = timezone.utc
+    elif tz_pos > 0:
         tzstr = tstr[tz_pos:]
 
         # Valid time zone strings are:
+        # HH                  len: 2
+        # HHMM                len: 4
         # HH:MM               len: 5
+        # HHMMSS              len: 6
         # HH:MM:SS            len: 8
-        # HH:MM:SS.ffffff     len: 15
+        # HH:MM:SS.f+         len: 10+
 
-        if len(tzstr) not in (5, 8, 15):
+        if (len_tzstr := len(tzstr)) < 10 and (len_tzstr % 2) and len_tzstr != 5:
             raise ValueError('Malformed time zone string')
 
-        tz_comps = _parse_hh_mm_ss_ff(tzstr)
+
+        if tzstr == 'Z':
+            tz_comps = (0, 0, 0, 0)
+        else:
+            tz_comps = _parse_hh_mm_ss_ff(tzstr)
+
         if all(x == 0 for x in tz_comps):
             tzi = timezone.utc
         else:
@@ -406,7 +482,7 @@ def _isoweek_to_gregorian(year, week, day):
     day_1 = _isoweek1monday(year)
     ord_day = day_1 + day_offset
 
-    return _ord2ymd(ord_day)
+    return list(_ord2ymd(ord_day))
 
 
 # Just raise TypeError if the arg isn't None or a string.
@@ -1743,11 +1819,15 @@ def fromisoformat(cls, date_string):
         if not isinstance(date_string, str):
             raise TypeError('fromisoformat: argument must be str')
 
-        # Split this at the separator
-        dstr = date_string[0:10]
-        tstr = date_string[11:]
+        if len(date_string) < 7:
+            raise ValueError(f'Invalid isoformat string: {date_string!r}')
 
+        # Split this at the separator
         try:
+            separator_location = _find_isoformat_separator(date_string)
+            dstr = date_string[0:separator_location]
+            tstr = date_string[(separator_location+1):]
+
             date_components = _parse_isoformat_date(dstr)
         except ValueError:
             raise ValueError(f'Invalid isoformat string: {date_string!r}')
@@ -2537,7 +2617,8 @@ def _name_from_offset(delta):
          _format_time, _format_offset, _index, _is_leap, _isoweek1monday, _math,
          _ord2ymd, _time, _time_class, _tzinfo_class, _wrap_strftime, _ymd2ord,
          _divide_and_round, _parse_isoformat_date, _parse_isoformat_time,
-         _parse_hh_mm_ss_ff, _IsoCalendarDate)
+         _parse_hh_mm_ss_ff, _IsoCalendarDate, _isoweek_to_gregorian,
+         _find_isoformat_separator, _FRACTION_CORRECTION)
     # XXX Since import * above excludes names that start with _,
     # docstring does not get overwritten. In the future, it may be
     # appropriate to maintain a single module level docstring and

@@ -3073,6 +3073,18 @@ def test_fromisoformat_timespecs(self):
                         dt_rt = self.theclass.fromisoformat(dtstr)
                         self.assertEqual(dt, dt_rt)
 
+    def test_fromisoformat_examples_datetime(self):
+        test_cases = [
+            ('2009-04-19T03:15:45.2345',  self.theclass(2009, 4, 19, 3, 15, 45, 234500)),
+            ('2009-04-19T03:15:45.1234567',  self.theclass(2009, 4, 19, 3, 15, 45, 123456)),
+        ]
+
+        for input_str, expected in test_cases:
+            with self.subTest(input_str=input_str):
+                actual = self.theclass.fromisoformat(input_str)
+
+                self.assertEqual(actual, expected)
+
     def test_fromisoformat_fails_datetime(self):
         # Test that fromisoformat() fails on invalid values
         bad_strs = [
@@ -3086,8 +3098,6 @@ def test_fromisoformat_fails_datetime(self):
             '2009-04-19T03;15:45',          # Bad first time separator
             '2009-04-19T03:15;45',          # Bad second time separator
             '2009-04-19T03:15:4500:00',     # Bad time zone separator
-            '2009-04-19T03:15:45.2345',     # Too many digits for milliseconds
-            '2009-04-19T03:15:45.1234567',  # Too many digits for microseconds
             '2009-04-19T03:15:45.123456+24:30',    # Invalid time zone offset
             '2009-04-19T03:15:45.123456-24:30',    # Invalid negative offset
             '2009-04-10ᛇᛇᛇᛇᛇ12:15',         # Too many unicode separators
@@ -4032,6 +4042,24 @@ def test_fromisoformat_timespecs(self):
                         t_rt = self.theclass.fromisoformat(tstr)
                         self.assertEqual(t, t_rt)
 
+    def test_fromisoformat_fractions(self):
+        strs = [
+            ('12:30:45.1', (12, 30, 45, 100000)),
+            ('12:30:45.12', (12, 30, 45, 120000)),
+            ('12:30:45.123', (12, 30, 45, 123000)),
+            ('12:30:45.1234', (12, 30, 45, 123400)),
+            ('12:30:45.12345', (12, 30, 45, 123450)),
+            ('12:30:45.123456', (12, 30, 45, 123456)),
+            ('12:30:45.1234567', (12, 30, 45, 123456)),
+            ('12:30:45.12345678', (12, 30, 45, 123456)),
+        ]
+
+        for time_str, time_comps in strs:
+            expected = self.theclass(*time_comps)
+            actual = self.theclass.fromisoformat(time_str)
+
+            self.assertEqual(actual, expected)
+
     def test_fromisoformat_fails(self):
         bad_strs = [
             '',                         # Empty string
@@ -4045,15 +4073,17 @@ def test_fromisoformat_fails(self):
             '1a:30:45.334034',          # Invalid character in hours
             '12:a0:45.334034',          # Invalid character in minutes
             '12:30:a5.334034',          # Invalid character in seconds
-            '12:30:45.1234',            # Too many digits for milliseconds
-            '12:30:45.1234567',         # Too many digits for microseconds
             '12:30:45.123456+24:30',    # Invalid time zone offset
             '12:30:45.123456-24:30',    # Invalid negative offset
             '12：30：45',                 # Uses full-width unicode colons
+            '12:30:45.123456a',         # Non-numeric data after 6 components
+            '12:30:45.123456789a',      # Non-numeric data after 9 components
             '12:30:45․123456',          # Uses \u2024 in place of decimal point
             '12:30:45a',                # Extra at tend of basic time
             '12:30:45.123a',            # Extra at end of millisecond time
             '12:30:45.123456a',         # Extra at end of microsecond time
+            '12:30:45.123456-',         # Extra at end of microsecond time
+            '12:30:45.123456+',         # Extra at end of microsecond time
             '12:30:45.123456+12:00:30a',    # Extra at end of full time
         ]
 
@@ -4080,6 +4110,62 @@ class TimeSubclass(self.theclass):
         self.assertEqual(tsc, tsc_rt)
         self.assertIsInstance(tsc_rt, TimeSubclass)
 
+    @hypothesis.given(
+        t=hypothesis.strategies.times(
+            timezones=iso_strategies.FIXED_TIMEZONES | hypothesis.strategies.none()
+        ),
+        iso_formatter=iso_strategies.TIME_ISOFORMATTERS,
+    )
+    @_cross_product_examples(
+        t=[
+            time(0, 0),
+            time(12, 0),
+            time(23, 59, 59, 999999),
+            time(12, 0, tzinfo=timezone.utc),
+            time(12, 0, tzinfo=timezone(timedelta(hours=-5))),
+        ],
+        iso_formatter=map(
+            IsoFormatter,
+            [
+                "%H:%M:%S",
+                "%H%M%S",
+                "%H:%M:%S.%(f6)",
+                "%H%M%S.%(f6)",
+                "%H:%M:%S.%(f3)",
+                "%H%M%S.%(f3)",
+                "%H:%M:%S[TZ:%H:%M]",
+                "%H:%M:%S[TZ:%H%M]",
+            ],
+        ),
+    )
+    @hypothesis.example(
+        t=time(0, 0, tzinfo=timezone.utc),
+        iso_formatter=IsoFormatter("%H:%M:%S[TZ:Z]"),
+    )
+    @_cross_product_examples(
+        t=[
+            time(0, 0, tzinfo=timezone(timedelta(hours=5, minutes=30))),
+        ],
+        iso_formatter=map(
+            IsoFormatter, ("%H:%M:%S[TZ:%H]", "%H:%M:%S[TZ:%H:%M]")
+        ),
+    )
+    def test_isoformat_times(self, t, iso_formatter):
+        input_str = iso_formatter.format(t)
+        actual = type(t).fromisoformat(input_str)
+        expected = iso_formatter.truncate(t)
+
+        self.assertEqual(
+            actual,
+            expected,
+            f"\n{actual} != {expected}\n"
+            + f"actual = {actual!r}\n"
+            + f"expected = {expected!r} \n"
+            + f"input_str = {input_str}\n"
+            + f"formatter = {iso_formatter!r}",
+        )
+
+
     def test_subclass_timetz(self):
 
         class C(self.theclass):

diff --git a/Lib/test/test_datetime.py b/Lib/test/test_datetime.py
@@ -8,7 +8,9 @@
 
 def load_tests(loader, tests, pattern):
     try:
-        pure_tests = import_fresh_module(TESTS, fresh=['datetime', '_strptime'],
+        pure_tests = import_fresh_module(TESTS, fresh=[
+            'datetime', '_strptime', 'test.isoformat_helpers.isoformatter',
+            'test.isoformat_helpers.strategies'],
                                         blocked=['_datetime'])
         fast_tests = import_fresh_module(TESTS, fresh=['datetime',
                                                     '_datetime', '_strptime'])