Skip to content

Commit

Permalink
low-code: Allow formatting datetimes as milliseconds since unix epoch (
Browse files Browse the repository at this point in the history
…#29504)

Co-authored-by: girarda <girarda@users.noreply.github.com>
  • Loading branch information
girarda and girarda authored Aug 18, 2023
1 parent 2af100f commit b4ce532
Show file tree
Hide file tree
Showing 6 changed files with 39 additions and 15 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@ class DatetimeParser:
Instead of using the directive directly, we can use datetime.fromtimestamp and dt.timestamp()
"""

def parse(self, date: Union[str, int], format: str):
_UNIX_EPOCH = datetime.datetime(1970, 1, 1, tzinfo=datetime.timezone.utc)

def parse(self, date: Union[str, int], format: str) -> datetime.datetime:
# "%s" is a valid (but unreliable) directive for formatting, but not for parsing
# It is defined as
# The number of seconds since the Epoch, 1970-01-01 00:00:00+0000 (UTC). https://man7.org/linux/man-pages/man3/strptime.3.html
Expand All @@ -25,6 +27,8 @@ def parse(self, date: Union[str, int], format: str):
# See https://stackoverflow.com/a/4974930
if format == "%s":
return datetime.datetime.fromtimestamp(int(date), tz=datetime.timezone.utc)
elif format == "%ms":
return self._UNIX_EPOCH + datetime.timedelta(milliseconds=int(date))

parsed_datetime = datetime.datetime.strptime(str(date), format)
if self._is_naive(parsed_datetime):
Expand All @@ -37,6 +41,9 @@ def format(self, dt: datetime.datetime, format: str) -> str:
# See https://stackoverflow.com/a/4974930
if format == "%s":
return str(int(dt.timestamp()))
if format == "%ms":
# timstamp() returns a float representing the number of seconds since the unix epoch
return str(int(dt.timestamp() * 1000))
else:
return dt.strftime(format)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -582,6 +582,7 @@ definitions:
description: |
The datetime format used to format the datetime values that are sent in outgoing requests to the API. Use placeholders starting with "%" to describe the format the API is using. The following placeholders are available:
* **%s**: Epoch unix timestamp - `1686218963`
* **%ms**: Epoch unix timestamp (milliseconds) - `1686218963123`
* **%a**: Weekday (abbreviated) - `Sun`
* **%A**: Weekday (full) - `Sunday`
* **%w**: Weekday (decimal) - `0` (Sunday), `6` (Saturday)
Expand Down Expand Up @@ -613,6 +614,7 @@ definitions:
- "%Y-%m-%dT%H:%M:%S.%f%z"
- "%Y-%m-%d"
- "%s"
- "%ms"
start_datetime:
title: Start Datetime
description: The datetime that determines the earliest record that should be synced.
Expand Down Expand Up @@ -1413,6 +1415,7 @@ definitions:
description: |
Format of the datetime value. Defaults to "%Y-%m-%dT%H:%M:%S.%f%z" if left empty. Use placeholders starting with "%" to describe the format the API is using. The following placeholders are available:
* **%s**: Epoch unix timestamp - `1686218963`
* **%ms**: Epoch unix timestamp - `1686218963123`
* **%a**: Weekday (abbreviated) - `Sun`
* **%A**: Weekday (full) - `Sunday`
* **%w**: Weekday (decimal) - `0` (Sunday), `6` (Saturday)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -457,7 +457,7 @@ class MinMaxDatetime(BaseModel):
)
datetime_format: Optional[str] = Field(
"",
description='Format of the datetime value. Defaults to "%Y-%m-%dT%H:%M:%S.%f%z" if left empty. Use placeholders starting with "%" to describe the format the API is using. The following placeholders are available:\n * **%s**: Epoch unix timestamp - `1686218963`\n * **%a**: Weekday (abbreviated) - `Sun`\n * **%A**: Weekday (full) - `Sunday`\n * **%w**: Weekday (decimal) - `0` (Sunday), `6` (Saturday)\n * **%d**: Day of the month (zero-padded) - `01`, `02`, ..., `31`\n * **%b**: Month (abbreviated) - `Jan`\n * **%B**: Month (full) - `January`\n * **%m**: Month (zero-padded) - `01`, `02`, ..., `12`\n * **%y**: Year (without century, zero-padded) - `00`, `01`, ..., `99`\n * **%Y**: Year (with century) - `0001`, `0002`, ..., `9999`\n * **%H**: Hour (24-hour, zero-padded) - `00`, `01`, ..., `23`\n * **%I**: Hour (12-hour, zero-padded) - `01`, `02`, ..., `12`\n * **%p**: AM/PM indicator\n * **%M**: Minute (zero-padded) - `00`, `01`, ..., `59`\n * **%S**: Second (zero-padded) - `00`, `01`, ..., `59`\n * **%f**: Microsecond (zero-padded to 6 digits) - `000000`, `000001`, ..., `999999`\n * **%z**: UTC offset - `(empty)`, `+0000`, `-04:00`\n * **%Z**: Time zone name - `(empty)`, `UTC`, `GMT`\n * **%j**: Day of the year (zero-padded) - `001`, `002`, ..., `366`\n * **%U**: Week number of the year (Sunday as first day) - `00`, `01`, ..., `53`\n * **%W**: Week number of the year (Monday as first day) - `00`, `01`, ..., `53`\n * **%c**: Date and time representation - `Tue Aug 16 21:30:00 1988`\n * **%x**: Date representation - `08/16/1988`\n * **%X**: Time representation - `21:30:00`\n * **%%**: Literal \'%\' character\n\n Some placeholders depend on the locale of the underlying system - in most cases this locale is configured as en/US. For more information see the [Python documentation](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes).\n',
description='Format of the datetime value. Defaults to "%Y-%m-%dT%H:%M:%S.%f%z" if left empty. Use placeholders starting with "%" to describe the format the API is using. The following placeholders are available:\n * **%s**: Epoch unix timestamp - `1686218963`\n * **%ms**: Epoch unix timestamp - `1686218963123`\n * **%a**: Weekday (abbreviated) - `Sun`\n * **%A**: Weekday (full) - `Sunday`\n * **%w**: Weekday (decimal) - `0` (Sunday), `6` (Saturday)\n * **%d**: Day of the month (zero-padded) - `01`, `02`, ..., `31`\n * **%b**: Month (abbreviated) - `Jan`\n * **%B**: Month (full) - `January`\n * **%m**: Month (zero-padded) - `01`, `02`, ..., `12`\n * **%y**: Year (without century, zero-padded) - `00`, `01`, ..., `99`\n * **%Y**: Year (with century) - `0001`, `0002`, ..., `9999`\n * **%H**: Hour (24-hour, zero-padded) - `00`, `01`, ..., `23`\n * **%I**: Hour (12-hour, zero-padded) - `01`, `02`, ..., `12`\n * **%p**: AM/PM indicator\n * **%M**: Minute (zero-padded) - `00`, `01`, ..., `59`\n * **%S**: Second (zero-padded) - `00`, `01`, ..., `59`\n * **%f**: Microsecond (zero-padded to 6 digits) - `000000`, `000001`, ..., `999999`\n * **%z**: UTC offset - `(empty)`, `+0000`, `-04:00`\n * **%Z**: Time zone name - `(empty)`, `UTC`, `GMT`\n * **%j**: Day of the year (zero-padded) - `001`, `002`, ..., `366`\n * **%U**: Week number of the year (Sunday as first day) - `00`, `01`, ..., `53`\n * **%W**: Week number of the year (Monday as first day) - `00`, `01`, ..., `53`\n * **%c**: Date and time representation - `Tue Aug 16 21:30:00 1988`\n * **%x**: Date representation - `08/16/1988`\n * **%X**: Time representation - `21:30:00`\n * **%%**: Literal \'%\' character\n\n Some placeholders depend on the locale of the underlying system - in most cases this locale is configured as en/US. For more information see the [Python documentation](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes).\n',
examples=["%Y-%m-%dT%H:%M:%S.%f%z", "%Y-%m-%d", "%s"],
title="Datetime Format",
)
Expand Down Expand Up @@ -810,8 +810,8 @@ class DatetimeBasedCursor(BaseModel):
)
datetime_format: str = Field(
...,
description="The datetime format used to format the datetime values that are sent in outgoing requests to the API. Use placeholders starting with \"%\" to describe the format the API is using. The following placeholders are available:\n * **%s**: Epoch unix timestamp - `1686218963`\n * **%a**: Weekday (abbreviated) - `Sun`\n * **%A**: Weekday (full) - `Sunday`\n * **%w**: Weekday (decimal) - `0` (Sunday), `6` (Saturday)\n * **%d**: Day of the month (zero-padded) - `01`, `02`, ..., `31`\n * **%b**: Month (abbreviated) - `Jan`\n * **%B**: Month (full) - `January`\n * **%m**: Month (zero-padded) - `01`, `02`, ..., `12`\n * **%y**: Year (without century, zero-padded) - `00`, `01`, ..., `99`\n * **%Y**: Year (with century) - `0001`, `0002`, ..., `9999`\n * **%H**: Hour (24-hour, zero-padded) - `00`, `01`, ..., `23`\n * **%I**: Hour (12-hour, zero-padded) - `01`, `02`, ..., `12`\n * **%p**: AM/PM indicator\n * **%M**: Minute (zero-padded) - `00`, `01`, ..., `59`\n * **%S**: Second (zero-padded) - `00`, `01`, ..., `59`\n * **%f**: Microsecond (zero-padded to 6 digits) - `000000`\n * **%z**: UTC offset - `(empty)`, `+0000`, `-04:00`\n * **%Z**: Time zone name - `(empty)`, `UTC`, `GMT`\n * **%j**: Day of the year (zero-padded) - `001`, `002`, ..., `366`\n * **%U**: Week number of the year (starting Sunday) - `00`, ..., `53`\n * **%W**: Week number of the year (starting Monday) - `00`, ..., `53`\n * **%c**: Date and time - `Tue Aug 16 21:30:00 1988`\n * **%x**: Date standard format - `08/16/1988`\n * **%X**: Time standard format - `21:30:00`\n * **%%**: Literal '%' character\n\n Some placeholders depend on the locale of the underlying system - in most cases this locale is configured as en/US. For more information see the [Python documentation](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes).\n",
examples=["%Y-%m-%dT%H:%M:%S.%f%z", "%Y-%m-%d", "%s"],
description="The datetime format used to format the datetime values that are sent in outgoing requests to the API. Use placeholders starting with \"%\" to describe the format the API is using. The following placeholders are available:\n * **%s**: Epoch unix timestamp - `1686218963`\n * **%ms**: Epoch unix timestamp (milliseconds) - `1686218963123`\n * **%a**: Weekday (abbreviated) - `Sun`\n * **%A**: Weekday (full) - `Sunday`\n * **%w**: Weekday (decimal) - `0` (Sunday), `6` (Saturday)\n * **%d**: Day of the month (zero-padded) - `01`, `02`, ..., `31`\n * **%b**: Month (abbreviated) - `Jan`\n * **%B**: Month (full) - `January`\n * **%m**: Month (zero-padded) - `01`, `02`, ..., `12`\n * **%y**: Year (without century, zero-padded) - `00`, `01`, ..., `99`\n * **%Y**: Year (with century) - `0001`, `0002`, ..., `9999`\n * **%H**: Hour (24-hour, zero-padded) - `00`, `01`, ..., `23`\n * **%I**: Hour (12-hour, zero-padded) - `01`, `02`, ..., `12`\n * **%p**: AM/PM indicator\n * **%M**: Minute (zero-padded) - `00`, `01`, ..., `59`\n * **%S**: Second (zero-padded) - `00`, `01`, ..., `59`\n * **%f**: Microsecond (zero-padded to 6 digits) - `000000`\n * **%z**: UTC offset - `(empty)`, `+0000`, `-04:00`\n * **%Z**: Time zone name - `(empty)`, `UTC`, `GMT`\n * **%j**: Day of the year (zero-padded) - `001`, `002`, ..., `366`\n * **%U**: Week number of the year (starting Sunday) - `00`, ..., `53`\n * **%W**: Week number of the year (starting Monday) - `00`, ..., `53`\n * **%c**: Date and time - `Tue Aug 16 21:30:00 1988`\n * **%x**: Date standard format - `08/16/1988`\n * **%X**: Time standard format - `21:30:00`\n * **%%**: Literal '%' character\n\n Some placeholders depend on the locale of the underlying system - in most cases this locale is configured as en/US. For more information see the [Python documentation](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes).\n",
examples=["%Y-%m-%dT%H:%M:%S.%f%z", "%Y-%m-%d", "%s", "%ms"],
title="Outgoing Datetime Format",
)
start_datetime: Union[str, MinMaxDatetime] = Field(
Expand Down
17 changes: 11 additions & 6 deletions airbyte-cdk/python/airbyte_cdk/utils/datetime_format_inferrer.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,18 +24,23 @@ def __init__(self) -> None:
"%Y-%m-%d %H:%M:%S.%f%z",
"%Y-%m-%dT%H:%M:%S.%f%z",
"%s",
"%ms",
"%d/%m/%Y %H:%M",
"%Y-%m",
"%d-%m-%Y",
]
self._timestamp_heuristic_range = range(1_000_000_000, 2_000_000_000)
self._timestamp_heuristic_ranges = [range(1_000_000_000, 2_000_000_000), range(1_000_000_000_000, 2_000_000_000_000)]

def _can_be_datetime(self, value: Any) -> bool:
"""Checks if the value can be a datetime. This is the case if the value is a string or an integer between 1_000_000_000 and 2_000_000_000. This is separate from the format check for performance reasons"""
if isinstance(value, str) and (not value.isdecimal() or int(value) in self._timestamp_heuristic_range):
return True
if isinstance(value, int) and value in self._timestamp_heuristic_range:
return True
"""Checks if the value can be a datetime.
This is the case if the value is a string or an integer between 1_000_000_000 and 2_000_000_000 for seconds
or between 1_000_000_000_000 and 2_000_000_000_000 for milliseconds.
This is separate from the format check for performance reasons"""
for timestamp_range in self._timestamp_heuristic_ranges:
if isinstance(value, str) and (not value.isdecimal() or int(value) in timestamp_range):
return True
if isinstance(value, int) and value in timestamp_range:
return True
return False

def _matches_format(self, value: Any, format: str) -> bool:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,18 +18,24 @@
datetime.datetime(2021, 1, 1, 0, 0, tzinfo=datetime.timezone.utc),
),
(
"test_parse_date_iso_with_timezone_not_utc",
"2021-01-01T00:00:00.000000+0400",
"%Y-%m-%dT%H:%M:%S.%f%z",
datetime.datetime(2021, 1, 1, 0, 0, tzinfo=datetime.timezone(datetime.timedelta(seconds=14400))),
"test_parse_date_iso_with_timezone_not_utc",
"2021-01-01T00:00:00.000000+0400",
"%Y-%m-%dT%H:%M:%S.%f%z",
datetime.datetime(2021, 1, 1, 0, 0, tzinfo=datetime.timezone(datetime.timedelta(seconds=14400))),
),
(
"test_parse_timestamp",
"1609459200",
"%s",
datetime.datetime(2021, 1, 1, 0, 0, tzinfo=datetime.timezone.utc),
),
("test_parse_date_number", "20210101", "%Y%m%d", datetime.datetime(2021, 1, 1, 0, 0, tzinfo=datetime.timezone.utc)),
(
"test_parse_timestamp",
"1609459200001",
"%ms",
datetime.datetime(2021, 1, 1, 0, 0, 0, 1000, tzinfo=datetime.timezone.utc),
),
("test_parse_date_ms", "20210101", "%Y%m%d", datetime.datetime(2021, 1, 1, 0, 0, tzinfo=datetime.timezone.utc)),
],
)
def test_parse_date(test_name, input_date, date_format, expected_output_date):
Expand All @@ -42,6 +48,7 @@ def test_parse_date(test_name, input_date, date_format, expected_output_date):
"test_name, input_dt, datetimeformat, expected_output",
[
("test_format_timestamp", datetime.datetime(2021, 1, 1, 0, 0, tzinfo=datetime.timezone.utc), "%s", "1609459200"),
("test_format_timestamp_ms", datetime.datetime(2021, 1, 1, 0, 0, 0, 1000, tzinfo=datetime.timezone.utc), "%ms", "1609459200001"),
("test_format_string", datetime.datetime(2021, 1, 1, 0, 0, tzinfo=datetime.timezone.utc), "%Y-%m-%d", "2021-01-01"),
("test_format_to_number", datetime.datetime(2021, 1, 1, 0, 0, tzinfo=datetime.timezone.utc), "%Y%m%d", "20210101"),
],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
("simple_match", [{"d": "2022-02-03"}], {"d": "%Y-%m-%d"}),
("timestamp_match_integer", [{"d": 1686058051}], {"d": "%s"}),
("timestamp_match_string", [{"d": "1686058051"}], {"d": "%s"}),
("timestamp_ms_match_integer", [{"d": 1686058051000}], {"d": "%ms"}),
("timestamp_ms_match_string", [{"d": "1686058051000"}], {"d": "%ms"}),
("timestamp_no_match_integer", [{"d": 99}], {}),
("timestamp_no_match_string", [{"d": "99999999999999999999"}], {}),
("simple_no_match", [{"d": "20220203"}], {}),
Expand Down

0 comments on commit b4ce532

Please sign in to comment.