Skip to content

Commit

Permalink
Isodate durations.
Browse files Browse the repository at this point in the history
  • Loading branch information
coady committed Oct 12, 2024
1 parent 5ffe5a5 commit 6fe9f9e
Show file tree
Hide file tree
Showing 7 changed files with 28 additions and 48 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).

## Unreleased
### Changed
* `isodate` dependency for durations
* Acero engine used for scanning
* Grouping defaults to parallelized but unordered
* Partitioning supports arbitrary functions
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ If index columns are detected in the schema metadata, then an initial `filter` w
## Dependencies
* pyarrow
* strawberry-graphql[asgi,cli]
* isodate
* uvicorn (or other [ASGI server](https://asgi.readthedocs.io/en/latest/implementations.html))

## Tests
Expand Down
50 changes: 13 additions & 37 deletions graphique/scalars.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@
"""

import functools
import re
from datetime import date, datetime, time, timedelta
from decimal import Decimal
from typing import Union, no_type_check
from typing import Union
import isodate
import pyarrow as pa
import strawberry

Expand All @@ -17,47 +17,23 @@ def parse_long(value) -> int:
raise TypeError(f"Long cannot represent value: {value}")


@no_type_check
def parse_duration(value: str):
months = days = seconds = 0
d_val, _, t_val = value.partition('T')
parts = re.split(r'(-?\d+\.?\d*)', d_val.lower() + t_val)
if parts.pop(0) != 'p':
raise ValueError("Duration format must start with `P`")
multipliers = {'y': 12, 'w': 7, 'H': 3600, 'M': 60}
for num, key in zip(parts[::2], parts[1::2]):
value = (float if '.' in num else int)(num) * multipliers.get(key, 1)
if key in 'ym':
months += value
elif key in 'wd':
days += value
elif key in 'HMS':
seconds += value
else:
raise ValueError(f"Invalid duration field: {key.upper()}")
if set(d_val).isdisjoint('YM'):
return timedelta(days, seconds)
return pa.MonthDayNano([months, days, int(seconds * 1_000_000_000)])
def parse_duration(value):
duration = isodate.parse_duration(value)
if isinstance(duration, timedelta) and set(value.partition('T')[0]).isdisjoint('YM'):
return duration
months = getattr(duration, 'years', 0) * 12 + getattr(duration, 'months', 0)
nanoseconds = duration.seconds * 1_000_000_000 + duration.microseconds * 1_000
return pa.MonthDayNano([months, duration.days, nanoseconds])


@functools.singledispatch
def duration_isoformat(months: int, days: int, seconds: int, fraction: str = '.') -> str:
minutes, seconds = divmod(seconds, 60)
items = zip('YMDHM', divmod(months, 12) + (days,) + divmod(minutes, 60))
year, month, day, hour, minute = (f'{value}{key}' if value else '' for key, value in items)
fraction = fraction.rstrip('0').rstrip('.')
return f'P{year}{month}{day}T{hour}{minute}{seconds}{fraction}S'


@duration_isoformat.register
def _(td: timedelta) -> str: # type: ignore
return duration_isoformat(0, td.days, td.seconds, f'.{td.microseconds:06}')
duration_isoformat = functools.singledispatch(isodate.duration_isoformat)


@duration_isoformat.register
def _(mdn: pa.MonthDayNano) -> str:
seconds, nanoseconds = divmod(mdn.nanoseconds, 1_000_000_000)
value = duration_isoformat(mdn.months, mdn.days, seconds, f'.{nanoseconds:09}')
value = isodate.duration_isoformat(
isodate.Duration(months=mdn.months, days=mdn.days, microseconds=mdn.nanoseconds // 1_000)
)
return value if mdn.months else value.replace('P', 'P0M')


Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ line-length = 100
quote-style = "preserve"

[[tool.mypy.overrides]]
module = ["numpy.*", "pyarrow.*", "strawberry.*", "starlette.*"]
module = ["numpy.*", "pyarrow.*", "strawberry.*", "starlette.*", "isodate.*"]
ignore_missing_imports = true

[tool.coverage.run]
Expand Down
1 change: 1 addition & 0 deletions requirements.in
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
pyarrow>=17
strawberry-graphql[asgi,cli]>=0.236
isodate>=0.7
11 changes: 5 additions & 6 deletions tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,18 +7,17 @@


def test_duration():
assert duration_isoformat(parse_duration('P1Y1M1DT1H1M1.1S')) == 'P1Y1M1DT1H1M1.1S'
assert duration_isoformat(parse_duration('P1Y1M1DT1H1M1.1S')) == 'P13M1DT1H1M1.1S'
assert duration_isoformat(parse_duration('P1M1DT1H1M1.1S')) == 'P1M1DT1H1M1.1S'
assert duration_isoformat(parse_duration('P1DT1H1M1.1S')) == 'P1DT1H1M1.1S'
assert duration_isoformat(parse_duration('PT1H1M1.1S')) == 'PT1H1M1.1S'
assert duration_isoformat(parse_duration('PT1M1.1S')) == 'PT1M1.1S'
assert duration_isoformat(parse_duration('PT1.1S')) == 'PT1.1S'
assert duration_isoformat(parse_duration('PT1S')) == 'PT1S'
assert duration_isoformat(parse_duration('P0D')) == 'PT0S'
assert duration_isoformat(parse_duration('PT0S')) == 'PT0S'
assert duration_isoformat(parse_duration('P-1DT-1H')) == 'P-2DT23H0S'
assert duration_isoformat(parse_duration('P0MT')) == 'P0MT0S'
assert duration_isoformat(parse_duration('P0YT')) == 'P0MT0S'
assert duration_isoformat(parse_duration('P0D')) == 'P0D'
assert duration_isoformat(parse_duration('PT0S')) == 'P0D'
assert duration_isoformat(parse_duration('P0MT')) == 'P0M0D'
assert duration_isoformat(parse_duration('P0YT')) == 'P0M0D'
with pytest.raises(ValueError):
duration_isoformat(parse_duration('T1H'))
with pytest.raises(ValueError):
Expand Down
10 changes: 6 additions & 4 deletions tests/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,13 +222,15 @@ def test_duration(executor):
"""{ scan(columns: {alias: "diff", checked: true, subtract: [{name: "timestamp"}, {name: "timestamp"}]})
{ column(name: "diff") { ... on DurationColumn { unique { values } } } } }"""
)
assert data == {'scan': {'column': {'unique': {'values': ['PT0S', None]}}}}
assert data == {'scan': {'column': {'unique': {'values': ['P0D', None]}}}}
data = executor('{ runs(split: [{name: "timestamp", gt: 0.0}]) { length } }')
assert data == {'runs': {'length': 1}}
data = executor("""{ scan(columns: {alias: "diff", temporal:
data = executor(
"""{ scan(columns: {alias: "diff", temporal:
{monthDayNanoIntervalBetween: [{name: "timestamp"}, {name: "timestamp"}]}})
{ column(name: "diff") { ... on DurationColumn { values } } } }""")
assert data == {'scan': {'column': {'values': ['P0MT0S', None]}}}
{ column(name: "diff") { ... on DurationColumn { values } } } }"""
)
assert data == {'scan': {'column': {'values': ['P0M0D', None]}}}


def test_list(executor):
Expand Down

0 comments on commit 6fe9f9e

Please sign in to comment.