Skip to content

Commit

Permalink
Merge pull request #108 from dh-tech/feature/convert-hebrew
Browse files Browse the repository at this point in the history
Add converter for hebrew calendar based on hijri calendar
  • Loading branch information
rlskoeser authored Dec 6, 2024
2 parents 5660fa2 + 867e018 commit 333e740
Show file tree
Hide file tree
Showing 14 changed files with 494 additions and 17 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/unit_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ on:
- 'undate/**'
- 'tests/**'
pull_request:
branches:
- "**"

env:
# python version used to calculate and submit code coverage
Expand Down
15 changes: 14 additions & 1 deletion src/undate/converters/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
formatter methods as desired/appropriate for your converter as well as the
additional methods for ``max_month``, ``max_day``, and convertion ``to_gregorian``
calendar.
- Import your calendar in ``undate/converters/calendars/__init__.py`` and include in `__all__``
- Add unit tests for the new calendar logic under ``tests/test_converters/calendars/``
- Add the new calendar to the ``Calendar`` enum of supported calendars in
``undate/undate.py`` and confirm that the `get_converter` method loads your
Expand Down Expand Up @@ -136,10 +137,22 @@ class BaseCalendarConverter(BaseDateConverter):
#: Converter name. Subclasses must define a unique name.
name: str = "Base Calendar Converter"

def min_month(self) -> int:
"""Smallest numeric month for this calendar."""
raise NotImplementedError

def max_month(self, year: int) -> int:
"""Maximum month for this calendar for this year"""
"""Maximum numeric month for this calendar"""
raise NotImplementedError

def first_month(self) -> int:
"""first month in this calendar; by default, returns :meth:`min_month`."""
return self.min_month()

def last_month(self, year: int) -> int:
"""last month in this calendar; by default, returns :meth:`max_month`."""
return self.max_month(year)

def max_day(self, year: int, month: int) -> int:
"""maximum numeric day for the specified year and month in this calendar"""
raise NotImplementedError
Expand Down
3 changes: 2 additions & 1 deletion src/undate/converters/calendars/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from undate.converters.calendars.gregorian import GregorianDateConverter
from undate.converters.calendars.hijri import HijriDateConverter
from undate.converters.calendars.hebrew import HebrewDateConverter

__all__ = ["HijriDateConverter", "GregorianDateConverter"]
__all__ = ["HijriDateConverter", "GregorianDateConverter", "HebrewDateConverter"]
6 changes: 5 additions & 1 deletion src/undate/converters/calendars/gregorian.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,12 @@ class GregorianDateConverter(BaseCalendarConverter):
#: known non-leap year
NON_LEAP_YEAR: int = 2022

def min_month(self) -> int:
"""First month for the Gregorian calendar."""
return 1

def max_month(self, year: int) -> int:
"""Maximum month for this calendar for this year"""
"""maximum numeric month for the specified year in the Gregorian calendar"""
return 12

def max_day(self, year: int, month: int) -> int:
Expand Down
3 changes: 3 additions & 0 deletions src/undate/converters/calendars/hebrew/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from undate.converters.calendars.hebrew.converter import HebrewDateConverter

__all__ = ["HebrewDateConverter"]
78 changes: 78 additions & 0 deletions src/undate/converters/calendars/hebrew/converter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
from typing import Union

from convertdate import hebrew # type: ignore
from lark.exceptions import UnexpectedCharacters

from undate.converters.base import BaseCalendarConverter
from undate.converters.calendars.hebrew.parser import hebrew_parser
from undate.converters.calendars.hebrew.transformer import HebrewDateTransformer
from undate.undate import Undate, UndateInterval


class HebrewDateConverter(BaseCalendarConverter):
"""
Converter for Hebrew Anno Mundicalendar.
Support for parsing Anno Mundi dates and converting to Undate and UndateInterval
objects in the Gregorian calendar.
"""

#: converter name: Hebrew
name: str = "Hebrew"
calendar_name: str = "Anno Mundi"

def __init__(self):
self.transformer = HebrewDateTransformer()

def min_month(self) -> int:
"""Smallest numeric month for this calendar."""
return 1

def max_month(self, year: int) -> int:
"""Maximum numeric month for this calendar. In Hebrew calendar, this is 12 or 13
depending on whether it is a leap year."""
return hebrew.year_months(year)

def first_month(self) -> int:
"""First month in this calendar. The Hebrew civil year starts in Tishri."""
return hebrew.TISHRI

def last_month(self, year: int) -> int:
"""Last month in this calendar. Hebrew civil year starts in Tishri,
Elul is the month before Tishri."""
return hebrew.ELUL

def max_day(self, year: int, month: int) -> int:
"""maximum numeric day for the specified year and month in this calendar"""
# NOTE: unreleased v2.4.1 of convertdate standardizes month_days to month_length
return hebrew.month_days(year, month)

def to_gregorian(self, year: int, month: int, day: int) -> tuple[int, int, int]:
"""Convert a Hebrew date, specified by year, month, and day,
to the Gregorian equivalent date. Returns a tuple of year, month, day.
"""
return hebrew.to_gregorian(year, month, day)

def parse(self, value: str) -> Union[Undate, UndateInterval]:
"""
Parse a Hebrew date string and return an :class:`~undate.undate.Undate` or
:class:`~undate.undate.UndateInterval`.
The Hebrew date string is preserved in the undate label.
"""
if not value:
raise ValueError("Parsing empty string is not supported")

# parse the input string, then transform to undate object
try:
# parse the string with our Hebrew date parser
parsetree = hebrew_parser.parse(value)
# transform the parse tree into an undate or undate interval
undate_obj = self.transformer.transform(parsetree)
# set the original date as a label, with the calendar name
undate_obj.label = f"{value} {self.calendar_name}"
return undate_obj
except UnexpectedCharacters as err:
raise ValueError(f"Could not parse '{value}' as a Hebrew date") from err

# do we need to support conversion the other direction?
# i.e., generate a Hebrew date from an abitrary undate or undate interval?
56 changes: 56 additions & 0 deletions src/undate/converters/calendars/hebrew/hebrew.lark
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
%import common.WS
%ignore WS

// only support day month year format for now
// parser requires numeric day and year to be distinguished based on order
hebrew_date: day month year | month year | year

// TODO: handle date ranges?

// TODO: add support for qualifiers?
// PGP dates use qualifiers like "first decade of" (for beginning of month)
// "first third of", seasons (can look for more examples)

// Hebrew calendar starts with year 1 in 3761 BCE
year: /\d+/

// months
month: month_1
| month_2
| month_3
| month_4
| month_5
| month_6
| month_7
| month_8
| month_9
| month_10
| month_11
| month_12
| month_13
// months have 29 or 30 days; we do not expect leading zeroes
day: /[1-9]/ | /[12][0-9]/ | /30/

// months, in order; from convertdate list
// with variants from Princeton Geniza Project
// support matching with and without accents
month_1: "Nisan"
// Iyar or Iyyar
month_2: /Iyy?ar/
month_3: "Sivan"
month_4: "Tammuz"
month_5: "Av"
month_6: "Elul"
// Tishrei or Tishri
month_7: /Tishre?i/
month_8: "Heshvan"
month_9: "Kislev"
// Tevet or Teveth
month_10: /[ṬT]eveth?/
month_11: "Shevat"
// Adar I or Adar
month_12: /Adar( I)?/
// Adar II or Adar Bet
month_13: /Adar (II|Bet)/


9 changes: 9 additions & 0 deletions src/undate/converters/calendars/hebrew/parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
import pathlib

from lark import Lark

grammar_path = pathlib.Path(__file__).parent / "hebrew.lark"

with open(grammar_path) as grammar:
# NOTE: LALR parser is faster but can't be used to ambiguity between years and dates
hebrew_parser = Lark(grammar.read(), start="hebrew_date", strict=True)
40 changes: 40 additions & 0 deletions src/undate/converters/calendars/hebrew/transformer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
from lark import Transformer, Tree

from undate.undate import Undate, Calendar


class HebrewUndate(Undate):
"""Undate convience subclass; sets default calendar to Hebrew."""

calendar = Calendar.HEBREW


class HebrewDateTransformer(Transformer):
"""Transform a Hebrew date parse tree and return an Undate or
UndateInterval."""

def hebrew_date(self, items):
parts = {}
for child in items:
if child.data in ["year", "month", "day"]:
# in each case we expect one integer value;
# anonymous tokens convert to their value and cast as int
value = int(child.children[0])
parts[str(child.data)] = value

# initialize and return an undate with islamic year, month, day and
# islamic calendar
return HebrewUndate(**parts)

# year translation is not needed since we want a tree with name year
# this is equivalent to a no-op
# def year(self, items):
# return Tree(data="year", children=[items[0]])

def month(self, items):
# month has a nested tree for the rule and the value
# the name of the rule (month_1, month_2, etc) gives us the
# number of the month needed for converting the date
tree = items[0]
month_n = tree.data.split("_")[-1]
return Tree(data="month", children=[month_n])
16 changes: 10 additions & 6 deletions src/undate/converters/calendars/hijri/converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,18 @@ class HijriDateConverter(BaseCalendarConverter):
def __init__(self):
self.transformer = HijriDateTransformer()

def max_month(self, year: int) -> int:
"""maximum numeric month for the specified year in this calendar"""
return 12

def max_day(self, year: int, month: int) -> int:
"""maximum numeric day for the specified year and month in this calendar"""
return islamic.month_length(year, month)

def min_month(self) -> int:
"""smallest numeric month for this calendar."""
return 1

def max_month(self, year: int) -> int:
"""maximum numeric month for this calendar"""
return 12

def to_gregorian(self, year: int, month: int, day: int) -> tuple[int, int, int]:
"""Convert a Hijri date, specified by year, month, and day,
to the Gregorian equivalent date. Returns a tuple of year, month, day.
Expand All @@ -41,8 +45,8 @@ def to_gregorian(self, year: int, month: int, day: int) -> tuple[int, int, int]:
def parse(self, value: str) -> Union[Undate, UndateInterval]:
"""
Parse a Hijri date string and return an :class:`~undate.undate.Undate` or
:class:`~undate.undate.UndateInterval` in Gregorian calendar.
The Hijri date string is preserved in the undate label
:class:`~undate.undate.UndateInterval`.
The Hijri date string is preserved in the undate label.
"""
if not value:
raise ValueError("Parsing empty string is not supported")
Expand Down
20 changes: 12 additions & 8 deletions src/undate/undate.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ class Calendar(StrEnum):

GREGORIAN = auto()
HIJRI = auto()
HEBREW = auto()

@staticmethod
def get_converter(calendar):
Expand Down Expand Up @@ -123,20 +124,23 @@ def calculate_earliest_latest(self, year, month, day):
if month == "XX":
month = None

min_month = 1 # is min month ever anything other than 1 ?
# get max month from the calendar, since it depends on the
# calendar and potentially the year (e.g. leap years in Hebrew Anno Mundi)
# get first and last month from the calendar (not always 1 and 12)
# as well as min/max months
earliest_month = self.calendar_converter.first_month()
latest_month = self.calendar_converter.last_month(max_year)

min_month = self.calendar_converter.min_month()
max_month = self.calendar_converter.max_month(max_year)
if month is not None:
try:
# treat as an integer if we can
month = int(month)
# update initial value
self.initial_values["month"] = month
min_month = max_month = month
earliest_month = latest_month = month
except ValueError:
# if not, calculate min/max for missing digits
min_month, max_month = self._missing_digit_minmax(
earliest_month, latest_month = self._missing_digit_minmax(
str(month), min_month, max_month
)
# similar to month above — unknown day, but day-level granularity
Expand All @@ -155,7 +159,7 @@ def calculate_earliest_latest(self, year, month, day):
rel_year = year if year and isinstance(year, int) else None
# use month if it is an integer; otherwise use previusly determined
# max month (which may not be 12 depending if partially unknown)
rel_month = month if month and isinstance(month, int) else max_month
rel_month = month if month and isinstance(month, int) else latest_month

max_day = self.calendar_converter.max_day(rel_year, rel_month)

Expand All @@ -171,10 +175,10 @@ def calculate_earliest_latest(self, year, month, day):
# convert to Gregorian calendar so earliest/latest can always
# be used for comparison
self.earliest = Date(
*self.calendar_converter.to_gregorian(min_year, min_month, min_day)
*self.calendar_converter.to_gregorian(min_year, earliest_month, min_day)
)
self.latest = Date(
*self.calendar_converter.to_gregorian(max_year, max_month, max_day)
*self.calendar_converter.to_gregorian(max_year, latest_month, max_day)
)

def set_calendar(self, calendar: Union[str, Calendar]):
Expand Down
Loading

0 comments on commit 333e740

Please sign in to comment.