-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #108 from dh-tech/feature/convert-hebrew
Add converter for hebrew calendar based on hijri calendar
- Loading branch information
Showing
14 changed files
with
494 additions
and
17 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,5 @@ | ||
from undate.converters.calendars.gregorian import GregorianDateConverter | ||
from undate.converters.calendars.hijri import HijriDateConverter | ||
from undate.converters.calendars.hebrew import HebrewDateConverter | ||
|
||
__all__ = ["HijriDateConverter", "GregorianDateConverter"] | ||
__all__ = ["HijriDateConverter", "GregorianDateConverter", "HebrewDateConverter"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
from undate.converters.calendars.hebrew.converter import HebrewDateConverter | ||
|
||
__all__ = ["HebrewDateConverter"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
from typing import Union | ||
|
||
from convertdate import hebrew # type: ignore | ||
from lark.exceptions import UnexpectedCharacters | ||
|
||
from undate.converters.base import BaseCalendarConverter | ||
from undate.converters.calendars.hebrew.parser import hebrew_parser | ||
from undate.converters.calendars.hebrew.transformer import HebrewDateTransformer | ||
from undate.undate import Undate, UndateInterval | ||
|
||
|
||
class HebrewDateConverter(BaseCalendarConverter): | ||
""" | ||
Converter for Hebrew Anno Mundicalendar. | ||
Support for parsing Anno Mundi dates and converting to Undate and UndateInterval | ||
objects in the Gregorian calendar. | ||
""" | ||
|
||
#: converter name: Hebrew | ||
name: str = "Hebrew" | ||
calendar_name: str = "Anno Mundi" | ||
|
||
def __init__(self): | ||
self.transformer = HebrewDateTransformer() | ||
|
||
def min_month(self) -> int: | ||
"""Smallest numeric month for this calendar.""" | ||
return 1 | ||
|
||
def max_month(self, year: int) -> int: | ||
"""Maximum numeric month for this calendar. In Hebrew calendar, this is 12 or 13 | ||
depending on whether it is a leap year.""" | ||
return hebrew.year_months(year) | ||
|
||
def first_month(self) -> int: | ||
"""First month in this calendar. The Hebrew civil year starts in Tishri.""" | ||
return hebrew.TISHRI | ||
|
||
def last_month(self, year: int) -> int: | ||
"""Last month in this calendar. Hebrew civil year starts in Tishri, | ||
Elul is the month before Tishri.""" | ||
return hebrew.ELUL | ||
|
||
def max_day(self, year: int, month: int) -> int: | ||
"""maximum numeric day for the specified year and month in this calendar""" | ||
# NOTE: unreleased v2.4.1 of convertdate standardizes month_days to month_length | ||
return hebrew.month_days(year, month) | ||
|
||
def to_gregorian(self, year: int, month: int, day: int) -> tuple[int, int, int]: | ||
"""Convert a Hebrew date, specified by year, month, and day, | ||
to the Gregorian equivalent date. Returns a tuple of year, month, day. | ||
""" | ||
return hebrew.to_gregorian(year, month, day) | ||
|
||
def parse(self, value: str) -> Union[Undate, UndateInterval]: | ||
""" | ||
Parse a Hebrew date string and return an :class:`~undate.undate.Undate` or | ||
:class:`~undate.undate.UndateInterval`. | ||
The Hebrew date string is preserved in the undate label. | ||
""" | ||
if not value: | ||
raise ValueError("Parsing empty string is not supported") | ||
|
||
# parse the input string, then transform to undate object | ||
try: | ||
# parse the string with our Hebrew date parser | ||
parsetree = hebrew_parser.parse(value) | ||
# transform the parse tree into an undate or undate interval | ||
undate_obj = self.transformer.transform(parsetree) | ||
# set the original date as a label, with the calendar name | ||
undate_obj.label = f"{value} {self.calendar_name}" | ||
return undate_obj | ||
except UnexpectedCharacters as err: | ||
raise ValueError(f"Could not parse '{value}' as a Hebrew date") from err | ||
|
||
# do we need to support conversion the other direction? | ||
# i.e., generate a Hebrew date from an abitrary undate or undate interval? |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
%import common.WS | ||
%ignore WS | ||
|
||
// only support day month year format for now | ||
// parser requires numeric day and year to be distinguished based on order | ||
hebrew_date: day month year | month year | year | ||
|
||
// TODO: handle date ranges? | ||
|
||
// TODO: add support for qualifiers? | ||
// PGP dates use qualifiers like "first decade of" (for beginning of month) | ||
// "first third of", seasons (can look for more examples) | ||
|
||
// Hebrew calendar starts with year 1 in 3761 BCE | ||
year: /\d+/ | ||
|
||
// months | ||
month: month_1 | ||
| month_2 | ||
| month_3 | ||
| month_4 | ||
| month_5 | ||
| month_6 | ||
| month_7 | ||
| month_8 | ||
| month_9 | ||
| month_10 | ||
| month_11 | ||
| month_12 | ||
| month_13 | ||
// months have 29 or 30 days; we do not expect leading zeroes | ||
day: /[1-9]/ | /[12][0-9]/ | /30/ | ||
|
||
// months, in order; from convertdate list | ||
// with variants from Princeton Geniza Project | ||
// support matching with and without accents | ||
month_1: "Nisan" | ||
// Iyar or Iyyar | ||
month_2: /Iyy?ar/ | ||
month_3: "Sivan" | ||
month_4: "Tammuz" | ||
month_5: "Av" | ||
month_6: "Elul" | ||
// Tishrei or Tishri | ||
month_7: /Tishre?i/ | ||
month_8: "Heshvan" | ||
month_9: "Kislev" | ||
// Tevet or Teveth | ||
month_10: /[ṬT]eveth?/ | ||
month_11: "Shevat" | ||
// Adar I or Adar | ||
month_12: /Adar( I)?/ | ||
// Adar II or Adar Bet | ||
month_13: /Adar (II|Bet)/ | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
import pathlib | ||
|
||
from lark import Lark | ||
|
||
grammar_path = pathlib.Path(__file__).parent / "hebrew.lark" | ||
|
||
with open(grammar_path) as grammar: | ||
# NOTE: LALR parser is faster but can't be used to ambiguity between years and dates | ||
hebrew_parser = Lark(grammar.read(), start="hebrew_date", strict=True) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
from lark import Transformer, Tree | ||
|
||
from undate.undate import Undate, Calendar | ||
|
||
|
||
class HebrewUndate(Undate): | ||
"""Undate convience subclass; sets default calendar to Hebrew.""" | ||
|
||
calendar = Calendar.HEBREW | ||
|
||
|
||
class HebrewDateTransformer(Transformer): | ||
"""Transform a Hebrew date parse tree and return an Undate or | ||
UndateInterval.""" | ||
|
||
def hebrew_date(self, items): | ||
parts = {} | ||
for child in items: | ||
if child.data in ["year", "month", "day"]: | ||
# in each case we expect one integer value; | ||
# anonymous tokens convert to their value and cast as int | ||
value = int(child.children[0]) | ||
parts[str(child.data)] = value | ||
|
||
# initialize and return an undate with islamic year, month, day and | ||
# islamic calendar | ||
return HebrewUndate(**parts) | ||
|
||
# year translation is not needed since we want a tree with name year | ||
# this is equivalent to a no-op | ||
# def year(self, items): | ||
# return Tree(data="year", children=[items[0]]) | ||
|
||
def month(self, items): | ||
# month has a nested tree for the rule and the value | ||
# the name of the rule (month_1, month_2, etc) gives us the | ||
# number of the month needed for converting the date | ||
tree = items[0] | ||
month_n = tree.data.split("_")[-1] | ||
return Tree(data="month", children=[month_n]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.