From 588984453924aa70a24fa6f5ab5729aee1b179a9 Mon Sep 17 00:00:00 2001 From: Lorenzo Stella Date: Tue, 5 Nov 2024 00:17:15 +0100 Subject: [PATCH 1/2] fix freq string issues in datasets --- src/gluonts/dataset/repository/_ercot.py | 2 +- src/gluonts/dataset/repository/_gp_copula_2019.py | 6 +++--- src/gluonts/dataset/repository/_lstnet.py | 6 +++--- src/gluonts/dataset/repository/_tsf_datasets.py | 4 ++++ src/gluonts/dataset/repository/_tsf_reader.py | 8 ++++---- src/gluonts/dataset/repository/_uber_tlc.py | 2 +- 6 files changed, 16 insertions(+), 12 deletions(-) diff --git a/src/gluonts/dataset/repository/_ercot.py b/src/gluonts/dataset/repository/_ercot.py index d013a09c9f..7e144d988e 100644 --- a/src/gluonts/dataset/repository/_ercot.py +++ b/src/gluonts/dataset/repository/_ercot.py @@ -26,7 +26,7 @@ def generate_ercot_dataset(dataset_path: Path, dataset_writer: DatasetWriter): df.ffill(inplace=True) regions = [col for col in df.columns if col not in ["ds", "y"]] - freq = "1H" + freq = "1h" prediction_length = 24 start = pd.Period(df["ds"][0], freq=freq) diff --git a/src/gluonts/dataset/repository/_gp_copula_2019.py b/src/gluonts/dataset/repository/_gp_copula_2019.py index 88c6becea2..41d5da9ba6 100644 --- a/src/gluonts/dataset/repository/_gp_copula_2019.py +++ b/src/gluonts/dataset/repository/_gp_copula_2019.py @@ -63,7 +63,7 @@ class GPCopulaDataset(NamedTuple): # original dataset can be found at https://archive.ics.uci.edu/ml/datasets/ElectricityLoadDiagrams20112014# num_series=370, prediction_length=24, - freq="H", + freq="h", rolling_evaluations=7, max_target_dim=None, ), @@ -73,7 +73,7 @@ class GPCopulaDataset(NamedTuple): # note there are 963 in the original dataset from https://archive.ics.uci.edu/ml/datasets/PEMS-SF num_series=963, prediction_length=24, - freq="H", + freq="h", rolling_evaluations=7, max_target_dim=None, ), @@ -82,7 +82,7 @@ class GPCopulaDataset(NamedTuple): url=root + "solar_nips.tar.gz", num_series=137, prediction_length=24, - freq="H", + freq="h", rolling_evaluations=7, max_target_dim=None, ), diff --git a/src/gluonts/dataset/repository/_lstnet.py b/src/gluonts/dataset/repository/_lstnet.py index e933666c77..0253cffd95 100644 --- a/src/gluonts/dataset/repository/_lstnet.py +++ b/src/gluonts/dataset/repository/_lstnet.py @@ -91,7 +91,7 @@ class LstnetDataset(NamedTuple): prediction_length=24, rolling_evaluations=7, start_date="2012-01-01", - freq="1H", + freq="1h", agg_freq=None, ), "traffic": LstnetDataset( @@ -105,7 +105,7 @@ class LstnetDataset(NamedTuple): prediction_length=24, rolling_evaluations=7, start_date="2015-01-01", - freq="H", + freq="h", agg_freq=None, ), "solar-energy": LstnetDataset( @@ -117,7 +117,7 @@ class LstnetDataset(NamedTuple): rolling_evaluations=7, start_date="2006-01-01", freq="10min", - agg_freq="1H", + agg_freq="1h", ), } diff --git a/src/gluonts/dataset/repository/_tsf_datasets.py b/src/gluonts/dataset/repository/_tsf_datasets.py index ba073cdf4c..b6ff0340b2 100644 --- a/src/gluonts/dataset/repository/_tsf_datasets.py +++ b/src/gluonts/dataset/repository/_tsf_datasets.py @@ -278,11 +278,15 @@ def generate_forecasting_dataset( def default_prediction_length_from_frequency(freq: str) -> int: prediction_length_map = { "T": 60, + "min": 60, "H": 48, + "h": 48, "D": 30, "W-SUN": 8, "M": 12, + "ME": 12, "Y": 4, + "YE": 4, } try: freq = to_offset(freq).name diff --git a/src/gluonts/dataset/repository/_tsf_reader.py b/src/gluonts/dataset/repository/_tsf_reader.py index 45386864a7..ca87bb27f3 100644 --- a/src/gluonts/dataset/repository/_tsf_reader.py +++ b/src/gluonts/dataset/repository/_tsf_reader.py @@ -49,10 +49,10 @@ def frequency_converter(freq: str): BASE_FREQ_TO_PANDAS_OFFSET: Dict[str, str] = { "seconds": "S", - "minutely": "T", - "minutes": "T", - "hourly": "H", - "hours": "H", + "minutely": "min", + "minutes": "min", + "hourly": "h", + "hours": "h", "daily": "D", "days": "D", "weekly": "W", diff --git a/src/gluonts/dataset/repository/_uber_tlc.py b/src/gluonts/dataset/repository/_uber_tlc.py index 73aabd0701..26a086db43 100644 --- a/src/gluonts/dataset/repository/_uber_tlc.py +++ b/src/gluonts/dataset/repository/_uber_tlc.py @@ -28,7 +28,7 @@ def generate_uber_dataset( prediction_length: int, dataset_writer: DatasetWriter, ): - subsets = {"daily": "1D", "hourly": "1H"} + subsets = {"daily": "1D", "hourly": "1h"} assert ( uber_freq.lower() in subsets ), f"invalid uber_freq='{uber_freq}'. Allowed values: {subsets.keys()}" From eba268b8fb6b9fcecf6f960d67eaf5e01db43bce Mon Sep 17 00:00:00 2001 From: Lorenzo Stella Date: Tue, 5 Nov 2024 00:36:11 +0100 Subject: [PATCH 2/2] update test --- test/dataset/test_tsf_reader.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/test/dataset/test_tsf_reader.py b/test/dataset/test_tsf_reader.py index 2261d7ebfc..686b3f4d2c 100644 --- a/test/dataset/test_tsf_reader.py +++ b/test/dataset/test_tsf_reader.py @@ -20,10 +20,10 @@ "input_freq_str, output_freq_str", [ ("30_seconds", "30S"), - ("minutely", "T"), - ("10_minutes", "10T"), - ("hourly", "H"), - ("half_hourly", "0.5H"), + ("minutely", "min"), + ("10_minutes", "10min"), + ("hourly", "h"), + ("half_hourly", "0.5h"), ("daily", "D"), ("7_days", "7D"), ("weekly", "W"),