Skip to content

Commit

Permalink
Fix freq string issues in datasets (awslabs#3232)
Browse files Browse the repository at this point in the history
*Issue #, if available:* fixes awslabs#3229, pandas changes in frequency
strings broke some of our logic.

*Description of changes:* Add missing frequency strings in
_tsf_datasets.py, and get rid of other frequency-related warnings with
other datasets. I tested the change by running the following script:

```python
from gluonts.dataset.repository import get_dataset, dataset_names

skip = [
    "m3_monthly",
    "m3_yearly",
    "m3_quarterly",
    "m3_other",
    "m5",
]

for dataset_name in dataset_names:
    if dataset_name in skip:
        continue
    print(dataset_name)
    dataset = get_dataset(dataset_name, regenerate=True)
```


By submitting this pull request, I confirm that you can use, modify,
copy, and redistribute this contribution, under the terms of your
choice.


**Please tag this pr with at least one of these labels to make our
release process faster:** BREAKING, new feature, bug fix, other change,
dev setup
  • Loading branch information
lostella committed Nov 7, 2024
1 parent 4388656 commit b5a70f7
Show file tree
Hide file tree
Showing 7 changed files with 20 additions and 16 deletions.
2 changes: 1 addition & 1 deletion src/gluonts/dataset/repository/_ercot.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def generate_ercot_dataset(dataset_path: Path, dataset_writer: DatasetWriter):
df.ffill(inplace=True)
regions = [col for col in df.columns if col not in ["ds", "y"]]

freq = "1H"
freq = "1h"
prediction_length = 24

start = pd.Period(df["ds"][0], freq=freq)
Expand Down
6 changes: 3 additions & 3 deletions src/gluonts/dataset/repository/_gp_copula_2019.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ class GPCopulaDataset(NamedTuple):
# original dataset can be found at https://archive.ics.uci.edu/ml/datasets/ElectricityLoadDiagrams20112014#
num_series=370,
prediction_length=24,
freq="H",
freq="h",
rolling_evaluations=7,
max_target_dim=None,
),
Expand All @@ -73,7 +73,7 @@ class GPCopulaDataset(NamedTuple):
# note there are 963 in the original dataset from https://archive.ics.uci.edu/ml/datasets/PEMS-SF
num_series=963,
prediction_length=24,
freq="H",
freq="h",
rolling_evaluations=7,
max_target_dim=None,
),
Expand All @@ -82,7 +82,7 @@ class GPCopulaDataset(NamedTuple):
url=root + "solar_nips.tar.gz",
num_series=137,
prediction_length=24,
freq="H",
freq="h",
rolling_evaluations=7,
max_target_dim=None,
),
Expand Down
6 changes: 3 additions & 3 deletions src/gluonts/dataset/repository/_lstnet.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ class LstnetDataset(NamedTuple):
prediction_length=24,
rolling_evaluations=7,
start_date="2012-01-01",
freq="1H",
freq="1h",
agg_freq=None,
),
"traffic": LstnetDataset(
Expand All @@ -105,7 +105,7 @@ class LstnetDataset(NamedTuple):
prediction_length=24,
rolling_evaluations=7,
start_date="2015-01-01",
freq="H",
freq="h",
agg_freq=None,
),
"solar-energy": LstnetDataset(
Expand All @@ -117,7 +117,7 @@ class LstnetDataset(NamedTuple):
rolling_evaluations=7,
start_date="2006-01-01",
freq="10min",
agg_freq="1H",
agg_freq="1h",
),
}

Expand Down
4 changes: 4 additions & 0 deletions src/gluonts/dataset/repository/_tsf_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -278,11 +278,15 @@ def generate_forecasting_dataset(
def default_prediction_length_from_frequency(freq: str) -> int:
prediction_length_map = {
"T": 60,
"min": 60,
"H": 48,
"h": 48,
"D": 30,
"W-SUN": 8,
"M": 12,
"ME": 12,
"Y": 4,
"YE": 4,
}
try:
freq = to_offset(freq).name
Expand Down
8 changes: 4 additions & 4 deletions src/gluonts/dataset/repository/_tsf_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,10 +49,10 @@ def frequency_converter(freq: str):

BASE_FREQ_TO_PANDAS_OFFSET: Dict[str, str] = {
"seconds": "S",
"minutely": "T",
"minutes": "T",
"hourly": "H",
"hours": "H",
"minutely": "min",
"minutes": "min",
"hourly": "h",
"hours": "h",
"daily": "D",
"days": "D",
"weekly": "W",
Expand Down
2 changes: 1 addition & 1 deletion src/gluonts/dataset/repository/_uber_tlc.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def generate_uber_dataset(
prediction_length: int,
dataset_writer: DatasetWriter,
):
subsets = {"daily": "1D", "hourly": "1H"}
subsets = {"daily": "1D", "hourly": "1h"}
assert (
uber_freq.lower() in subsets
), f"invalid uber_freq='{uber_freq}'. Allowed values: {subsets.keys()}"
Expand Down
8 changes: 4 additions & 4 deletions test/dataset/test_tsf_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,10 @@
"input_freq_str, output_freq_str",
[
("30_seconds", "30S"),
("minutely", "T"),
("10_minutes", "10T"),
("hourly", "H"),
("half_hourly", "0.5H"),
("minutely", "min"),
("10_minutes", "10min"),
("hourly", "h"),
("half_hourly", "0.5h"),
("daily", "D"),
("7_days", "7D"),
("weekly", "W"),
Expand Down

0 comments on commit b5a70f7

Please sign in to comment.