Skip to content

Commit

Permalink
Restructure library into record and batch
Browse files Browse the repository at this point in the history
  • Loading branch information
gutzbenj committed Dec 22, 2023
1 parent db9e892 commit 477669d
Show file tree
Hide file tree
Showing 12 changed files with 223 additions and 407 deletions.
53 changes: 53 additions & 0 deletions src/isd/batch.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
import gzip
from io import BytesIO
from pathlib import Path
from dataclasses import dataclass
from typing import List, TYPE_CHECKING, Union, Optional
import datetime as dt

from src.isd.record import Record

if TYPE_CHECKING:
import pandas as pd


@dataclass
class Batch:
records: List[Record]

@classmethod
def from_path(cls, path: Union[str, Path]) -> "Batch":
"""Opens a local ISD file and returns an iterator over its records.
If the path has a .gz extension, this function will assume it has gzip
compression and will attempt to open it using `gzip.open`.
"""
path = Path(path)
if path.suffix == ".gz":
with gzip.open(path) as gzip_file:
return cls([Record.from_string(gzip_line.decode("utf-8")) for gzip_line in gzip_file])
else:
with open(path) as uncompressed_file:
return cls([Record.from_string(uncompressed_line) for uncompressed_line in uncompressed_file])

@classmethod
def from_string(cls, string: Union[str, BytesIO]) -> "Batch":
"""Reads records from a text io stream."""
if isinstance(string, BytesIO):
string = string.read().decode("utf-8")
return cls([Record.from_string(line) for line in string.splitlines()])

def filter_by_datetime(self, start_date: Optional[dt.datetime] = None, end_date: Optional[dt.datetime] = None,
) -> List[Record]:
"""Returns an iterator over records filtered by start and end datetimes (both optional)."""
return [
record
for record in self.records
if (not start_date or record.datetime() >= start_date)
and (not end_date or record.datetime() < end_date)
]

def to_df(self) -> "pd.DataFrame":
"""Reads a local ISD file into a DataFrame."""
import pandas as pd
return pd.DataFrame([record.to_dict() for record in self.records])
18 changes: 7 additions & 11 deletions src/isd/cli.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,9 @@
# type: ignore

import dataclasses
import itertools
import json

import click
from click import ClickException

import isd.io
from src.isd.batch import Batch


@click.group()
Expand All @@ -20,9 +16,9 @@ def main() -> None:
@click.option("-i", "--index", default=0)
def record(infile: str, index: int) -> None:
"""Prints a single record to standard output in JSON format."""
with isd.io.open(infile) as records:
record = next(itertools.islice(records, index, None), None)
if record:
print(json.dumps(dataclasses.asdict(record), indent=4))
else:
raise ClickException(f"No record with index {index}")
batch = Batch.from_path(infile)
try:
record_ = batch.records[index]
print(record_.to_json())
except IndexError:
raise ClickException(f"No record with index {index}")
48 changes: 0 additions & 48 deletions src/isd/io.py

This file was deleted.

165 changes: 0 additions & 165 deletions src/isd/pandas.py

This file was deleted.

Loading

0 comments on commit 477669d

Please sign in to comment.