Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cleanup hyfeature_network_utilities.py #599

Open
wants to merge 9 commits into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
89 changes: 39 additions & 50 deletions src/troute-network/troute/hyfeature_network_utilities.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,9 @@
import json
import pathlib
from functools import partial
from datetime import datetime, timedelta
import logging
import os

import pandas as pd
import numpy as np
import netCDF4
from joblib import delayed, Parallel
import pyarrow as pa
import pyarrow.parquet as pq

Expand All @@ -29,13 +24,13 @@ def build_forcing_sets(
max_loop_size = forcing_parameters.get("max_loop_size", 12)
dt = forcing_parameters.get("dt", None)

try:
if nexus_input_folder is not None:
nexus_input_folder = pathlib.Path(nexus_input_folder)
assert nexus_input_folder.is_dir() == True
except TypeError:
raise TypeError("Aborting simulation because no nexus_input_folder is specified in the forcing_parameters section of the .yaml control file.") from None
except AssertionError:
raise AssertionError("Aborting simulation because the nexus_input_folder:", qlat_input_folder,"does not exist. Please check the the nexus_input_folder variable is correctly entered in the .yaml control file") from None
if not nexus_input_folder.is_dir():
raise ValueError(f"Aborting simulation because the nexus_input_folder: {nexus_input_folder} does not exist. Please check the the nexus_input_folder variable is correctly entered in the .yaml control file")
else:
raise TypeError("Aborting simulation because no nexus_input_folder is specified in the forcing_parameters section of the .yaml control file.")


forcing_glob_filter = forcing_parameters.get("nexus_file_pattern_filter", "*.NEXOUT")

Expand All @@ -46,11 +41,16 @@ def build_forcing_sets(

#Check that directory/files specified will work
if not binary_folder:
raise(RuntimeError("No output binary qlat folder supplied in config"))
raise RuntimeError("No output binary qlat folder supplied in config")
elif not os.path.exists(binary_folder):
raise(RuntimeError("Output binary qlat folder supplied in config does not exist"))
elif len(list(pathlib.Path(binary_folder).glob('*.parquet'))) != 0:
raise(RuntimeError("Output binary qlat folder supplied in config is not empty (already contains '.parquet' files)"))
raise RuntimeError("Output binary qlat folder supplied in config does not exist")

try:
next(pathlib.Path(binary_folder).glob('*.parquet'))
raise RuntimeError("Output binary qlat folder supplied in config is not empty (already contains '.parquet' files)")
except StopIteration:
# Directory is empty
pass

#Add tnx for backwards compatability
nexus_files_list = list(nexus_files) + list(nexus_input_folder.glob('tnx*.csv'))
Expand All @@ -73,6 +73,7 @@ def build_forcing_sets(
run_sets[s]['final_timestamp'] = \
datetime.strptime(final_timestamp_str, '%Y-%m-%d_%H:%M:%S')
'''
pass
elif nexus_input_folder:
# Construct run_set dictionary from user-specified parameters

Expand All @@ -84,12 +85,10 @@ def build_forcing_sets(

# Deduce the timeinterval of the forcing data from the output timestamps of the first
# two ordered CHRTOUT files
df = read_file(first_file)
t1_str = pd.to_datetime(df.columns[1]).strftime("%Y-%m-%d_%H:%M:%S")
t1 = datetime.strptime(t1_str,"%Y-%m-%d_%H:%M:%S")
df = read_file(second_file)
t2_str = pd.to_datetime(df.columns[1]).strftime("%Y-%m-%d_%H:%M:%S")
t2 = datetime.strptime(t2_str,"%Y-%m-%d_%H:%M:%S")
df = read_file(first_file)
t1 = pd.to_datetime(df.columns[1])
df = read_file(second_file)
t2 = pd.to_datetime(df.columns[1])
dt_qlat_timedelta = t2 - t1
dt_qlat = dt_qlat_timedelta.seconds

Expand All @@ -108,37 +107,30 @@ def build_forcing_sets(
# range(nfiles)]
# ** Correction ** Because qlat file at time t is constantly applied throughout [t, t+1],
# ** n + 1 should be replaced by n
datetime_list = [t0 + dt_qlat_timedelta * (n) for n in
range(nfiles)]
datetime_list_str = [datetime.strftime(d, '%Y%m%d%H%M') for d in
datetime_list]

# list of forcing files
forcing_filename_list = [d_str + forcing_glob_filter[1:] for d_str in
datetime_list_str]

# check that all forcing files exist
for f in forcing_filename_list:
try:
J = pathlib.Path(nexus_input_folder.joinpath(f))
assert J.is_file() == True
except AssertionError:
raise AssertionError("Aborting simulation because forcing file", J, "cannot be not found.") from None

# Existence of each file is checked, raising an error if an expected file is missing.
forcing_filenames = []
for n in range(nfiles):
fn_dt = (t0 + dt_qlat_timedelta * n).strftime("%Y%m%d%H%M")
fn = nexus_input_folder.joinpath(fn_dt + forcing_glob_filter[1:])
if fn.is_file():
forcing_filenames.append(fn)
else:
raise FileNotFoundError(f"Forcing file {fn} is missing. Aborting simulation.")

# build run sets list
run_sets = []
k = 0
j = 0
nts_accum = 0
nts_last = 0
while k < len(forcing_filename_list):
while k < len(forcing_filenames):
run_sets.append({})

if k + max_loop_size < len(forcing_filename_list):
run_sets[j]['nexus_files'] = forcing_filename_list[k:k
if k + max_loop_size < len(forcing_filenames):
run_sets[j]['nexus_files'] = forcing_filenames[k:k
+ max_loop_size]
else:
run_sets[j]['nexus_files'] = forcing_filename_list[k:]
run_sets[j]['nexus_files'] = forcing_filenames[k:]

nts_accum += len(run_sets[j]['nexus_files']) * qts_subdivisions
if nts_accum <= nts:
Expand All @@ -147,15 +139,11 @@ def build_forcing_sets(
else:
run_sets[j]['nts'] = int(nts - nts_last)

final_nexout = nexus_input_folder.joinpath(run_sets[j]['nexus_files'
][-1])
final_nexout = nexus_input_folder.joinpath(run_sets[j]['nexus_files'][-1])
#final_timestamp_str = nhd_io.get_param_str(final_nexout,
# 'model_output_valid_time')
df = read_file(final_nexout)
final_timestamp_str = pd.to_datetime(df.columns[1]).strftime("%Y-%m-%d_%H:%M:%S")

run_sets[j]['final_timestamp'] = \
datetime.strptime(final_timestamp_str, '%Y-%m-%d_%H:%M:%S')
df = read_file(final_nexout)
run_sets[j]['final_timestamp'] = pd.to_datetime(df.columns[1])

nts_last = nts_accum
k += max_loop_size
Expand Down Expand Up @@ -296,5 +284,6 @@ def read_file(file_name):
elif extension=='.parquet':
df = pq.read_table(file_name).to_pandas().reset_index()
df.index.name = None

else:
raise ValueError(f"Unknown file suffix: {extension}")
return df