From 29f37ee0b6f74b754217f3b2217d07c7564b919c Mon Sep 17 00:00:00 2001 From: Ryan Grout Date: Wed, 7 Dec 2022 12:12:12 -0600 Subject: [PATCH 1/9] Raise an error for unknown extensions. --- src/troute-network/troute/hyfeature_network_utilities.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/troute-network/troute/hyfeature_network_utilities.py b/src/troute-network/troute/hyfeature_network_utilities.py index e1fc65860..386aa6c8e 100644 --- a/src/troute-network/troute/hyfeature_network_utilities.py +++ b/src/troute-network/troute/hyfeature_network_utilities.py @@ -296,5 +296,6 @@ def read_file(file_name): elif extension=='.parquet': df = pq.read_table(file_name).to_pandas().reset_index() df.index.name = None - + else: + raise ValueError(f"Unknown file suffix: {extension}") return df \ No newline at end of file From 6edf7721dabbe38136b39556696a9a2fb670b9cf Mon Sep 17 00:00:00 2001 From: Ryan Grout Date: Thu, 8 Dec 2022 15:18:09 -0600 Subject: [PATCH 2/9] Reduce parsing overhead. --- .../troute/hyfeature_network_utilities.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/troute-network/troute/hyfeature_network_utilities.py b/src/troute-network/troute/hyfeature_network_utilities.py index 386aa6c8e..7b1ae94cf 100644 --- a/src/troute-network/troute/hyfeature_network_utilities.py +++ b/src/troute-network/troute/hyfeature_network_utilities.py @@ -84,12 +84,10 @@ def build_forcing_sets( # Deduce the timeinterval of the forcing data from the output timestamps of the first # two ordered CHRTOUT files - df = read_file(first_file) - t1_str = pd.to_datetime(df.columns[1]).strftime("%Y-%m-%d_%H:%M:%S") - t1 = datetime.strptime(t1_str,"%Y-%m-%d_%H:%M:%S") - df = read_file(second_file) - t2_str = pd.to_datetime(df.columns[1]).strftime("%Y-%m-%d_%H:%M:%S") - t2 = datetime.strptime(t2_str,"%Y-%m-%d_%H:%M:%S") + df = read_file(first_file) + t1 = pd.to_datetime(df.columns[1]) + df = read_file(second_file) + t2 = pd.to_datetime(df.columns[1]) dt_qlat_timedelta = t2 - t1 dt_qlat = dt_qlat_timedelta.seconds From 092b2711f2a97cf0a553e9acfd054aa94dc5f5c0 Mon Sep 17 00:00:00 2001 From: Ryan Grout Date: Fri, 9 Dec 2022 10:08:57 -0600 Subject: [PATCH 3/9] Remove assert. --- .../troute/hyfeature_network_utilities.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/troute-network/troute/hyfeature_network_utilities.py b/src/troute-network/troute/hyfeature_network_utilities.py index 7b1ae94cf..99a0c15f9 100644 --- a/src/troute-network/troute/hyfeature_network_utilities.py +++ b/src/troute-network/troute/hyfeature_network_utilities.py @@ -29,13 +29,13 @@ def build_forcing_sets( max_loop_size = forcing_parameters.get("max_loop_size", 12) dt = forcing_parameters.get("dt", None) - try: + if nexus_input_folder is not None: nexus_input_folder = pathlib.Path(nexus_input_folder) - assert nexus_input_folder.is_dir() == True - except TypeError: - raise TypeError("Aborting simulation because no nexus_input_folder is specified in the forcing_parameters section of the .yaml control file.") from None - except AssertionError: - raise AssertionError("Aborting simulation because the nexus_input_folder:", qlat_input_folder,"does not exist. Please check the the nexus_input_folder variable is correctly entered in the .yaml control file") from None + if not nexus_input_folder.is_dir(): + raise ValueError(f"Aborting simulation because the nexus_input_folder: {nexus_input_folder} does not exist. Please check the the nexus_input_folder variable is correctly entered in the .yaml control file") + else: + raise TypeError("Aborting simulation because no nexus_input_folder is specified in the forcing_parameters section of the .yaml control file.") + forcing_glob_filter = forcing_parameters.get("nexus_file_pattern_filter", "*.NEXOUT") From 6f0c7d154a508d74616dff90e8ecc278c46c30f1 Mon Sep 17 00:00:00 2001 From: Ryan Grout Date: Fri, 9 Dec 2022 10:09:47 -0600 Subject: [PATCH 4/9] Add pass statement to empty block. --- src/troute-network/troute/hyfeature_network_utilities.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/troute-network/troute/hyfeature_network_utilities.py b/src/troute-network/troute/hyfeature_network_utilities.py index 99a0c15f9..9051fc0c6 100644 --- a/src/troute-network/troute/hyfeature_network_utilities.py +++ b/src/troute-network/troute/hyfeature_network_utilities.py @@ -73,6 +73,7 @@ def build_forcing_sets( run_sets[s]['final_timestamp'] = \ datetime.strptime(final_timestamp_str, '%Y-%m-%d_%H:%M:%S') ''' + pass elif nexus_input_folder: # Construct run_set dictionary from user-specified parameters From 15562148eac484ffef20d009381b49b876b75f93 Mon Sep 17 00:00:00 2001 From: Ryan Grout Date: Fri, 9 Dec 2022 10:11:32 -0600 Subject: [PATCH 5/9] Generate forcing filenames in one pass. Remove assert. --- .../troute/hyfeature_network_utilities.py | 35 ++++++++----------- 1 file changed, 14 insertions(+), 21 deletions(-) diff --git a/src/troute-network/troute/hyfeature_network_utilities.py b/src/troute-network/troute/hyfeature_network_utilities.py index 9051fc0c6..4b2bab158 100644 --- a/src/troute-network/troute/hyfeature_network_utilities.py +++ b/src/troute-network/troute/hyfeature_network_utilities.py @@ -107,37 +107,30 @@ def build_forcing_sets( # range(nfiles)] # ** Correction ** Because qlat file at time t is constantly applied throughout [t, t+1], # ** n + 1 should be replaced by n - datetime_list = [t0 + dt_qlat_timedelta * (n) for n in - range(nfiles)] - datetime_list_str = [datetime.strftime(d, '%Y%m%d%H%M') for d in - datetime_list] - - # list of forcing files - forcing_filename_list = [d_str + forcing_glob_filter[1:] for d_str in - datetime_list_str] - - # check that all forcing files exist - for f in forcing_filename_list: - try: - J = pathlib.Path(nexus_input_folder.joinpath(f)) - assert J.is_file() == True - except AssertionError: - raise AssertionError("Aborting simulation because forcing file", J, "cannot be not found.") from None - + # Existence of each file is checked, raising an error if an expected file is missing. + forcing_filenames = [] + for n in range(nfiles): + fn_dt = (t0 + dt_qlat_timedelta * n).strftime("%Y%m%d%H%M") + fn = nexus_input_folder.joinpath(fn_dt + forcing_glob_filter[1:]) + if fn.is_file(): + forcing_filenames.append(fn) + else: + raise FileNotFoundError(f"Forcing file {fn} is missing. Aborting simulation.") + # build run sets list run_sets = [] k = 0 j = 0 nts_accum = 0 nts_last = 0 - while k < len(forcing_filename_list): + while k < len(forcing_filenames): run_sets.append({}) - if k + max_loop_size < len(forcing_filename_list): - run_sets[j]['nexus_files'] = forcing_filename_list[k:k + if k + max_loop_size < len(forcing_filenames): + run_sets[j]['nexus_files'] = forcing_filenames[k:k + max_loop_size] else: - run_sets[j]['nexus_files'] = forcing_filename_list[k:] + run_sets[j]['nexus_files'] = forcing_filenames[k:] nts_accum += len(run_sets[j]['nexus_files']) * qts_subdivisions if nts_accum <= nts: From 7aee393fbb29f61e42a53b972ff200565a9c4bcb Mon Sep 17 00:00:00 2001 From: Ryan Grout Date: Fri, 9 Dec 2022 10:14:46 -0600 Subject: [PATCH 6/9] Avoid roundtrip parsing with datetimes. --- .../troute/hyfeature_network_utilities.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/src/troute-network/troute/hyfeature_network_utilities.py b/src/troute-network/troute/hyfeature_network_utilities.py index 4b2bab158..a8eb43b23 100644 --- a/src/troute-network/troute/hyfeature_network_utilities.py +++ b/src/troute-network/troute/hyfeature_network_utilities.py @@ -139,15 +139,11 @@ def build_forcing_sets( else: run_sets[j]['nts'] = int(nts - nts_last) - final_nexout = nexus_input_folder.joinpath(run_sets[j]['nexus_files' - ][-1]) + final_nexout = nexus_input_folder.joinpath(run_sets[j]['nexus_files'][-1]) #final_timestamp_str = nhd_io.get_param_str(final_nexout, # 'model_output_valid_time') - df = read_file(final_nexout) - final_timestamp_str = pd.to_datetime(df.columns[1]).strftime("%Y-%m-%d_%H:%M:%S") - - run_sets[j]['final_timestamp'] = \ - datetime.strptime(final_timestamp_str, '%Y-%m-%d_%H:%M:%S') + df = read_file(final_nexout) + run_sets[j]['final_timestamp'] = pd.to_datetime(df.columns[1]) nts_last = nts_accum k += max_loop_size From b0e9800f9b2fa3af6e32fef8e9f6f167bddd7f9d Mon Sep 17 00:00:00 2001 From: Ryan Grout Date: Fri, 9 Dec 2022 10:17:54 -0600 Subject: [PATCH 7/9] More efficient check for empty directory. --- .../troute/hyfeature_network_utilities.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/src/troute-network/troute/hyfeature_network_utilities.py b/src/troute-network/troute/hyfeature_network_utilities.py index a8eb43b23..e17cd9812 100644 --- a/src/troute-network/troute/hyfeature_network_utilities.py +++ b/src/troute-network/troute/hyfeature_network_utilities.py @@ -46,11 +46,16 @@ def build_forcing_sets( #Check that directory/files specified will work if not binary_folder: - raise(RuntimeError("No output binary qlat folder supplied in config")) + raise RuntimeError("No output binary qlat folder supplied in config") elif not os.path.exists(binary_folder): - raise(RuntimeError("Output binary qlat folder supplied in config does not exist")) - elif len(list(pathlib.Path(binary_folder).glob('*.parquet'))) != 0: - raise(RuntimeError("Output binary qlat folder supplied in config is not empty (already contains '.parquet' files)")) + raise RuntimeError("Output binary qlat folder supplied in config does not exist") + else: + try: + next(pathlib.Path(binary_folder).glob('*.parquet')) + raise RuntimeError("Output binary qlat folder supplied in config is not empty (already contains '.parquet' files)") + except StopIteration: + # Directory is empty + pass #Add tnx for backwards compatability nexus_files_list = list(nexus_files) + list(nexus_input_folder.glob('tnx*.csv')) From 293ce691be4675533149a8fa3eac0b819c8963a2 Mon Sep 17 00:00:00 2001 From: Ryan Grout Date: Fri, 9 Dec 2022 10:20:13 -0600 Subject: [PATCH 8/9] Cleanup imports. --- src/troute-network/troute/hyfeature_network_utilities.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/troute-network/troute/hyfeature_network_utilities.py b/src/troute-network/troute/hyfeature_network_utilities.py index e17cd9812..b13fdb68f 100644 --- a/src/troute-network/troute/hyfeature_network_utilities.py +++ b/src/troute-network/troute/hyfeature_network_utilities.py @@ -1,14 +1,9 @@ -import json import pathlib -from functools import partial -from datetime import datetime, timedelta import logging import os import pandas as pd import numpy as np -import netCDF4 -from joblib import delayed, Parallel import pyarrow as pa import pyarrow.parquet as pq From 479d2b4c5329fbfb55624bc583c4ab03a898dc2d Mon Sep 17 00:00:00 2001 From: Ryan Grout Date: Fri, 9 Dec 2022 10:34:16 -0600 Subject: [PATCH 9/9] Remove else. --- .../troute/hyfeature_network_utilities.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/troute-network/troute/hyfeature_network_utilities.py b/src/troute-network/troute/hyfeature_network_utilities.py index b13fdb68f..eb65c0092 100644 --- a/src/troute-network/troute/hyfeature_network_utilities.py +++ b/src/troute-network/troute/hyfeature_network_utilities.py @@ -44,13 +44,13 @@ def build_forcing_sets( raise RuntimeError("No output binary qlat folder supplied in config") elif not os.path.exists(binary_folder): raise RuntimeError("Output binary qlat folder supplied in config does not exist") - else: - try: - next(pathlib.Path(binary_folder).glob('*.parquet')) - raise RuntimeError("Output binary qlat folder supplied in config is not empty (already contains '.parquet' files)") - except StopIteration: - # Directory is empty - pass + + try: + next(pathlib.Path(binary_folder).glob('*.parquet')) + raise RuntimeError("Output binary qlat folder supplied in config is not empty (already contains '.parquet' files)") + except StopIteration: + # Directory is empty + pass #Add tnx for backwards compatability nexus_files_list = list(nexus_files) + list(nexus_input_folder.glob('tnx*.csv'))