diff --git a/landbosse/landbosse_omdao/OpenMDAODataframeCache.py b/landbosse/landbosse_omdao/OpenMDAODataframeCache.py index d3ac2bf8..46632834 100644 --- a/landbosse/landbosse_omdao/OpenMDAODataframeCache.py +++ b/landbosse/landbosse_omdao/OpenMDAODataframeCache.py @@ -82,10 +82,10 @@ def read_all_sheets_from_xlsx(cls, xlsx_basename, xlsx_path=None): else: xlsx_filename = os.path.join(xlsx_path, f"{xlsx_basename}.xlsx") - with warnings.catch_warnings(): - warnings.filterwarnings("ignore", category=PendingDeprecationWarning) - xlsx = pd.ExcelFile(xlsx_filename) + xlsx = pd.ExcelFile(xlsx_filename, engine='openpyxl') sheets_dict = {sheet_name: xlsx.parse(sheet_name) for sheet_name in xlsx.sheet_names} + for sheet_name in xlsx.sheet_names: + sheets_dict[sheet_name].dropna(inplace=True, how='all') cls._cache[xlsx_basename] = sheets_dict return cls.copy_dataframes(sheets_dict) diff --git a/landbosse/landbosse_omdao/XlsxValidator.py b/landbosse/landbosse_omdao/XlsxValidator.py index e2360ea4..5d4fc220 100644 --- a/landbosse/landbosse_omdao/XlsxValidator.py +++ b/landbosse/landbosse_omdao/XlsxValidator.py @@ -1,6 +1,5 @@ import pandas as pd - class XlsxValidator: """ XlsxValidator is for comparing the results of a previous model run @@ -36,65 +35,54 @@ def compare_expected_to_actual(self, expected_xlsx, actual_module_type_operation # First, make the list of dictionaries into a dataframe, and drop # the raw_cost and raw_cost_total_or_per_turbine columns. actual_df = pd.DataFrame(actual_module_type_operation_list) - actual_df.drop(["raw_cost", "raw_cost_total_or_per_turbine"], axis=1, inplace=True) - expected_df = pd.read_excel(expected_xlsx, "costs_by_module_type_operation") - expected_df.rename( - columns={ - "Project ID with serial": "project_id_with_serial", - "Number of turbines": "num_turbines", - "Turbine rating MW": "turbine_rating_MW", - "Module": "module", - "Operation ID": "operation_id", - "Type of cost": "type_of_cost", - "Cost per turbine": "cost_per_turbine", - "Cost per project": "cost_per_project", - "USD/kW per project": "usd_per_kw_per_project", - }, - inplace=True, - ) + actual_df.drop(['raw_cost', 'raw_cost_total_or_per_turbine'], axis=1, inplace=True) + expected_df = pd.read_excel(expected_xlsx, 'costs_by_module_type_operation', engine='openpyxl') + #expected_df = expected_df.dropna(inplace=True, how='all') + expected_df.rename(columns={ + 'Project ID with serial': 'project_id_with_serial', + 'Number of turbines': 'num_turbines', + 'Turbine rating MW': 'turbine_rating_MW', + 'Module': 'module', + 'Operation ID': 'operation_id', + 'Type of cost': 'type_of_cost', + 'Cost per turbine': 'cost_per_turbine', + 'Cost per project': 'cost_per_project', + 'USD/kW per project': 'usd_per_kw_per_project' + }, inplace=True) cost_per_project_actual = actual_df[ - ["cost_per_project", "project_id_with_serial", "module", "operation_id", "type_of_cost"] - ] + ['cost_per_project', 'project_id_with_serial', 'module', 'operation_id', 'type_of_cost']] cost_per_project_expected = expected_df[ - ["cost_per_project", "project_id_with_serial", "module", "operation_id", "type_of_cost"] - ] + ['cost_per_project', 'project_id_with_serial', 'module', 'operation_id', 'type_of_cost']] comparison = cost_per_project_actual.merge( - cost_per_project_expected, on=["project_id_with_serial", "module", "operation_id", "type_of_cost"] - ) + cost_per_project_expected, + on=['project_id_with_serial', 'module', 'operation_id', 'type_of_cost']) - comparison.rename( - columns={ - "cost_per_project_x": "cost_per_project_actual", - "cost_per_project_y": "cost_per_project_expected", - }, - inplace=True, - ) + comparison.rename(columns={'cost_per_project_x': 'cost_per_project_actual', + 'cost_per_project_y': 'cost_per_project_expected'}, inplace=True) - comparison["difference_validation"] = ( - comparison["cost_per_project_actual"] - comparison["cost_per_project_expected"] - ) + comparison['difference_validation'] = comparison['cost_per_project_actual'] - comparison['cost_per_project_expected'] # Regardless of the outcome, write the end result of the comparison # to the validation output file. columns_for_comparison_output = [ - "project_id_with_serial", - "module", - "operation_id", - "type_of_cost", - "cost_per_project_actual", - "cost_per_project_expected", - "difference_validation", + 'project_id_with_serial', + 'module', + 'operation_id', + 'type_of_cost', + 'cost_per_project_actual', + 'cost_per_project_expected', + 'difference_validation' ] comparison.to_excel(validation_output_xlsx, index=False, columns=columns_for_comparison_output) # If the comparison dataframe is empty, that means there are no common # projects in the expected data that match the actual data. if len(comparison) < 1: - print("=" * 80) - print("Validation error: There are no common projects between actual and expected data.") - print("=" * 80) + print('=' * 80) + print('Validation error: There are no common projects between actual and expected data.') + print('=' * 80) return False # Find all rows where the difference is unequal to 0. These are rows @@ -102,13 +90,13 @@ def compare_expected_to_actual(self, expected_xlsx, actual_module_type_operation # in a different order than the originals. # # Round the difference to a given number of decimal places. - failed_rows = comparison[comparison["difference_validation"].round(decimals=4) != 0] + failed_rows = comparison[comparison['difference_validation'].round(decimals=4) != 0] if len(failed_rows) > 0: - print("=" * 80) - print("The following rows failed validation:") + print('=' * 80) + print('The following rows failed validation:') print(failed_rows) - print("=" * 80) + print('=' * 80) return False else: return True