Skip to content

Commit

Permalink
syncing with other PR and removing xlrd engine
Browse files Browse the repository at this point in the history
  • Loading branch information
gbarter committed Dec 24, 2020
1 parent f64d7d8 commit 02ada47
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 50 deletions.
6 changes: 3 additions & 3 deletions landbosse/landbosse_omdao/OpenMDAODataframeCache.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,10 +82,10 @@ def read_all_sheets_from_xlsx(cls, xlsx_basename, xlsx_path=None):
else:
xlsx_filename = os.path.join(xlsx_path, f"{xlsx_basename}.xlsx")

with warnings.catch_warnings():
warnings.filterwarnings("ignore", category=PendingDeprecationWarning)
xlsx = pd.ExcelFile(xlsx_filename)
xlsx = pd.ExcelFile(xlsx_filename, engine='openpyxl')
sheets_dict = {sheet_name: xlsx.parse(sheet_name) for sheet_name in xlsx.sheet_names}
for sheet_name in xlsx.sheet_names:
sheets_dict[sheet_name].dropna(inplace=True, how='all')
cls._cache[xlsx_basename] = sheets_dict
return cls.copy_dataframes(sheets_dict)

Expand Down
82 changes: 35 additions & 47 deletions landbosse/landbosse_omdao/XlsxValidator.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import pandas as pd


class XlsxValidator:
"""
XlsxValidator is for comparing the results of a previous model run
Expand Down Expand Up @@ -36,79 +35,68 @@ def compare_expected_to_actual(self, expected_xlsx, actual_module_type_operation
# First, make the list of dictionaries into a dataframe, and drop
# the raw_cost and raw_cost_total_or_per_turbine columns.
actual_df = pd.DataFrame(actual_module_type_operation_list)
actual_df.drop(["raw_cost", "raw_cost_total_or_per_turbine"], axis=1, inplace=True)
expected_df = pd.read_excel(expected_xlsx, "costs_by_module_type_operation")
expected_df.rename(
columns={
"Project ID with serial": "project_id_with_serial",
"Number of turbines": "num_turbines",
"Turbine rating MW": "turbine_rating_MW",
"Module": "module",
"Operation ID": "operation_id",
"Type of cost": "type_of_cost",
"Cost per turbine": "cost_per_turbine",
"Cost per project": "cost_per_project",
"USD/kW per project": "usd_per_kw_per_project",
},
inplace=True,
)
actual_df.drop(['raw_cost', 'raw_cost_total_or_per_turbine'], axis=1, inplace=True)
expected_df = pd.read_excel(expected_xlsx, 'costs_by_module_type_operation', engine='openpyxl')
#expected_df = expected_df.dropna(inplace=True, how='all')
expected_df.rename(columns={
'Project ID with serial': 'project_id_with_serial',
'Number of turbines': 'num_turbines',
'Turbine rating MW': 'turbine_rating_MW',
'Module': 'module',
'Operation ID': 'operation_id',
'Type of cost': 'type_of_cost',
'Cost per turbine': 'cost_per_turbine',
'Cost per project': 'cost_per_project',
'USD/kW per project': 'usd_per_kw_per_project'
}, inplace=True)

cost_per_project_actual = actual_df[
["cost_per_project", "project_id_with_serial", "module", "operation_id", "type_of_cost"]
]
['cost_per_project', 'project_id_with_serial', 'module', 'operation_id', 'type_of_cost']]
cost_per_project_expected = expected_df[
["cost_per_project", "project_id_with_serial", "module", "operation_id", "type_of_cost"]
]
['cost_per_project', 'project_id_with_serial', 'module', 'operation_id', 'type_of_cost']]

comparison = cost_per_project_actual.merge(
cost_per_project_expected, on=["project_id_with_serial", "module", "operation_id", "type_of_cost"]
)
cost_per_project_expected,
on=['project_id_with_serial', 'module', 'operation_id', 'type_of_cost'])

comparison.rename(
columns={
"cost_per_project_x": "cost_per_project_actual",
"cost_per_project_y": "cost_per_project_expected",
},
inplace=True,
)
comparison.rename(columns={'cost_per_project_x': 'cost_per_project_actual',
'cost_per_project_y': 'cost_per_project_expected'}, inplace=True)

comparison["difference_validation"] = (
comparison["cost_per_project_actual"] - comparison["cost_per_project_expected"]
)
comparison['difference_validation'] = comparison['cost_per_project_actual'] - comparison['cost_per_project_expected']

# Regardless of the outcome, write the end result of the comparison
# to the validation output file.
columns_for_comparison_output = [
"project_id_with_serial",
"module",
"operation_id",
"type_of_cost",
"cost_per_project_actual",
"cost_per_project_expected",
"difference_validation",
'project_id_with_serial',
'module',
'operation_id',
'type_of_cost',
'cost_per_project_actual',
'cost_per_project_expected',
'difference_validation'
]
comparison.to_excel(validation_output_xlsx, index=False, columns=columns_for_comparison_output)

# If the comparison dataframe is empty, that means there are no common
# projects in the expected data that match the actual data.
if len(comparison) < 1:
print("=" * 80)
print("Validation error: There are no common projects between actual and expected data.")
print("=" * 80)
print('=' * 80)
print('Validation error: There are no common projects between actual and expected data.')
print('=' * 80)
return False

# Find all rows where the difference is unequal to 0. These are rows
# that failed validation. Note that, after the join, the rows may be
# in a different order than the originals.
#
# Round the difference to a given number of decimal places.
failed_rows = comparison[comparison["difference_validation"].round(decimals=4) != 0]
failed_rows = comparison[comparison['difference_validation'].round(decimals=4) != 0]

if len(failed_rows) > 0:
print("=" * 80)
print("The following rows failed validation:")
print('=' * 80)
print('The following rows failed validation:')
print(failed_rows)
print("=" * 80)
print('=' * 80)
return False
else:
return True

0 comments on commit 02ada47

Please sign in to comment.