syncing with other PR and removing xlrd engine

WISDEM · Dec 24, 2020 · 02ada47 · 02ada47
1 parent f64d7d8
commit 02ada47
Show file tree

Hide file tree

Showing 2 changed files with 38 additions and 50 deletions.
diff --git a/landbosse/landbosse_omdao/OpenMDAODataframeCache.py b/landbosse/landbosse_omdao/OpenMDAODataframeCache.py
@@ -82,10 +82,10 @@ def read_all_sheets_from_xlsx(cls, xlsx_basename, xlsx_path=None):
         else:
             xlsx_filename = os.path.join(xlsx_path, f"{xlsx_basename}.xlsx")
 
-        with warnings.catch_warnings():
-            warnings.filterwarnings("ignore", category=PendingDeprecationWarning)
-            xlsx = pd.ExcelFile(xlsx_filename)
+        xlsx = pd.ExcelFile(xlsx_filename, engine='openpyxl')
         sheets_dict = {sheet_name: xlsx.parse(sheet_name) for sheet_name in xlsx.sheet_names}
+        for sheet_name in xlsx.sheet_names:
+            sheets_dict[sheet_name].dropna(inplace=True, how='all')
         cls._cache[xlsx_basename] = sheets_dict
         return cls.copy_dataframes(sheets_dict)
 

diff --git a/landbosse/landbosse_omdao/XlsxValidator.py b/landbosse/landbosse_omdao/XlsxValidator.py
@@ -1,6 +1,5 @@
 import pandas as pd
 
-
 class XlsxValidator:
     """
     XlsxValidator is for comparing the results of a previous model run
@@ -36,79 +35,68 @@ def compare_expected_to_actual(self, expected_xlsx, actual_module_type_operation
         # First, make the list of dictionaries into a dataframe, and drop
         # the raw_cost and raw_cost_total_or_per_turbine columns.
         actual_df = pd.DataFrame(actual_module_type_operation_list)
-        actual_df.drop(["raw_cost", "raw_cost_total_or_per_turbine"], axis=1, inplace=True)
-        expected_df = pd.read_excel(expected_xlsx, "costs_by_module_type_operation")
-        expected_df.rename(
-            columns={
-                "Project ID with serial": "project_id_with_serial",
-                "Number of turbines": "num_turbines",
-                "Turbine rating MW": "turbine_rating_MW",
-                "Module": "module",
-                "Operation ID": "operation_id",
-                "Type of cost": "type_of_cost",
-                "Cost per turbine": "cost_per_turbine",
-                "Cost per project": "cost_per_project",
-                "USD/kW per project": "usd_per_kw_per_project",
-            },
-            inplace=True,
-        )
+        actual_df.drop(['raw_cost', 'raw_cost_total_or_per_turbine'], axis=1, inplace=True)
+        expected_df = pd.read_excel(expected_xlsx, 'costs_by_module_type_operation', engine='openpyxl')
+        #expected_df = expected_df.dropna(inplace=True, how='all')
+        expected_df.rename(columns={
+            'Project ID with serial': 'project_id_with_serial',
+            'Number of turbines': 'num_turbines',
+            'Turbine rating MW': 'turbine_rating_MW',
+            'Module': 'module',
+            'Operation ID': 'operation_id',
+            'Type of cost': 'type_of_cost',
+            'Cost per turbine': 'cost_per_turbine',
+            'Cost per project': 'cost_per_project',
+            'USD/kW per project': 'usd_per_kw_per_project'
+        }, inplace=True)
 
         cost_per_project_actual = actual_df[
-            ["cost_per_project", "project_id_with_serial", "module", "operation_id", "type_of_cost"]
-        ]
+            ['cost_per_project', 'project_id_with_serial', 'module', 'operation_id', 'type_of_cost']]
         cost_per_project_expected = expected_df[
-            ["cost_per_project", "project_id_with_serial", "module", "operation_id", "type_of_cost"]
-        ]
+            ['cost_per_project', 'project_id_with_serial', 'module', 'operation_id', 'type_of_cost']]
 
         comparison = cost_per_project_actual.merge(
-            cost_per_project_expected, on=["project_id_with_serial", "module", "operation_id", "type_of_cost"]
-        )
+            cost_per_project_expected,
+            on=['project_id_with_serial', 'module', 'operation_id', 'type_of_cost'])
 
-        comparison.rename(
-            columns={
-                "cost_per_project_x": "cost_per_project_actual",
-                "cost_per_project_y": "cost_per_project_expected",
-            },
-            inplace=True,
-        )
+        comparison.rename(columns={'cost_per_project_x': 'cost_per_project_actual',
+                                    'cost_per_project_y': 'cost_per_project_expected'}, inplace=True)
 
-        comparison["difference_validation"] = (
-            comparison["cost_per_project_actual"] - comparison["cost_per_project_expected"]
-        )
+        comparison['difference_validation'] = comparison['cost_per_project_actual'] - comparison['cost_per_project_expected']
 
         # Regardless of the outcome, write the end result of the comparison
         # to the validation output file.
         columns_for_comparison_output = [
-            "project_id_with_serial",
-            "module",
-            "operation_id",
-            "type_of_cost",
-            "cost_per_project_actual",
-            "cost_per_project_expected",
-            "difference_validation",
+            'project_id_with_serial',
+            'module',
+            'operation_id',
+            'type_of_cost',
+            'cost_per_project_actual',
+            'cost_per_project_expected',
+            'difference_validation'
         ]
         comparison.to_excel(validation_output_xlsx, index=False, columns=columns_for_comparison_output)
 
         # If the comparison dataframe is empty, that means there are no common
         # projects in the expected data that match the actual data.
         if len(comparison) < 1:
-            print("=" * 80)
-            print("Validation error: There are no common projects between actual and expected data.")
-            print("=" * 80)
+            print('=' * 80)
+            print('Validation error: There are no common projects between actual and expected data.')
+            print('=' * 80)
             return False
 
         # Find all rows where the difference is unequal to 0. These are rows
         # that failed validation. Note that, after the join, the rows may be
         # in a different order than the originals.
         #
         # Round the difference to a given number of decimal places.
-        failed_rows = comparison[comparison["difference_validation"].round(decimals=4) != 0]
+        failed_rows = comparison[comparison['difference_validation'].round(decimals=4) != 0]
 
         if len(failed_rows) > 0:
-            print("=" * 80)
-            print("The following rows failed validation:")
+            print('=' * 80)
+            print('The following rows failed validation:')
             print(failed_rows)
-            print("=" * 80)
+            print('=' * 80)
             return False
         else:
             return True