Merge pull request #165 from WISDEM/omdao

Omdao
WISDEM · Jan 7, 2021 · a07ecce · a07ecce
2 parents e523636 + 02ada47
commit a07ecce
Show file tree

Hide file tree

Showing 12 changed files with 1,579 additions and 0 deletions.
diff --git a/.gitignore b/.gitignore
@@ -131,6 +131,9 @@ dmypy.json
 # VSCode
 .vscode/
 
+# Emacs
+*~
+
 # Ignore Pandas _libs files
 pandas/_libs/
 

diff --git a/landbosse/landbosse_omdao/CsvGenerator.py b/landbosse/landbosse_omdao/CsvGenerator.py
@@ -0,0 +1,113 @@
+import pandas as pd
+
+
+class CsvGenerator:
+    """
+    This class generates CSV files.
+    """
+
+    def __init__(self, file_ops):
+        """
+        Parameters
+        ----------
+        file_ops : XlsxFileOperations
+            An instance of XlsxFileOperations to manage file names.
+        """
+        self.file_ops = file_ops
+
+    def create_details_dataframe(self, details):
+        """
+        This writes the details .csv.
+
+        Parameters
+        ----------
+        details : list[dict]
+            A list of dictionaries to be converted into a Pandas dataframe
+
+        Returns
+        -------
+        pd.DataFrame
+            The dataframe that can be written to a .csv file.
+        """
+
+        # This the list of details to write to the .csv
+        details_to_write_to_csv = []
+        for row in details:
+            new_row = {}
+            new_row["Project ID with serial"] = row["project_id_with_serial"]
+            new_row["Module"] = row["module"]
+            new_row["Variable name"] = row["variable_df_key_col_name"]
+            new_row["Unit"] = row["unit"]
+
+            value = row["value"]
+            value_is_number = self._is_numeric(value)
+            if value_is_number:
+                new_row["Numeric value"] = value
+            else:
+                new_row["Non-numeric value"] = value
+
+            # If there is a last_number, which means this is a dataframe row that has a number
+            # at the end, write this into the numeric value column. This overrides automatic
+            # type detection.
+
+            if "last_number" in row:
+                new_row["Numeric value"] = row["last_number"]
+
+            details_to_write_to_csv.append(new_row)
+
+        details = pd.DataFrame(details_to_write_to_csv)
+
+        return details
+
+    def create_costs_dataframe(self, costs):
+        """
+        Parameters
+        ----------
+        costs : list[dict]
+            The list of dictionaries of costs.
+
+        Returns
+        -------
+        pd.DataFrame
+            A dataframe to be written as a .csv
+        """
+        new_rows = []
+        for row in costs:
+            new_row = {
+                "Project ID with serial": row["project_id_with_serial"],
+                "Number of turbines": row["num_turbines"],
+                "Turbine rating MW": row["turbine_rating_MW"],
+                "Rotor diameter m": row["rotor_diameter_m"],
+                "Module": row["module"],
+                "Type of cost": row["type_of_cost"],
+                "Cost per turbine": row["cost_per_turbine"],
+                "Cost per project": row["cost_per_project"],
+                "Cost per kW": row["usd_per_kw_per_project"],
+            }
+            new_rows.append(new_row)
+        costs_df = pd.DataFrame(new_rows)
+        return costs_df
+
+    def _is_numeric(self, value):
+        """
+        This method tests if a value is a numeric (that is, can be parsed
+        by float()) or non numeric (which cannot be parsed).
+
+        The decision from this method determines whether values go into
+        the numeric or non-numeric columns.
+
+        Parameters
+        ----------
+        value
+            The value to be tested.
+
+        Returns
+        -------
+        bool
+            True if the value is numeric, False otherwise.
+        """
+        try:
+            float(value)
+        except ValueError:
+            return False
+        return True
diff --git a/landbosse/landbosse_omdao/GridSearchTree.py b/landbosse/landbosse_omdao/GridSearchTree.py
@@ -0,0 +1,165 @@
+import numpy as np
+import pandas as pd
+
+"""
+This module contains the logic to handle a tree to compute
+points in an N-dimensional parametric search space.
+"""
+
+
+class GridSearchTreeNode:
+    """
+    This just contains information about a node in the grid
+    search tree.
+    """
+
+    def __init__(self):
+        self.cell_specification = None
+        self.children = []
+        self.value = None
+
+
+class GridSearchTree:
+    """
+    This class implements a k-ary tree to compute possible
+    combinations of points in a N-dimensional parametric
+    search space.
+    """
+
+    def __init__(self, parametric_list):
+        """
+        This simply sets the parametric_list. See the first dataframe
+        described in the docstring of XlsxReader.create_parametric_value_list()
+
+        Parameters
+        ----------
+        parametric_list : pandas.DataFrame
+            The dataframe of the parametrics list.
+        """
+        self.parametric_list = parametric_list
+
+    def build_grid_tree_and_return_grid(self):
+        """
+        See the dataframes in XlsxReader.create_parametric_value_list()
+        for context.
+
+        This builds a tree of points in the search space and traverse
+        it to find points on the grid.
+
+        Returns
+        -------
+        """
+
+        # Build the tree. Its leaf nodes contain the values for each
+        # point in the grid.
+        root = self.build_tree()
+
+        # Recursions of the traversal method needs to start with an empty
+        # list.
+        grid = self.dfs_search_tree(root, traversal=[])
+        return grid
+
+    def build_tree(self, depth=0, root=None):
+        """
+        This method builds a k-ary tree to contain cell_specifications and
+        their values.
+
+        Callers from outside this method shouldn't override the defaults
+        for the parameters. These parameters are to manage the recursion,
+        and are supplied by this method when it invokes itself.
+
+        Parameters
+        ----------
+        root : GridSearchTreeNode
+            The root of the subtree. At the start of iteration, at the
+            root of the whole tree, this should be None.
+
+        depth : int
+            The level of the tree currently being built. This is
+            also the row number in the dataframe from which the tree
+            is being built.
+
+        Returns
+        -------
+        GridSearchTreeNode
+            The root of the tree just built.
+        """
+        row = self.parametric_list.iloc[depth]
+        cell_specification = f"{row['Dataframe name']}/{row['Row name']}/{row['Column name']}"
+
+        # First, make an iterable of the range we are going to be using.
+        if "Value list" in row and not pd.isnull(row["Value list"]):
+            values = [float(value) for value in row["Value list"].split(",")]
+        else:
+            start = row["Min"]
+            end = row["Max"]
+            step = row["Step"]
+            values = np.arange(start, end + step, step)
+
+        if root == None:
+            root = GridSearchTreeNode()
+
+        # Putting the stop at end + step ensures the end value is in the sequence
+        #
+        # Append children for each value in the parametric step sequence.
+
+        for value in values:
+            child = GridSearchTreeNode()
+            child.value = value
+            child.cell_specification = cell_specification
+            root.children.append(child)
+
+            # If there are more levels of variables to add, recurse
+            # down 1 level.
+            if len(self.parametric_list) > depth + 1:
+                self.build_tree(depth + 1, child)
+
+        return root
+
+    def dfs_search_tree(self, root, traversal, path=None):
+        """
+        This does a depth first search traversal of the GridSearchTree
+        specified by the root parameter. It stores the node it encounters
+        in the list referenced by traversal.
+
+        There is a distinction from normal DFS traversals: Only leaf nodes
+        are recorded in the traversal. This means that only nodes that have
+        a complete list of cell specifications and values are returned.
+
+        Parameters
+        ----------
+        root : GridSearchTreeNode
+            The root of the
+
+        traversal : list
+            The nodes traversed on the tree. When this method is called
+            by an external caller, this should be an empty list ([])
+
+        path : list
+            This shouldn't be manipulated except by this method itself.
+            It is for storing the paths to the leaf nodes.
+
+        Returns
+        -------
+        list
+            A list of dictionaries that hold the cell specifications and
+            values of each leaf node.
+        """
+
+        path = [] if path is None else path[:]
+
+        if root.cell_specification is not None:
+            path.append(
+                {
+                    "cell_specification": root.cell_specification,
+                    "value": root.value,
+                }
+            )
+
+        if len(root.children) == 0:
+            traversal.append(path)
+
+        for child in root.children:
+            self.dfs_search_tree(child, traversal, path)
+
+        return traversal
diff --git a/landbosse/landbosse_omdao/OpenMDAODataframeCache.py b/landbosse/landbosse_omdao/OpenMDAODataframeCache.py
@@ -0,0 +1,109 @@
+import os
+
+import warnings
+
+with warnings.catch_warnings():
+    warnings.filterwarnings("ignore", message="numpy.ufunc size changed")
+    import pandas as pd
+
+
+# The library path is where to find the default input data for LandBOSSE.
+ROOT = os.path.abspath(os.path.join(os.path.dirname(os.path.abspath(__file__)), "../.."))
+if ROOT.endswith('wisdem'):
+    library_path = os.path.join(ROOT, "library", "landbosse")
+else:
+    library_path = os.path.join(ROOT, "project_input_template", "project_data")
+
+
+class OpenMDAODataframeCache:
+    """
+    This class does not need to be instantiated. This means that the
+    cache is shared throughout all parts of the code that needs access
+    to any part of the project_data .xlsx files.
+
+    This class is made to read all sheets from xlsx files and store those
+    sheets as dictionaries. This is so .xlsx files only need to be parsed
+    once.
+
+    One of the use cases for this dataframe cache is in parallel process
+    execution using ProcessPoolExecutor. Alternatively, once code use
+    the ThreadPoolExecutor (though that wouldn't give the same advantages
+    of paralelization).
+
+    Regardless of which executor is used, care must be taken that one thread
+    or process cannot mutate the dataframes of another process. So, this
+    class make copies of dataframes so the callables running from the
+    executor cannot overwrite each other's data.
+    """
+
+    # _cache is a class attribute that holds the cache of sheets and their
+    # dataframes
+    _cache = {}
+
+    @classmethod
+    def read_all_sheets_from_xlsx(cls, xlsx_basename, xlsx_path=None):
+        """
+        If the .xlsx file specified by .xlsx_basename has been read before
+        (meaning it is stored as a key on cls._cache), a copy of all the
+        dataframes stored under that sheet name is returned. See the note
+        about copying in the class docstring for why copies are being made.
+
+        If the xlsx_basename has not been read before, all the sheets are
+        read and copies are returned. The sheets are stored on the dictionary
+        cache.
+
+        Parameters
+        ----------
+        xlsx_basename : str
+            The base name of the xlsx file to read. This name should
+            not include the .xlsx at the end of the filename. This class
+            uses XlsxFileOperations to find the dataframes in the
+            project_data directory. The xlsx_basename becomes the key
+            in the dictionary used to access all the sheets in the
+            named .xlsx file.
+
+        xlsx_path : str
+            The path from which to read the .xlsx file. This parameter
+            has the default value of the library path variable above.
+
+        Returns
+        -------
+        dict
+            A dictionary of dataframes. Keys on the dictionary are names of
+            sheets and values in the dictionary are dataframes in that
+            .xlsx file.
+        """
+        if xlsx_basename in cls._cache:
+            original = cls._cache[xlsx_basename]
+            return cls.copy_dataframes(original)
+
+        if xlsx_path is None:
+            xlsx_filename = os.path.join(library_path, f"{xlsx_basename}.xlsx")
+        else:
+            xlsx_filename = os.path.join(xlsx_path, f"{xlsx_basename}.xlsx")
+
+        xlsx = pd.ExcelFile(xlsx_filename, engine='openpyxl')
+        sheets_dict = {sheet_name: xlsx.parse(sheet_name) for sheet_name in xlsx.sheet_names}
+        for sheet_name in xlsx.sheet_names:
+            sheets_dict[sheet_name].dropna(inplace=True, how='all')
+        cls._cache[xlsx_basename] = sheets_dict
+        return cls.copy_dataframes(sheets_dict)
+
+    @classmethod
+    def copy_dataframes(cls, dict_of_dataframes):
+        """
+        This copies a dictionary of dataframes. See the class docstring for an
+        explanation of why this copying is taking place.
+
+        Parameters
+        ----------
+        dict_of_dataframes : dict
+            The dictionary of dataframes to copy.
+
+        Returns
+        -------
+        dict
+            Keys are the same as the original dictionary of dataframes.
+            Values are copies of the origin dataframes.
+        """
+        return {xlsx_basename: df.copy() for xlsx_basename, df in dict_of_dataframes.items()}