Skip to content

Commit

Permalink
Merge pull request #165 from WISDEM/omdao
Browse files Browse the repository at this point in the history
Omdao
  • Loading branch information
akey7 authored Jan 7, 2021
2 parents e523636 + 02ada47 commit a07ecce
Show file tree
Hide file tree
Showing 12 changed files with 1,579 additions and 0 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,9 @@ dmypy.json
# VSCode
.vscode/

# Emacs
*~

# Ignore Pandas _libs files
pandas/_libs/

Expand Down
113 changes: 113 additions & 0 deletions landbosse/landbosse_omdao/CsvGenerator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
import pandas as pd


class CsvGenerator:
"""
This class generates CSV files.
"""

def __init__(self, file_ops):
"""
Parameters
----------
file_ops : XlsxFileOperations
An instance of XlsxFileOperations to manage file names.
"""
self.file_ops = file_ops

def create_details_dataframe(self, details):
"""
This writes the details .csv.
Parameters
----------
details : list[dict]
A list of dictionaries to be converted into a Pandas dataframe
Returns
-------
pd.DataFrame
The dataframe that can be written to a .csv file.
"""

# This the list of details to write to the .csv
details_to_write_to_csv = []
for row in details:
new_row = {}
new_row["Project ID with serial"] = row["project_id_with_serial"]
new_row["Module"] = row["module"]
new_row["Variable name"] = row["variable_df_key_col_name"]
new_row["Unit"] = row["unit"]

value = row["value"]
value_is_number = self._is_numeric(value)
if value_is_number:
new_row["Numeric value"] = value
else:
new_row["Non-numeric value"] = value

# If there is a last_number, which means this is a dataframe row that has a number
# at the end, write this into the numeric value column. This overrides automatic
# type detection.

if "last_number" in row:
new_row["Numeric value"] = row["last_number"]

details_to_write_to_csv.append(new_row)

details = pd.DataFrame(details_to_write_to_csv)

return details

def create_costs_dataframe(self, costs):
"""
Parameters
----------
costs : list[dict]
The list of dictionaries of costs.
Returns
-------
pd.DataFrame
A dataframe to be written as a .csv
"""
new_rows = []
for row in costs:
new_row = {
"Project ID with serial": row["project_id_with_serial"],
"Number of turbines": row["num_turbines"],
"Turbine rating MW": row["turbine_rating_MW"],
"Rotor diameter m": row["rotor_diameter_m"],
"Module": row["module"],
"Type of cost": row["type_of_cost"],
"Cost per turbine": row["cost_per_turbine"],
"Cost per project": row["cost_per_project"],
"Cost per kW": row["usd_per_kw_per_project"],
}
new_rows.append(new_row)
costs_df = pd.DataFrame(new_rows)
return costs_df

def _is_numeric(self, value):
"""
This method tests if a value is a numeric (that is, can be parsed
by float()) or non numeric (which cannot be parsed).
The decision from this method determines whether values go into
the numeric or non-numeric columns.
Parameters
----------
value
The value to be tested.
Returns
-------
bool
True if the value is numeric, False otherwise.
"""
try:
float(value)
except ValueError:
return False
return True
165 changes: 165 additions & 0 deletions landbosse/landbosse_omdao/GridSearchTree.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
import numpy as np
import pandas as pd

"""
This module contains the logic to handle a tree to compute
points in an N-dimensional parametric search space.
"""


class GridSearchTreeNode:
"""
This just contains information about a node in the grid
search tree.
"""

def __init__(self):
self.cell_specification = None
self.children = []
self.value = None


class GridSearchTree:
"""
This class implements a k-ary tree to compute possible
combinations of points in a N-dimensional parametric
search space.
"""

def __init__(self, parametric_list):
"""
This simply sets the parametric_list. See the first dataframe
described in the docstring of XlsxReader.create_parametric_value_list()
Parameters
----------
parametric_list : pandas.DataFrame
The dataframe of the parametrics list.
"""
self.parametric_list = parametric_list

def build_grid_tree_and_return_grid(self):
"""
See the dataframes in XlsxReader.create_parametric_value_list()
for context.
This builds a tree of points in the search space and traverse
it to find points on the grid.
Returns
-------
"""

# Build the tree. Its leaf nodes contain the values for each
# point in the grid.
root = self.build_tree()

# Recursions of the traversal method needs to start with an empty
# list.
grid = self.dfs_search_tree(root, traversal=[])
return grid

def build_tree(self, depth=0, root=None):
"""
This method builds a k-ary tree to contain cell_specifications and
their values.
Callers from outside this method shouldn't override the defaults
for the parameters. These parameters are to manage the recursion,
and are supplied by this method when it invokes itself.
Parameters
----------
root : GridSearchTreeNode
The root of the subtree. At the start of iteration, at the
root of the whole tree, this should be None.
depth : int
The level of the tree currently being built. This is
also the row number in the dataframe from which the tree
is being built.
Returns
-------
GridSearchTreeNode
The root of the tree just built.
"""
row = self.parametric_list.iloc[depth]
cell_specification = f"{row['Dataframe name']}/{row['Row name']}/{row['Column name']}"

# First, make an iterable of the range we are going to be using.
if "Value list" in row and not pd.isnull(row["Value list"]):
values = [float(value) for value in row["Value list"].split(",")]
else:
start = row["Min"]
end = row["Max"]
step = row["Step"]
values = np.arange(start, end + step, step)

if root == None:
root = GridSearchTreeNode()

# Putting the stop at end + step ensures the end value is in the sequence
#
# Append children for each value in the parametric step sequence.

for value in values:
child = GridSearchTreeNode()
child.value = value
child.cell_specification = cell_specification
root.children.append(child)

# If there are more levels of variables to add, recurse
# down 1 level.
if len(self.parametric_list) > depth + 1:
self.build_tree(depth + 1, child)

return root

def dfs_search_tree(self, root, traversal, path=None):
"""
This does a depth first search traversal of the GridSearchTree
specified by the root parameter. It stores the node it encounters
in the list referenced by traversal.
There is a distinction from normal DFS traversals: Only leaf nodes
are recorded in the traversal. This means that only nodes that have
a complete list of cell specifications and values are returned.
Parameters
----------
root : GridSearchTreeNode
The root of the
traversal : list
The nodes traversed on the tree. When this method is called
by an external caller, this should be an empty list ([])
path : list
This shouldn't be manipulated except by this method itself.
It is for storing the paths to the leaf nodes.
Returns
-------
list
A list of dictionaries that hold the cell specifications and
values of each leaf node.
"""

path = [] if path is None else path[:]

if root.cell_specification is not None:
path.append(
{
"cell_specification": root.cell_specification,
"value": root.value,
}
)

if len(root.children) == 0:
traversal.append(path)

for child in root.children:
self.dfs_search_tree(child, traversal, path)

return traversal
109 changes: 109 additions & 0 deletions landbosse/landbosse_omdao/OpenMDAODataframeCache.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
import os

import warnings

with warnings.catch_warnings():
warnings.filterwarnings("ignore", message="numpy.ufunc size changed")
import pandas as pd


# The library path is where to find the default input data for LandBOSSE.
ROOT = os.path.abspath(os.path.join(os.path.dirname(os.path.abspath(__file__)), "../.."))
if ROOT.endswith('wisdem'):
library_path = os.path.join(ROOT, "library", "landbosse")
else:
library_path = os.path.join(ROOT, "project_input_template", "project_data")


class OpenMDAODataframeCache:
"""
This class does not need to be instantiated. This means that the
cache is shared throughout all parts of the code that needs access
to any part of the project_data .xlsx files.
This class is made to read all sheets from xlsx files and store those
sheets as dictionaries. This is so .xlsx files only need to be parsed
once.
One of the use cases for this dataframe cache is in parallel process
execution using ProcessPoolExecutor. Alternatively, once code use
the ThreadPoolExecutor (though that wouldn't give the same advantages
of paralelization).
Regardless of which executor is used, care must be taken that one thread
or process cannot mutate the dataframes of another process. So, this
class make copies of dataframes so the callables running from the
executor cannot overwrite each other's data.
"""

# _cache is a class attribute that holds the cache of sheets and their
# dataframes
_cache = {}

@classmethod
def read_all_sheets_from_xlsx(cls, xlsx_basename, xlsx_path=None):
"""
If the .xlsx file specified by .xlsx_basename has been read before
(meaning it is stored as a key on cls._cache), a copy of all the
dataframes stored under that sheet name is returned. See the note
about copying in the class docstring for why copies are being made.
If the xlsx_basename has not been read before, all the sheets are
read and copies are returned. The sheets are stored on the dictionary
cache.
Parameters
----------
xlsx_basename : str
The base name of the xlsx file to read. This name should
not include the .xlsx at the end of the filename. This class
uses XlsxFileOperations to find the dataframes in the
project_data directory. The xlsx_basename becomes the key
in the dictionary used to access all the sheets in the
named .xlsx file.
xlsx_path : str
The path from which to read the .xlsx file. This parameter
has the default value of the library path variable above.
Returns
-------
dict
A dictionary of dataframes. Keys on the dictionary are names of
sheets and values in the dictionary are dataframes in that
.xlsx file.
"""
if xlsx_basename in cls._cache:
original = cls._cache[xlsx_basename]
return cls.copy_dataframes(original)

if xlsx_path is None:
xlsx_filename = os.path.join(library_path, f"{xlsx_basename}.xlsx")
else:
xlsx_filename = os.path.join(xlsx_path, f"{xlsx_basename}.xlsx")

xlsx = pd.ExcelFile(xlsx_filename, engine='openpyxl')
sheets_dict = {sheet_name: xlsx.parse(sheet_name) for sheet_name in xlsx.sheet_names}
for sheet_name in xlsx.sheet_names:
sheets_dict[sheet_name].dropna(inplace=True, how='all')
cls._cache[xlsx_basename] = sheets_dict
return cls.copy_dataframes(sheets_dict)

@classmethod
def copy_dataframes(cls, dict_of_dataframes):
"""
This copies a dictionary of dataframes. See the class docstring for an
explanation of why this copying is taking place.
Parameters
----------
dict_of_dataframes : dict
The dictionary of dataframes to copy.
Returns
-------
dict
Keys are the same as the original dictionary of dataframes.
Values are copies of the origin dataframes.
"""
return {xlsx_basename: df.copy() for xlsx_basename, df in dict_of_dataframes.items()}
Loading

0 comments on commit a07ecce

Please sign in to comment.