-
Notifications
You must be signed in to change notification settings - Fork 25
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #165 from WISDEM/omdao
Omdao
- Loading branch information
Showing
12 changed files
with
1,579 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -131,6 +131,9 @@ dmypy.json | |
# VSCode | ||
.vscode/ | ||
|
||
# Emacs | ||
*~ | ||
|
||
# Ignore Pandas _libs files | ||
pandas/_libs/ | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,113 @@ | ||
import pandas as pd | ||
|
||
|
||
class CsvGenerator: | ||
""" | ||
This class generates CSV files. | ||
""" | ||
|
||
def __init__(self, file_ops): | ||
""" | ||
Parameters | ||
---------- | ||
file_ops : XlsxFileOperations | ||
An instance of XlsxFileOperations to manage file names. | ||
""" | ||
self.file_ops = file_ops | ||
|
||
def create_details_dataframe(self, details): | ||
""" | ||
This writes the details .csv. | ||
Parameters | ||
---------- | ||
details : list[dict] | ||
A list of dictionaries to be converted into a Pandas dataframe | ||
Returns | ||
------- | ||
pd.DataFrame | ||
The dataframe that can be written to a .csv file. | ||
""" | ||
|
||
# This the list of details to write to the .csv | ||
details_to_write_to_csv = [] | ||
for row in details: | ||
new_row = {} | ||
new_row["Project ID with serial"] = row["project_id_with_serial"] | ||
new_row["Module"] = row["module"] | ||
new_row["Variable name"] = row["variable_df_key_col_name"] | ||
new_row["Unit"] = row["unit"] | ||
|
||
value = row["value"] | ||
value_is_number = self._is_numeric(value) | ||
if value_is_number: | ||
new_row["Numeric value"] = value | ||
else: | ||
new_row["Non-numeric value"] = value | ||
|
||
# If there is a last_number, which means this is a dataframe row that has a number | ||
# at the end, write this into the numeric value column. This overrides automatic | ||
# type detection. | ||
|
||
if "last_number" in row: | ||
new_row["Numeric value"] = row["last_number"] | ||
|
||
details_to_write_to_csv.append(new_row) | ||
|
||
details = pd.DataFrame(details_to_write_to_csv) | ||
|
||
return details | ||
|
||
def create_costs_dataframe(self, costs): | ||
""" | ||
Parameters | ||
---------- | ||
costs : list[dict] | ||
The list of dictionaries of costs. | ||
Returns | ||
------- | ||
pd.DataFrame | ||
A dataframe to be written as a .csv | ||
""" | ||
new_rows = [] | ||
for row in costs: | ||
new_row = { | ||
"Project ID with serial": row["project_id_with_serial"], | ||
"Number of turbines": row["num_turbines"], | ||
"Turbine rating MW": row["turbine_rating_MW"], | ||
"Rotor diameter m": row["rotor_diameter_m"], | ||
"Module": row["module"], | ||
"Type of cost": row["type_of_cost"], | ||
"Cost per turbine": row["cost_per_turbine"], | ||
"Cost per project": row["cost_per_project"], | ||
"Cost per kW": row["usd_per_kw_per_project"], | ||
} | ||
new_rows.append(new_row) | ||
costs_df = pd.DataFrame(new_rows) | ||
return costs_df | ||
|
||
def _is_numeric(self, value): | ||
""" | ||
This method tests if a value is a numeric (that is, can be parsed | ||
by float()) or non numeric (which cannot be parsed). | ||
The decision from this method determines whether values go into | ||
the numeric or non-numeric columns. | ||
Parameters | ||
---------- | ||
value | ||
The value to be tested. | ||
Returns | ||
------- | ||
bool | ||
True if the value is numeric, False otherwise. | ||
""" | ||
try: | ||
float(value) | ||
except ValueError: | ||
return False | ||
return True |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,165 @@ | ||
import numpy as np | ||
import pandas as pd | ||
|
||
""" | ||
This module contains the logic to handle a tree to compute | ||
points in an N-dimensional parametric search space. | ||
""" | ||
|
||
|
||
class GridSearchTreeNode: | ||
""" | ||
This just contains information about a node in the grid | ||
search tree. | ||
""" | ||
|
||
def __init__(self): | ||
self.cell_specification = None | ||
self.children = [] | ||
self.value = None | ||
|
||
|
||
class GridSearchTree: | ||
""" | ||
This class implements a k-ary tree to compute possible | ||
combinations of points in a N-dimensional parametric | ||
search space. | ||
""" | ||
|
||
def __init__(self, parametric_list): | ||
""" | ||
This simply sets the parametric_list. See the first dataframe | ||
described in the docstring of XlsxReader.create_parametric_value_list() | ||
Parameters | ||
---------- | ||
parametric_list : pandas.DataFrame | ||
The dataframe of the parametrics list. | ||
""" | ||
self.parametric_list = parametric_list | ||
|
||
def build_grid_tree_and_return_grid(self): | ||
""" | ||
See the dataframes in XlsxReader.create_parametric_value_list() | ||
for context. | ||
This builds a tree of points in the search space and traverse | ||
it to find points on the grid. | ||
Returns | ||
------- | ||
""" | ||
|
||
# Build the tree. Its leaf nodes contain the values for each | ||
# point in the grid. | ||
root = self.build_tree() | ||
|
||
# Recursions of the traversal method needs to start with an empty | ||
# list. | ||
grid = self.dfs_search_tree(root, traversal=[]) | ||
return grid | ||
|
||
def build_tree(self, depth=0, root=None): | ||
""" | ||
This method builds a k-ary tree to contain cell_specifications and | ||
their values. | ||
Callers from outside this method shouldn't override the defaults | ||
for the parameters. These parameters are to manage the recursion, | ||
and are supplied by this method when it invokes itself. | ||
Parameters | ||
---------- | ||
root : GridSearchTreeNode | ||
The root of the subtree. At the start of iteration, at the | ||
root of the whole tree, this should be None. | ||
depth : int | ||
The level of the tree currently being built. This is | ||
also the row number in the dataframe from which the tree | ||
is being built. | ||
Returns | ||
------- | ||
GridSearchTreeNode | ||
The root of the tree just built. | ||
""" | ||
row = self.parametric_list.iloc[depth] | ||
cell_specification = f"{row['Dataframe name']}/{row['Row name']}/{row['Column name']}" | ||
|
||
# First, make an iterable of the range we are going to be using. | ||
if "Value list" in row and not pd.isnull(row["Value list"]): | ||
values = [float(value) for value in row["Value list"].split(",")] | ||
else: | ||
start = row["Min"] | ||
end = row["Max"] | ||
step = row["Step"] | ||
values = np.arange(start, end + step, step) | ||
|
||
if root == None: | ||
root = GridSearchTreeNode() | ||
|
||
# Putting the stop at end + step ensures the end value is in the sequence | ||
# | ||
# Append children for each value in the parametric step sequence. | ||
|
||
for value in values: | ||
child = GridSearchTreeNode() | ||
child.value = value | ||
child.cell_specification = cell_specification | ||
root.children.append(child) | ||
|
||
# If there are more levels of variables to add, recurse | ||
# down 1 level. | ||
if len(self.parametric_list) > depth + 1: | ||
self.build_tree(depth + 1, child) | ||
|
||
return root | ||
|
||
def dfs_search_tree(self, root, traversal, path=None): | ||
""" | ||
This does a depth first search traversal of the GridSearchTree | ||
specified by the root parameter. It stores the node it encounters | ||
in the list referenced by traversal. | ||
There is a distinction from normal DFS traversals: Only leaf nodes | ||
are recorded in the traversal. This means that only nodes that have | ||
a complete list of cell specifications and values are returned. | ||
Parameters | ||
---------- | ||
root : GridSearchTreeNode | ||
The root of the | ||
traversal : list | ||
The nodes traversed on the tree. When this method is called | ||
by an external caller, this should be an empty list ([]) | ||
path : list | ||
This shouldn't be manipulated except by this method itself. | ||
It is for storing the paths to the leaf nodes. | ||
Returns | ||
------- | ||
list | ||
A list of dictionaries that hold the cell specifications and | ||
values of each leaf node. | ||
""" | ||
|
||
path = [] if path is None else path[:] | ||
|
||
if root.cell_specification is not None: | ||
path.append( | ||
{ | ||
"cell_specification": root.cell_specification, | ||
"value": root.value, | ||
} | ||
) | ||
|
||
if len(root.children) == 0: | ||
traversal.append(path) | ||
|
||
for child in root.children: | ||
self.dfs_search_tree(child, traversal, path) | ||
|
||
return traversal |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,109 @@ | ||
import os | ||
|
||
import warnings | ||
|
||
with warnings.catch_warnings(): | ||
warnings.filterwarnings("ignore", message="numpy.ufunc size changed") | ||
import pandas as pd | ||
|
||
|
||
# The library path is where to find the default input data for LandBOSSE. | ||
ROOT = os.path.abspath(os.path.join(os.path.dirname(os.path.abspath(__file__)), "../..")) | ||
if ROOT.endswith('wisdem'): | ||
library_path = os.path.join(ROOT, "library", "landbosse") | ||
else: | ||
library_path = os.path.join(ROOT, "project_input_template", "project_data") | ||
|
||
|
||
class OpenMDAODataframeCache: | ||
""" | ||
This class does not need to be instantiated. This means that the | ||
cache is shared throughout all parts of the code that needs access | ||
to any part of the project_data .xlsx files. | ||
This class is made to read all sheets from xlsx files and store those | ||
sheets as dictionaries. This is so .xlsx files only need to be parsed | ||
once. | ||
One of the use cases for this dataframe cache is in parallel process | ||
execution using ProcessPoolExecutor. Alternatively, once code use | ||
the ThreadPoolExecutor (though that wouldn't give the same advantages | ||
of paralelization). | ||
Regardless of which executor is used, care must be taken that one thread | ||
or process cannot mutate the dataframes of another process. So, this | ||
class make copies of dataframes so the callables running from the | ||
executor cannot overwrite each other's data. | ||
""" | ||
|
||
# _cache is a class attribute that holds the cache of sheets and their | ||
# dataframes | ||
_cache = {} | ||
|
||
@classmethod | ||
def read_all_sheets_from_xlsx(cls, xlsx_basename, xlsx_path=None): | ||
""" | ||
If the .xlsx file specified by .xlsx_basename has been read before | ||
(meaning it is stored as a key on cls._cache), a copy of all the | ||
dataframes stored under that sheet name is returned. See the note | ||
about copying in the class docstring for why copies are being made. | ||
If the xlsx_basename has not been read before, all the sheets are | ||
read and copies are returned. The sheets are stored on the dictionary | ||
cache. | ||
Parameters | ||
---------- | ||
xlsx_basename : str | ||
The base name of the xlsx file to read. This name should | ||
not include the .xlsx at the end of the filename. This class | ||
uses XlsxFileOperations to find the dataframes in the | ||
project_data directory. The xlsx_basename becomes the key | ||
in the dictionary used to access all the sheets in the | ||
named .xlsx file. | ||
xlsx_path : str | ||
The path from which to read the .xlsx file. This parameter | ||
has the default value of the library path variable above. | ||
Returns | ||
------- | ||
dict | ||
A dictionary of dataframes. Keys on the dictionary are names of | ||
sheets and values in the dictionary are dataframes in that | ||
.xlsx file. | ||
""" | ||
if xlsx_basename in cls._cache: | ||
original = cls._cache[xlsx_basename] | ||
return cls.copy_dataframes(original) | ||
|
||
if xlsx_path is None: | ||
xlsx_filename = os.path.join(library_path, f"{xlsx_basename}.xlsx") | ||
else: | ||
xlsx_filename = os.path.join(xlsx_path, f"{xlsx_basename}.xlsx") | ||
|
||
xlsx = pd.ExcelFile(xlsx_filename, engine='openpyxl') | ||
sheets_dict = {sheet_name: xlsx.parse(sheet_name) for sheet_name in xlsx.sheet_names} | ||
for sheet_name in xlsx.sheet_names: | ||
sheets_dict[sheet_name].dropna(inplace=True, how='all') | ||
cls._cache[xlsx_basename] = sheets_dict | ||
return cls.copy_dataframes(sheets_dict) | ||
|
||
@classmethod | ||
def copy_dataframes(cls, dict_of_dataframes): | ||
""" | ||
This copies a dictionary of dataframes. See the class docstring for an | ||
explanation of why this copying is taking place. | ||
Parameters | ||
---------- | ||
dict_of_dataframes : dict | ||
The dictionary of dataframes to copy. | ||
Returns | ||
------- | ||
dict | ||
Keys are the same as the original dictionary of dataframes. | ||
Values are copies of the origin dataframes. | ||
""" | ||
return {xlsx_basename: df.copy() for xlsx_basename, df in dict_of_dataframes.items()} |
Oops, something went wrong.