Skip to content

Commit

Permalink
Add sections to README.md to comply with JOSS' community guidelines.
Browse files Browse the repository at this point in the history
  • Loading branch information
romainsacchi committed Mar 2, 2023
1 parent abb67a6 commit c0669ea
Show file tree
Hide file tree
Showing 5 changed files with 150 additions and 47 deletions.
29 changes: 28 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,12 @@ restrictive licensing (i.e., ecoinvent).

pip install unfold

Alternatively, you can clone the repository and install the library from the source code:

git clone https://github.com/polca/unfold.git
cd unfold
pip install -e .

It can also be installed from ``conda``:

conda install -c romainsacchi unfold
Expand Down Expand Up @@ -122,10 +128,31 @@ This outputs a superstructure database in your brightway2 project,
as well as a scenario difference file (Excel) in the current working
directory.

## Contributing

Unfold is an open-source project, and contributions are welcome.
To contribute to the project, please create a pull request on the project's
GitHub page. Before submitting a pull request, please make sure that
your changes are properly documented and that all tests pass.

## Issues

If you encounter any problems, please open an issue on the project's
GitHub page. Please include a minimal working example that reproduces
the problem. If you are reporting a bug, please include the version
of the package you are using.

## Support

If you need help using Unfold, please contact the author (see below).


## Author

[Romain Sacchi](mailto:romain.sacchi@psi.ch), PSI

## License

See [License](https://github.com/romainsacchi/stunt/blob/main/LICENSE).
Unfold is released under the MIT License.
See the [License](https://github.com/polca/unfold/blob/main/LICENSE) file for more information.

2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def package_files(directory):

setup(
name="unfold",
version="1.0.2",
version="1.0.3",
python_requires=">=3.9",
packages=packages,
author="Romain Sacchi <romain.sacchi@psi.ch>",
Expand Down
2 changes: 1 addition & 1 deletion unfold/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
__all__ = ("Unfold", "Fold")
__version__ = (1, 0, 2)
__version__ = (1, 0, 3)

from .fold import Fold
from .unfold import Unfold
86 changes: 78 additions & 8 deletions unfold/fold.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
check_mandatory_fields,
get_biosphere_code,
get_outdated_flows,
check_commonality_between_databases
check_commonality_between_databases,
)

DIR_DATAPACKAGE_TEMP = DATA_DIR / "temp"
Expand Down Expand Up @@ -134,8 +134,29 @@ def identify_databases_to_fold(
descriptions: List[str] = None,
):
"""
Identify the source database
:return: name of the source database
The identify_databases_to_fold function identifies the source database and the databases to be folded into the source database and extracts them.
:param source_database: Name of the source database to be used. If not specified, the user is prompted to choose from the available databases.
:param source_database_system_model: System model of the source database to be used. If not specified, the user is prompted to input.
:param source_database_version: Version of the source database to be used. If not specified, the user is prompted to input.
:param databases_to_fold: List of databases to be folded into the source database. If not specified, the user is prompted to input.
:param descriptions: Short descriptions of each database to be folded. If not specified, the user is prompted to input.
:return: The source dictionary containing information about the source database, such as its name, database, system model, and version.
:return: The databases_to_fold list containing dictionaries of information about each database to be folded, including its name, database, and description.
Functionality:
Checks whether the user has already specified a datapackage name and description. If not, prompts the user to input these details.
Lists the available databases and prompts the user to input the number of the reference database if the source_database input is not specified.
Identifies the dependencies of the source and folded databases.
Prompts the user to input the system model and version of the source database if not specified.
Prompts the user to input the list of databases to be folded and their descriptions if not specified.
Extracts the source database and ensures that mandatory fields are included.
Builds the mapping of dependencies for the source database.
Extracts each database to be folded, ensures that mandatory fields are included, and builds the mapping of dependencies.
Identifies whether any dependencies are external and, if so, extracts them.
Returns a set of the dependencies excluding the source and databases to be folded.
"""

if not self.datapackage_name:
Expand Down Expand Up @@ -226,7 +247,6 @@ def identify_databases_to_fold(
source_database_extracted, extracted_database, database
)


self.databases_to_fold.append(
{
"name": database,
Expand Down Expand Up @@ -290,9 +310,16 @@ def build_mapping_for_dependencies(self, database):

def get_list_unique_acts(self, scenarios: List[List[dict]]) -> list:
"""
Get a list of unique activities from a list of databases
:param scenarios: list of databases
:return: list of unique activities
Returns a list of unique activities from a list of databases, where each database is represented by a list of
datasets containing their respective exchanges.
:param scenarios: A list of databases, where each database is a list of datasets, with each dataset containing the
exchanges.
:type scenarios: list
:return: A list of tuples representing the unique activities in the provided databases, where each tuple contains
the activity name, reference product, location, categories, unit and type.
:rtype: list
"""

list_unique_acts = []
Expand Down Expand Up @@ -343,6 +370,25 @@ def fold(
databases_to_fold: List[str] = None,
descriptions: List[str] = None,
):
"""
Folds one or more databases into a new package.
:param package_name: Name for the new datapackage.
:type package_name: str, optional
:param package_description: Short description for the new datapackage.
:type package_description: str, optional
:param source: Name of the source database.
:type source: str, optional
:param system_model: System model of the source database.
:type system_model: str, optional
:param version: Version of the source database.
:type version: float or str, optional
:param databases_to_fold: List of names of the databases to fold.
:type databases_to_fold: List[str], optional
:param descriptions: Short description for each database to fold.
:type descriptions: List[str], optional
:raises AssertionError: When one or more databases to fold are not found.
"""
self.datapackage_name = package_name
self.datapackage_description = package_description

Expand Down Expand Up @@ -563,9 +609,33 @@ def generate_scenario_difference_file(
self, origin_db: dict, scenarios: List[dict]
) -> tuple[pd.DataFrame, list[dict], list[tuple]]:
"""
Generate a scenario difference file for a given list of databases
Generate a scenario difference file for a given list of databases.
The function generate_scenario_difference_file calculates the scenario difference file for a given list of databases.
This function takes in two parameters, origin_db, and scenarios. origin_db is a dictionary representing the
original database, and scenarios is a list of databases.
The function first creates a dictionary self.exc_codes to store the codes of exchanges using their attributes
like name, reference product, location, and unit. It then fetches the unique activities by calling
the get_list_unique_acts function using the list of the original database and scenarios as the argument.
It then creates a dictionary acts_ind to map indices to each activity, and another dictionary acts_ind_rev
that maps activities to their corresponding indices. It also creates a list of scenarios and a list of databases.
The function then initializes matrices using the lil_matrix function from the scipy.sparse module to store
the matrices for each scenario. It then extracts metadata from the databases, and stores them in a dictionary dict_meta.
Next, for each dataset in each database, the function retrieves the dataset_id and exc_id by using their
attributes like name, reference product, location, unit, and type. It then updates the corresponding matrix entry with the exchange amount for the current dataset and exchange.
The function then stacks the matrices into a sparse matrix and retrieves the indices of nonzero elements.
It then creates a new database by combining the metadata with exchanges corresponding to the indices retrieved.
Finally, the function creates a dataframe containing the differences between scenarios and returns it along
with the new database and a list of activities.
:param origin_db: the original database
:param scenarios: list of databases
:return: a tuple containing a dataframe, a list of dictionaries, and a list of tuples
"""

self.exc_codes.update(
Expand Down
78 changes: 42 additions & 36 deletions unfold/unfold.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,9 @@ def check_dependencies(self, dependencies: dict) -> None:
dependencies = dependencies or {}

# Check if all dependencies are available in the project
if dependencies and all(dependency in available_databases for dependency in dependencies.values()):
if dependencies and all(
dependency in available_databases for dependency in dependencies.values()
):
# If all dependencies are available, update the "source" attribute for each dependency
for database in self.dependencies:
database["source"] = dependencies[database["name"]]
Expand Down Expand Up @@ -349,15 +351,15 @@ def store_datasets_metadata(self) -> None:
key: values
for key, values in dataset.items()
if key
not in [
"exchanges",
"code",
"name",
"reference product",
"location",
"unit",
"database",
]
not in [
"exchanges",
"code",
"name",
"reference product",
"location",
"unit",
"database",
]
}
for dataset in self.database
}
Expand Down Expand Up @@ -431,23 +433,23 @@ def get_exchange(

def populate_sparse_matrix(self) -> nsp.lil_matrix:
"""
Generate a sparse matrix representation of the product system modeled by this object.
Generate a sparse matrix representation of the product system modeled by this object.
The matrix is generated based on the data in the `database` attribute, which is assumed to be
a list of dictionaries, where each dictionary represents a dataset in the product system.
Each dataset has a list of exchanges, where each exchange is a dictionary with information
about the input or output of a process.
The matrix is generated based on the data in the `database` attribute, which is assumed to be
a list of dictionaries, where each dictionary represents a dataset in the product system.
Each dataset has a list of exchanges, where each exchange is a dictionary with information
about the input or output of a process.
The matrix is generated as a `lil_matrix` object from the `scipy.sparse` module. The rows and
columns of the matrix represent unique activities in the product system, which are determined
by the unique combinations of the following exchange attributes: name, product, categories,
location, unit, and type.
The matrix is generated as a `lil_matrix` object from the `scipy.sparse` module. The rows and
columns of the matrix represent unique activities in the product system, which are determined
by the unique combinations of the following exchange attributes: name, product, categories,
location, unit, and type.
The matrix is populated by looping over each exchange in each dataset, and adding the amount
of the exchange to the corresponding row and column in the matrix.
The matrix is populated by looping over each exchange in each dataset, and adding the amount
of the exchange to the corresponding row and column in the matrix.
:return: A `lil_matrix` object representing the product system modeled by this object.
"""
:return: A `lil_matrix` object representing the product system modeled by this object.
"""

# Generate the indices for the activities
self.generate_activities_indices()
Expand Down Expand Up @@ -477,11 +479,15 @@ def populate_sparse_matrix(self) -> nsp.lil_matrix:
"production",
)
# Add the exchange amount to the corresponding cell in the matrix
m[self.reversed_acts_indices[s], self.reversed_acts_indices[c]] += exc["amount"]
m[self.reversed_acts_indices[s], self.reversed_acts_indices[c]] += exc[
"amount"
]

return m

def write_scaling_factors_in_matrix(self, matrix: np.ndarray, scenario_name: str) -> np.ndarray:
def write_scaling_factors_in_matrix(
self, matrix: np.ndarray, scenario_name: str
) -> np.ndarray:
"""
Multiplies the elements of the given matrix with scaling factors for a given scenario.
Expand Down Expand Up @@ -868,17 +874,17 @@ def format_superstructure_dataframe(self) -> None:

# Rename columns and add new columns for database information and metadata
self.scenario_df.columns = [
"to activity name",
"to reference product",
"to location",
"to unit",
"from activity name",
"from reference product",
"from location",
"from categories",
"from unit",
"flow type",
] + [s["name"] for s in self.scenarios]
"to activity name",
"to reference product",
"to location",
"to unit",
"from activity name",
"from reference product",
"from location",
"from categories",
"from unit",
"flow type",
] + [s["name"] for s in self.scenarios]

self.scenario_df["to database"] = self.package.descriptor["name"]
self.scenario_df["to categories"] = None
Expand Down

0 comments on commit c0669ea

Please sign in to comment.