diff --git a/README.md b/README.md index e61a08f..e4d3607 100644 --- a/README.md +++ b/README.md @@ -44,6 +44,12 @@ restrictive licensing (i.e., ecoinvent). pip install unfold +Alternatively, you can clone the repository and install the library from the source code: + + git clone https://github.com/polca/unfold.git + cd unfold + pip install -e . + It can also be installed from ``conda``: conda install -c romainsacchi unfold @@ -122,10 +128,31 @@ This outputs a superstructure database in your brightway2 project, as well as a scenario difference file (Excel) in the current working directory. +## Contributing + +Unfold is an open-source project, and contributions are welcome. +To contribute to the project, please create a pull request on the project's +GitHub page. Before submitting a pull request, please make sure that +your changes are properly documented and that all tests pass. + +## Issues + +If you encounter any problems, please open an issue on the project's +GitHub page. Please include a minimal working example that reproduces +the problem. If you are reporting a bug, please include the version +of the package you are using. + +## Support + +If you need help using Unfold, please contact the author (see below). + + ## Author [Romain Sacchi](mailto:romain.sacchi@psi.ch), PSI ## License -See [License](https://github.com/romainsacchi/stunt/blob/main/LICENSE). +Unfold is released under the MIT License. +See the [License](https://github.com/polca/unfold/blob/main/LICENSE) file for more information. + diff --git a/setup.py b/setup.py index abd4245..a90b66d 100644 --- a/setup.py +++ b/setup.py @@ -32,7 +32,7 @@ def package_files(directory): setup( name="unfold", - version="1.0.2", + version="1.0.3", python_requires=">=3.9", packages=packages, author="Romain Sacchi ", diff --git a/unfold/__init__.py b/unfold/__init__.py index 7cf2c9c..747fb93 100644 --- a/unfold/__init__.py +++ b/unfold/__init__.py @@ -1,5 +1,5 @@ __all__ = ("Unfold", "Fold") -__version__ = (1, 0, 2) +__version__ = (1, 0, 3) from .fold import Fold from .unfold import Unfold diff --git a/unfold/fold.py b/unfold/fold.py index 9d2e716..7403dfd 100644 --- a/unfold/fold.py +++ b/unfold/fold.py @@ -24,7 +24,7 @@ check_mandatory_fields, get_biosphere_code, get_outdated_flows, - check_commonality_between_databases + check_commonality_between_databases, ) DIR_DATAPACKAGE_TEMP = DATA_DIR / "temp" @@ -134,8 +134,29 @@ def identify_databases_to_fold( descriptions: List[str] = None, ): """ - Identify the source database - :return: name of the source database + The identify_databases_to_fold function identifies the source database and the databases to be folded into the source database and extracts them. + + :param source_database: Name of the source database to be used. If not specified, the user is prompted to choose from the available databases. + :param source_database_system_model: System model of the source database to be used. If not specified, the user is prompted to input. + :param source_database_version: Version of the source database to be used. If not specified, the user is prompted to input. + :param databases_to_fold: List of databases to be folded into the source database. If not specified, the user is prompted to input. + :param descriptions: Short descriptions of each database to be folded. If not specified, the user is prompted to input. + :return: The source dictionary containing information about the source database, such as its name, database, system model, and version. + :return: The databases_to_fold list containing dictionaries of information about each database to be folded, including its name, database, and description. + + Functionality: + + Checks whether the user has already specified a datapackage name and description. If not, prompts the user to input these details. + Lists the available databases and prompts the user to input the number of the reference database if the source_database input is not specified. + Identifies the dependencies of the source and folded databases. + Prompts the user to input the system model and version of the source database if not specified. + Prompts the user to input the list of databases to be folded and their descriptions if not specified. + Extracts the source database and ensures that mandatory fields are included. + Builds the mapping of dependencies for the source database. + Extracts each database to be folded, ensures that mandatory fields are included, and builds the mapping of dependencies. + Identifies whether any dependencies are external and, if so, extracts them. + Returns a set of the dependencies excluding the source and databases to be folded. + """ if not self.datapackage_name: @@ -226,7 +247,6 @@ def identify_databases_to_fold( source_database_extracted, extracted_database, database ) - self.databases_to_fold.append( { "name": database, @@ -290,9 +310,16 @@ def build_mapping_for_dependencies(self, database): def get_list_unique_acts(self, scenarios: List[List[dict]]) -> list: """ - Get a list of unique activities from a list of databases - :param scenarios: list of databases - :return: list of unique activities + Returns a list of unique activities from a list of databases, where each database is represented by a list of + datasets containing their respective exchanges. + + :param scenarios: A list of databases, where each database is a list of datasets, with each dataset containing the + exchanges. + :type scenarios: list + + :return: A list of tuples representing the unique activities in the provided databases, where each tuple contains + the activity name, reference product, location, categories, unit and type. + :rtype: list """ list_unique_acts = [] @@ -343,6 +370,25 @@ def fold( databases_to_fold: List[str] = None, descriptions: List[str] = None, ): + """ + Folds one or more databases into a new package. + + :param package_name: Name for the new datapackage. + :type package_name: str, optional + :param package_description: Short description for the new datapackage. + :type package_description: str, optional + :param source: Name of the source database. + :type source: str, optional + :param system_model: System model of the source database. + :type system_model: str, optional + :param version: Version of the source database. + :type version: float or str, optional + :param databases_to_fold: List of names of the databases to fold. + :type databases_to_fold: List[str], optional + :param descriptions: Short description for each database to fold. + :type descriptions: List[str], optional + :raises AssertionError: When one or more databases to fold are not found. + """ self.datapackage_name = package_name self.datapackage_description = package_description @@ -563,9 +609,33 @@ def generate_scenario_difference_file( self, origin_db: dict, scenarios: List[dict] ) -> tuple[pd.DataFrame, list[dict], list[tuple]]: """ - Generate a scenario difference file for a given list of databases + Generate a scenario difference file for a given list of databases. + The function generate_scenario_difference_file calculates the scenario difference file for a given list of databases. + This function takes in two parameters, origin_db, and scenarios. origin_db is a dictionary representing the + original database, and scenarios is a list of databases. + + The function first creates a dictionary self.exc_codes to store the codes of exchanges using their attributes + like name, reference product, location, and unit. It then fetches the unique activities by calling + the get_list_unique_acts function using the list of the original database and scenarios as the argument. + + It then creates a dictionary acts_ind to map indices to each activity, and another dictionary acts_ind_rev + that maps activities to their corresponding indices. It also creates a list of scenarios and a list of databases. + + The function then initializes matrices using the lil_matrix function from the scipy.sparse module to store + the matrices for each scenario. It then extracts metadata from the databases, and stores them in a dictionary dict_meta. + + Next, for each dataset in each database, the function retrieves the dataset_id and exc_id by using their + attributes like name, reference product, location, unit, and type. It then updates the corresponding matrix entry with the exchange amount for the current dataset and exchange. + + The function then stacks the matrices into a sparse matrix and retrieves the indices of nonzero elements. + It then creates a new database by combining the metadata with exchanges corresponding to the indices retrieved. + + Finally, the function creates a dataframe containing the differences between scenarios and returns it along + with the new database and a list of activities. + :param origin_db: the original database :param scenarios: list of databases + :return: a tuple containing a dataframe, a list of dictionaries, and a list of tuples """ self.exc_codes.update( diff --git a/unfold/unfold.py b/unfold/unfold.py index 20e59ef..ed384aa 100644 --- a/unfold/unfold.py +++ b/unfold/unfold.py @@ -112,7 +112,9 @@ def check_dependencies(self, dependencies: dict) -> None: dependencies = dependencies or {} # Check if all dependencies are available in the project - if dependencies and all(dependency in available_databases for dependency in dependencies.values()): + if dependencies and all( + dependency in available_databases for dependency in dependencies.values() + ): # If all dependencies are available, update the "source" attribute for each dependency for database in self.dependencies: database["source"] = dependencies[database["name"]] @@ -349,15 +351,15 @@ def store_datasets_metadata(self) -> None: key: values for key, values in dataset.items() if key - not in [ - "exchanges", - "code", - "name", - "reference product", - "location", - "unit", - "database", - ] + not in [ + "exchanges", + "code", + "name", + "reference product", + "location", + "unit", + "database", + ] } for dataset in self.database } @@ -431,23 +433,23 @@ def get_exchange( def populate_sparse_matrix(self) -> nsp.lil_matrix: """ - Generate a sparse matrix representation of the product system modeled by this object. + Generate a sparse matrix representation of the product system modeled by this object. - The matrix is generated based on the data in the `database` attribute, which is assumed to be - a list of dictionaries, where each dictionary represents a dataset in the product system. - Each dataset has a list of exchanges, where each exchange is a dictionary with information - about the input or output of a process. + The matrix is generated based on the data in the `database` attribute, which is assumed to be + a list of dictionaries, where each dictionary represents a dataset in the product system. + Each dataset has a list of exchanges, where each exchange is a dictionary with information + about the input or output of a process. - The matrix is generated as a `lil_matrix` object from the `scipy.sparse` module. The rows and - columns of the matrix represent unique activities in the product system, which are determined - by the unique combinations of the following exchange attributes: name, product, categories, - location, unit, and type. + The matrix is generated as a `lil_matrix` object from the `scipy.sparse` module. The rows and + columns of the matrix represent unique activities in the product system, which are determined + by the unique combinations of the following exchange attributes: name, product, categories, + location, unit, and type. - The matrix is populated by looping over each exchange in each dataset, and adding the amount - of the exchange to the corresponding row and column in the matrix. + The matrix is populated by looping over each exchange in each dataset, and adding the amount + of the exchange to the corresponding row and column in the matrix. - :return: A `lil_matrix` object representing the product system modeled by this object. - """ + :return: A `lil_matrix` object representing the product system modeled by this object. + """ # Generate the indices for the activities self.generate_activities_indices() @@ -477,11 +479,15 @@ def populate_sparse_matrix(self) -> nsp.lil_matrix: "production", ) # Add the exchange amount to the corresponding cell in the matrix - m[self.reversed_acts_indices[s], self.reversed_acts_indices[c]] += exc["amount"] + m[self.reversed_acts_indices[s], self.reversed_acts_indices[c]] += exc[ + "amount" + ] return m - def write_scaling_factors_in_matrix(self, matrix: np.ndarray, scenario_name: str) -> np.ndarray: + def write_scaling_factors_in_matrix( + self, matrix: np.ndarray, scenario_name: str + ) -> np.ndarray: """ Multiplies the elements of the given matrix with scaling factors for a given scenario. @@ -868,17 +874,17 @@ def format_superstructure_dataframe(self) -> None: # Rename columns and add new columns for database information and metadata self.scenario_df.columns = [ - "to activity name", - "to reference product", - "to location", - "to unit", - "from activity name", - "from reference product", - "from location", - "from categories", - "from unit", - "flow type", - ] + [s["name"] for s in self.scenarios] + "to activity name", + "to reference product", + "to location", + "to unit", + "from activity name", + "from reference product", + "from location", + "from categories", + "from unit", + "flow type", + ] + [s["name"] for s in self.scenarios] self.scenario_df["to database"] = self.package.descriptor["name"] self.scenario_df["to categories"] = None