Add sections to README.md to comply with JOSS' community guidelines.

polca · Mar 2, 2023 · c0669ea · c0669ea
1 parent abb67a6
commit c0669ea
Show file tree

Hide file tree

Showing 5 changed files with 150 additions and 47 deletions.
diff --git a/README.md b/README.md
@@ -44,6 +44,12 @@ restrictive licensing (i.e., ecoinvent).
 
     pip install unfold
 
+Alternatively, you can clone the repository and install the library from the source code:
+
+    git clone https://github.com/polca/unfold.git
+    cd unfold
+    pip install -e .
+
 It can also be installed from ``conda``:
 
     conda install -c romainsacchi unfold
@@ -122,10 +128,31 @@ This outputs a superstructure database in your brightway2 project,
 as well as a scenario difference file (Excel) in the current working
 directory.
 
+## Contributing
+
+Unfold is an open-source project, and contributions are welcome.
+To contribute to the project, please create a pull request on the project's 
+GitHub page. Before submitting a pull request, please make sure that 
+your changes are properly documented and that all tests pass.
+
+## Issues
+
+If you encounter any problems, please open an issue on the project's
+GitHub page. Please include a minimal working example that reproduces
+the problem. If you are reporting a bug, please include the version
+of the package you are using.
+
+## Support
+
+If you need help using Unfold, please contact the author (see below).
+
+
 ## Author
 
 [Romain Sacchi](mailto:romain.sacchi@psi.ch), PSI
 
 ## License
 
-See [License](https://github.com/romainsacchi/stunt/blob/main/LICENSE).
+Unfold is released under the MIT License.
+See the [License](https://github.com/polca/unfold/blob/main/LICENSE) file for more information.
+
diff --git a/setup.py b/setup.py
@@ -32,7 +32,7 @@ def package_files(directory):
 
 setup(
     name="unfold",
-    version="1.0.2",
+    version="1.0.3",
     python_requires=">=3.9",
     packages=packages,
     author="Romain Sacchi <romain.sacchi@psi.ch>",

diff --git a/unfold/__init__.py b/unfold/__init__.py
@@ -1,5 +1,5 @@
 __all__ = ("Unfold", "Fold")
-__version__ = (1, 0, 2)
+__version__ = (1, 0, 3)
 
 from .fold import Fold
 from .unfold import Unfold
diff --git a/unfold/fold.py b/unfold/fold.py
@@ -24,7 +24,7 @@
     check_mandatory_fields,
     get_biosphere_code,
     get_outdated_flows,
-    check_commonality_between_databases
+    check_commonality_between_databases,
 )
 
 DIR_DATAPACKAGE_TEMP = DATA_DIR / "temp"
@@ -134,8 +134,29 @@ def identify_databases_to_fold(
         descriptions: List[str] = None,
     ):
         """
-        Identify the source database
-        :return: name of the source database
+        The identify_databases_to_fold function identifies the source database and the databases to be folded into the source database and extracts them.
+
+        :param source_database: Name of the source database to be used. If not specified, the user is prompted to choose from the available databases.
+        :param source_database_system_model: System model of the source database to be used. If not specified, the user is prompted to input.
+        :param source_database_version: Version of the source database to be used. If not specified, the user is prompted to input.
+        :param databases_to_fold: List of databases to be folded into the source database. If not specified, the user is prompted to input.
+        :param descriptions: Short descriptions of each database to be folded. If not specified, the user is prompted to input.
+        :return: The source dictionary containing information about the source database, such as its name, database, system model, and version.
+        :return: The databases_to_fold list containing dictionaries of information about each database to be folded, including its name, database, and description.
+
+        Functionality:
+
+        Checks whether the user has already specified a datapackage name and description. If not, prompts the user to input these details.
+        Lists the available databases and prompts the user to input the number of the reference database if the source_database input is not specified.
+        Identifies the dependencies of the source and folded databases.
+        Prompts the user to input the system model and version of the source database if not specified.
+        Prompts the user to input the list of databases to be folded and their descriptions if not specified.
+        Extracts the source database and ensures that mandatory fields are included.
+        Builds the mapping of dependencies for the source database.
+        Extracts each database to be folded, ensures that mandatory fields are included, and builds the mapping of dependencies.
+        Identifies whether any dependencies are external and, if so, extracts them.
+        Returns a set of the dependencies excluding the source and databases to be folded.
+
         """
 
         if not self.datapackage_name:
@@ -226,7 +247,6 @@ def identify_databases_to_fold(
                 source_database_extracted, extracted_database, database
             )
 
-
             self.databases_to_fold.append(
                 {
                     "name": database,
@@ -290,9 +310,16 @@ def build_mapping_for_dependencies(self, database):
 
     def get_list_unique_acts(self, scenarios: List[List[dict]]) -> list:
         """
-        Get a list of unique activities from a list of databases
-        :param scenarios: list of databases
-        :return: list of unique activities
+        Returns a list of unique activities from a list of databases, where each database is represented by a list of
+        datasets containing their respective exchanges.
+
+        :param scenarios: A list of databases, where each database is a list of datasets, with each dataset containing the
+                          exchanges.
+        :type scenarios: list
+
+        :return: A list of tuples representing the unique activities in the provided databases, where each tuple contains
+                 the activity name, reference product, location, categories, unit and type.
+        :rtype: list
         """
 
         list_unique_acts = []
@@ -343,6 +370,25 @@ def fold(
         databases_to_fold: List[str] = None,
         descriptions: List[str] = None,
     ):
+        """
+        Folds one or more databases into a new package.
+
+        :param package_name: Name for the new datapackage.
+        :type package_name: str, optional
+        :param package_description: Short description for the new datapackage.
+        :type package_description: str, optional
+        :param source: Name of the source database.
+        :type source: str, optional
+        :param system_model: System model of the source database.
+        :type system_model: str, optional
+        :param version: Version of the source database.
+        :type version: float or str, optional
+        :param databases_to_fold: List of names of the databases to fold.
+        :type databases_to_fold: List[str], optional
+        :param descriptions: Short description for each database to fold.
+        :type descriptions: List[str], optional
+        :raises AssertionError: When one or more databases to fold are not found.
+        """
         self.datapackage_name = package_name
         self.datapackage_description = package_description
 
@@ -563,9 +609,33 @@ def generate_scenario_difference_file(
         self, origin_db: dict, scenarios: List[dict]
     ) -> tuple[pd.DataFrame, list[dict], list[tuple]]:
         """
-        Generate a scenario difference file for a given list of databases
+        Generate a scenario difference file for a given list of databases.
+        The function generate_scenario_difference_file calculates the scenario difference file for a given list of databases.
+        This function takes in two parameters, origin_db, and scenarios. origin_db is a dictionary representing the
+        original database, and scenarios is a list of databases.
+
+        The function first creates a dictionary self.exc_codes to store the codes of exchanges using their attributes
+        like name, reference product, location, and unit. It then fetches the unique activities by calling
+        the get_list_unique_acts function using the list of the original database and scenarios as the argument.
+
+        It then creates a dictionary acts_ind to map indices to each activity, and another dictionary acts_ind_rev
+        that maps activities to their corresponding indices. It also creates a list of scenarios and a list of databases.
+
+        The function then initializes matrices using the lil_matrix function from the scipy.sparse module to store
+        the matrices for each scenario. It then extracts metadata from the databases, and stores them in a dictionary dict_meta.
+
+        Next, for each dataset in each database, the function retrieves the dataset_id and exc_id by using their
+        attributes like name, reference product, location, unit, and type. It then updates the corresponding matrix entry with the exchange amount for the current dataset and exchange.
+
+        The function then stacks the matrices into a sparse matrix and retrieves the indices of nonzero elements.
+        It then creates a new database by combining the metadata with exchanges corresponding to the indices retrieved.
+
+        Finally, the function creates a dataframe containing the differences between scenarios and returns it along
+        with the new database and a list of activities.
+
         :param origin_db: the original database
         :param scenarios: list of databases
+        :return: a tuple containing a dataframe, a list of dictionaries, and a list of tuples
         """
 
         self.exc_codes.update(

diff --git a/unfold/unfold.py b/unfold/unfold.py
@@ -112,7 +112,9 @@ def check_dependencies(self, dependencies: dict) -> None:
         dependencies = dependencies or {}
 
         # Check if all dependencies are available in the project
-        if dependencies and all(dependency in available_databases for dependency in dependencies.values()):
+        if dependencies and all(
+            dependency in available_databases for dependency in dependencies.values()
+        ):
             # If all dependencies are available, update the "source" attribute for each dependency
             for database in self.dependencies:
                 database["source"] = dependencies[database["name"]]
@@ -349,15 +351,15 @@ def store_datasets_metadata(self) -> None:
                 key: values
                 for key, values in dataset.items()
                 if key
-                   not in [
-                       "exchanges",
-                       "code",
-                       "name",
-                       "reference product",
-                       "location",
-                       "unit",
-                       "database",
-                   ]
+                not in [
+                    "exchanges",
+                    "code",
+                    "name",
+                    "reference product",
+                    "location",
+                    "unit",
+                    "database",
+                ]
             }
             for dataset in self.database
         }
@@ -431,23 +433,23 @@ def get_exchange(
 
     def populate_sparse_matrix(self) -> nsp.lil_matrix:
         """
-            Generate a sparse matrix representation of the product system modeled by this object.
+        Generate a sparse matrix representation of the product system modeled by this object.
 
-            The matrix is generated based on the data in the `database` attribute, which is assumed to be
-            a list of dictionaries, where each dictionary represents a dataset in the product system.
-            Each dataset has a list of exchanges, where each exchange is a dictionary with information
-            about the input or output of a process.
+        The matrix is generated based on the data in the `database` attribute, which is assumed to be
+        a list of dictionaries, where each dictionary represents a dataset in the product system.
+        Each dataset has a list of exchanges, where each exchange is a dictionary with information
+        about the input or output of a process.
 
-            The matrix is generated as a `lil_matrix` object from the `scipy.sparse` module. The rows and
-            columns of the matrix represent unique activities in the product system, which are determined
-            by the unique combinations of the following exchange attributes: name, product, categories,
-            location, unit, and type.
+        The matrix is generated as a `lil_matrix` object from the `scipy.sparse` module. The rows and
+        columns of the matrix represent unique activities in the product system, which are determined
+        by the unique combinations of the following exchange attributes: name, product, categories,
+        location, unit, and type.
 
-            The matrix is populated by looping over each exchange in each dataset, and adding the amount
-            of the exchange to the corresponding row and column in the matrix.
+        The matrix is populated by looping over each exchange in each dataset, and adding the amount
+        of the exchange to the corresponding row and column in the matrix.
 
-            :return: A `lil_matrix` object representing the product system modeled by this object.
-            """
+        :return: A `lil_matrix` object representing the product system modeled by this object.
+        """
 
         # Generate the indices for the activities
         self.generate_activities_indices()
@@ -477,11 +479,15 @@ def populate_sparse_matrix(self) -> nsp.lil_matrix:
                     "production",
                 )
                 # Add the exchange amount to the corresponding cell in the matrix
-                m[self.reversed_acts_indices[s], self.reversed_acts_indices[c]] += exc["amount"]
+                m[self.reversed_acts_indices[s], self.reversed_acts_indices[c]] += exc[
+                    "amount"
+                ]
 
         return m
 
-    def write_scaling_factors_in_matrix(self, matrix: np.ndarray, scenario_name: str) -> np.ndarray:
+    def write_scaling_factors_in_matrix(
+        self, matrix: np.ndarray, scenario_name: str
+    ) -> np.ndarray:
         """
         Multiplies the elements of the given matrix with scaling factors for a given scenario.
 
@@ -868,17 +874,17 @@ def format_superstructure_dataframe(self) -> None:
 
         # Rename columns and add new columns for database information and metadata
         self.scenario_df.columns = [
-                                       "to activity name",
-                                       "to reference product",
-                                       "to location",
-                                       "to unit",
-                                       "from activity name",
-                                       "from reference product",
-                                       "from location",
-                                       "from categories",
-                                       "from unit",
-                                       "flow type",
-                                   ] + [s["name"] for s in self.scenarios]
+            "to activity name",
+            "to reference product",
+            "to location",
+            "to unit",
+            "from activity name",
+            "from reference product",
+            "from location",
+            "from categories",
+            "from unit",
+            "flow type",
+        ] + [s["name"] for s in self.scenarios]
 
         self.scenario_df["to database"] = self.package.descriptor["name"]
         self.scenario_df["to categories"] = None