nasa · danielfromearth · Sep 6, 2024 · Sep 6, 2024 · Sep 6, 2024 · Sep 6, 2024
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -8,6 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Changed
 - Allow single netCDF file input in addition to single text file listings ([#230](https://github.com/nasa/stitchee/issues/230))([**@danielfromearth**](https://github.com/danielfromearth))
+- Remove the dask dependency ([#235](https://github.com/nasa/stitchee/issues/235))([**@danielfromearth**](https://github.com/danielfromearth))
 
 
 ## [1.3.0] - 2024-07-11

diff --git a/concatenator/attribute_handling.py b/concatenator/attribute_handling.py
@@ -1,8 +1,5 @@
-"""
-attribute_handling.py
-
-Functions for converting "coordinates" in netCDF variable attributes
- between paths that reference a group hierarchy and flattened paths.
+"""Functions for converting "coordinates" in netCDF variable attributes
+    between paths that reference a group hierarchy and flattened paths.
 """
 
 import json

diff --git a/concatenator/dataset_and_group_handling.py b/concatenator/dataset_and_group_handling.py
@@ -1,12 +1,8 @@
-"""
-dataset_and_group_handling.py
-
-Functions for converting multidimensional data structures
- between a group hierarchy and a flat structure
-"""
+"""Functions to convert data structures between a group hierarchy and a flat structure."""
 
 from __future__ import annotations
 
+import logging
 import re
 from logging import Logger
 
@@ -20,6 +16,8 @@
     regroup_coordinate_attribute,
 )
 
+module_logger = logging.getLogger(__name__)
+
 # Match dimension names such as "__char28" or "__char16". Used for CERES datasets.
 _string_dimension_name_pattern = re.compile(r"__char[0-9]+")
 
@@ -51,7 +49,7 @@ def walk(
                 var_group_name = f"{group_path}{concatenator.group_delim}{var_name}"
                 new_dataset.variables[var_group_name] = var
 
-                # Flatten the paths of variables referenced in the coordinates attribute
+                # Flatten the paths of variables referenced in the 'coordinates' attribute
                 flatten_coordinate_attribute_paths(new_dataset, var, var_group_name)
 
                 if (len(var.dimensions) == 1) and _string_dimension_name_pattern.fullmatch(
@@ -99,6 +97,7 @@ def flatten_grouped_dataset(
     ----------
     nc_dataset : nc.Dataset
         netCDF4 Dataset that contains groups
+    ensure_all_dims_are_coords
 
     Returns
     -------
@@ -124,7 +123,7 @@ def flatten_grouped_dataset(
             # Copy variables to root group with new name
             nc_dataset.variables[new_var_name] = var
 
-            # Flatten the paths of variables referenced in the coordinates attribute.
+            # Flatten the paths of variables referenced in the 'coordinates' attribute.
             flatten_coordinate_attribute_paths(nc_dataset, var, new_var_name)
 
             del nc_dataset.variables[var_name]  # Delete old variable
@@ -173,6 +172,7 @@ def regroup_flattened_dataset(
         List of xarray datasets to be combined
     output_file : str
         Name of the output file to write the resulting NetCDF file to.
+    history_to_append : str
     """
     with nc.Dataset(output_file, mode="w", format="NETCDF4") as base_dataset:
         # Copy global attributes
@@ -325,7 +325,9 @@ def _get_dimension_size(dataset: nc.Dataset, dim_name: str) -> int:
     return dim_size
 
 
-def validate_workable_files(files: list[str], logger: Logger) -> tuple[list[str], int]:
+def validate_workable_files(
+    files: list[str], logger: Logger | None = module_logger
+) -> tuple[list[str], int]:
     """Remove files from a list that are not open-able as netCDF or that are empty."""
     workable_files = []
     for file in files:
@@ -335,7 +337,10 @@ def validate_workable_files(files: list[str], logger: Logger) -> tuple[list[str]
                 if is_empty is False:
                     workable_files.append(file)
         except OSError:
-            logger.debug("Error opening <%s> as a netCDF dataset. Skipping.", file)
+            if logger:
+                logger.debug("Error opening <%s> as a netCDF dataset. Skipping.", file)
+            else:
+                print("Error opening <%s> as a netCDF dataset. Skipping.")
 
     # addressing GitHub issue 153: propagate the first empty file if all input files are empty
     if (len(workable_files)) == 0 and (len(files) > 0):

diff --git a/concatenator/dimension_cleanup.py b/concatenator/dimension_cleanup.py
@@ -1,8 +1,8 @@
-"""
-dimension_cleanup.py
+"""Functions for renaming duplicated dimension names for netCDF variables.
 
-Functions for renaming duplicated dimension names for netCDF variables, so that xarray can handle the dataset.
+So that xarray can handle the dataset.
 """
+
 import collections
 
 import netCDF4 as nc
@@ -29,16 +29,20 @@ def remove_duplicate_dims(nc_dataset: nc.Dataset) -> nc.Dataset:
             dup_vars[var_name] = var  # populate dictionary with variables with vars with dup dims
 
     for dup_var_name, dup_var in dup_vars.items():
-        dim_list = list(dup_var.dimensions)  # original dimensions of the variable with duplicated dims
+        dim_list = list(
+            dup_var.dimensions
+        )  # original dimensions of the variable with duplicated dims
 
         # Dimension(s) that are duplicated are retrieved.
         #   Note: this is not yet tested for more than one duplicated dimension.
         dim_dup = [item for item, count in collections.Counter(dim_list).items() if count > 1][0]
-        dim_dup_length = dup_var.shape[dup_var.dimensions.index(dim_dup)]  # length of the duplicated dimension
+        dim_dup_length = dup_var.shape[
+            dup_var.dimensions.index(dim_dup)
+        ]  # length of the duplicated dimension
 
         # New dimension and variable names are created.
-        dim_dup_new = dim_dup + '_1'
-        var_name_new = dup_var_name + '_1'
+        dim_dup_new = dim_dup + "_1"
+        var_name_new = dup_var_name + "_1"
         dup_new_varnames.append(var_name_new)
 
         # The last dimension for the variable is replaced with the new name in a temporary list.
@@ -48,9 +52,9 @@ def remove_duplicate_dims(nc_dataset: nc.Dataset) -> nc.Dataset:
         new_dup_var = {}
 
         # Attributes for the original variable are retrieved.
-        attrs_contents = get_attributes_minus_fillvalue_and_renamed_coords(original_var_name=dup_var_name,
-                                                                           new_var_name=dim_dup_new,
-                                                                           original_dataset=nc_dataset)
+        attrs_contents = get_attributes_minus_fillvalue_and_renamed_coords(
+            original_var_name=dup_var_name, new_var_name=dim_dup_new, original_dataset=nc_dataset
+        )
         # for attrname in dup_var.ncattrs():
         #     if attrname != '_FillValue':
         #         contents: str = nc_dataset.variables[dup_var_name].getncattr(attrname)
@@ -71,11 +75,15 @@ def remove_duplicate_dims(nc_dataset: nc.Dataset) -> nc.Dataset:
             if dim_dup in nc_dataset.variables.keys():
 
                 # New variable object is created for the renamed, previously duplicated dimension.
-                new_dup_var[dim_dup_new] = nc_dataset.createVariable(dim_dup_new, nc_dataset.variables[dim_dup].dtype,
-                                                                     (dim_dup_new,), fill_value=fill_value)
-                dim_var_attr_contents = get_attributes_minus_fillvalue_and_renamed_coords(original_var_name=dim_dup,
-                                                                                          new_var_name=dim_dup_new,
-                                                                                          original_dataset=nc_dataset)
+                new_dup_var[dim_dup_new] = nc_dataset.createVariable(
+                    dim_dup_new,
+                    nc_dataset.variables[dim_dup].dtype,
+                    (dim_dup_new,),
+                    fill_value=fill_value,
+                )
+                dim_var_attr_contents = get_attributes_minus_fillvalue_and_renamed_coords(
+                    original_var_name=dim_dup, new_var_name=dim_dup_new, original_dataset=nc_dataset
+                )
                 for attr_name, contents in dim_var_attr_contents.items():
                     new_dup_var[dim_dup_new].setncattr(attr_name, contents)
 
@@ -85,25 +93,26 @@ def remove_duplicate_dims(nc_dataset: nc.Dataset) -> nc.Dataset:
         del nc_dataset.variables[dup_var_name]
 
         # Replace original *Variable* with new variable with no duplicated dimensions.
-        new_dup_var[dup_var_name] = nc_dataset.createVariable(dup_var_name, str(dup_var[:].dtype),
-                                                              tuple(new_dim_list), fill_value=fill_value)
+        new_dup_var[dup_var_name] = nc_dataset.createVariable(
+            dup_var_name, str(dup_var[:].dtype), tuple(new_dim_list), fill_value=fill_value
+        )
         for attr_name, contents in attrs_contents.items():
             new_dup_var[dup_var_name].setncattr(attr_name, contents)
         new_dup_var[dup_var_name][:] = dup_var[:]
 
     return nc_dataset
 
 
-def get_attributes_minus_fillvalue_and_renamed_coords(original_var_name: str,
-                                                      new_var_name: str,
-                                                      original_dataset: nc.Dataset) -> dict:
+def get_attributes_minus_fillvalue_and_renamed_coords(
+    original_var_name: str, new_var_name: str, original_dataset: nc.Dataset
+) -> dict:
     """Variable attributes are retrieved."""
     attrs_contents = {}
 
     for ncattr in original_dataset.variables[original_var_name].ncattrs():
-        if ncattr != '_FillValue':
+        if ncattr != "_FillValue":
             contents: str = original_dataset.variables[original_var_name].getncattr(ncattr)
-            if ncattr == 'coordinates':
+            if ncattr == "coordinates":
                 contents.replace(original_var_name, new_var_name)
             attrs_contents[ncattr] = contents