roocs · cehbrecht · Nov 8, 2023 · Sep 27, 2023 · Sep 27, 2023 · Sep 28, 2023
diff --git a/.gitignore b/.gitignore
@@ -82,6 +82,7 @@ src/
 *.log
 *.lock
 testdata.json
+output_*.nc
 
 # IPython
 .ipynb_checkpoints

diff --git a/environment.yml b/environment.yml
@@ -12,7 +12,7 @@ dependencies:
 - requests
 # daops
 - cftime>=1.2.1
-- xarray>=0.20,<0.22
+- xarray>=0.21
 - cf_xarray>=0.7,<=0.8.4
 - dask>=2021.12
 - netcdf4>=1.4

diff --git a/notebooks/weighted_average.ipynb b/notebooks/weighted_average.ipynb
@@ -0,0 +1,251 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "ee398d02-1958-4e73-b9ef-137185bd97d3",
+   "metadata": {},
+   "source": [
+    "# Weighted Average\n",
+    "\n",
+    "* https://docs.xarray.dev/en/stable/examples/area_weighted_temperature.html\n",
+    "* https://docs.xarray.dev/en/stable/generated/xarray.DataArray.weighted.html\n",
+    "* https://docs.xarray.dev/en/stable/user-guide/weather-climate.html\n",
+    "* https://towardsdatascience.com/xarray-recipes-for-earth-scientists-c12a10c6a293#c729\n",
+    "* https://towardsdatascience.com/the-correct-way-to-average-the-globe-92ceecd172b7\n",
+    "* https://xcdat.readthedocs.io/en/stable/generated/xarray.Dataset.spatial.average.html\n",
+    "* https://xcdat.readthedocs.io/en/stable/examples/spatial-average.html"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "037931cd-d112-4460-86a3-848dd28efd6e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import cf_xarray as cfxr\n",
+    "import xarray as xr\n",
+    "import pandas as pd\n",
+    "import numpy as np"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "8a17aae4-9928-4b01-addf-ebbc9a54d7f6",
+   "metadata": {},
+   "source": [
+    "## search intake catalog"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6fe8241b-6edb-45fa-8706-f1ba374a4d4e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import intake"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "69afa185-ecf5-420f-8942-f24e738555d3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "cat_url = \"https://raw.githubusercontent.com/cp4cds/c3s_34g_manifests/master/intake/catalogs/c3s.yaml\"\n",
+    "\n",
+    "cat = intake.open_catalog(cat_url)\n",
+    "list(cat)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "551b1877-c39e-47c6-a358-31a193cbff93",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_cmip6 = cat['c3s-cmip6'].read()\n",
+    "df_cmip6.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c3191572-09af-4607-841f-d3a288edc904",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# c3s-cmip6.ScenarioMIP.INM.INM-CM5-0.ssp245.r1i1p1f1.Amon.rlds.gr1.v20190619\n",
+    "\n",
+    "df = df_cmip6.loc[\n",
+    "    (df_cmip6.experiment_id==\"ssp245\")\n",
+    "    & (df_cmip6.activity_id==\"ScenarioMIP\")\n",
+    "    & (df_cmip6.institution_id==\"INM\")\n",
+    "    & (df_cmip6.member_id==\"r1i1p1f1\")\n",
+    "    & (df_cmip6.table_id==\"Amon\")\n",
+    "    & (df_cmip6.source_id==\"INM-CM5-0\")\n",
+    "    & (df_cmip6.variable_id==\"rlds\")\n",
+    "]\n",
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5e938263-a1ec-441a-a375-35c863f8a611",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ds_path = list(set(df.path))[0]\n",
+    "ds_path"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1596cd41-7f6a-43f0-aa9a-64629a06cf3d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# ds_url = f\"https://data.mips.copernicus-climate.eu/thredds/fileServer/esg_c3s-cmip6/{ds_path}\"\n",
+    "ds_url = f\"http://esgf3.dkrz.de/thredds/fileServer/cmip6/{ds_path}\"\n",
+    "ds_url"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "48daa7a3-b2bc-4662-af52-1616d4ef6db7",
+   "metadata": {},
+   "source": [
+    "## download test data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4e8fdd3b-4943-4d3a-a492-d18e3e686247",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from pathlib import Path\n",
+    "data_dir = Path(\"/tmp\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f2c99174-a059-4f51-a775-764d3b024c3b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!wget $ds_url -O /tmp/test.nc"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e0d9a0ba-34d6-4d94-b3a6-9fc5b13eedd8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "nc_files = sorted(data_dir.glob('**/test.nc'))\n",
+    "nc_files"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ec0ae6ce-d503-4fc6-82c6-8f08d8bc4f04",
+   "metadata": {},
+   "source": [
+    "## calculate weighted average"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "60c13c47-e097-4829-a9e7-2de5543476dd",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# xr.set_options(keep_attrs=True)\n",
+    "\n",
+    "from roocs_utils.xarray_utils.xarray_utils import open_xr_dataset\n",
+    "\n",
+    "# ds = xr.open_dataset(nc_files[0], use_cftime=True)\n",
+    "ds = open_xr_dataset(nc_files[0].as_posix())\n",
+    "ds"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5c073005-252a-4e9c-9c86-01420175beab",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ds['time'] = ds.indexes['time'].to_numpy()\n",
+    "ds = ds.drop_vars([\"time_bnds\"])\n",
+    "ds"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "53819d47-53d2-4e77-b020-e91b4cc38fde",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "weights = np.cos(np.deg2rad(ds.lat))\n",
+    "weights.name = \"weights\"\n",
+    "weights.fillna(0)\n",
+    "weights"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "13097b15-cb02-4577-b4d1-032553a8eff6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ds_weighted = ds.weighted(weights)\n",
+    "ds_weighted"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "444bbc02-d8b5-450f-9e79-f8aac2b64a7e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "weighted_mean = ds_weighted.mean((\"lon\", \"lat\"), keep_attrs=True)\n",
+    "weighted_mean"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/requirements.txt b/requirements.txt
@@ -7,7 +7,7 @@ daops>=0.8.1,<0.9
 clisops>=0.9.6,<0.11
 roocs-utils>=0.6.4,<0.7
 # roocs-utils @ git+https://github.com/roocs/roocs-utils@master#egg=roocs-utils
-xarray>=0.20,<0.22
+xarray>=0.21
 cf-xarray<=0.8.4
 dask[complete]
 netcdf4

diff --git a/rook/director/director.py b/rook/director/director.py
@@ -48,7 +48,9 @@ def __init__(self, coll, inputs):
         self._check_apply_fixes()
 
     def use_fixes(self):
-        return CONFIG[f"project:{self.project}"].get("use_fixes", False)
+        # TODO: don't use fixes
+        return False
+        # return CONFIG[f"project:{self.project}"].get("use_fixes", False)
 
     def _check_apply_fixes(self):
         if (
@@ -118,20 +120,22 @@ def _resolve(self):
         # If we got here: then WPS will be used, because `self.use_original_files == False`
 
     def requires_fixes(self):
-        if not self.use_fixes():
-            return False
+        # TODO: don't use fixes
+        return False
+        # if not self.use_fixes():
+        #     return False
 
-        if self.search_result:
-            ds_ids = self.search_result.files()
-        else:
-            ds_ids = self.coll
-        for ds_id in ds_ids:
-            fix = fixer.Fixer(ds_id)
+        # if self.search_result:
+        #     ds_ids = self.search_result.files()
+        # else:
+        #     ds_ids = self.coll
+        # for ds_id in ds_ids:
+        #     fix = fixer.Fixer(ds_id)
 
-            if fix.pre_processor or fix.post_processors:
-                return True
+        #     if fix.pre_processor or fix.post_processors:
+        #         return True
 
-        return False
+        # return False
 
     def request_aligns_with_files(self):
         """

diff --git a/rook/operator.py b/rook/operator.py
@@ -8,8 +8,8 @@
 from rook.utils.average_utils import (
     run_average_by_time,
     run_average_by_dim,
-    run_weighted_average,
 )
+from rook.utils.weighted_average_utils import run_weighted_average
 from rook.utils.subset_utils import run_subset
 from rook.utils.concat_utils import run_concat
 from rook.utils.regrid_utils import run_regrid

diff --git a/rook/processes/wps_average_weighted.py b/rook/processes/wps_average_weighted.py
@@ -10,7 +10,7 @@
 from ..utils.input_utils import parse_wps_input
 from ..utils.metalink_utils import build_metalink
 from ..utils.response_utils import populate_response
-from ..utils.average_utils import run_weighted_average
+from ..utils.weighted_average_utils import run_weighted_average
 
 LOGGER = logging.getLogger()
 
@@ -78,9 +78,9 @@ def _handler(self, request, response):
         inputs = {
             "collection": collection,
             "output_dir": self.workdir,
+            "dims": ["latitude", "longitude"],
             "apply_fixes": False,
             "pre_checked": False,
-            "dims": ["latitude", "longitude"],
         }
         # print(inputs)
 

diff --git a/rook/utils/average_utils.py b/rook/utils/average_utils.py
@@ -11,13 +11,3 @@ def run_average_by_dim(args):
 
     result = average_over_dims(**args)
     return result.file_uris
-
-
-def run_weighted_average(args):
-    from daops.ops.average import average_over_dims
-
-    args["apply_fixes"] = False
-    args["dims"] = ["latitude", "longitude"]
-
-    result = average_over_dims(**args)
-    return result.file_uris
diff --git a/rook/utils/regrid_utils.py b/rook/utils/regrid_utils.py
@@ -4,5 +4,9 @@ def run_regrid(args):
     args["apply_fixes"] = False
     args["dims"] = ["latitude", "longitude"]
 
+    # remove regrid arguments
+    args.pop("method", None)
+    args.pop("grid", None)
+
     result = average_over_dims(**args)
     return result.file_uris