Fix swapped x and y dimensions in comments and variable names for MEP…

…S data (mllam#52) The x- and y-dimensions for the MEPS data are swapped in comments describing tensor shapes, and also in some variable names. This change swaps from (x, y) ordering to the correct (y, x) ordering. This fixes mllam#46. See the issue for a more clear description.
MeteoSwiss · Jun 4, 2024 · c8d3553 · c8d3553
1 parent 81d0840
commit c8d3553
Show file tree

Hide file tree

Showing 3 changed files with 22 additions and 17 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -76,6 +76,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
   [\#47](https://github.com/mllam/neural-lam/pull/47)
   @joeloskarsson
 
+- Fix incorrect ordering of x- and y-dimensions in comments describing tensor
+  shapes for MEPS data
+  [\#52](https://github.com/mllam/neural-lam/pull/52)
+  @joeloskarsson
+
 ## [v0.1.0](https://github.com/joeloskarsson/neural-lam/releases/tag/v0.1.0)
 
 First tagged release of `neural-lam`, matching Oskarsson et al 2023 publication

diff --git a/create_grid_features.py b/create_grid_features.py
@@ -29,14 +29,14 @@ def main():
     # -- Static grid node features --
     grid_xy = torch.tensor(
         np.load(os.path.join(static_dir_path, "nwp_xy.npy"))
-    )  # (2, N_x, N_y)
+    )  # (2, N_y, N_x)
     grid_xy = grid_xy.flatten(1, 2).T  # (N_grid, 2)
     pos_max = torch.max(torch.abs(grid_xy))
     grid_xy = grid_xy / pos_max  # Divide by maximum coordinate
 
     geopotential = torch.tensor(
         np.load(os.path.join(static_dir_path, "surface_geopotential.npy"))
-    )  # (N_x, N_y)
+    )  # (N_y, N_x)
     geopotential = geopotential.flatten(0, 1).unsqueeze(1)  # (N_grid,1)
     gp_min = torch.min(geopotential)
     gp_max = torch.max(geopotential)
@@ -46,7 +46,7 @@ def main():
     grid_border_mask = torch.tensor(
         np.load(os.path.join(static_dir_path, "border_mask.npy")),
         dtype=torch.int64,
-    )  # (N_x, N_y)
+    )  # (N_y, N_x)
     grid_border_mask = (
         grid_border_mask.flatten(0, 1).to(torch.float).unsqueeze(1)
     )  # (N_grid, 1)

diff --git a/neural_lam/weather_dataset.py b/neural_lam/weather_dataset.py
@@ -16,8 +16,8 @@ class WeatherDataset(torch.utils.data.Dataset):
     For our dataset:
     N_t' = 65
     N_t = 65//subsample_step (= 21 for 3h steps)
-    dim_x = 268
-    dim_y = 238
+    dim_y = 268
+    dim_x = 238
     N_grid = 268x238 = 63784
     d_features = 17 (d_features' = 18)
     d_forcing = 5
@@ -87,7 +87,7 @@ def __getitem__(self, idx):
         try:
             full_sample = torch.tensor(
                 np.load(sample_path), dtype=torch.float32
-            )  # (N_t', dim_x, dim_y, d_features')
+            )  # (N_t', dim_y, dim_x, d_features')
         except ValueError:
             print(f"Failed to load {sample_path}")
 
@@ -101,40 +101,40 @@ def __getitem__(self, idx):
         sample = full_sample[
             subsample_index : subsample_end_index : self.subsample_step
         ]
-        # (N_t, dim_x, dim_y, d_features')
+        # (N_t, dim_y, dim_x, d_features')
 
         # Remove feature 15, "z_height_above_ground"
         sample = torch.cat(
             (sample[:, :, :, :15], sample[:, :, :, 16:]), dim=3
-        )  # (N_t, dim_x, dim_y, d_features)
+        )  # (N_t, dim_y, dim_x, d_features)
 
         # Accumulate solar radiation instead of just subsampling
-        rad_features = full_sample[:, :, :, 2:4]  # (N_t', dim_x, dim_y, 2)
+        rad_features = full_sample[:, :, :, 2:4]  # (N_t', dim_y, dim_x, 2)
         # Accumulate for first time step
         init_accum_rad = torch.sum(
             rad_features[: (subsample_index + 1)], dim=0, keepdim=True
-        )  # (1, dim_x, dim_y, 2)
+        )  # (1, dim_y, dim_x, 2)
         # Accumulate for rest of subsampled sequence
         in_subsample_len = (
             subsample_end_index - self.subsample_step + subsample_index + 1
         )
         rad_features_in_subsample = rad_features[
             (subsample_index + 1) : in_subsample_len
-        ]  # (N_t*, dim_x, dim_y, 2), N_t* = (N_t-1)*ss_step
-        _, dim_x, dim_y, _ = sample.shape
+        ]  # (N_t*, dim_y, dim_x, 2), N_t* = (N_t-1)*ss_step
+        _, dim_y, dim_x, _ = sample.shape
         rest_accum_rad = torch.sum(
             rad_features_in_subsample.view(
                 self.original_sample_length - 1,
                 self.subsample_step,
-                dim_x,
                 dim_y,
+                dim_x,
                 2,
             ),
             dim=1,
-        )  # (N_t-1, dim_x, dim_y, 2)
+        )  # (N_t-1, dim_y, dim_x, 2)
         accum_rad = torch.cat(
             (init_accum_rad, rest_accum_rad), dim=0
-        )  # (N_t, dim_x, dim_y, 2)
+        )  # (N_t, dim_y, dim_x, 2)
         # Replace in sample
         sample[:, :, :, 2:4] = accum_rad
 
@@ -168,7 +168,7 @@ def __getitem__(self, idx):
             np.load(water_path), dtype=torch.float32
         ).unsqueeze(
             -1
-        )  # (dim_x, dim_y, 1)
+        )  # (dim_y, dim_x, 1)
         # Flatten
         water_cover_features = water_cover_features.flatten(0, 1)  # (N_grid, 1)
         # Expand over temporal dimension
@@ -183,7 +183,7 @@ def __getitem__(self, idx):
         )
         flux = torch.tensor(np.load(flux_path), dtype=torch.float32).unsqueeze(
             -1
-        )  # (N_t', dim_x, dim_y, 1)
+        )  # (N_t', dim_y, dim_x, 1)
 
         if self.standardize:
             flux = (flux - self.flux_mean) / self.flux_std