fix use of SharedMemoryCache

lattice · Aug 23, 2023 · 2438954 · 2438954
1 parent c050c45
commit 2438954
Showing 1 changed file with 10 additions and 1 deletion.
diff --git a/include/kernels/block_transpose.cuh b/include/kernels/block_transpose.cuh
@@ -47,6 +47,14 @@ namespace quda
     constexpr BlockTransposeKernel(const Arg &arg) : arg(arg) { }
     static constexpr const char *filename() { return KERNEL_FILE; }
 
+    struct Dims {
+      static constexpr dim3 dims(dim3 block) {
+        block.x += 1;
+        block.z = 1;
+        return block;
+      }
+    };
+
     /**
       @brief Transpose between the two different orders of batched colorspinor fields:
         - B: nVec -> spatial/N -> spin/color -> N, where N is for that in floatN
@@ -60,7 +68,8 @@ namespace quda
       int parity = parity_color / Arg::nColor;
       using color_spinor_t = ColorSpinor<typename Arg::real, 1, Arg::nSpin>;
 
-      SharedMemoryCache<color_spinor_t> cache({target::block_dim().x + 1, target::block_dim().y, 1});
+      //SharedMemoryCache<color_spinor_t> cache({target::block_dim().x + 1, target::block_dim().y, 1});
+      SharedMemoryCache<color_spinor_t, Dims> cache;
 
       int x_offset = target::block_dim().x * target::block_idx().x;
       int v_offset = target::block_dim().y * target::block_idx().y;