Skip to content

Commit

Permalink
fix use of SharedMemoryCache
Browse files Browse the repository at this point in the history
  • Loading branch information
jcosborn committed Aug 23, 2023
1 parent c050c45 commit 2438954
Showing 1 changed file with 10 additions and 1 deletion.
11 changes: 10 additions & 1 deletion include/kernels/block_transpose.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,14 @@ namespace quda
constexpr BlockTransposeKernel(const Arg &arg) : arg(arg) { }
static constexpr const char *filename() { return KERNEL_FILE; }

struct Dims {
static constexpr dim3 dims(dim3 block) {
block.x += 1;
block.z = 1;
return block;
}
};

/**
@brief Transpose between the two different orders of batched colorspinor fields:
- B: nVec -> spatial/N -> spin/color -> N, where N is for that in floatN
Expand All @@ -60,7 +68,8 @@ namespace quda
int parity = parity_color / Arg::nColor;
using color_spinor_t = ColorSpinor<typename Arg::real, 1, Arg::nSpin>;

SharedMemoryCache<color_spinor_t> cache({target::block_dim().x + 1, target::block_dim().y, 1});
//SharedMemoryCache<color_spinor_t> cache({target::block_dim().x + 1, target::block_dim().y, 1});
SharedMemoryCache<color_spinor_t, Dims> cache;

int x_offset = target::block_dim().x * target::block_idx().x;
int v_offset = target::block_dim().y * target::block_idx().y;
Expand Down

0 comments on commit 2438954

Please sign in to comment.