Skip to content

Commit

Permalink
restore needsFullBlock for SharedMemoryCache
Browse files Browse the repository at this point in the history
  • Loading branch information
jcosborn committed Sep 19, 2023
1 parent bebf8db commit d827beb
Show file tree
Hide file tree
Showing 5 changed files with 20 additions and 10 deletions.
6 changes: 3 additions & 3 deletions include/dslash_helper.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -702,15 +702,15 @@ namespace quda

#ifdef QUDA_DSLASH_FAST_COMPILE
if constexpr (allthreads) {
dslash.template operator()<kernel_type == UBER_KERNEL ? INTERIOR_KERNEL : kernel_type, allthreads>(x_cb, s, parity, active);
dslash.template operator()<kernel_type == UBER_KERNEL ? INTERIOR_KERNEL : kernel_type, true>(x_cb, s, parity, active);
} else {
dslash.template operator()<kernel_type == UBER_KERNEL ? INTERIOR_KERNEL : kernel_type>(x_cb, s, parity);
}
#else
if constexpr (allthreads) {
switch (parity) {
case 0: dslash.template operator()<kernel_type == UBER_KERNEL ? INTERIOR_KERNEL : kernel_type, allthreads>(x_cb, s, 0, active); break;
case 1: dslash.template operator()<kernel_type == UBER_KERNEL ? INTERIOR_KERNEL : kernel_type, allthreads>(x_cb, s, 1, active); break;
case 0: dslash.template operator()<kernel_type == UBER_KERNEL ? INTERIOR_KERNEL : kernel_type, true>(x_cb, s, 0, active); break;
case 1: dslash.template operator()<kernel_type == UBER_KERNEL ? INTERIOR_KERNEL : kernel_type, true>(x_cb, s, 1, active); break;
}
} else {
switch (parity) {
Expand Down
6 changes: 6 additions & 0 deletions include/targets/generic/special_ops.h
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,13 @@ namespace quda {
template <typename T> inline constexpr bool explicitSpecialOps = explicitSpecialOpsS<T>::value;

// hasSpecialOps
#if 1
template <typename T> inline constexpr bool hasSpecialOps = !std::is_same_v<getSpecialOps<T>,NoSpecialOps>;
#else
template <typename T> struct hasSpecialOpsImpl { static constexpr bool value = false; };
template <typename ...U> struct hasSpecialOpsImpl<SpecialOps<U...>> { static constexpr bool value = true; };
template <typename T> inline constexpr bool hasSpecialOps = hasSpecialOpsImpl<T>::value;
#endif

// combineOps
template <typename ...T> struct combineOpsS {};
Expand Down
6 changes: 5 additions & 1 deletion include/targets/sycl/shared_memory_cache_helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@

#include <../generic/shared_memory_cache_helper.h>

namespace quda {
template <typename T, typename D, typename O> static constexpr bool needsFullBlock<SharedMemoryCache<T,D,O>> = true;
}

#else
#include <target_device.h>
Expand All @@ -30,7 +34,7 @@ namespace quda
This accessor supports both explicit run-time block size and
compile-time sizing.
* For run-time block size, the constructor should be initialied
* For run-time block size, the constructor should be initialized
with the desired block size.
* For compile-time block size, no arguments should be passed to
Expand Down
6 changes: 3 additions & 3 deletions include/targets/sycl/thread_array.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,6 @@ namespace quda

#endif

namespace quda {
template <typename T, int n> static constexpr bool needsFullBlock<thread_array<T,n>> = false;
}
//namespace quda {
// template <typename T, int n> static constexpr bool needsFullBlock<thread_array<T,n>> = false;
//}
6 changes: 3 additions & 3 deletions include/targets/sycl/thread_local_cache.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,6 @@
#include "../generic/thread_local_cache_noshared.h"
#endif

namespace quda {
template <typename T, int N, typename O> static constexpr bool needsFullBlock<ThreadLocalCache<T,N,O>> = false;
}
//namespace quda {
// template <typename T, int N, typename O> static constexpr bool needsFullBlock<ThreadLocalCache<T,N,O>> = false;
//}

0 comments on commit d827beb

Please sign in to comment.