diff --git a/include/communicator_quda.h b/include/communicator_quda.h index 19bbf6e42c..0dc22fdfab 100644 --- a/include/communicator_quda.h +++ b/include/communicator_quda.h @@ -741,6 +741,8 @@ namespace quda void comm_allreduce_sum_array(double *data, size_t size); + void comm_allreduce_sum(size_t &a); + void comm_allreduce_max_array(double *data, size_t size); void comm_allreduce_max_array(deviation_t *data, size_t size); diff --git a/lib/color_spinor_util.in.cu b/lib/color_spinor_util.in.cu index 3681438c9f..c0baeb0ec5 100644 --- a/lib/color_spinor_util.in.cu +++ b/lib/color_spinor_util.in.cu @@ -191,7 +191,7 @@ namespace quda { int compareSpinor(const U &u, const V &v, const int tol) { int fail_check = 16*tol; - std::vector fail(fail_check); + std::vector fail(fail_check); for (int f=0; f *data, size_t size) { size_t n = comm_size(); diff --git a/lib/communicator_qmp.cpp b/lib/communicator_qmp.cpp index 174c95fa9d..1a4a2a7878 100644 --- a/lib/communicator_qmp.cpp +++ b/lib/communicator_qmp.cpp @@ -318,6 +318,14 @@ void Communicator::comm_allreduce_sum_array(double *data, size_t size) } } +void Communicator::comm_allreduce_sum(size_t &a) +{ + if (sizeof(size_t) != sizeof(uint64_t)) { + errorQuda("sizeof(size_t) != sizeof(uint64_t): %lu != %lu\n", sizeof(size_t), sizeof(uint64_t)); + } + QMP_CHECK(QMP_comm_sum_uint64_t(QMP_COMM_HANDLE, reinterpret_cast(&a))); +} + void Communicator::comm_allreduce_max_array(deviation_t *data, size_t size) { size_t n = comm_size(); diff --git a/lib/communicator_single.cpp b/lib/communicator_single.cpp index e6757a1265..978fbcc069 100644 --- a/lib/communicator_single.cpp +++ b/lib/communicator_single.cpp @@ -83,6 +83,8 @@ namespace quda void Communicator::comm_allreduce_sum_array(double *, size_t) { } + void Communicator::comm_allreduce_sum(size_t &) { } + void Communicator::comm_allreduce_max_array(deviation_t *, size_t) { } void Communicator::comm_allreduce_max_array(double *, size_t) { } diff --git a/lib/communicator_stack.cpp b/lib/communicator_stack.cpp index 9d6f1eb7e3..fe9d1faaba 100644 --- a/lib/communicator_stack.cpp +++ b/lib/communicator_stack.cpp @@ -216,6 +216,8 @@ namespace quda template <> void comm_allreduce_sum(double &a) { comm_allreduce_sum_array(&a, 1); } + template <> void comm_allreduce_sum(size_t &a) { get_current_communicator().comm_allreduce_sum(a); } + void comm_allreduce_max_array(double *data, size_t size) { get_current_communicator().comm_allreduce_max_array(data, size); diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 9ca912f8b3..5f4c9808ae 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -801,14 +801,24 @@ endif() if(QUDA_DIRAC_STAGGERED) set(DIRAC_NAME staggered) - add_test(NAME dslash_${DIRAC_NAME}_policy${pol2} + add_test(NAME dslash_${DIRAC_NAME}_matpc_policy${pol2} COMMAND ${QUDA_CTEST_LAUNCH} $ ${MPIEXEC_POSTFLAGS} --dslash-type ${DIRAC_NAME} --test MatPC --dim 2 4 6 8 - --gtest_output=xml:dslash_${DIRAC_NAME}_test_pol${pol2}.xml) + --gtest_output=xml:dslash_${DIRAC_NAME}_matpc_test_pol${pol2}.xml) if(polenv) - set_tests_properties(dslash_${DIRAC_NAME}_policy${pol2} PROPERTIES ENVIRONMENT QUDA_ENABLE_DSLASH_POLICY=${pol2}) + set_tests_properties(dslash_${DIRAC_NAME}_matpc_policy${pol2} PROPERTIES ENVIRONMENT QUDA_ENABLE_DSLASH_POLICY=${pol2}) + endif() + + add_test(NAME dslash_${DIRAC_NAME}_mat_policy${pol2} + COMMAND ${QUDA_CTEST_LAUNCH} $ ${MPIEXEC_POSTFLAGS} + --dslash-type ${DIRAC_NAME} + --test Mat + --dim 2 4 6 8 + --gtest_output=xml:dslash_${DIRAC_NAME}_mat_test_pol${pol2}.xml) + if(polenv) + set_tests_properties(dslash_${DIRAC_NAME}_mat_policy${pol2} PROPERTIES ENVIRONMENT QUDA_ENABLE_DSLASH_POLICY=${pol2}) endif() add_test(NAME benchmark_dslash_${DIRAC_NAME}_policy${pol2} @@ -824,15 +834,26 @@ endif() endif() set(DIRAC_NAME asqtad) - add_test(NAME dslash_${DIRAC_NAME}_policy${pol2} + add_test(NAME dslash_${DIRAC_NAME}_matpc_policy${pol2} COMMAND ${QUDA_CTEST_LAUNCH} $ ${MPIEXEC_POSTFLAGS} --dslash-type ${DIRAC_NAME} --all-partitions 1 --test MatPC --dim 6 8 10 12 - --gtest_output=xml:dslash_${DIRAC_NAME}_test_pol${pol2}.xml) + --gtest_output=xml:dslash_${DIRAC_NAME}_matpc_test_pol${pol2}.xml) + if(polenv) + set_tests_properties(dslash_${DIRAC_NAME}_matpc_policy${pol2} PROPERTIES ENVIRONMENT QUDA_ENABLE_DSLASH_POLICY=${pol2}) + endif() + + add_test(NAME dslash_${DIRAC_NAME}_mat_policy${pol2} + COMMAND ${QUDA_CTEST_LAUNCH} $ ${MPIEXEC_POSTFLAGS} + --dslash-type ${DIRAC_NAME} + --all-partitions 1 + --test Mat + --dim 6 8 10 12 + --gtest_output=xml:dslash_${DIRAC_NAME}_mat_test_pol${pol2}.xml) if(polenv) - set_tests_properties(dslash_${DIRAC_NAME}_policy${pol2} PROPERTIES ENVIRONMENT QUDA_ENABLE_DSLASH_POLICY=${pol2}) + set_tests_properties(dslash_${DIRAC_NAME}_mat_policy${pol2} PROPERTIES ENVIRONMENT QUDA_ENABLE_DSLASH_POLICY=${pol2}) endif() add_test(NAME benchmark_dslash_${DIRAC_NAME}_policy${pol2} diff --git a/tests/host_reference/staggered_dslash_reference.cpp b/tests/host_reference/staggered_dslash_reference.cpp index e95532590f..86ecd17464 100644 --- a/tests/host_reference/staggered_dslash_reference.cpp +++ b/tests/host_reference/staggered_dslash_reference.cpp @@ -116,10 +116,10 @@ void staggeredDslashReference(sFloat *res, gFloat **fatlink, gFloat **longlink, sub(&res[offset], &res[offset], gaugedSpinor, stag_spinor_site_size); } } + } // forward/backward in all four directions - if (daggerBit) negx(&res[offset], stag_spinor_site_size); - } // 4-d volume - } // right-hand-side + if (daggerBit) negx(&res[offset], stag_spinor_site_size); + } // 4-d volume } void staggeredDslash(ColorSpinorField &out, void **fatlink, void **longlink, void **ghost_fatlink,