diff --git a/src/details/ArborX_DetailsDistributedTreeUtils.hpp b/src/details/ArborX_DetailsDistributedTreeUtils.hpp index 1b8dba0c7..6e2429472 100644 --- a/src/details/ArborX_DetailsDistributedTreeUtils.hpp +++ b/src/details/ArborX_DetailsDistributedTreeUtils.hpp @@ -20,7 +20,6 @@ #include #include #include -#include // create_layout* #include #include @@ -30,73 +29,6 @@ namespace ArborX::Details::DistributedTree { -template -std::enable_if_t::value> -sendAcrossNetwork(ExecutionSpace const &space, Distributor const &distributor, - View exports, typename View::non_const_type imports) -{ - Kokkos::Profiling::ScopedRegion guard( - "ArborX::DistributedTree::sendAcrossNetwork (" + exports.label() + ")"); - - ARBORX_ASSERT((exports.extent(0) == distributor.getTotalSendLength()) && - (imports.extent(0) == distributor.getTotalReceiveLength()) && - (exports.extent(1) == imports.extent(1)) && - (exports.extent(2) == imports.extent(2)) && - (exports.extent(3) == imports.extent(3)) && - (exports.extent(4) == imports.extent(4)) && - (exports.extent(5) == imports.extent(5)) && - (exports.extent(6) == imports.extent(6)) && - (exports.extent(7) == imports.extent(7))); - - auto const num_packets = exports.extent(1) * exports.extent(2) * - exports.extent(3) * exports.extent(4) * - exports.extent(5) * exports.extent(6) * - exports.extent(7); - - using NonConstValueType = typename View::non_const_value_type; - -#ifndef ARBORX_ENABLE_GPU_AWARE_MPI - using MirrorSpace = typename View::host_mirror_space; - typename MirrorSpace::execution_space const execution_space; -#else - using MirrorSpace = typename View::device_type::memory_space; - auto const &execution_space = space; -#endif - - auto imports_layout_right = create_layout_right_mirror_view_no_init( - execution_space, MirrorSpace{}, imports); - -#ifndef ARBORX_ENABLE_GPU_AWARE_MPI - execution_space.fence(); -#endif - - Kokkos::View> - import_buffer(imports_layout_right.data(), imports_layout_right.size()); - - distributor.doPostsAndWaits(space, exports, num_packets, import_buffer); - - constexpr bool can_skip_copy = - (View::rank == 1 && - (std::is_same_v || - std::is_same_v)); - if constexpr (can_skip_copy) - { - // For 1D non-strided views, we can directly copy to the original location, - // as layout is the same - Kokkos::deep_copy(space, imports, imports_layout_right); - } - else - { - // For multi-dimensional views, we need to first copy into a separate - // storage because of a different layout - auto tmp_view = Kokkos::create_mirror_view_and_copy( - Kokkos::view_alloc(space, typename ExecutionSpace::memory_space{}), - imports_layout_right); - Kokkos::deep_copy(space, imports, tmp_view); - } -} - template void countResults(ExecutionSpace const &space, int n_queries, QueryIdsView const &query_ids, OffsetView &offset) @@ -152,7 +84,7 @@ void forwardQueries(MPI_Comm comm, ExecutionSpace const &space, "ArborX::DistributedTree::query::forwardQueries::import_ranks"), n_imports); - sendAcrossNetwork(space, distributor, export_ranks, import_ranks); + distributor.doPostsAndWaits(space, export_ranks, import_ranks); fwd_ranks = import_ranks; } @@ -177,7 +109,7 @@ void forwardQueries(MPI_Comm comm, ExecutionSpace const &space, "ArborX::DistributedTree::query::forwardQueries::imports"), n_imports); - sendAcrossNetwork(space, distributor, exports, imports); + distributor.doPostsAndWaits(space, exports, imports); fwd_queries = imports; } @@ -202,7 +134,7 @@ void forwardQueries(MPI_Comm comm, ExecutionSpace const &space, "ArborX::DistributedTree::query::forwardQueries::import_ids"), n_imports); - sendAcrossNetwork(space, distributor, export_ids, import_ids); + distributor.doPostsAndWaits(space, export_ids, import_ids); fwd_ids = import_ids; } } @@ -245,7 +177,7 @@ void communicateResultsBack(MPI_Comm comm, ExecutionSpace const &space, Kokkos::view_alloc(space, Kokkos::WithoutInitializing, ranks.label()), n_imports); - sendAcrossNetwork(space, distributor, export_ranks, import_ranks); + distributor.doPostsAndWaits(space, export_ranks, import_ranks); ranks = import_ranks; } @@ -267,7 +199,7 @@ void communicateResultsBack(MPI_Comm comm, ExecutionSpace const &space, Kokkos::view_alloc(space, Kokkos::WithoutInitializing, ids.label()), n_imports); - sendAcrossNetwork(space, distributor, export_ids, import_ids); + distributor.doPostsAndWaits(space, export_ids, import_ids); ids = import_ids; } @@ -278,7 +210,7 @@ void communicateResultsBack(MPI_Comm comm, ExecutionSpace const &space, Kokkos::view_alloc(space, Kokkos::WithoutInitializing, out.label()), n_imports); - sendAcrossNetwork(space, distributor, export_out, import_out); + distributor.doPostsAndWaits(space, export_out, import_out); out = import_out; } } @@ -311,7 +243,7 @@ void forwardQueriesAndCommunicateResults( // Communicate results back communicateResultsBack(comm, space, values, offset, ranks, ids); - Kokkos::Profiling::pushRegion(prefix + "postprocess_results"); + Kokkos::Profiling::pushRegion(prefix + "::postprocess_results"); // Merge results int const n_predicates = predicates.size(); diff --git a/src/details/ArborX_DetailsDistributor.hpp b/src/details/ArborX_DetailsDistributor.hpp index 6ee6325f4..487d17ecc 100644 --- a/src/details/ArborX_DetailsDistributor.hpp +++ b/src/details/ArborX_DetailsDistributor.hpp @@ -16,7 +16,6 @@ #include #include #include -#include // create_layout_right... #include #include @@ -263,85 +262,102 @@ class Distributor template void doPostsAndWaits(ExecutionSpace const &space, ExportView const &exports, - size_t num_packets, ImportView const &imports) const + ImportView const &imports) const { Kokkos::Profiling::ScopedRegion guard( "ArborX::Distributor::doPostsAndWaits"); - ARBORX_ASSERT(num_packets * _src_offsets.back() == imports.size()); - ARBORX_ASSERT(num_packets * _dest_offsets.back() == exports.size()); + static_assert(ExportView::rank == 1 && + (std::is_same_v || + std::is_same_v)); + static_assert(ImportView::rank == 1 && + (std::is_same_v || + std::is_same_v)); + + using MemorySpace = typename ExportView::memory_space; + static_assert( + std::is_same_v); + static_assert( + std::is_same_v); using ValueType = typename ImportView::value_type; static_assert( std::is_same>::value); - static_assert(ImportView::rank == 1); - static_assert( - std::is_same::value); + bool const permutation_necessary = _permute.size() != 0; + + ARBORX_ASSERT(!permutation_necessary || exports.size() == _permute.size()); + ARBORX_ASSERT(exports.size() == getTotalSendLength()); + ARBORX_ASSERT(imports.size() == getTotalReceiveLength()); - // This allows function to work even when ExportView is unmanaged. + // Make sure things work even if ExportView is unmanaged using ExportViewWithoutMemoryTraits = Kokkos::View; + ExportViewWithoutMemoryTraits permuted_exports_storage( + "ArborX::Distributor::doPostsAndWaits::permuted_exports", 0); - using DestBufferMirrorViewType = - decltype(ArborX::Details::create_layout_right_mirror_view_and_copy( - space, std::declval(), - std::declval())); - - // nvcc-12.2 fails compiling if using DestBufferMirrorViewType here - constexpr int pointer_depth = - internal::PointerDepth::value; - DestBufferMirrorViewType dest_buffer_mirror( - "ArborX::Distributor::doPostsAndWaits::destination_buffer_mirror", 0, - pointer_depth > 1 ? 0 : KOKKOS_INVALID_INDEX, - pointer_depth > 2 ? 0 : KOKKOS_INVALID_INDEX, - pointer_depth > 3 ? 0 : KOKKOS_INVALID_INDEX, - pointer_depth > 4 ? 0 : KOKKOS_INVALID_INDEX, - pointer_depth > 5 ? 0 : KOKKOS_INVALID_INDEX, - pointer_depth > 6 ? 0 : KOKKOS_INVALID_INDEX, - pointer_depth > 7 ? 0 : KOKKOS_INVALID_INDEX); - - // If _permute is empty, we are assuming that we don't need to permute - // exports. - bool const permutation_necessary = _permute.size() != 0; + auto permuted_exports = exports; if (permutation_necessary) { - ExportViewWithoutMemoryTraits dest_buffer( - Kokkos::view_alloc( - space, Kokkos::WithoutInitializing, - "ArborX::Distributor::doPostsAndWaits::destination_buffer"), - exports.layout()); + KokkosExt::reallocWithoutInitializing(space, permuted_exports_storage, + exports.size()); + permuted_exports = permuted_exports_storage; ArborX::Details::applyInversePermutation(space, _permute, exports, - dest_buffer); + permuted_exports); + } + + int comm_rank; + MPI_Comm_rank(_comm, &comm_rank); - dest_buffer_mirror = - ArborX::Details::create_layout_right_mirror_view_and_copy( - space, typename ImportView::memory_space(), dest_buffer); + int same_rank_destination = -1; + int same_rank_source = -1; + { + auto it = + std::find(_destinations.begin(), _destinations.end(), comm_rank); + if (it != _destinations.end()) + { + same_rank_destination = it - _destinations.begin(); + + it = std::find(_sources.begin(), _sources.end(), comm_rank); + ARBORX_ASSERT(it != _sources.end()); + same_rank_source = it - _sources.begin(); + } + } + +#ifndef ARBORX_ENABLE_GPU_AWARE_MPI + using MirrorSpace = typename ExportView::host_mirror_space; + + auto exports_comm = Kokkos::create_mirror_view( + Kokkos::WithoutInitializing, MirrorSpace{}, permuted_exports); + if (same_rank_destination != -1) + { + // Only copy the parts of the exports that we need to send remotely + for (auto interval : + {std::make_pair(0, _dest_offsets[same_rank_destination]), + std::make_pair(_dest_offsets[same_rank_destination + 1], + _dest_offsets.back())}) + Kokkos::deep_copy(space, Kokkos::subview(exports_comm, interval), + Kokkos::subview(permuted_exports, interval)); } else { - dest_buffer_mirror = - ArborX::Details::create_layout_right_mirror_view_and_copy( - space, typename ImportView::memory_space(), exports); + Kokkos::deep_copy(space, exports_comm, permuted_exports); } + auto imports_comm = Kokkos::create_mirror_view(Kokkos::WithoutInitializing, + MirrorSpace{}, imports); +#else + auto exports_comm = permuted_exports; + auto imports_comm = imports; +#endif - static_assert( - decltype(dest_buffer_mirror)::rank == 1 || - std::is_same::value); - static_assert(ImportView::rank == 1 || - std::is_same::value); - - int comm_rank; - MPI_Comm_rank(_comm, &comm_rank); - int comm_size; - MPI_Comm_size(_comm, &comm_size); int const indegrees = _sources.size(); int const outdegrees = _destinations.size(); std::vector requests; @@ -350,53 +366,61 @@ class Distributor { if (_sources[i] != comm_rank) { - auto const message_size = - _src_counts[i] * num_packets * sizeof(ValueType); - auto const receive_buffer_ptr = - imports.data() + _src_offsets[i] * num_packets; + auto const receive_buffer_ptr = imports_comm.data() + _src_offsets[i]; + auto const message_size = _src_counts[i] * sizeof(ValueType); requests.emplace_back(); MPI_Irecv(receive_buffer_ptr, message_size, MPI_BYTE, _sources[i], 123, _comm, &requests.back()); } } - // make sure the data in dest_buffer has been copied before sending it. - if (permutation_necessary) - space.fence("ArborX::Distributor::doPostsAndWaits" - " (permute done before packing data into send buffer)"); + // Make sure the data is ready before sending it + space.fence( + "ArborX::Distributor::doPostsAndWaits (data ready before sending)"); for (int i = 0; i < outdegrees; ++i) { - auto const message_size = - _dest_counts[i] * num_packets * sizeof(ValueType); - auto const send_buffer_ptr = - dest_buffer_mirror.data() + _dest_offsets[i] * num_packets; - if (_destinations[i] == comm_rank) - { - auto const it = std::find(_sources.begin(), _sources.end(), comm_rank); - ARBORX_ASSERT(it != _sources.end()); - auto const position = it - _sources.begin(); - auto const receive_buffer_ptr = - imports.data() + _src_offsets[position] * num_packets; - - Kokkos::View> - receive_view(receive_buffer_ptr, message_size / sizeof(ValueType)); - Kokkos::View> - send_view(send_buffer_ptr, message_size / sizeof(ValueType)); - Kokkos::deep_copy(space, receive_view, send_view); - } - else + if (_destinations[i] != comm_rank) { requests.emplace_back(); - MPI_Isend(send_buffer_ptr, message_size, MPI_BYTE, _destinations[i], - 123, _comm, &requests.back()); + MPI_Isend(exports_comm.data() + _dest_offsets[i], + _dest_counts[i] * sizeof(ValueType), MPI_BYTE, + _destinations[i], 123, _comm, &requests.back()); } } if (!requests.empty()) MPI_Waitall(requests.size(), requests.data(), MPI_STATUSES_IGNORE); + + if (same_rank_destination != -1) + { + ARBORX_ASSERT((_src_offsets[same_rank_source + 1] - + _src_offsets[same_rank_source]) == + (_dest_offsets[same_rank_destination + 1] - + _dest_offsets[same_rank_destination])); + Kokkos::deep_copy( + space, + Kokkos::subview(imports, + std::pair(_src_offsets[same_rank_source], + _src_offsets[same_rank_source + 1])), + Kokkos::subview(permuted_exports, + std::pair(_dest_offsets[same_rank_destination], + _dest_offsets[same_rank_destination + 1]))); + } + +#ifndef ARBORX_ENABLE_GPU_AWARE_MPI + if (same_rank_destination != -1) + { + for (auto interval : {std::make_pair(0, _src_offsets[same_rank_source]), + std::make_pair(_src_offsets[same_rank_source + 1], + _src_offsets.back())}) + Kokkos::deep_copy(space, Kokkos::subview(imports, interval), + Kokkos::subview(imports_comm, interval)); + } + else + { + Kokkos::deep_copy(space, imports, imports_comm); + } +#endif } size_t getTotalReceiveLength() const { return _src_offsets.back(); } size_t getTotalSendLength() const { return _dest_offsets.back(); } diff --git a/src/details/ArborX_DetailsUtils.hpp b/src/details/ArborX_DetailsUtils.hpp index 6e9400971..fc2abf4c8 100644 --- a/src/details/ArborX_DetailsUtils.hpp +++ b/src/details/ArborX_DetailsUtils.hpp @@ -23,141 +23,6 @@ namespace ArborX { -namespace Details -{ - -namespace internal -{ -template -struct PointerDepth -{ - static constexpr int value = 0; -}; - -template -struct PointerDepth -{ - static constexpr int value = PointerDepth::value + 1; -}; - -template -struct PointerDepth -{ - static constexpr int value = PointerDepth::value; -}; -} // namespace internal - -template -inline Kokkos::View -create_layout_right_mirror_view_no_init(ExecutionSpace const &execution_space, - MemorySpace const &memory_space, - View const &src) -{ - static_assert(Kokkos::is_execution_space::value); - static_assert(Kokkos::is_memory_space::value); - - constexpr bool has_compatible_layout = - (std::is_same_v || - (View::rank == 1 && - (std::is_same_v || - std::is_same_v))); - constexpr bool has_compatible_memory_space = - std::is_same_v; - - if constexpr (has_compatible_layout && has_compatible_memory_space) - { - return src; - } - else - { - constexpr int pointer_depth = - internal::PointerDepth::value; - return Kokkos::View( - Kokkos::view_alloc( - execution_space, memory_space, Kokkos::WithoutInitializing, - std::string(src.label()).append("_layout_right_mirror")), - src.extent(0), pointer_depth > 1 ? src.extent(1) : KOKKOS_INVALID_INDEX, - pointer_depth > 2 ? src.extent(2) : KOKKOS_INVALID_INDEX, - pointer_depth > 3 ? src.extent(3) : KOKKOS_INVALID_INDEX, - pointer_depth > 4 ? src.extent(4) : KOKKOS_INVALID_INDEX, - pointer_depth > 5 ? src.extent(5) : KOKKOS_INVALID_INDEX, - pointer_depth > 6 ? src.extent(6) : KOKKOS_INVALID_INDEX, - pointer_depth > 7 ? src.extent(7) : KOKKOS_INVALID_INDEX); - } -} - -template -inline auto create_layout_right_mirror_view_no_init(View const &src) -{ - typename View::traits::host_mirror_space::execution_space exec; - auto mirror_view = create_layout_right_mirror_view_no_init( - exec, typename View::traits::host_mirror_space{}, src); - exec.fence(); - return mirror_view; -} - -template -inline auto -create_layout_right_mirror_view_and_copy(ExecutionSpace const &execution_space, - MemorySpace const &memory_space, - View const &src) -{ - static_assert(Kokkos::is_execution_space::value); - static_assert(Kokkos::is_memory_space::value); - - constexpr bool has_compatible_layout = - (std::is_same_v || - (View::rank == 1 && - (std::is_same_v || - std::is_same_v))); - - if constexpr (has_compatible_layout) - { - return Kokkos::create_mirror_view_and_copy(memory_space, src); - } - else - { - constexpr int pointer_depth = - internal::PointerDepth::value; - - auto exec = [execution_space]() { - if constexpr (Kokkos::SpaceAccessibility::accessible) - return execution_space; - else - return typename MemorySpace::execution_space{}; - }(); - - Kokkos::View - layout_right_view( - Kokkos::view_alloc( - exec, Kokkos::WithoutInitializing, - std::string(src.label()).append("_layout_right_mirror")), - src.extent(0), - pointer_depth > 1 ? src.extent(1) : KOKKOS_INVALID_INDEX, - pointer_depth > 2 ? src.extent(2) : KOKKOS_INVALID_INDEX, - pointer_depth > 3 ? src.extent(3) : KOKKOS_INVALID_INDEX, - pointer_depth > 4 ? src.extent(4) : KOKKOS_INVALID_INDEX, - pointer_depth > 5 ? src.extent(5) : KOKKOS_INVALID_INDEX, - pointer_depth > 6 ? src.extent(6) : KOKKOS_INVALID_INDEX, - pointer_depth > 7 ? src.extent(7) : KOKKOS_INVALID_INDEX); - auto tmp_view = Kokkos::create_mirror_view( - Kokkos::view_alloc(exec, Kokkos::WithoutInitializing, memory_space), - src); - if constexpr (!Kokkos::SpaceAccessibility::accessible) - exec.fence(); - Kokkos::deep_copy(execution_space, tmp_view, src); - Kokkos::deep_copy(execution_space, layout_right_view, tmp_view); - return layout_right_view; - } -} - -} // namespace Details - template [[deprecated]] void exclusivePrefixSum(ExecutionSpace &&space, diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 93e7fb40e..35969eda8 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -249,7 +249,7 @@ if(ARBORX_ENABLE_MPI) target_include_directories(ArborX_Test_DistributedTree.exe PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) add_test(NAME ArborX_Test_DistributedTree COMMAND ${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG} ${MPIEXEC_MAX_NUMPROCS} ${MPIEXEC_PREFLAGS} $ ${MPIEXEC_POSTFLAGS}) - add_executable(ArborX_Test_DetailsDistributedTreeImpl.exe tstDetailsDistributedTreeImpl.cpp utf_main.cpp) + add_executable(ArborX_Test_DetailsDistributedTreeImpl.exe tstDetailsDistributedTreeImpl.cpp tstDetailsDistributor.cpp utf_main.cpp) target_link_libraries(ArborX_Test_DetailsDistributedTreeImpl.exe PRIVATE ArborX Boost::unit_test_framework) target_compile_definitions(ArborX_Test_DetailsDistributedTreeImpl.exe PRIVATE BOOST_TEST_DYN_LINK ARBORX_MPI_UNIT_TEST) target_include_directories(ArborX_Test_DetailsDistributedTreeImpl.exe PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) diff --git a/test/tstDetailsDistributedTreeImpl.cpp b/test/tstDetailsDistributedTreeImpl.cpp index 34b71699a..ca263c9cc 100644 --- a/test/tstDetailsDistributedTreeImpl.cpp +++ b/test/tstDetailsDistributedTreeImpl.cpp @@ -18,8 +18,6 @@ #define BOOST_TEST_MODULE DetailsDistributedTree -#include - namespace tt = boost::test_tools; BOOST_AUTO_TEST_CASE_TEMPLATE(count_results, DeviceType, ARBORX_DEVICE_TYPES) @@ -33,13 +31,13 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(count_results, DeviceType, ARBORX_DEVICE_TYPES) BOOST_TEST(ids_ref.size() == nnz); BOOST_TEST(offset_ref.size() == m + 1); - Kokkos::View ids("query_ids", nnz); + Kokkos::View ids("Testing::query_ids", nnz); auto ids_host = Kokkos::create_mirror_view(ids); for (int i = 0; i < nnz; ++i) ids_host(i) = ids_ref[i]; Kokkos::deep_copy(ids, ids_host); - Kokkos::View offset("offset", m); + Kokkos::View offset("Testing::offset", m); using ExecutionSpace = typename DeviceType::execution_space; ArborX::Details::DistributedTree::countResults(ExecutionSpace{}, m, ids, @@ -50,100 +48,6 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(count_results, DeviceType, ARBORX_DEVICE_TYPES) BOOST_TEST(offset_host == offset_ref, tt::per_element()); } -template -inline void checkViewWasNotAllocated(View1 const &v1, View2 const &v2) -{ - // NOTE: cannot use operator== here because array layout may "change" for - // rank-1 views - BOOST_TEST(v1.data() == v2.data()); - BOOST_TEST(v1.span() == v2.span()); - - BOOST_TEST((int)View1::rank == (int)View2::rank); - BOOST_TEST((std::is_same_v)); - BOOST_TEST((std::is_same_v)); - - BOOST_TEST(v1.extent(0) == v2.extent(0)); - BOOST_TEST(v1.extent(1) == v2.extent(1)); - BOOST_TEST(v1.extent(2) == v2.extent(2)); - BOOST_TEST(v1.extent(3) == v2.extent(3)); - BOOST_TEST(v1.extent(4) == v2.extent(4)); - BOOST_TEST(v1.extent(5) == v2.extent(5)); - BOOST_TEST(v1.extent(6) == v2.extent(6)); - BOOST_TEST(v1.extent(7) == v2.extent(7)); -} - -template -inline void checkNewViewWasAllocated(View1 const &v1, View2 const &v2) -{ - BOOST_TEST(v1.data() != v2.data()); - - BOOST_TEST((int)View1::rank == (int)View2::rank); - BOOST_TEST((std::is_same_v)); - - BOOST_TEST(v1.extent(0) == v2.extent(0)); - BOOST_TEST(v1.extent(1) == v2.extent(1)); - BOOST_TEST(v1.extent(2) == v2.extent(2)); - BOOST_TEST(v1.extent(3) == v2.extent(3)); - BOOST_TEST(v1.extent(4) == v2.extent(4)); - BOOST_TEST(v1.extent(5) == v2.extent(5)); - BOOST_TEST(v1.extent(6) == v2.extent(6)); - BOOST_TEST(v1.extent(7) == v2.extent(7)); -} - -BOOST_AUTO_TEST_CASE_TEMPLATE(create_layout_right_mirror_view_no_init, - DeviceType, ARBORX_DEVICE_TYPES) -{ - using ArborX::Details::create_layout_right_mirror_view_no_init; - using Kokkos::ALL; - using Kokkos::LayoutLeft; - using Kokkos::LayoutRight; - using Kokkos::make_pair; - using Kokkos::subview; - using Kokkos::View; - - if (!Kokkos::SpaceAccessibility< - Kokkos::HostSpace, typename DeviceType::memory_space>::accessible) - return; - - // rank-1 and not strided -> do not allocate - View u("u", 255); - auto u_h = create_layout_right_mirror_view_no_init(u); - checkViewWasNotAllocated(u, u_h); - - // right layout -> do not allocate - View v("v", 2, 3); - auto v_h = create_layout_right_mirror_view_no_init(v); - checkViewWasNotAllocated(v, v_h); - - // the same with compile time size - View v_c("v"); - auto v_c_h = create_layout_right_mirror_view_no_init(v_c); - checkViewWasNotAllocated(v_c, v_c_h); - - // left layout and rank > 1 -> allocate - View w("w", 4, 5); - auto w_h = create_layout_right_mirror_view_no_init(w); - checkNewViewWasAllocated(w, w_h); - - // the same with compile time size - View w_c("v", 4); - auto w_c_h = create_layout_right_mirror_view_no_init(w_c); - checkNewViewWasAllocated(w_c, w_c_h); - - // strided layout -> allocate - auto x = subview(v, ALL, 0); - auto x_h = create_layout_right_mirror_view_no_init(x); - checkNewViewWasAllocated(x, x_h); - - // subview is rank-1 and not strided -> do not allocate - auto y = subview(u, make_pair(8, 16)); - auto y_h = create_layout_right_mirror_view_no_init(y); - checkViewWasNotAllocated(y, y_h); -} - void checkBufferLayout(std::vector const &ranks, std::vector const &permute_ref, std::vector const &unique_ref, @@ -181,104 +85,3 @@ BOOST_AUTO_TEST_CASE(sort_and_determine_buffer_layout) checkBufferLayout({0, 1, 2, 3}, {3, 2, 1, 0}, {3, 2, 1, 0}, {1, 1, 1, 1}, {0, 1, 2, 3, 4}); } - -BOOST_AUTO_TEST_CASE(pointer_depth) -{ - static_assert(ArborX::Details::internal::PointerDepth::value == 0, - "Failing for double"); - static_assert(ArborX::Details::internal::PointerDepth::value == 1, - "Failing for double*"); - static_assert(ArborX::Details::internal::PointerDepth::value == 0, - "Failing for double[2]"); - static_assert(ArborX::Details::internal::PointerDepth::value == 2, - "Failing for double**"); - static_assert(ArborX::Details::internal::PointerDepth::value == - 1, - "Failing for double*[2]"); - static_assert(ArborX::Details::internal::PointerDepth::value == - 0, - "Failing for double[2][3]"); - static_assert(ArborX::Details::internal::PointerDepth::value == 3, - "Failing for double***"); - static_assert(ArborX::Details::internal::PointerDepth::value == - 2, - "Failing for double[2]"); - static_assert( - ArborX::Details::internal::PointerDepth::value == 1, - "Failing for double*[2][3]"); - static_assert( - ArborX::Details::internal::PointerDepth::value == 0, - "Failing for double[2][3][4]"); -} - -template -struct Helper -{ - template - static void checkSendAcrossNetwork(MPI_Comm comm, View1 const &ranks, - View2 const &v_exp, View3 const &v_ref) - { - ArborX::Details::Distributor distributor(comm); - distributor.createFromSends(typename DeviceType::execution_space{}, ranks); - - // NOTE here we assume that the reference solution is sized properly - auto v_imp = Kokkos::create_mirror(typename View3::memory_space(), v_ref); - - ArborX::Details::DistributedTree::sendAcrossNetwork( - typename DeviceType::execution_space{}, distributor, v_exp, v_imp); - - auto v_imp_host = Kokkos::create_mirror_view(v_imp); - Kokkos::deep_copy(v_imp_host, v_imp); - auto v_ref_host = Kokkos::create_mirror_view(v_ref); - Kokkos::deep_copy(v_ref_host, v_ref); - - BOOST_TEST(v_imp.extent(0) == v_ref.extent(0)); - BOOST_TEST(v_imp.extent(1) == v_ref.extent(1)); - for (unsigned int i = 0; i < v_imp.extent(0); ++i) - { - for (unsigned int j = 0; j < v_imp.extent(1); ++j) - { - BOOST_TEST(v_imp_host(i, j) == v_ref_host(i, j)); - } - } - } -}; - -BOOST_AUTO_TEST_CASE_TEMPLATE(send_across_network, DeviceType, - ARBORX_DEVICE_TYPES) -{ - using ExecutionSpace = typename DeviceType::execution_space; - MPI_Comm comm = MPI_COMM_WORLD; - int comm_rank; - MPI_Comm_rank(comm, &comm_rank); - int comm_size; - MPI_Comm_size(comm, &comm_size); - - int const DIM = 3; - - // send 1 packet to rank k - // receive comm_size packets - Kokkos::View u_exp("u_exp", comm_size, DIM); - Kokkos::parallel_for( - Kokkos::RangePolicy(0, comm_size), KOKKOS_LAMBDA(int i) { - for (int j = 0; j < DIM; ++j) - u_exp(i, j) = i + j * comm_rank; - }); - - Kokkos::View ranks_u("", comm_size); - ArborX::Details::KokkosExt::iota(ExecutionSpace{}, ranks_u, 0); - - Kokkos::View u_ref("u_ref", comm_size, DIM); - Kokkos::parallel_for( - Kokkos::RangePolicy(0, comm_size), KOKKOS_LAMBDA(int i) { - for (int j = 0; j < DIM; ++j) - u_ref(i, j) = comm_rank + i * j; - }); - - Helper::checkSendAcrossNetwork(comm, ranks_u, u_exp, u_ref); - - Kokkos::View u_exp_unmanaged{ - u_exp}; - Helper::checkSendAcrossNetwork(comm, ranks_u, u_exp_unmanaged, - u_ref); -} diff --git a/test/tstDetailsDistributor.cpp b/test/tstDetailsDistributor.cpp new file mode 100644 index 000000000..b20adec94 --- /dev/null +++ b/test/tstDetailsDistributor.cpp @@ -0,0 +1,83 @@ +/**************************************************************************** + * Copyright (c) 2017-2022 by the ArborX authors * + * All rights reserved. * + * * + * This file is part of the ArborX library. ArborX is * + * distributed under a BSD 3-clause license. For the licensing terms see * + * the LICENSE file in the top-level directory. * + * * + * SPDX-License-Identifier: BSD-3-Clause * + ****************************************************************************/ +#include "ArborX_EnableDeviceTypes.hpp" // ARBORX_DEVICE_TYPES +#include "ArborX_EnableViewComparison.hpp" +#include +#include + +#include + +#define BOOST_TEST_MODULE DetailsDistributor + +namespace tt = boost::test_tools; + +template +struct Helper +{ + template + static void checkDoPostsAndWaits(MPI_Comm comm, View1 const &ranks, + View2 const &exports, + View3 const &imports_ref) + { + ArborX::Details::Distributor distributor(comm); + distributor.createFromSends(typename DeviceType::execution_space{}, ranks); + + // NOTE here we assume that the reference solution is sized properly + auto imports = + Kokkos::create_mirror(typename View2::memory_space(), imports_ref); + + distributor.doPostsAndWaits(typename DeviceType::execution_space{}, exports, + imports); + + auto imports_host = Kokkos::create_mirror_view(imports); + Kokkos::deep_copy(imports_host, imports); + auto imports_ref_host = Kokkos::create_mirror_view(imports_ref); + Kokkos::deep_copy(imports_ref_host, imports_ref); + + BOOST_TEST(imports_host == imports_ref_host, tt::per_element()); + } +}; + +BOOST_AUTO_TEST_CASE_TEMPLATE(do_posts_and_waits, DeviceType, + ARBORX_DEVICE_TYPES) +{ + using ExecutionSpace = typename DeviceType::execution_space; + + MPI_Comm comm = MPI_COMM_WORLD; + int comm_rank; + MPI_Comm_rank(comm, &comm_rank); + int comm_size; + MPI_Comm_size(comm, &comm_size); + + // send 1 packet to rank k + // receive comm_size packets + Kokkos::View exports("Testing::exports", comm_size); + Kokkos::parallel_for( + Kokkos::RangePolicy(0, comm_size), + KOKKOS_LAMBDA(int i) { exports(i) = (i < comm_rank ? 0 : comm_rank); }); + + Kokkos::View ranks("Testing::ranks", comm_size); + ArborX::Details::KokkosExt::iota(ExecutionSpace{}, ranks, 0); + + Kokkos::View imports_ref("Testing::v_ref", comm_size); + Kokkos::parallel_for( + Kokkos::RangePolicy(0, comm_size), KOKKOS_LAMBDA(int i) { + // Sum of all smaller ranks including this one + imports_ref(i) = (i <= comm_rank ? i : 0); + }); + + Helper::checkDoPostsAndWaits(comm, ranks, exports, imports_ref); + + Kokkos::View exports_unmanaged{ + exports}; + Helper::checkDoPostsAndWaits(comm, ranks, exports_unmanaged, + imports_ref); +}