diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index a48f3cac5fc..58322136606 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -39,7 +39,7 @@ if(GINKGO_BUILD_EXTLIB_EXAMPLE) endif() if(GINKGO_BUILD_MPI) - list(APPEND EXAMPLES_LIST distributed-spmv distributed-solver) + list(APPEND EXAMPLES_LIST distributed-spmv distributed-spmv-scaling distributed-solver) endif() find_package(OpenCV QUIET) diff --git a/examples/distributed-spmv-scaling/CMakeLists.txt b/examples/distributed-spmv-scaling/CMakeLists.txt new file mode 100644 index 00000000000..b00d44733b2 --- /dev/null +++ b/examples/distributed-spmv-scaling/CMakeLists.txt @@ -0,0 +1,2 @@ +add_executable(distributed-spmv-scaling distributed-spmv-scaling.cpp) +target_link_libraries(distributed-spmv-scaling Ginkgo::ginkgo) diff --git a/examples/distributed-spmv-scaling/build.sh b/examples/distributed-spmv-scaling/build.sh new file mode 100755 index 00000000000..f4a66345f00 --- /dev/null +++ b/examples/distributed-spmv-scaling/build.sh @@ -0,0 +1,16 @@ +#!/bin/bash + +# set up script +if [ $# -ne 1 ]; then + echo -e "Usage: $0 GINKGO_BUILD_DIRECTORY" + exit 1 +fi +BUILD_DIR=$1 +THIS_DIR=$( cd "$( dirname "${BASH_SOURCE[0]}" )" &>/dev/null && pwd ) + +source ${THIS_DIR}/../build-setup.sh + +# build +mpic++ -std=c++14 -o ${THIS_DIR}/distributed-spmv ${THIS_DIR}/distributed-spmv-scaling.cpp \ + -I${THIS_DIR}/../../include -I${BUILD_DIR}/include \ + -L${THIS_DIR} ${LINK_FLAGS} diff --git a/examples/distributed-spmv-scaling/distributed-spmv-scaling.cpp b/examples/distributed-spmv-scaling/distributed-spmv-scaling.cpp new file mode 100644 index 00000000000..5c9b70905ba --- /dev/null +++ b/examples/distributed-spmv-scaling/distributed-spmv-scaling.cpp @@ -0,0 +1,270 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +// @sect3{Include files} + +// This is the main ginkgo header file. +#include + +// Add the fstream header to read from data from files. +#include +// Add the C++ iostream header to output information to the console. +#include +// Add the STL map header for the executor selection +#include +// Add the string manipulation header to handle strings. +#include +#include + + +// Finally, we need the MPI header for MPI_Init and _Finalize +#include + + +/** + * Generates matrix data for a 2D stencil matrix. If restricted is set to true, + * creates a 5-pt stencil, if it is false creates a 9-pt stencil. If + * strong_scaling is set to true, creates the same problemsize independent of + * the number of ranks, if it false the problem size grows with the number of + * ranks. + */ +template +gko::matrix_data generate_2d_stencil( + const IndexType dp, std::shared_ptr comm, + bool restricted, bool strong_scaling) +{ + const auto mat_size = strong_scaling ? dp * dp : dp * dp * comm->size(); + const auto rows_per_rank = gko::ceildiv(mat_size, comm->size()); + const auto start = rows_per_rank * comm->rank(); + const auto end = gko::min(rows_per_rank * (comm->rank() + 1), mat_size); + + auto A_data = + gko::matrix_data(gko::dim<2>{mat_size, mat_size}); + + for (IndexType row = start; row < end; row++) { + auto i = row / dp; + auto j = row % dp; + for (IndexType d_i = -1; d_i <= 1; d_i++) { + for (IndexType d_j = -1; d_j <= 1; d_j++) { + if (!restricted || (d_i == 0 || d_j == 0)) { + auto col = j + d_j + (i + d_i) * dp; + if (col >= 0 && col < mat_size) { + A_data.nonzeros.emplace_back(row, col, + gko::one()); + } + } + } + } + } + + return A_data; +} + + +/** + * Generates matrix data for a 3D stencil matrix. If restricted is set to true, + * creates a 7-pt stencil, if it is false creates a 27-pt stencil. If + * strong_scaling is set to true, creates the same problemsize independent of + * the number of ranks, if it false the problem size grows with the number of + * ranks. + */ +template +gko::matrix_data generate_3d_stencil( + const IndexType dp, std::shared_ptr comm, + bool restricted, bool strong_scaling) +{ + const auto mat_size = + strong_scaling ? dp * dp * dp : dp * dp * dp * comm->size(); + const auto rows_per_rank = gko::ceildiv(mat_size, comm->size()); + const auto start = rows_per_rank * comm->rank(); + const auto end = gko::min(rows_per_rank * (comm->rank() + 1), mat_size); + + auto A_data = + gko::matrix_data(gko::dim<2>{mat_size, mat_size}); + + for (IndexType row = start; row < end; row++) { + auto i = row / (dp * dp); + auto j = (row % (dp * dp)) / dp; + auto k = row % dp; + for (IndexType d_i = -1; d_i <= 1; d_i++) { + for (IndexType d_j = -1; d_j <= 1; d_j++) { + for (IndexType d_k = -1; d_k <= 1; d_k++) { + if (!restricted || + ((d_i == 0 && d_j == 0) || (d_i == 0 && d_k == 0) || + (d_j == 0 && d_k == 0))) { + auto col = + k + d_k + (j + d_j) * dp + (i + d_i) * dp * dp; + if (col >= 0 && col < mat_size) { + A_data.nonzeros.emplace_back(row, col, + gko::one()); + } + } + } + } + } + } + + return A_data; +} + + +int main(int argc, char* argv[]) +{ + const auto fin = gko::mpi::init_finalize(argc, argv); + // Use some shortcuts. In Ginkgo, vectors are seen as a gko::matrix::Dense + // with one column/one row. The advantage of this concept is that using + // multiple vectors is a now a natural extension of adding columns/rows are + // necessary. + using ValueType = double; + using GlobalIndexType = gko::distributed::global_index_type; + using LocalIndexType = GlobalIndexType; + using dist_mtx = gko::distributed::Matrix; + using dist_vec = gko::distributed::Vector; + using vec = gko::matrix::Dense; + using part_type = gko::distributed::Partition; + + const auto comm = gko::mpi::communicator::create_world(); + const auto rank = comm->rank(); + const auto local_rank = comm->local_rank(); + + // Print the ginkgo version information. + if (rank == 0) { + std::cout << gko::version_info::get() << std::endl; + } + + if (argc == 2 && (std::string(argv[1]) == "--help")) { + if (rank == 0) { + std::cerr << "Usage: " << argv[0] + << " [executor] [DISCRETIZATION_POINTS] [2D] " + "[RESTRICT_STENCIL] [STRONG_SCALING]" + << std::endl; + std::cerr << "Default values:" << std::endl; + std::cerr << " - executor: reference" << std::endl; + std::cerr << " - DISCRETIZATION_POINTS: 100" << std::endl; + std::cerr << " - 2D: 1" << std::endl; + std::cerr << " - RESTRICT_STENCIL: 0" << std::endl; + std::cerr << " - STRONG_SCALING: 1" << std::endl; + } + std::exit(-1); + } + + const auto executor_string = argc >= 2 ? argv[1] : "reference"; + std::map()>> + exec_map{ + {"omp", [] { return gko::OmpExecutor::create(); }}, + {"cuda", + [local_rank] { + return gko::CudaExecutor::create( + local_rank, gko::ReferenceExecutor::create(), true); + }}, + {"hip", + [local_rank] { + return gko::HipExecutor::create( + local_rank, gko::ReferenceExecutor::create(), true); + }}, + {"dpcpp", + [local_rank] { + return gko::DpcppExecutor::create( + local_rank, gko::ReferenceExecutor::create()); + }}, + {"reference", [] { return gko::ReferenceExecutor::create(); }}}; + + // executor where Ginkgo will perform the computation + const auto exec = exec_map.at(executor_string)(); // throws if not valid + + const auto dp = argc >= 3 ? atoi(argv[2]) : 100; + const bool two_dim = argc >= 4 ? atoi(argv[3]) > 0 : true; + const bool restricted = argc >= 5 ? atoi(argv[4]) > 0 : false; + const bool strong_scaling = argc >= 6 ? atoi(argv[5]) > 0 : true; + + // Generate matrix data on each rank + if (rank == 0) { + std::cout << "Generating stencil matrix..." << std::endl; + } + auto A_data = two_dim ? generate_2d_stencil( + dp, comm, restricted, strong_scaling) + : generate_3d_stencil( + dp, comm, restricted, strong_scaling); + const auto mat_size = A_data.size[0]; + const auto rows_per_rank = mat_size / comm->size(); + + // build partition: uniform number of rows per rank + gko::Array ranges_array{ + exec->get_master(), static_cast(comm->size() + 1)}; + for (int i = 0; i < comm->size(); i++) { + ranges_array.get_data()[i] = i * rows_per_rank; + } + ranges_array.get_data()[comm->size()] = mat_size; + auto partition = gko::share( + part_type::build_from_contiguous(exec->get_master(), ranges_array)); + + // Build global matrix from local matrix data. + auto h_A = dist_mtx::create(exec->get_master(), comm); + auto A = dist_mtx::create(exec, comm); + h_A->read_distributed(A_data, partition); + A->copy_from(h_A.get()); + + // Set up global vectors for the distributed SpMV + if (rank == 0) { + std::cout << "Setting up vectors..." << std::endl; + } + const auto local_size = + ranges_array.get_data()[rank + 1] - ranges_array.get_data()[rank]; + auto x = dist_vec::create(exec, comm, partition, gko::dim<2>{mat_size, 1}, + gko::dim<2>{local_size, 1}); + x->fill(gko::one()); + auto b = dist_vec::create(exec, comm, partition, gko::dim<2>{mat_size, 1}, + gko::dim<2>{local_size, 1}); + b->fill(gko::one()); + + // Do a warmup run + if (rank == 0) { + std::cout << "Warming up..." << std::endl; + } + A->apply(lend(x), lend(b)); + + // Do and time the actual benchmark runs + if (rank == 0) { + std::cout << "Running benchmark..." << std::endl; + } + auto tic = std::chrono::steady_clock::now(); + for (auto i = 0; i < 100; i++) { + A->apply(lend(x), lend(b)); + exec->synchronize(); + } + auto toc = std::chrono::steady_clock::now(); + + if (rank == 0) { + std::chrono::duration duration = toc - tic; + std::cout << "DURATION: " << duration.count() << "s" << std::endl; + } +} diff --git a/examples/distributed-spmv-scaling/doc/builds-on b/examples/distributed-spmv-scaling/doc/builds-on new file mode 100644 index 00000000000..dbf16906746 --- /dev/null +++ b/examples/distributed-spmv-scaling/doc/builds-on @@ -0,0 +1 @@ +distributed-spmv diff --git a/examples/distributed-spmv-scaling/doc/intro.dox b/examples/distributed-spmv-scaling/doc/intro.dox new file mode 100644 index 00000000000..c8f39263b9e --- /dev/null +++ b/examples/distributed-spmv-scaling/doc/intro.dox @@ -0,0 +1,6 @@ + +

Introduction

+This example should help you to inspect the scaling behaviour of the distributed sparse matrix-vector product in Ginkgo. +You can select the hardware architecture you want to run the example on via the executor, choose if you want to run with +a 2D five point, a 2D nine point, a 3D seven point or a 3D 27 point stencil matrix, control the problem size and select +if you want to inspect weak or strong scaling. diff --git a/examples/distributed-spmv-scaling/doc/kind b/examples/distributed-spmv-scaling/doc/kind new file mode 100644 index 00000000000..196aa616342 --- /dev/null +++ b/examples/distributed-spmv-scaling/doc/kind @@ -0,0 +1 @@ +distributed diff --git a/examples/distributed-spmv-scaling/doc/results.dox b/examples/distributed-spmv-scaling/doc/results.dox new file mode 100644 index 00000000000..043176dbea7 --- /dev/null +++ b/examples/distributed-spmv-scaling/doc/results.dox @@ -0,0 +1,14 @@ +

Results

+The following is the expected result: + +@code{.cpp} + +Generating stencil matrix... +Setting up vectors... +Warming up... +Running benchmark... +DURATION: 0.0114743s + +@endcode + +

Comments about programming and debugging

diff --git a/examples/distributed-spmv-scaling/doc/short-intro b/examples/distributed-spmv-scaling/doc/short-intro new file mode 100644 index 00000000000..6862e6a495f --- /dev/null +++ b/examples/distributed-spmv-scaling/doc/short-intro @@ -0,0 +1 @@ +The distributed SpMV scaling behaviour example. diff --git a/examples/distributed-spmv-scaling/doc/tooltip b/examples/distributed-spmv-scaling/doc/tooltip new file mode 100644 index 00000000000..aade238e62e --- /dev/null +++ b/examples/distributed-spmv-scaling/doc/tooltip @@ -0,0 +1 @@ +Computes and times a distributed sparse matrix-vector product (SpMV).