From 66046d8cd51f5bcf8666fd8c810322e253c4ce0e Mon Sep 17 00:00:00 2001 From: Adam Kunen Date: Tue, 23 Oct 2018 14:30:42 -0600 Subject: [PATCH] v1.2.3 release (#9) * Initial pass at switching to BLT * Playing with modules * Updated to build with RAJA and fixed a number of warnings * Updated configuration and parameter display * More cleanup of output * Added DataStore class * First sketch of refactor. PartitionSpace and Sets have been added. Still need to work out a ProductSet * Refactored timing to be in DataStore, fixed some basic code layout issues * Downselected to DZG kernels as prototypes for RAJAfication * Broke up Kernel_3d_DGZ into separate kernel files * Fixed timers, make Population edit its own kernel * Initial implementation and use of Field * Fixed to not require MPI, and implemented sizes for ProductSet * Switched psi over to Field * Moved rhs to use Field * Renamed Initialize to Generate * Partial conversion to fields... kinda broken * More progress, started moving quadrature set generation to Fields * Eliminated everything from Grid and Subdomain except adjacency information * Split Generate.cpp into each component * Removed Grid and Subdomain and SubTVec completely. Something wrong with adjaceny/sweep communication. * Removed Quadrature.*, moved into Generate/Quadrature.cpp Fixed some bugs in sweep... still something wrong * Added zone count checks. Made sure plane data * Moved files into Kripke/Core, still to to renamespace and fix includes * Fixed up header files for Kripke/Core * Re-namespaced everything in Kripke/Core * Fixed initialization bug * RAJAfied the code for sequential policies * Updated RAJA * Updated to latest nested forall branch, and fixed timers * Reinstated SIMD policies and added FOM output * Added MPI back, cleaned up a lot of the output * Reorganized Arch headers * Fixed build system to allow changing of Arch * Added more output for build options * Refactored kernels to use type dispatch routine * Updated to work with latest RAJA, added support for stride-1 dims in RAJA::Layout. * Updated OpenMP policies * Freezing RAJA * Freezing BLT * Cleanup * Updated some documentation, more cleanup * Updated documentation to have CORAL2 problem defintions for BG/Q * Removed old testing dir * Updated version to 1.2.0-CORAL2 * Fixed RAJA version * Updated version to 1.2.0 * Fixed some documentation and indentation * Added throughput FOM * Updated submodule to raja 0.6.0rc1 * Updated to RAJA::kernel * fixed FOM * fixed format * Bump from raja-0.6.1rc1 to rc2 * Fixed bug in Population kernel * First stab at adding runtime AL selection * Added SdomAL class to aid in pulling out AL specific views * Fixed bug in permutation/stride-1 calc * Wired in runtime selecable arch and layout * Changed "kripke" to "build" in build instructions * Much more work on adding multiple layouts back in * Added OpenMP policies * Added chai and modified build system to bring things in... working but needs cleanup * Fixed build for BG/Q at LLNL * Added debug flags for bgq * Fixed default arch/layout selection, added a bunch of host configs * Added blueos host config files, doesn't work yet * Fixed configs to pass "correct" flags * Updated buildsystem for CUDA and CHAI. ArchV_CUDA has properly been added. Now in a position to start adding CUDA memory management and execution policies * Working CUDA with DGZ layout * Updated build system and tpl's * Some cleanup of documentation and source. Fixed some buildsystem issues. * Numerous updates to get things compiling * More fixes for P100 and V100 platforms * Cleaned up cuda build issues --- .gitmodules | 11 + CMakeLists.txt | 310 ++++++---- NOTICE.html | 15 - README.html | 237 -------- README.md | 141 +++-- blt | 1 + cmake/Modules/FindHDF5.cmake | 46 -- cmake/Modules/FindPAPI.cmake | 45 -- cmake/Modules/FindSilo.cmake | 50 -- cmake/Modules/FindTCMalloc.cmake | 30 - cmake/Toolchain/bgqos_0-clang.cmake | 12 - cmake/Toolchain/bgqos_0-gcc.cmake | 12 - cmake/Toolchain/bgqos_0-xlc.cmake | 23 - cmake/Toolchain/chaos_5_x86_64_ib-clang.cmake | 12 - cmake/Toolchain/chaos_5_x86_64_ib-gcc.cmake | 10 - cmake/Toolchain/chaos_5_x86_64_ib-ic12.cmake | 10 - cmake/Toolchain/chaos_5_x86_64_ib-ic14.cmake | 14 - cmake/Toolchain/chaos_5_x86_64_ib-ic15.cmake | 14 - cmake/Toolchain/chaos_5_x86_64_ib-pgi.cmake | 10 - cmake/Toolchain/linux-gcc.cmake | 11 - cmake/modules/Findchai.cmake | 5 + host-configs/llnl-bgqos-clang.cmake | 24 + .../llnl-blueos-P100-nvcc-clang.cmake | 31 + .../llnl-blueos-V100-nvcc-clang.cmake | 31 + host-configs/llnl-toss3-clang4.cmake | 24 + host-configs/llnl-toss3-gcc7.1.cmake | 24 + host-configs/llnl-toss3-gcc8.1.cmake | 25 + host-configs/llnl-toss3-intel18.cmake | 24 + scripts/plotSweepConcur.py | 67 --- scripts/plotSweepTrace.py | 37 -- src/CMakeLists.txt | 5 - src/Kripke.h | 198 +++---- src/Kripke/Arch/LPlusTimes.h | 365 ++++++++++++ src/Kripke/Arch/LTimes.h | 356 ++++++++++++ src/Kripke/Arch/Population.h | 356 ++++++++++++ src/Kripke/Arch/Scattering.h | 364 ++++++++++++ src/Kripke/Arch/Source.h | 141 +++++ src/Kripke/Arch/SweepSubdomains.h | 400 +++++++++++++ src/Kripke/ArchLayout.h | 223 +++++++ src/Kripke/CMakeLists.txt | 28 - .../Kernel_3d_DZG.h => Core/BaseVar.cpp} | 46 +- .../Kernel_3d_DGZ.h => Core/BaseVar.h} | 40 +- src/Kripke/Core/Comm.h | 232 ++++++++ src/Kripke/Core/DataStore.cpp | 66 +++ src/Kripke/Core/DataStore.h | 118 ++++ src/Kripke/Core/DomainVar.cpp | 71 +++ src/Kripke/Core/DomainVar.h | 98 ++++ src/Kripke/Core/Field.h | 313 ++++++++++ src/Kripke/Core/PartitionSpace.cpp | 308 ++++++++++ src/Kripke/Core/PartitionSpace.h | 129 +++++ src/Kripke/Core/Set.cpp | 173 ++++++ src/Kripke/Core/Set.h | 234 ++++++++ src/Kripke/Core/VarLayout.h | 204 +++++++ src/Kripke/Directions.cpp | 211 ------- src/Kripke/Generate.cpp | 119 ++++ src/Kripke/{Kernel.cpp => Generate.h} | 62 +- src/Kripke/Generate/Data.cpp | 134 +++++ src/Kripke/Generate/Decomp.cpp | 83 +++ .../{Directions.h => Generate/Energy.cpp} | 60 +- src/Kripke/Generate/Quadrature.cpp | 546 +++++++++++++++++ src/Kripke/Generate/Space.cpp | 393 +++++++++++++ src/Kripke/Grid.cpp | 547 ------------------ src/Kripke/Grid.h | 106 ---- ...Input_Variables.cpp => InputVariables.cpp} | 34 +- .../{Input_Variables.h => InputVariables.h} | 16 +- src/Kripke/Kernel.h | 99 +++- src/Kripke/Kernel/Kernel_3d_DGZ.cpp | 367 ------------ src/Kripke/Kernel/Kernel_3d_DZG.cpp | 374 ------------ src/Kripke/Kernel/Kernel_3d_GDZ.cpp | 373 ------------ src/Kripke/Kernel/Kernel_3d_GZD.cpp | 368 ------------ src/Kripke/Kernel/Kernel_3d_GZD.h | 54 -- src/Kripke/Kernel/Kernel_3d_ZDG.cpp | 372 ------------ src/Kripke/Kernel/Kernel_3d_ZDG.h | 54 -- src/Kripke/Kernel/Kernel_3d_ZGD.cpp | 366 ------------ src/Kripke/Kernel/Kernel_3d_ZGD.h | 54 -- src/Kripke/Kernel/LPlusTimes.cpp | 114 ++++ src/Kripke/Kernel/LTimes.cpp | 130 +++++ src/Kripke/Kernel/Population.cpp | 123 ++++ src/Kripke/Kernel/Scattering.cpp | 188 ++++++ src/Kripke/Kernel/Source.cpp | 137 +++++ src/Kripke/Kernel/SweepSubdomain.cpp | 144 +++++ src/Kripke/Layout.cpp | 372 ------------ src/Kripke/Layout.h | 101 ---- src/Kripke/ParallelComm.cpp | 190 +++--- src/Kripke/ParallelComm.h | 71 ++- src/Kripke/ParallelComm/BlockJacobiComm.cpp | 70 ++- src/Kripke/ParallelComm/SweepComm.cpp | 34 +- src/Kripke/SteadyStateSolver.cpp | 144 +++++ .../TestKernels.h => SteadyStateSolver.h} | 17 +- src/Kripke/SubTVec.h | 220 ------- src/Kripke/Subdomain.cpp | 482 --------------- src/Kripke/Subdomain.h | 125 ---- src/Kripke/SweepSolver.cpp | 114 ++++ .../{Kernel/Kernel_3d_GDZ.h => SweepSolver.h} | 40 +- src/Kripke/Sweep_Solver.cpp | 221 ------- src/Kripke/Test/TestKernels.cpp | 171 ------ src/Kripke/Timing.cpp | 198 ++----- src/Kripke/Timing.h | 166 +++--- src/Kripke/VarTypes.h | 182 ++++++ src/KripkeConfig.h.in | 23 + src/kripke.cpp | 350 +++++------ tarball.py | 26 - tpl/README.txt | 5 + tpl/chai | 1 + tpl/raja | 1 + 105 files changed, 8076 insertions(+), 6650 deletions(-) create mode 100644 .gitmodules delete mode 100644 NOTICE.html delete mode 100644 README.html create mode 160000 blt delete mode 100644 cmake/Modules/FindHDF5.cmake delete mode 100644 cmake/Modules/FindPAPI.cmake delete mode 100644 cmake/Modules/FindSilo.cmake delete mode 100644 cmake/Modules/FindTCMalloc.cmake delete mode 100644 cmake/Toolchain/bgqos_0-clang.cmake delete mode 100644 cmake/Toolchain/bgqos_0-gcc.cmake delete mode 100644 cmake/Toolchain/bgqos_0-xlc.cmake delete mode 100644 cmake/Toolchain/chaos_5_x86_64_ib-clang.cmake delete mode 100644 cmake/Toolchain/chaos_5_x86_64_ib-gcc.cmake delete mode 100644 cmake/Toolchain/chaos_5_x86_64_ib-ic12.cmake delete mode 100644 cmake/Toolchain/chaos_5_x86_64_ib-ic14.cmake delete mode 100644 cmake/Toolchain/chaos_5_x86_64_ib-ic15.cmake delete mode 100644 cmake/Toolchain/chaos_5_x86_64_ib-pgi.cmake delete mode 100644 cmake/Toolchain/linux-gcc.cmake create mode 100644 cmake/modules/Findchai.cmake create mode 100644 host-configs/llnl-bgqos-clang.cmake create mode 100644 host-configs/llnl-blueos-P100-nvcc-clang.cmake create mode 100644 host-configs/llnl-blueos-V100-nvcc-clang.cmake create mode 100644 host-configs/llnl-toss3-clang4.cmake create mode 100644 host-configs/llnl-toss3-gcc7.1.cmake create mode 100644 host-configs/llnl-toss3-gcc8.1.cmake create mode 100644 host-configs/llnl-toss3-intel18.cmake delete mode 100755 scripts/plotSweepConcur.py delete mode 100755 scripts/plotSweepTrace.py delete mode 100644 src/CMakeLists.txt create mode 100644 src/Kripke/Arch/LPlusTimes.h create mode 100644 src/Kripke/Arch/LTimes.h create mode 100644 src/Kripke/Arch/Population.h create mode 100644 src/Kripke/Arch/Scattering.h create mode 100644 src/Kripke/Arch/Source.h create mode 100644 src/Kripke/Arch/SweepSubdomains.h create mode 100644 src/Kripke/ArchLayout.h delete mode 100644 src/Kripke/CMakeLists.txt rename src/Kripke/{Kernel/Kernel_3d_DZG.h => Core/BaseVar.cpp} (69%) rename src/Kripke/{Kernel/Kernel_3d_DGZ.h => Core/BaseVar.h} (70%) create mode 100644 src/Kripke/Core/Comm.h create mode 100644 src/Kripke/Core/DataStore.cpp create mode 100644 src/Kripke/Core/DataStore.h create mode 100644 src/Kripke/Core/DomainVar.cpp create mode 100644 src/Kripke/Core/DomainVar.h create mode 100644 src/Kripke/Core/Field.h create mode 100644 src/Kripke/Core/PartitionSpace.cpp create mode 100644 src/Kripke/Core/PartitionSpace.h create mode 100644 src/Kripke/Core/Set.cpp create mode 100644 src/Kripke/Core/Set.h create mode 100644 src/Kripke/Core/VarLayout.h delete mode 100644 src/Kripke/Directions.cpp create mode 100644 src/Kripke/Generate.cpp rename src/Kripke/{Kernel.cpp => Generate.h} (62%) create mode 100644 src/Kripke/Generate/Data.cpp create mode 100644 src/Kripke/Generate/Decomp.cpp rename src/Kripke/{Directions.h => Generate/Energy.cpp} (61%) create mode 100644 src/Kripke/Generate/Quadrature.cpp create mode 100644 src/Kripke/Generate/Space.cpp delete mode 100644 src/Kripke/Grid.cpp delete mode 100644 src/Kripke/Grid.h rename src/Kripke/{Input_Variables.cpp => InputVariables.cpp} (87%) rename src/Kripke/{Input_Variables.h => InputVariables.h} (88%) delete mode 100644 src/Kripke/Kernel/Kernel_3d_DGZ.cpp delete mode 100644 src/Kripke/Kernel/Kernel_3d_DZG.cpp delete mode 100644 src/Kripke/Kernel/Kernel_3d_GDZ.cpp delete mode 100644 src/Kripke/Kernel/Kernel_3d_GZD.cpp delete mode 100644 src/Kripke/Kernel/Kernel_3d_GZD.h delete mode 100644 src/Kripke/Kernel/Kernel_3d_ZDG.cpp delete mode 100644 src/Kripke/Kernel/Kernel_3d_ZDG.h delete mode 100644 src/Kripke/Kernel/Kernel_3d_ZGD.cpp delete mode 100644 src/Kripke/Kernel/Kernel_3d_ZGD.h create mode 100644 src/Kripke/Kernel/LPlusTimes.cpp create mode 100644 src/Kripke/Kernel/LTimes.cpp create mode 100644 src/Kripke/Kernel/Population.cpp create mode 100644 src/Kripke/Kernel/Scattering.cpp create mode 100644 src/Kripke/Kernel/Source.cpp create mode 100644 src/Kripke/Kernel/SweepSubdomain.cpp delete mode 100644 src/Kripke/Layout.cpp delete mode 100644 src/Kripke/Layout.h create mode 100644 src/Kripke/SteadyStateSolver.cpp rename src/Kripke/{Test/TestKernels.h => SteadyStateSolver.h} (85%) delete mode 100644 src/Kripke/SubTVec.h delete mode 100644 src/Kripke/Subdomain.cpp delete mode 100644 src/Kripke/Subdomain.h create mode 100644 src/Kripke/SweepSolver.cpp rename src/Kripke/{Kernel/Kernel_3d_GDZ.h => SweepSolver.h} (70%) delete mode 100644 src/Kripke/Sweep_Solver.cpp delete mode 100644 src/Kripke/Test/TestKernels.cpp create mode 100644 src/Kripke/VarTypes.h create mode 100644 src/KripkeConfig.h.in delete mode 100755 tarball.py create mode 100644 tpl/README.txt create mode 160000 tpl/chai create mode 160000 tpl/raja diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 00000000..ac54b450 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,11 @@ +[submodule "blt"] + path = blt + url = https://github.com/LLNL/blt.git +[submodule "tpl/raja"] + path = tpl/raja + url = https://github.com/LLNL/RAJA.git + branch = develop +[submodule "tpl/chai"] + path = tpl/chai + url = https://github.com/LLNL/CHAI.git + branch = develop diff --git a/CMakeLists.txt b/CMakeLists.txt index e7006d4d..867d14fe 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,146 +1,246 @@ -cmake_minimum_required(VERSION 3.0) +cmake_minimum_required(VERSION 3.8) -project(KRIPKE) +cmake_policy(SET CMP0057 NEW) -set(CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake/Modules" ${CMAKE_MODULE_PATH}) -message(STATUS ${CMAKE_MODULE_PATH}) +project(KRIPKE LANGUAGES CXX) +set(CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake/modules" ${CMAKE_MODULE_PATH}) -# MPI Support (Required) -# If the ENABLE_MPI is turned off, you must use the mpi* compiler wrappers, since -# Kripke will not build without MPI support -set(ENABLE_MPI On CACHE STRING "Turn this off if you are using mpi compiler wrappers") -if(ENABLE_MPI) - find_package(MPI REQUIRED) - include_directories(${MPI_INCLUDE_PATH}) - set(KRIPKE_LIBS ${KRIPKE_LIBS} ${MPI_LIBRARIES}) -endif() +set(KRIPKE_VERSION "1.2.3") -# OpenMP Support (Optional) -if(ENABLE_OPENMP) - find_package(OpenMP REQUIRED) - if(OPENMP_FOUND) - set(CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}) - add_definitions (-DKRIPKE_USE_OPENMP) - message(STATUS "OpenMP Enabled") - else() - message(WARNING "OpenMP NOT FOUND") - endif() -endif() +# +# Initialize the BLT build system +# +set(ENABLE_GTEST Off CACHE Bool "") +set(ENABLE_TESTS Off CACHE Bool "") +set(ENABLE_TBB Off CACHE Bool "") +set(ENABLE_EXAMPLES Off CACHE Bool "") +set(ENABLE_DOCUMENTATION Off CACHE Bool "") + +# Use C++14 standard +set(BLT_CXX_STD "c++14" CACHE STRING "") + +set(chai_DIR ${CURRENT_SOURCE_DIR}/tpl/chai) + +include(blt/SetupBLT.cmake) + + + +# +# Default Arch and Layout selection +# Sequential by default, but will be overriden if OpenMP or CUDA are enabled +# +set(KRIPKE_ARCH "Sequential") +set(KRIPKE_LAYOUT DGZ) + + +# +# Add CMake CUDA language support +# +if(ENABLE_CUDA) + # Native CMake support + enable_language(CUDA) + + # Make sure we use nvcc with our selected host compiler to link executables + set(CMAKE_CUDA_LINK_EXECUTABLE "${CMAKE_CUDA_COMPILER} -ccbin -o ") + # Use nvcc as the linker + set(CMAKE_LINKER "${CMAKE_CUDA_COMPILER}" CACHE STRING "") + + # Pass nvcc the -cubin to point at our C++ compiler + set(CUDA_HOST_COMPILER ${CMAKE_CXX_COMPILER}) -# LLNL's Silo file support (Optional) -if(ENABLE_SILO) - find_package(Silo) - if(Silo_FOUND) - add_definitions (-DKRIPKE_USE_SILO) - include_directories(${Silo_INCLUDE_DIRS}) - set(KRIPKE_LIBS ${KRIPKE_LIBS} ${Silo_LIBRARIES}) - message(STATUS "Silo Enabled") - else() - message(WARNING "Silo NOT FOUND") - endif() endif() -# PAPI Support for Hardware Counters (Optional) -if(ENABLE_PAPI) - find_package(PAPI) - if(PAPI_FOUND) - add_definitions(-DKRIPKE_USE_PAPI) - include_directories(${PAPI_INCLUDE_DIRS}) - set(KRIPKE_LIBS ${KRIPKE_LIBS} ${PAPI_LIBRARIES}) - message(STATUS "PAPI Enabled") - else() - message(WARNING "PAPI NOT FOUND") - endif() + +# +# Configure CHAI/Umpire for memory management +# +option(ENABLE_CHAI "Enable CHAI/Umpire memory management" Off) +if(ENABLE_CHAI) + + # Add CHAI (which brings in Umpire as a submodule) + add_subdirectory(tpl/chai) + + set(ENABLE_TESTS Off CACHE Bool "") + + set(KRIPKE_USE_CHAI 1) + + list(APPEND KRIPKE_DEPENDS chai) + endif() -# TCMalloc Support for better malloc/profiling (Optional) -if(ENABLE_TCMALLOC) - find_package(TCMalloc) - if(TCMalloc_FOUND) - add_definitions(-DKRIPKE_USE_TCMALLOC) - include_directories(${TCMalloc_INCLUDE_DIRS}) - set(KRIPKE_LIBS ${KRIPKE_LIBS} ${TCMalloc_LIBRARIES}) - message(STATUS "TCMalloc Enabled") - else() - message(WARNING "TCMalloc NOT FOUND") - endif() +# +# Configure RAJA (REQUIRED) +# + +set(RAJA_ENABLE_TESTS Off CACHE Bool "") +add_subdirectory(tpl/raja) + +list(APPEND KRIPKE_DEPENDS RAJA) + + +# +# Configure OpenMP (CPU threading, not target offload) +# + +if(ENABLE_OPENMP) + + set(KRIPKE_USE_OPENMP 1) + + list(APPEND KRIPKE_DEPENDS openmp) + + set(KRIPKE_ARCH "OpenMP") + endif() -# BG/Q Performance tools (BGPM) Support (Optional, only on BG/Q) -if(ENABLED_BGPM) - find_package(TCMalloc) - if(TCMalloc_FOUND) - add_definitions(-DKRIPKE_USE_TCMALLOC) - include_directories(${TCMalloc_INCLUDE_DIRS}) - set(KRIPKE_LIBS ${KRIPKE_LIBS} ${TCMalloc_LIBRARIES}) - message(STATUS "TCMalloc Enabled") - else() - message(WARNING "TCMalloc NOT FOUND") + +# +# Configure CUDA +# + +if(ENABLE_CUDA) + + set(KRIPKE_USE_CUDA 1) + + list(APPEND KRIPKE_DEPENDS cuda) + + set(KRIPKE_ARCH "CUDA") + + # Make sure that nvcc turns on the host compiler OpenMP flag + if(ENABLE_OPENMP) + list(APPEND CUDA_NVCC_FLAGS -Xcompiler ${OpenMP_CXX_FLAGS}) endif() + + + # Pass on compiler info to KripkeConfig.h + string(TOUPPER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE_UPPER) + set(KRIPKE_NVCC_COMPILER "${CMAKE_CUDA_COMPILER}") + set(KRIPKE_NVCC_FLAGS "${CMAKE_CUDA_FLAGS} ${CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER}}") + endif() -# Tarball Creation Support -if(ENABLE_CPACK) - set(CPACK_PACKAGE_VERSION_MAJOR "1") - set(CPACK_PACKAGE_VERSION_MINOR "1") - set(CPACK_PACKAGE_VERSION_PATCH "0") - set(CPACK_SOURCE_GENERATOR "TGZ") - set(CPACK_SOURCE_IGNORE_FILES "tarball.py;kripke-tarball;.git;.cproject;.project;${CPACK_SOURCE_IGNORE_FILES}") - include(CPack) -endif() -# Optional -#bdiv_opt_pkg(perftools 0 OFF) + + +# +# Configure MPI +# +# Use ENABLE_MPI=On if you want CMake to automatically figure out MPI +# using FindMPI # -# Other Build Options +# Use ENABLE_MPI_WRAPPER if you want to use mpi compiler wrappers for CC/CXX # -# Traverse subdirectories -include_directories(src) -add_subdirectory(src) +if(ENABLE_MPI) + set(KRIPKE_USE_MPI 1) + list(APPEND KRIPKE_DEPENDS mpi) +endif() + +if(ENABLE_MPI_WRAPPER) + set(KRIPKE_USE_MPI 1) +endif() + + + + + + -# Setup the kripke target -add_executable(kripke "src/kripke.cpp") -target_link_libraries(kripke ${KRIPKE_LIBS} ${KRIPKE_LIBS}) # -# Testing suite +# Display/Configure our default ArchLayout # +message(STATUS "Kripke selected default architecture: '${KRIPKE_ARCH}'") +message(STATUS "Kripke selected default layout: '${KRIPKE_LAYOUT}'") -enable_testing() +set(KRIPKE_ARCHV_DEFAULT ArchV_${KRIPKE_ARCH}) +set(KRIPKE_LAYOUTV_DEFAULT LayoutV_${KRIPKE_LAYOUT}) -# Use testing harness to check all kernels, all nestings, with default problem -add_test(test_default_dgz kripke --nest dgz --test) -add_test(test_default_dzg kripke --nest dzg --test) -add_test(test_default_gdz kripke --nest gdz --test) -add_test(test_default_gzd kripke --nest gzd --test) -add_test(test_default_zdg kripke --nest zdg --test) -add_test(test_default_zgd kripke --nest zgd --test) -# Use testing harness to check use of a zone sets for all nestings -add_test(test_zset_dgz_block kripke --nest dgz --test --zset 2,3,5 --gset 3 --groups 6 --layout 0) -add_test(test_zset_dzg_block kripke --nest dzg --test --zset 2,3,5 --gset 3 --groups 6 --layout 0) -add_test(test_zset_gdz_block kripke --nest gdz --test --zset 2,3,5 --gset 3 --groups 6 --layout 0) -add_test(test_zset_gzd_block kripke --nest gzd --test --zset 2,3,5 --gset 3 --groups 6 --layout 0) -add_test(test_zset_zdg_block kripke --nest zdg --test --zset 2,3,5 --gset 3 --groups 6 --layout 0) -add_test(test_zset_zgd_block kripke --nest zgd --test --zset 2,3,5 --gset 3 --groups 6 --layout 0) -# Also check with scattered layout (just checking parallel algo, so just 1 nesting) -add_test(test_zset_dgz_scatter kripke --nest dgz --zset 2,3,5 --gset 3 --groups 6 --layout 1) -# Test the block-Jacobi parallel method -add_test(test_bj_dgz kripke --nest dgz --pmethod bj) +# +# Pass compiler options to our KripkeConfig.h file so we can print them at +# runtime +# +set(KRIPKE_CXX_COMPILER "${CMAKE_CXX_COMPILER}") +set(KRIPKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_${KRIPKE_BUILD_TYPE}}") +set(KRIPKE_LINK_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${CMAKE_LINK_FLAGS_${CMAKE_BUILD_TYPE}}") + + + + + + + +# +# Create KripkeConfig.h file +# + +configure_file(${PROJECT_SOURCE_DIR}/src/KripkeConfig.h.in + ${PROJECT_BINARY_DIR}/include/KripkeConfig.h) + + + + + + +# +# Define build targets for Kripke +# + + + +blt_add_library( + NAME kripke + SOURCES "src/Kripke/Core/BaseVar.cpp" + "src/Kripke/Core/DataStore.cpp" + "src/Kripke/Core/DomainVar.cpp" + "src/Kripke/Generate.cpp" + "src/Kripke/Generate/Data.cpp" + "src/Kripke/Generate/Decomp.cpp" + "src/Kripke/Generate/Energy.cpp" + "src/Kripke/Generate/Quadrature.cpp" + "src/Kripke/Generate/Space.cpp" + "src/Kripke/InputVariables.cpp" + "src/Kripke/Kernel/LPlusTimes.cpp" + "src/Kripke/Kernel/LTimes.cpp" + "src/Kripke/Kernel/Population.cpp" + "src/Kripke/Kernel/Scattering.cpp" + "src/Kripke/Kernel/Source.cpp" + "src/Kripke/Kernel/SweepSubdomain.cpp" + "src/Kripke/ParallelComm/BlockJacobiComm.cpp" + "src/Kripke/ParallelComm/SweepComm.cpp" + "src/Kripke/ParallelComm.cpp" + "src/Kripke/Core/PartitionSpace.cpp" + "src/Kripke/Core/Set.cpp" + "src/Kripke/SteadyStateSolver.cpp" + "src/Kripke/SweepSolver.cpp" + "src/Kripke/Timing.cpp" + DEPENDS_ON ${KRIPKE_DEPENDS} +) + + +target_include_directories(kripke PUBLIC + $ + $) + +blt_add_executable( + NAME kripke.exe + SOURCES "src/kripke.cpp" + DEPENDS_ON ${KRIPKE_DEPENDS} kripke +) + diff --git a/NOTICE.html b/NOTICE.html deleted file mode 100644 index 43514bc5..00000000 --- a/NOTICE.html +++ /dev/null @@ -1,15 +0,0 @@ -

LLNL-CODE-658597 -Title: Kripke, Version: 1.1 -Author(s) Adam J. Kunen, etc. all......

- -

NOTICE

- -

This work was produced at the Lawrence Livermore National Laboratory (LLNL) under contract no. DE-AC-52-07NA27344 (Contract 44) between the U.S. Department of Energy (DOE) and Lawrence Livermore National Security, LLC (LLNS) for the operation of LLNL. The rights of the Federal Government are reserved under Contract 44.

- -

DISCLAIMER

- -

This work was prepared as an account of work sponsored by an agency of the United States Government. Neither the United States Government nor Lawrence Livermore National Security, LLC nor any of their employees, makes any warranty, express or implied, or assumes any liability or responsibility for the accuracy, completeness, or usefulness of any information, apparatus, product, or process disclosed, or represents that its use would not infringe privately-owned rights. Reference herein to any specific commercial products, process, or service by trade name, trademark, manufacturer or otherwise does not necessarily constitute or imply its endorsement, recommendation, or favoring by the United States Government or Lawrence Livermore National Security, LLC. The views and opinions of authors expressed herein do not necessarily state or reflect those of the United States Government or Lawrence Livermore National Security, LLC, and shall not be used for advertising or product endorsement purposes.

- -

NOTIFICATION OF COMMERCIAL USE

- -

Commercialization of this product is prohibited without notifying the Department of Energy (DOE) or Lawrence Livermore National Security.

diff --git a/README.html b/README.html deleted file mode 100644 index 1a9bc6fa..00000000 --- a/README.html +++ /dev/null @@ -1,237 +0,0 @@ -

KRIPKE

- -

Version 1.1

- -

Release Date 9/13/2015

- -

Authors

- - - -

License

- -

See included file NOTICE.md

- -

Overview

- -

Kripke is a simple, scalable, 3D Sn deterministic particle transport code. Its primary purpose is to research how data layout, programming paradigms and architectures effect the implementation and performance of Sn transport. A main goal of Kripke is investigating how different data-layouts affect instruction, thread and task level parallelism, and what the implications are on overall solver performance.

- -

Kripkie supports storage of angular fluxes (Psi) using all six striding orders (or "nestings") of Directions (D), Groups (G), and Zones (Z), and provides computational kernels specifically written for each of these nestings. Most Sn transport codes are designed around one of these nestings, which is an inflexibility that leads to software engineering compromises when porting to new architectures and programming paradigms.

- -

Early research has found that the problem dimensions (zones, groups, directions, scattering order) and the scaling (number of threads and MPI tasks), can make a profound difference in the performance of each of these nestings. To our knowledge this is a capability unique to Kripke, and should provide key insight into how data-layout effects Sn solver performance. An asynchronous MPI-based parallel sweep algorithm is provided, which employs the concepts of Group Sets (GS) Zone Sets (ZS), and Direction Sets (DS), borrowed from the Texas A&M code PDT.

- -

As we explore new architectures and programming paradigms with Kripke, we will be able to incorporate these findings and ideas into our larger codes. The main advantages of using Kripke for this exploration is that it's light-weight (ie. easily refactored and modified), and it gets us closer to the real question we want answered: "What is the best way to layout and implement an Sn code on a given architecture+programming-model?" instead of the more commonly asked question "What is the best way to map my existing Sn code to a given architecture+programming-model?".

- -

Mini App or Proxy App?

- -

Kripke is a Mini-App since it has a very small code base consisting of 4184 lines of C++ code (generated using David A. Wheeler's SLOCCount v2.26).

- -

Kripke is also a Proxy-App since it is a proxy for the LLNL transport code ARDRA.

- -

Analysis

- -

A major challenge of achieving high-performance in an Sn transport (or any physics) code is choosing a data-layout and a parallel decomposition that lends itself to the targeted architecture. Often the data-layout determines the most efficient nesting of loops in computational kernels, which then determines how well your inner-most-loop SIMDizes, how you add threading (pthreads, OpenMP, etc.), and the efficiency and design of your parallel algorithms. Therefore, each nesting produces different loop nesting orders, which provides substantially different performance characteristics. We want to explore how easily and efficiently these different nestings map to different architectures. In particular, we are interested in how we can achieve good parallel efficiency while also achieving efficient use of node resources (such as SIMD units, memory systems, and accelerators).

- -

Parallel sweep algorithms can be explored with Kripke in multiple ways. The core MPI algorithm could be modified or rewritten to explore other approaches, domain overloading, or alternate programming models (such as Charm++). The effect of load-imbalance is an understudied aspect of Sn transport sweeps, and could easily be studied with Kripke by artificially adding more work (ie unknowns) to a subset of MPI tasks. Block-AMR could be added to Kripke, which would be a useful way to explore the cost-benefit analysis of adding AMR to an Sn code, and would be a way to further study load imbalances and AMR effects on sweeps.

- -

The coupling of on-node sweep kernel, the parallel sweep algorithm, and the choices of decomposing the problem phase space into GS's, ZS's and DS's impact the performance of the overall sweep. The tradeoff between large and small "units of work" can be studied. Larger "units of work" provide more opportunity for on-node parallelism, while creating larger messages, less "sends", and less efficient parallel sweeps. Smaller "units of work" make for less efficient on-node kernels, but more efficient parallel sweeps.

- -

We can also study trading MPI tasks for threads, and the effects this has on our programming models and cache efficiency.

- -

A simple timer infrastructure is provided that measure each compute kernels total time.

- -

Physical Models

- -

Kripke solves the Discrete Ordinance and Diamond Difference discretized steady-state linear Boltzmann equation.

- -
    H * Psi = (LPlus * S * L) * Psi + Q
-
- -

Where:

- -
    -
  • Psi is the unknown angular flux discretized over zones, directions, and energy groups

  • -
  • H is the "streaming-collision" operator. (Couples zones)

  • -
  • L is the "discrete-to-moments operator. (Couples directions and moments)

  • -
  • LPlus is the "moment-to-discrete" operator. (Couples directions and moments)

  • -
  • S is the (arbitrary) order scattering operator. (Couples groups)

  • -
  • Q is an external source. In Kripke it is represented in moment space, so really "LPlus*Q"

  • -
- -

Kripke is hard-coded to setup and solve the 3D Kobayashi radiation benchmark, problem 3i. Since Kripke does not have reflecting boundary conditions, the full-space model is solved. Command line arguments allow the user to modify the total and scattering cross-sections. Since Kripke is a multi-group transport code and the Kobayashi problem is single-group, each energy group is setup to solve the same problem with no group-to-group coupling in the data.

- -

The steady-state solution method uses the source-iteration technique, where each iteration is as follows:

- -
    -
  1. Phi = LTimes(Psi)
  2. -
  3. PhiOut = Scattering(Phi)
  4. -
  5. PhiOut = PhiOut + Source()
  6. -
  7. Rhs = LPlusTimes(PhiOut)
  8. -
  9. Psi = Sweep(Rhs, Psi) which is solving Psi=(Hinverse * Rhs) a.k.a "Inverting H"
  10. -
- -

Building and Running

- -

Kripke comes with a simple CMake based build system.

- -

Requirements

- -
    -
  • CMake 3.0 or later
  • -
  • C++ Compiler (g++, icpc, etc.)
  • -
  • MPI 1.0 or later
  • -
- -

Quick Start

- -

The easiest way to get Kripke running, is to directly invoke CMake and take whatever system defaults you have for compilers and let CMake find MPI for you.

- -
    -
  • Step 1: Create a build space (assuming you are starting in the Kripke root directory)

    - -
    mkdir build
    -
  • -
  • Step 2: Run CMake in that build space

    - -
    cd kripke
    -cmake ..
    -
  • -
  • Step 3: Now make Kripke:

    - -
    make -j8
    -
  • -
  • Step 4: Run the test suite to make sure it works

    - -
    make test
    -
  • -
  • Step 5: Run Kripke's default problem:

    - -
    ./kripke
    -
  • -
- -

Running Kripke

- -

Environment Variabes

- -

If Kripke is build with OpenMP support, then the environment variables OMP_NUM_THREADS is used to control the number of OpenMP threads. Kripke does not attempt to modify the OpenMP runtime in anyway, so other OMP_* environment variables should also work as well.

- -

Command Line Options

- -

Command line option help can also be viewed by running "./kripke --help"

- -

Problem Size Options:

- -
    -
  • --groups <ngroups>

    - -

    Number of energy groups. (Default: --groups 32)

  • -
  • --legendre <lorder>

    - -

    Scattering Legendre Expansion Order (0, 1, ...). (Default: --legendre 4)

  • -
  • --quad <ndirs>, or --quad <polar>:<azim>

    - -

    Define the quadrature set to use either a fake S2 with points, OR Gauss-Legendre with by points. (Default: --quad 96)

  • -
  • --zones <x>,<y>,<z>

    - -

    Number of zones in x,y,z. (Default: --zones 16,16,16)

  • -
- -

Physics Parameters:

- -
    -
  • --sigt <sigt0,sigt1,sigt2>

    - -

    Total material cross-sections. (Default: --sigt 0.1,0.0001,0.1)

  • -
  • --sigs <sigs0,sigs1,sigs2>

    - -

    Total material cross-sections. (Default: --sigs 0.05,0.00005,0.05)

  • -
- -

On-Node Options:

- -
    -
  • --nest <NEST>

    - -

    Loop nesting order (and data layout), available are DGZ, DZG, GDZ, GZD, ZDG, and ZGD. (Default: --nest DGZ)

  • -
- -

Parallel Decomposition Options:

- -
    -
  • --layout <lout>

    - -

    Layout of spatial subdomains over mpi ranks. 0 for "Blocked" where local zone sets represent adjacent regions of space. 1 for "Scattered" where adjacent regions of space are distributed to adjacent MPI ranks. (Default: --layout 0)

  • -
  • --procs <npx,npy,npz>

    - -

    Number of MPI ranks in each spatial dimension. (Default: --procs 1,1,1)

  • -
  • --dset <ds>

    - -

    Number of direction-sets. Must be a factor of 8, and divide evenly the number of quadrature points. (Default: --dset 8)

  • -
  • --gset <gs>

    - -

    Number of energy group-sets. Must divide evenly the number energy groups. (Default: --gset 1)

  • -
  • --zset <zx>,<zy>,<zz>

    - -

    Number of zone-sets in x, y, and z. (Default: --zset 1:1:1)

  • -
- -

Solver Options:

- -
    -
  • --niter <NITER>

    - -

    Number of solver iterations to run. (Default: --niter 10)

  • -
  • --pmethod <method>

    - -

    Parallel solver method. "sweep" for full up-wind sweep (wavefront algorithm). "bj" for Block Jacobi. (Default: --pmethod sweep)

  • -
- -

Output and Testing Options:

- -
    -
  • --test

    - -

    Run Kernel Test instead of solve

  • -
  • --silo <siloname>

    - -

    Write SILO output (requires building with LLNL's Silo library)

  • -
  • --papi <PAPI_XXX_XXX,...>

    - -

    Track PAPI hardware counters for each timer. (requires building with PAPI library)

  • -
- -

Test Suite

- -

Running with the --test command line argument will run a unit-testing frame work that will compare each kernel, using random input data, with the same kernel from a different nesting. This is very useful for checking correctness of kernels after modification.

- -

Running make test will use the CMake testing framework, CTest, to run a series of tests outlined in the root CMakeLists.txt file.

- -

Future Plans

- -

Some ideas for future study:

- -
    -
  • Block AMR.

  • -
  • More FLOP intensive spatial discretizations such as DFEM's.

  • -
  • Programming model abstractions

  • -
- -

Retirement

- -

Retirement of this Mini-App should be considered when it is no longer a representative of state-of-the-art transport codes, or when it becomes too cumbersome to adapt to advanced architectures. Also, at the point of retirement it should be clear how to design its successor.

- -

Links

- - - -

Release

- -

LLNL-CODE-658597

diff --git a/README.md b/README.md index 80cb7cce..a4a3f009 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,11 @@ KRIPKE ====== -Version 1.1 +Version 1.2.3 -Release Date 9/13/2015 +Release Date 10/12/2018 + +LLNL-CODE-658597 Authors @@ -19,6 +21,17 @@ License See included file NOTICE.md +Changes +======= + + * 10/12/2018 v1.2.3: CUDA support, updated policy selection/dispatch mechanisms, updated build system + * 04/04/2018 v1.2.2-CORAL2: Fixed bug in Population edit + * 03/26/2018 v1.2.1-CORAL2: Updated to RAJA-0.6.0rc2, fixed FOM calcuation and updated docs + * 11/02/2017 v1.2.0-CORAL2: Initial release for CORAL2 + + + + Overview ======== Kripke is a simple, scalable, 3D Sn deterministic particle transport code. Its primary purpose is to research how data layout, programming paradigms and architectures effect the implementation and performance of Sn transport. A main goal of Kripke is investigating how different data-layouts affect instruction, thread and task level parallelism, and what the implications are on overall solver performance. @@ -32,7 +45,7 @@ As we explore new architectures and programming paradigms with Kripke, we will b Mini App or Proxy App? ---------------------- -Kripke is a Mini-App since it has a very small code base consisting of 4233 lines of C++ code (generated using David A. Wheeler's SLOCCount v2.26). +Kripke is a Mini-App since it has a very small code base consisting of about 5000 lines of C++ code (using cloc v1.67). Kripke is also a Proxy-App since it is a proxy for the LLNL transport code ARDRA. @@ -88,15 +101,56 @@ The steady-state solution method uses the source-iteration technique, where each Building and Running ==================== -Kripke comes with a simple CMake based build system. +Kripke comes with a BLT(CMake) based build system based. Requirements ------------ -* CMake 3.0 or later -* C++ Compiler (g++, icpc, etc.) -* MPI 1.0 or later +Basic requirements: + +* CMake 3.8 or later (3.9.2 or later for CUDA support) + +* C++14 Compiler (g++, icpc, etc.) + +* (Optional) MPI 1.0 or later + +* (Optional) OpenMP 3 or later + +Submodule dependencies: + +* [BLT](https://github.com/LLNL/blt) v0.1: a CMake based build system (required) +* [RAJA](https://github.com/LLNL/RAJA) v0.6.0: a loop abstraction library (required) + +* [CHAI](https://github.com/LLNL/CHAI) v1.1: a copy hiding abstraction for moving data between memory spaces (optional) + +* [Umpire](https://github.com/LLNL/Umpire): a memory management abstraction (required if using CHAI) + +* [Cub](https://github.com/NVlabs/cub.git): algorithm primitives library for CUDA (required by RAJA if using CUDA) + + +Getting Kripke +-------------- +Two options are available: +* Download a released source tarball from github: https://github.com/LLNL/Kripke/releases +* Clone the source from github. + + +The following are the instruction for cloning the tarball, and setting up your clone repository. + +Clone the latest released version from github: + + git clone https://github.com/LLNL/Kripke.git + +Clone all of the submodules. The Kripke build system, BLT, resides in +another repository on github so one must initialize and update the "git submodules" + + cd Kripke + git submodule update --init --recursive + +The released source tarball on github is created with all of the submodules included already. + + Quick Start ----------- @@ -108,29 +162,50 @@ The easiest way to get Kripke running, is to directly invoke CMake and take what * Step 2: Run CMake in that build space - cd kripke + cd build cmake .. + For a number of platforms, we have cache inits file that makes things easier: + + cd build + cmake .. -C../host-configs/llnl-bgqos-clang.cmake + * Step 3: Now make Kripke: make -j8 -* Step 4: Run the test suite to make sure it works - - make test - * Step 5: Run Kripke's default problem: - ./kripke + ./bin/kripke.exe +There are a number of cache init files for LLNL machines and operating systems. +These might not meet your needs, but can be a very good starting point for developing your own. +The current list of cache init files (located in the ./host-confgs/ directory) are: + +* llnl-bgqos-clang.cmake + +* llnl-toss3-clang4.cmake + +* llnl-toss3-intel18.cmake + +* llnl-toss3-gcc7.1.cmake + +* llnl-toss3-gcc8.1.cmake + +* llnl-blueos-P100-nvcc-clang.cmake + +* llnl-blueos-V100-nvcc-clang.cmake + + + Running Kripke ============== Environment Variabes -------------------- -If Kripke is build with OpenMP support, then the environment variables ``OMP_NUM_THREADS`` is used to control the number of OpenMP threads. Kripke does not attempt to modify the OpenMP runtime in anyway, so other ``OMP_*`` environment variables should also work as well. +If Kripke is built with OpenMP support, then the environment variables ``OMP_NUM_THREADS`` is used to control the number of OpenMP threads. Kripke does not attempt to modify the OpenMP runtime in anyway, so other ``OMP_*`` environment variables should also work as well. Command Line Options @@ -169,14 +244,18 @@ Command line option help can also be viewed by running "./kripke --help" ### On-Node Options: -* **``--nest ``** +* **``--arch ``** - Loop nesting order (and data layout), available are DGZ, DZG, GDZ, GZD, ZDG, and ZGD. (Default: --nest DGZ) + Architecture selection. Selects the back-end used for computation, available are Sequential, OpenMP and CUDA. The default depends on capabilities selected by the build system and is selected from list of increasing precedence: Sequential, OpenMP and CUDA. + +* **``--layout ``** + + Data layout selection. This determines the data layout and kernel implementation details (such as loop nesting order). The layouts are determined by the order of unknwons in the angular flux: Direction, Group, and Zone. Available layouts are DGZ, DZG, GDZ, GZD, ZDG, and ZGD. The order is specified left-to-right in longest-to-shortes stride. For example: DGZ means that Directions are the longest stride, and Zones are stride-1. (Default: --nest DGZ) ###Parallel Decomposition Options: -* **``--layout ``** +* **``--pdist ``** Layout of spatial subdomains over mpi ranks. 0 for "Blocked" where local zone sets represent adjacent regions of space. 1 for "Scattered" where adjacent regions of space are distributed to adjacent MPI ranks. (Default: --layout 0) @@ -208,27 +287,6 @@ Command line option help can also be viewed by running "./kripke --help" Parallel solver method. "sweep" for full up-wind sweep (wavefront algorithm). "bj" for Block Jacobi. (Default: --pmethod sweep) -### Output and Testing Options: - -* **``--test``** - - Run Kernel Test instead of solve - -* **``--silo ``** - - Write SILO output (requires building with LLNL's Silo library) - -* **``--papi ``** - - Track PAPI hardware counters for each timer. (requires building with PAPI library) - - -Test Suite ----------- - -Running with the ``--test`` command line argument will run a unit-testing frame work that will compare each kernel, using random input data, with the same kernel from a different nesting. This is very useful for checking correctness of kernels after modification. - -Running ``make test`` will use the CMake testing framework, CTest, to run a series of tests outlined in the root ``CMakeLists.txt`` file. Future Plans @@ -236,11 +294,12 @@ Future Plans Some ideas for future study: -* Block AMR. +* More tuning of CUDA implementation + +* Block AMR -* More FLOP intensive spatial discretizations such as DFEM's. +* More FLOP intensive spatial discretizations such as DFEM's -* Programming model abstractions Retirement diff --git a/blt b/blt new file mode 160000 index 00000000..d755326d --- /dev/null +++ b/blt @@ -0,0 +1 @@ +Subproject commit d755326d014dad76eb14d2665f695ee29a7019e9 diff --git a/cmake/Modules/FindHDF5.cmake b/cmake/Modules/FindHDF5.cmake deleted file mode 100644 index 77dda19a..00000000 --- a/cmake/Modules/FindHDF5.cmake +++ /dev/null @@ -1,46 +0,0 @@ -# -# Find the native HDF5 includes and library -# -# HDF5_INCLUDE_DIR - where to find H5public.h, etc. -# HDF5_LIBRARIES - List of fully qualified libraries to link against when using hdf5. -# HDF5_FOUND - Do not attempt to use hdf5 if "no" or undefined. -message("Path = ${HDF5_DIR}") - -# First try to find using the user-specific HDF info. -find_path(HDF5_INCLUDE_DIR - NAMES H5public.h - PATHS ${HDF5_DIR}/include - ${HDF5_INC} - NO_DEFAULT_PATH -) -find_library(HDF5_LIBRARY_CORE hdf5 hdf5dll - PATHS ${HDF5_DIR}/lib - ${HDF5_LIB} - NO_DEFAULT_PATH -) - -# if that fails, use the full path -find_path(HDF5_INCLUDE_DIR - NAMES H5public.h - PATHS ${HDF5_DIR}/include - ${HDF5_INC} -) -find_library(HDF5_LIBRARY hdf5 hdf5dll - PATHS ${HDF5_DIR}/lib - ${HDF5_LIB} -) -message("HDF5_INCLUDE_DIR: ${HDF5_INCLUDE_DIR}") -message("HDF5_LIBRARY: ${HDF5_LIBRARY}") - -set( HDF5_FOUND "NO" ) -if(HDF5_INCLUDE_DIR) - if(HDF5_LIBRARY) - set( HDF5_LIBRARIES ${HDF5_LIBRARY}) - set( HDF5_FOUND "YES" ) - endif(HDF5_LIBRARY) -elseif(HDF5_INCLUDE_DIR) - message("HDF5 not found. Try setting HDF5_DIR.") -endif(HDF5_INCLUDE_DIR) - - - diff --git a/cmake/Modules/FindPAPI.cmake b/cmake/Modules/FindPAPI.cmake deleted file mode 100644 index e72ceaed..00000000 --- a/cmake/Modules/FindPAPI.cmake +++ /dev/null @@ -1,45 +0,0 @@ -# Try to find PAPI headers and libraries. -# -# Usage of this module as follows: -# -# find_package(PAPI) -# -# Variables used by this module, they can change the default behaviour and need -# to be set before calling find_package: -# -# PAPI_PREFIX Set this variable to the root installation of -# libpapi if the module has problems finding the -# proper installation path. -# -# Variables defined by this module: -# -# PAPI_FOUND System has PAPI libraries and headers -# PAPI_LIBRARIES The PAPI library -# PAPI_INCLUDE_DIRS The location of PAPI headers - -find_path(PAPI_PREFIX - NAMES include/papi.h -) - -find_library(PAPI_LIBRARIES - # Pick the static library first for easier run-time linking. - NAMES papi - HINTS ${PAPI_PREFIX}/lib ${HILTIDEPS}/lib -) - -find_path(PAPI_INCLUDE_DIRS - NAMES papi.h - HINTS ${PAPI_PREFIX}/include ${HILTIDEPS}/include -) - -include(FindPackageHandleStandardArgs) -find_package_handle_standard_args(PAPI DEFAULT_MSG - PAPI_LIBRARIES - PAPI_INCLUDE_DIRS -) - -mark_as_advanced( - PAPI_PREFIX_DIRS - PAPI_LIBRARIES - PAPI_INCLUDE_DIRS -) diff --git a/cmake/Modules/FindSilo.cmake b/cmake/Modules/FindSilo.cmake deleted file mode 100644 index df9c8256..00000000 --- a/cmake/Modules/FindSilo.cmake +++ /dev/null @@ -1,50 +0,0 @@ -# (Slightly adapted from S. Johnson) -# - Find LLNL's Silo library -# This module defines -# Silo_INCLUDE_DIR, where to find blitz/blitz.h, etc. -# Silo_LIBRARIES, libraries to link against to use Silo. -# Silo_FOUND, If false, do not try to use Silo. -# also defined, but not for general use are -# Silo_LIBRARY, where to find the Silo library. -# The user should specify the head Silo director, Silo_DIR, -# or Silo_INC and Silo_LIB. - -find_path(Silo_INCLUDE_DIRS - NAMES silo.h - PATHS ${Silo_DIR}/include - ${Silo_INC} -) - -find_library(Silo_LIBRARY - NAMES siloh5 silo siloxx - PATHS ${Silo_DIR}/lib - ${Silo_LIB} -) - -if (Silo_LIBRARY MATCHES "siloh5") - FIND_PACKAGE(HDF5 REQUIRED) -endif() - -# Requires ZLib -#find_package(ZLIB REQUIRED) - -SET( Silo_FOUND "NO" ) -IF(Silo_INCLUDE_DIRS) - IF(Silo_LIBRARY) - - SET( Silo_LIBRARIES ${Silo_LIBRARY}) - SET( Silo_FOUND "YES" ) - - #The following deprecated settings are for backwards compatibility with CMake1.4 - SET (Silo_INCLUDE_PATH ${Silo_INCLUDE_DIR}) - - ENDIF(Silo_LIBRARY) -ENDIF(Silo_INCLUDE_DIRS) - -MARK_AS_ADVANCED( - Silo_INCLUDE_DIR - Silo_LIBRARY -) - - - diff --git a/cmake/Modules/FindTCMalloc.cmake b/cmake/Modules/FindTCMalloc.cmake deleted file mode 100644 index 010eb606..00000000 --- a/cmake/Modules/FindTCMalloc.cmake +++ /dev/null @@ -1,30 +0,0 @@ - -find_path(TCMalloc_INCLUDE_DIRS - NAMES gperftools/tcmalloc.h - PATHS ${TCMalloc_DIR}/include - ${TCMalloc_INC} -) - -find_library(TCMalloc_LIBRARY - NAMES tcmalloc - PATHS ${TCMalloc_DIR}/lib - ${TCMalloc_LIB} -) - -SET( TCMalloc_FOUND "NO" ) -IF(TCMalloc_INCLUDE_DIRS) - IF(TCMalloc_LIBRARY) - - SET( TCMalloc_LIBRARIES ${TCMalloc_LIBRARY}) - SET( TCMalloc_FOUND "YES" ) - - ENDIF() -ENDIF() - -MARK_AS_ADVANCED( - TCMalloc_INCLUDE_DIR - TCMalloc_LIBRARY -) - - - diff --git a/cmake/Toolchain/bgqos_0-clang.cmake b/cmake/Toolchain/bgqos_0-clang.cmake deleted file mode 100644 index 8458e3c7..00000000 --- a/cmake/Toolchain/bgqos_0-clang.cmake +++ /dev/null @@ -1,12 +0,0 @@ - -set(CMAKE_C_COMPILER "mpiclang") - -set(CMAKE_CXX_COMPILER "mpiclang++11") - -set(CMAKE_LINKER "mpiclang++11") - -set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O3 -static -Wswitch -g ") -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -static -Wswitch -g") - -set(PKG_PATH "/usr/gapps/bdiv/${SYS_TYPE}/opt") - diff --git a/cmake/Toolchain/bgqos_0-gcc.cmake b/cmake/Toolchain/bgqos_0-gcc.cmake deleted file mode 100644 index 6a04ef5a..00000000 --- a/cmake/Toolchain/bgqos_0-gcc.cmake +++ /dev/null @@ -1,12 +0,0 @@ - -set(CMAKE_C_COMPILER mpigcc-4.7.2-fastmpi) - -set(CMAKE_CXX_COMPILER mpig++-4.7.2-fastmpi) - -set(CMAKE_LINKER mpig++-4.7.2-fastmpi) - -set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O3 -mcpu=a2 -mtune=a2 -finline-functions -finline-limit=20000 -std=c++11") -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -mcpu=a2 -mtune=a2 -finline-functions -finline-limit=20000 -std=c++11 -ftree-vectorizer-verbose=6") - -set(PKG_PATH "/usr/gapps/bdiv/${SYS_TYPE}/opt") - diff --git a/cmake/Toolchain/bgqos_0-xlc.cmake b/cmake/Toolchain/bgqos_0-xlc.cmake deleted file mode 100644 index b5dbb353..00000000 --- a/cmake/Toolchain/bgqos_0-xlc.cmake +++ /dev/null @@ -1,23 +0,0 @@ - -set(CMAKE_C_COMPILER mpixlcxx_r) - -set(CMAKE_CXX_COMPILER mpixlcxx_r) - -#set(CMAKE_LINKER "mpixlcxx_r") - -#set(CMAKE_LINKER "memcheck_link mpixlcxx_r") - - -#set(CMAKE_C_COMPILER /usr/local/tools/compilers/ibm/mpixlc_r-lompbeta2-fastmpi) - -#set(CMAKE_CXX_COMPILER /usr/local/tools/compilers/ibm/mpixlcxx_r-lompbeta2-fastmpi) - -#set(CMAKE_LINKER /usr/local/tools/compilers/ibm/mpixlcxx_r-lompbeta2-fastmpi) - - -set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -qarch=auto") -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -qsimd=auto -qhot=novector -qnostrict -qreport -qsource -qlist -qlistfmt=html") -set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,-zmuldefs") - -set(PKG_PATH "/usr/gapps/bdiv/${SYS_TYPE}/opt") - diff --git a/cmake/Toolchain/chaos_5_x86_64_ib-clang.cmake b/cmake/Toolchain/chaos_5_x86_64_ib-clang.cmake deleted file mode 100644 index 86394d89..00000000 --- a/cmake/Toolchain/chaos_5_x86_64_ib-clang.cmake +++ /dev/null @@ -1,12 +0,0 @@ - -set(CMAKE_C_COMPILER "mpiclang") - -set(CMAKE_CXX_COMPILER "mpiclang++") - -set(CMAKE_LINKER "mpiclang++ -pie") - -set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O3 -g -std=c++11") -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -g -std=c++11") - -set(PKG_PATH "/usr/gapps/bdiv/${SYS_TYPE}/opt") - diff --git a/cmake/Toolchain/chaos_5_x86_64_ib-gcc.cmake b/cmake/Toolchain/chaos_5_x86_64_ib-gcc.cmake deleted file mode 100644 index 98230a38..00000000 --- a/cmake/Toolchain/chaos_5_x86_64_ib-gcc.cmake +++ /dev/null @@ -1,10 +0,0 @@ -set(CMAKE_C_COMPILER "mpigcc") -set(CMAKE_CXX_COMPILER "mpig++") -set(CMAKE_LINKER "mpig++") - -set(CMAKE_C_FLAGS "-g -mtune=native ${CMAKE_C_FLAGS}") -set(CMAKE_CXX_FLAGS "-g -std=c++11 -mtune=native ${CMAKE_CXX_FLAGS}") - -set(PKG_PATH "/usr/gapps/bdiv/${SYS_TYPE}/gnu-4.9-opt") - - diff --git a/cmake/Toolchain/chaos_5_x86_64_ib-ic12.cmake b/cmake/Toolchain/chaos_5_x86_64_ib-ic12.cmake deleted file mode 100644 index 85e23856..00000000 --- a/cmake/Toolchain/chaos_5_x86_64_ib-ic12.cmake +++ /dev/null @@ -1,10 +0,0 @@ -set(ICC_VER 12.1.339) - -set(CMAKE_C_COMPILER mpiicc-${ICC_VER}) - -set(CMAKE_CXX_COMPILER mpiicpc-${ICC_VER}) - -set(CMAKE_LINKER mpiicpc-${ICC_VER}) - -set(PKG_PATH "/usr/gapps/bdiv/${SYS_TYPE}/icc-12.1-opt") - diff --git a/cmake/Toolchain/chaos_5_x86_64_ib-ic14.cmake b/cmake/Toolchain/chaos_5_x86_64_ib-ic14.cmake deleted file mode 100644 index b050de8a..00000000 --- a/cmake/Toolchain/chaos_5_x86_64_ib-ic14.cmake +++ /dev/null @@ -1,14 +0,0 @@ -set(ICC_VER 14.0.174) - -set(CMAKE_C_COMPILER mpiicc-${ICC_VER}) -set(CMAKE_CXX_COMPILER mpiicpc-${ICC_VER}) -set(CMAKE_LINKER mpiicpc-${ICC_VER}) - -set(MPI "-mpi=mvapich2-intel-1.9") - -set(CMAKE_C_FLAGS "${MPI} -g -fno-omit-frame-pointer -unroll-aggressive -finline-functions -axAVX -msse4.2 -no-fma ${CMAKE_C_FLAGS}") -set(CMAKE_CXX_FLAGS "${MPI} -g -unroll-aggressive -finline-functions -axAVX -msse4.2 ${CMAKE_CXX_FLAGS}") - -set(PKG_PATH "/usr/gapps/bdiv/${SYS_TYPE}/icc-14.0-opt-mvapich2-intel-1.9") - - diff --git a/cmake/Toolchain/chaos_5_x86_64_ib-ic15.cmake b/cmake/Toolchain/chaos_5_x86_64_ib-ic15.cmake deleted file mode 100644 index 6b1bea6f..00000000 --- a/cmake/Toolchain/chaos_5_x86_64_ib-ic15.cmake +++ /dev/null @@ -1,14 +0,0 @@ -set(ICC_VER 15.0.133) - -set(CMAKE_C_COMPILER mpiicc-${ICC_VER}) -set(CMAKE_CXX_COMPILER mpiicpc-${ICC_VER}) -set(CMAKE_LINKER mpiicpc-${ICC_VER}) - -set(MPI "-mpi=mvapich2-intel-1.9") - -set(CMAKE_C_FLAGS "${MPI} -g -fno-omit-frame-pointer -unroll-aggressive -finline-functions -axAVX -msse4.2 -no-fma ${CMAKE_C_FLAGS}") -set(CMAKE_CXX_FLAGS "${MPI} -g -qopt-report=5 -std=c++11 -unroll-aggressive -finline-functions -axAVX -msse4.2 ${CMAKE_CXX_FLAGS}") - -set(PKG_PATH "/usr/gapps/bdiv/${SYS_TYPE}/icc-14.0-opt-mvapich2-intel-1.9") - - diff --git a/cmake/Toolchain/chaos_5_x86_64_ib-pgi.cmake b/cmake/Toolchain/chaos_5_x86_64_ib-pgi.cmake deleted file mode 100644 index 4d9985b6..00000000 --- a/cmake/Toolchain/chaos_5_x86_64_ib-pgi.cmake +++ /dev/null @@ -1,10 +0,0 @@ - -set(CMAKE_C_COMPILER "mpipgcc") - -set(CMAKE_CXX_COMPILER "mpipgCC") - -set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O3 -g") -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -g --c++11 -fast -Mipa=fast,inline") - -set(PKG_PATH "/usr/gapps/bdiv/${SYS_TYPE}/opt") - diff --git a/cmake/Toolchain/linux-gcc.cmake b/cmake/Toolchain/linux-gcc.cmake deleted file mode 100644 index 9bad1b47..00000000 --- a/cmake/Toolchain/linux-gcc.cmake +++ /dev/null @@ -1,11 +0,0 @@ -set(CMAKE_C_COMPILER "mpicc") -set(CMAKE_CXX_COMPILER "mpic++") -set(CMAKE_LINKER "mpic++") - -set(CMAKE_C_FLAGS "-g -mtune=native ${CMAKE_C_FLAGS}") -set(CMAKE_CXX_FLAGS "-g -O0 ${CMAKE_CXX_FLAGS}") -#set(CMAKE_CXX_FLAGS "-g -std=c++11 -mtune=native ${CMAKE_CXX_FLAGS}") - -set(PKG_PATH "/usr/local") - - diff --git a/cmake/modules/Findchai.cmake b/cmake/modules/Findchai.cmake new file mode 100644 index 00000000..36689745 --- /dev/null +++ b/cmake/modules/Findchai.cmake @@ -0,0 +1,5 @@ +# This is a dummy file for FindCHAI that RAJA depends on +# +# Kripke will automatically pull in CHAI +set(CHAI_DIR "${PROJECT_SOURCE_DIR}/tpl/chai") +set( CHAI_FOUND "YES" ) diff --git a/host-configs/llnl-bgqos-clang.cmake b/host-configs/llnl-bgqos-clang.cmake new file mode 100644 index 00000000..6fb56b3d --- /dev/null +++ b/host-configs/llnl-bgqos-clang.cmake @@ -0,0 +1,24 @@ +## +## Copyright (c) 2016, Lawrence Livermore National Security, LLC. +## +## Produced at the Lawrence Livermore National Laboratory. +## +## All rights reserved. +## +## + +set(RAJA_COMPILER "RAJA_COMPILER_CLANG" CACHE STRING "") + +set(CMAKE_C_COMPILER "/usr/apps/gnu/clang/2017.06.06/llnl/bin/mpiclang" CACHE PATH "") +set(CMAKE_CXX_COMPILER "/usr/apps/gnu/clang/2017.06.06/llnl/bin/mpiclang++" CACHE PATH "") +set(CMAKE_LINKER "/usr/apps/gnu/clang/2017.06.06/llnl/bin/mpiclang++" CACHE PATH "") + +set(CMAKE_CXX_FLAGS "-stdlib=libc++" CACHE STRING "") +set(CMAKE_CXX_FLAGS_RELEASE "-O3 -ffast-math" CACHE STRING "") +set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O3 -g -ffast-math" CACHE STRING "") +set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g" CACHE STRING "") + +set(ENABLE_OPENMP On CACHE BOOL "") +set(ENABLE_MPI_WRAPPER On CACHE BOOL "") + + diff --git a/host-configs/llnl-blueos-P100-nvcc-clang.cmake b/host-configs/llnl-blueos-P100-nvcc-clang.cmake new file mode 100644 index 00000000..b1a85bde --- /dev/null +++ b/host-configs/llnl-blueos-P100-nvcc-clang.cmake @@ -0,0 +1,31 @@ +## +## Copyright (c) 2016, Lawrence Livermore National Security, LLC. +## +## Produced at the Lawrence Livermore National Laboratory. +## +## All rights reserved. +## +## + +set(RAJA_COMPILER "RAJA_COMPILER_CLANG" CACHE STRING "") + +set(CMAKE_C_COMPILER "mpiclang" CACHE PATH "") +set(CMAKE_CXX_COMPILER "mpiclang++" CACHE PATH "") + +set(CMAKE_CXX_FLAGS "" CACHE STRING "") +set(CMAKE_CXX_FLAGS_RELEASE "-O3 -ffast-math" CACHE STRING "") +set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O3 -g -ffast-math" CACHE STRING "") +set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g" CACHE STRING "") + +set(ENABLE_CHAI On CACHE BOOL "") +set(ENABLE_CUDA On CACHE BOOL "") +set(ENABLE_OPENMP Off CACHE BOOL "") +set(ENABLE_MPI_WRAPPER On CACHE BOOL "") + +set(CMAKE_CUDA_FLAGS "-restrict -gencode=arch=compute_60,code=sm_60 " CACHE STRING "") +set(CMAKE_CUDA_FLAGS_RELEASE "-O3 --expt-extended-lambda" CACHE STRING "") +set(CMAKE_CUDA_FLAGS_RELWITHDEBINFO "-O3 -lineinfo --expt-extended-lambda" CACHE STRING "") +set(CMAKE_CUDA_FLAGS_DEBUG "-O0 -g -G --expt-extended-lambda" CACHE STRING "") +set(CMAKE_CUDA_HOST_COMPILER "${CMAKE_CXX_COMPILER}" CACHE STRING "") + + diff --git a/host-configs/llnl-blueos-V100-nvcc-clang.cmake b/host-configs/llnl-blueos-V100-nvcc-clang.cmake new file mode 100644 index 00000000..8c394fe5 --- /dev/null +++ b/host-configs/llnl-blueos-V100-nvcc-clang.cmake @@ -0,0 +1,31 @@ +## +## Copyright (c) 2016, Lawrence Livermore National Security, LLC. +## +## Produced at the Lawrence Livermore National Laboratory. +## +## All rights reserved. +## +## + +set(RAJA_COMPILER "RAJA_COMPILER_CLANG" CACHE STRING "") + +set(CMAKE_C_COMPILER "mpiclang" CACHE PATH "") +set(CMAKE_CXX_COMPILER "mpiclang++" CACHE PATH "") + +set(CMAKE_CXX_FLAGS "" CACHE STRING "") +set(CMAKE_CXX_FLAGS_RELEASE "-O3 -ffast-math" CACHE STRING "") +set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O3 -g -ffast-math" CACHE STRING "") +set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g" CACHE STRING "") + +set(ENABLE_CHAI On CACHE BOOL "") +set(ENABLE_CUDA On CACHE BOOL "") +set(ENABLE_OPENMP Off CACHE BOOL "") +set(ENABLE_MPI_WRAPPER On CACHE BOOL "") + +set(CMAKE_CUDA_FLAGS "-restrict -gencode=arch=compute_70,code=sm_70 " CACHE STRING "") +set(CMAKE_CUDA_FLAGS_RELEASE "-O3 --expt-extended-lambda" CACHE STRING "") +set(CMAKE_CUDA_FLAGS_RELWITHDEBINFO "-O3 -lineinfo --expt-extended-lambda" CACHE STRING "") +set(CMAKE_CUDA_FLAGS_DEBUG "-O0 -g -G --expt-extended-lambda" CACHE STRING "") +set(CMAKE_CUDA_HOST_COMPILER "${CMAKE_CXX_COMPILER}" CACHE STRING "") + + diff --git a/host-configs/llnl-toss3-clang4.cmake b/host-configs/llnl-toss3-clang4.cmake new file mode 100644 index 00000000..f5653d13 --- /dev/null +++ b/host-configs/llnl-toss3-clang4.cmake @@ -0,0 +1,24 @@ +## +## Copyright (c) 2016, Lawrence Livermore National Security, LLC. +## +## Produced at the Lawrence Livermore National Laboratory. +## +## All rights reserved. +## +## + +set(RAJA_COMPILER "RAJA_COMPILER_CLANG" CACHE STRING "") + +set(CMAKE_C_COMPILER "/usr/tce/bin/clang-4.0.0" CACHE PATH "") +set(CMAKE_CXX_COMPILER "/usr/tce/bin/clang++-4.0.0" CACHE PATH "") +set(CMAKE_LINKER "/usr/tce/bin/clang++-4.0.0" CACHE PATH "") + +set(CMAKE_CXX_FLAGS "-stdlib=libc++" CACHE STRING "") +set(CMAKE_CXX_FLAGS_RELEASE "-O3 -ffast-math" CACHE STRING "") +set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O3 -g -ffast-math" CACHE STRING "") +set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g" CACHE STRING "") + +set(ENABLE_OPENMP On CACHE BOOL "") +set(ENABLE_MPI On CACHE BOOL "") + + diff --git a/host-configs/llnl-toss3-gcc7.1.cmake b/host-configs/llnl-toss3-gcc7.1.cmake new file mode 100644 index 00000000..9e4c2f17 --- /dev/null +++ b/host-configs/llnl-toss3-gcc7.1.cmake @@ -0,0 +1,24 @@ +## +## Copyright (c) 2016, Lawrence Livermore National Security, LLC. +## +## Produced at the Lawrence Livermore National Laboratory. +## +## All rights reserved. +## +## + +set(RAJA_COMPILER "RAJA_COMPILER_GNU" CACHE STRING "") + +set(CMAKE_C_COMPILER "/usr/tce/packages/gcc/gcc-7.1.0/bin/gcc" CACHE PATH "") +set(CMAKE_CXX_COMPILER "/usr/tce/packages/gcc/gcc-7.1.0/bin/g++" CACHE PATH "") +set(CMAKE_LINKER "/usr/tce/packages/gcc/gcc-7.1.0/bin/g++" CACHE PATH "") + +set(CMAKE_CXX_FLAGS "" CACHE STRING "") +set(CMAKE_CXX_FLAGS_RELEASE "-O3" CACHE STRING "") +set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O3 -g" CACHE STRING "") +set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g" CACHE STRING "") + +set(ENABLE_OPENMP On CACHE BOOL "") +set(ENABLE_MPI On CACHE BOOL "") + + diff --git a/host-configs/llnl-toss3-gcc8.1.cmake b/host-configs/llnl-toss3-gcc8.1.cmake new file mode 100644 index 00000000..893a24fa --- /dev/null +++ b/host-configs/llnl-toss3-gcc8.1.cmake @@ -0,0 +1,25 @@ +## +## Copyright (c) 2016, Lawrence Livermore National Security, LLC. +## +## Produced at the Lawrence Livermore National Laboratory. +## +## All rights reserved. +## +## + +set(RAJA_COMPILER "RAJA_COMPILER_GNU" CACHE STRING "") + +set(CMAKE_C_COMPILER "/usr/tce/packages/gcc/gcc-8.1.0/bin/gcc" CACHE PATH "") +set(CMAKE_CXX_COMPILER "/usr/tce/packages/gcc/gcc-8.1.0/bin/g++" CACHE PATH "") +set(CMAKE_LINKER "/usr/tce/packages/gcc/gcc-8.1.0/bin/g++" CACHE PATH "") + +set(CMAKE_CXX_FLAGS "" CACHE STRING "") +set(CMAKE_CXX_FLAGS_RELEASE "-O3" CACHE STRING "") +set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O3 -g" CACHE STRING "") +set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g" CACHE STRING "") + +set(ENABLE_OPENMP On CACHE BOOL "") +set(ENABLE_MPI On CACHE BOOL "") + +set(RAJA_HOST_CONFIG_LOADED On CACHE Bool "") + diff --git a/host-configs/llnl-toss3-intel18.cmake b/host-configs/llnl-toss3-intel18.cmake new file mode 100644 index 00000000..3246ccf7 --- /dev/null +++ b/host-configs/llnl-toss3-intel18.cmake @@ -0,0 +1,24 @@ +## +## Copyright (c) 2016, Lawrence Livermore National Security, LLC. +## +## Produced at the Lawrence Livermore National Laboratory. +## +## All rights reserved. +## +## + +set(RAJA_COMPILER "RAJA_COMPILER_INTEL" CACHE STRING "") + +set(CMAKE_C_COMPILER "/usr/tce/bin/icc-18.0.2" CACHE PATH "") +set(CMAKE_CXX_COMPILER "/usr/tce/bin/icpc-18.0.2" CACHE PATH "") +set(CMAKE_LINKER "/usr/tce/bin/icpc-18.0.2" CACHE PATH "") + +set(CMAKE_CXX_FLAGS "" CACHE STRING "") +set(CMAKE_CXX_FLAGS_RELEASE "-O3" CACHE STRING "") +set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O3 -g" CACHE STRING "") +set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g" CACHE STRING "") + +set(ENABLE_OPENMP On CACHE BOOL "") +set(ENABLE_MPI On CACHE BOOL "") + + diff --git a/scripts/plotSweepConcur.py b/scripts/plotSweepConcur.py deleted file mode 100755 index 9aec0d0c..00000000 --- a/scripts/plotSweepConcur.py +++ /dev/null @@ -1,67 +0,0 @@ -#!/usr/bin/env python - -import sys -import matplotlib.pyplot as plt - -fig = plt.figure() -ax = fig.add_subplot(1,1,1) - - -data = [] -T = [] - -# Open trace files -for fname in sys.argv[1:]: - - # Extract the rank from the filename - fparts = fname.split(".") - fparts.reverse() - rank = int(fparts[0]) - - # Read the input file - #print "Reading data for rank %d" % rank - with open(fname, "rb") as fh: - for line in fh.readlines(): - line = line.rstrip() - fields = line.split(" ") - - if fields[0] == 'sweep_kernel': - color = 'blue' - - t0 = float(fields[1]) - t1 = float(fields[2]) - T.append(t0) - T.append(t1) - data.append( (t0, 1) ) - data.append( (t1, -1) ) - -t_min = min(T) -t_max = max(T) -print "Total time: %f seconds" % (t_max - t_min) - -# Sort data based on timestamp -data.sort(key=lambda tup: tup[0]) - -# Compute a curve that shows concurrency -ax_time = [] -ay_concur = [] -concur = 0 -t_last = t_min -ave_concur = 0.0 -for i in data: - concur += i[1] - ax_time.append(i[0]) - ay_concur.append(concur) - dt = (i[0] - t_last) - ave_concur += concur * dt / (t_max-t_min) - t_last = i[0] - -print "Max concurrency: %d" % max(ay_concur) -print "Ave concurrency: %f" % ave_concur - -plt.plot(ax_time, ay_concur) -#plt.hist(ay_concur) - -plt.autoscale() -plt.show() - diff --git a/scripts/plotSweepTrace.py b/scripts/plotSweepTrace.py deleted file mode 100755 index 7f1cb0b7..00000000 --- a/scripts/plotSweepTrace.py +++ /dev/null @@ -1,37 +0,0 @@ -#!/usr/bin/env python - -import sys -import matplotlib.pyplot as plt -import matplotlib.patches as patches - -fig = plt.figure() -ax = fig.add_subplot(1,1,1) - - -# Open trace files -for fname in sys.argv[1:]: - # Extract the rank from the filename - fparts = fname.split(".") - fparts.reverse() - rank = int(fparts[0]) - - # Read the input file - print "Reading data for rank %d" % rank - with open(fname, "rb") as fh: - for line in fh.readlines(): - line = line.rstrip() - fields = line.split(" ") - - if fields[0] == 'sweep_kernel': - color = 'blue' - - t0 = float(fields[1]) - t1 = float(fields[2]) - #print "%f - %f" % (t0, t1) - - ax.add_patch(patches.Rectangle( (t0, rank), t1-t0, 1.0, facecolor=color)) - -plt.autoscale() - -plt.show() - diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt deleted file mode 100644 index e0534604..00000000 --- a/src/CMakeLists.txt +++ /dev/null @@ -1,5 +0,0 @@ -include_directories(.) - -add_subdirectory(Kripke) - -set(KRIPKE_LIBS ${KRIPKE_LIBS} PARENT_SCOPE) diff --git a/src/Kripke.h b/src/Kripke.h index 8809769f..1225ba16 100644 --- a/src/Kripke.h +++ b/src/Kripke.h @@ -33,60 +33,68 @@ #ifndef KRIPKE_H__ #define KRIPKE_H__ -#include -#include -#include -#include -#include +#include + +#include + +#include +#include +#include +#include +#include +#include // Make sure that there's openmp support, otherwise error out -#if KRIPKE_USE_OPENMP +#ifdef KRIPKE_USE_OPENMP #ifndef _OPENMP #error "OpenMP selected for build, but OpenMP is not available" #endif #endif +#ifdef KRIPKE_USE_MPI +#include +#endif + // Forward Decl struct Grid_Data; #define KRESTRICT __restrict__ +#ifdef KRIPKE_USE_MPI +#define KRIPKE_ABORT(...) \ + printf(__VA_ARGS__); \ + MPI_Abort(MPI_COMM_WORLD, 1); +#else +#define KRIPKE_ABORT(...) \ + printf(__VA_ARGS__); \ + exit(1); +#endif -/** - * Un-comment ONE of the 3 MPI send methods below. - * It decides how sweep messages are sent (intended to be Isend) - */ -#define KRIPKE_SWEEP_ISEND -//#define KRIPKE_SWEEP_SEND -//#define KRIPKE_SWEEP_SSEND +#define KRIPKE_ASSERT(EXPR, ...) \ + if(!(EXPR)){\ + KRIPKE_ABORT("Assertion Failed: " __VA_ARGS__); \ + } + -/** - * Set to the number of extra "Testany"s that are executed during the sweep, - * between each Sweep kernel. - * This is intended to "help" MPI flush outgoing async messages. - * (In an ideal world, this would be 0) - */ -#define KRIPKE_SWEEP_EXTRA_RECV 0 +#define KRIPKE_LAMBDA [=] RAJA_HOST_DEVICE +namespace Kripke { -// In Kripke/Sweep_Solver.cpp -int SweepSolver(Grid_Data *grid_data, bool block_jacobi); -void SweepSubdomains (std::vector subdomain_list, Grid_Data *grid_data, bool block_jacobi); + /** + * Index used to specify a local subdomain + */ + RAJA_INDEX_VALUE(SdomId, "SdomId"); + + + /** + * Index used to specify a global subdomain + */ + RAJA_INDEX_VALUE(GlobalSdomId, "GlobalSdomId"); + + +} -/** - * Tags for choosing which data nesting to be chosen - */ -enum Nesting_Order { - // Nestings for Psi and Phi - // D referes to directions OR moments, depending on context - NEST_DGZ, - NEST_DZG, - NEST_GDZ, - NEST_GZD, - NEST_ZDG, - NEST_ZGD -}; /** @@ -97,86 +105,58 @@ enum ParallelMethod { PMETHOD_BJ }; -/** - * Converts a nesting tag to a human-readable string. - */ -inline std::string nestingString(Nesting_Order nesting){ - switch(nesting){ - case NEST_DGZ: return("DGZ"); - case NEST_DZG: return("DZG"); - case NEST_GDZ: return("GDZ"); - case NEST_GZD: return("GZD"); - case NEST_ZDG: return("ZDG"); - case NEST_ZGD: return("ZGD"); - } - return("UNKNOWN"); -} - -/** - * Converts a string (eg. from command line) to a nesting tag. - */ -inline Nesting_Order nestingFromString(std::string const &str){ - for(int i = 0;i < 6;++ i){ - if(!strcasecmp(str.c_str(), nestingString((Nesting_Order)i).c_str())){ - return (Nesting_Order)i; - } - } - return (Nesting_Order)-1; -} /** - * Compares two vectors for differences. - * Used in testing suite. + * Import RAJA types into Kripke::Arch to make defining policies a lot + * cleaner */ -inline bool compareVector(std::string const &name, - std::vector const &a, - std::vector const &b, double tol, bool verbose){ - - if(a.size() != b.size()){ - if(verbose){ - printf("Vectors are different lengths: %ld, %ld\n", - (long)a.size(), (long)b.size()); - } - return true; - } +namespace Kripke { +namespace Arch { + + using RAJA::loop_exec; + using RAJA::seq_exec; + using RAJA::simd_exec; + using RAJA::seq_reduce; + using RAJA::atomic::auto_atomic; + using RAJA::atomic::seq_atomic; + using RAJA::ArgList; + using RAJA::KernelPolicy; + using RAJA::statement::Collapse; + using RAJA::statement::If; + using RAJA::statement::Param; + using RAJA::statement::Not; + using RAJA::statement::For; + using RAJA::statement::Hyperplane; + using RAJA::statement::Lambda; + using RAJA::statement::SetShmemWindow; + using RAJA::statement::Tile; + using RAJA::statement::tile_fixed; + +#ifdef KRIPKE_USE_OPENMP + using RAJA::omp_parallel_collapse_exec; + using RAJA::omp_parallel_for_exec; + using RAJA::omp_reduce; +#endif - bool is_diff = false; - for(size_t i = 0;i < a.size();++i){ - if(std::abs(a[i]-b[i]) > tol){ - is_diff = true; - if(verbose){ - printf("%s[%d]:%e, %e [%e]\n", - name.c_str(), (int)i, - a[i], b[i], std::abs(a[i]-b[i])); - is_diff = true; - } - else{ - break; - } - } - } +#ifdef KRIPKE_USE_CUDA + using RAJA::cuda_exec; + using RAJA::cuda_block_exec; + using RAJA::cuda_seq_syncthreads_exec; + using RAJA::cuda_thread_exec; + using RAJA::cuda_threadblock_exec; + using RAJA::cuda_reduce; + using RAJA::atomic::cuda_atomic; + using RAJA::statement::CudaKernel; + using RAJA::statement::CudaKernelAsync; + using RAJA::statement::CudaSyncThreads; + using RAJA::statement::Thread; +#endif + +} // namespace Arch +} // namespace Kripke - return is_diff; -} -/** - * Compares two scalars for differences. - * Used in testing suite. - */ -inline bool compareScalar(std::string const &name, - double a, double b, double tol, bool verbose){ - - if(std::abs(a-b) > tol){ - if(verbose){ - printf("%s:%e, %e [%e]\n", - name.c_str(), - a, b, std::abs(a-b)); - } - return true; - } - return false; -} #endif diff --git a/src/Kripke/Arch/LPlusTimes.h b/src/Kripke/Arch/LPlusTimes.h new file mode 100644 index 00000000..78d07d3c --- /dev/null +++ b/src/Kripke/Arch/LPlusTimes.h @@ -0,0 +1,365 @@ +/* + * NOTICE + * + * This work was produced at the Lawrence Livermore National Laboratory (LLNL) + * under contract no. DE-AC-52-07NA27344 (Contract 44) between the U.S. + * Department of Energy (DOE) and Lawrence Livermore National Security, LLC + * (LLNS) for the operation of LLNL. The rights of the Federal Government are + * reserved under Contract 44. + * + * DISCLAIMER + * + * This work was prepared as an account of work sponsored by an agency of the + * United States Government. Neither the United States Government nor Lawrence + * Livermore National Security, LLC nor any of their employees, makes any + * warranty, express or implied, or assumes any liability or responsibility + * for the accuracy, completeness, or usefulness of any information, apparatus, + * product, or process disclosed, or represents that its use would not infringe + * privately-owned rights. Reference herein to any specific commercial products, + * process, or service by trade name, trademark, manufacturer or otherwise does + * not necessarily constitute or imply its endorsement, recommendation, or + * favoring by the United States Government or Lawrence Livermore National + * Security, LLC. The views and opinions of authors expressed herein do not + * necessarily state or reflect those of the United States Government or + * Lawrence Livermore National Security, LLC, and shall not be used for + * advertising or product endorsement purposes. + * + * NOTIFICATION OF COMMERCIAL USE + * + * Commercialization of this product is prohibited without notifying the + * Department of Energy (DOE) or Lawrence Livermore National Security. + */ + +#ifndef KRIPKE_ARCH_LPLUSTIMES +#define KRIPKE_ARCH_LPLUSTIMES + +#include +#include + +namespace Kripke { +namespace Arch { + + +template +struct Policy_LPlusTimes; + +template<> +struct Policy_LPlusTimes> { + using ExecPolicy = + KernelPolicy< + For<0, loop_exec, // Direction + For<1, loop_exec, // Moment + For<2, loop_exec, // Group + For<3, loop_exec, // Zone + Lambda<0> + > + > + > + > + >; +}; + +template<> +struct Policy_LPlusTimes> { + using ExecPolicy = + KernelPolicy< + For<0, loop_exec, // Direction + For<1, loop_exec, // Moment + For<3, loop_exec, // Zone + For<2, loop_exec, // Group + Lambda<0> + > + > + > + > + >; +}; + +template<> +struct Policy_LPlusTimes> { + using ExecPolicy = + KernelPolicy< + For<2, loop_exec, // Group + For<0, loop_exec, // Direction + For<1, loop_exec, // Moment + For<3, loop_exec, // Zone + Lambda<0> + > + > + > + > + >; +}; + + +template<> +struct Policy_LPlusTimes> { + using ExecPolicy = + KernelPolicy< + For<2, loop_exec, // Group + For<3, loop_exec, // Zone + For<0, loop_exec, // Direction + For<1, loop_exec, // Moment + Lambda<0> + > + > + > + > + >; +}; + +template<> +struct Policy_LPlusTimes> { + using ExecPolicy = + KernelPolicy< + For<3, loop_exec, // Zone + For<0, loop_exec, // Direction + For<1, loop_exec, // Moment + For<2, loop_exec, // Group + Lambda<0> + > + > + > + > + >; +}; + + + +template<> +struct Policy_LPlusTimes> { + using ExecPolicy = + KernelPolicy< + For<3, loop_exec, // Zone + For<2, loop_exec, // Group + For<0, loop_exec, // Direction + For<1, loop_exec, // Moment + Lambda<0> + > + > + > + > + >; +}; + + + +#ifdef KRIPKE_USE_OPENMP + +template<> +struct Policy_LPlusTimes> { + using ExecPolicy = + KernelPolicy< + Collapse, // Direction, Group + For<1, loop_exec, // Moment + For<3, loop_exec, // Zone + Lambda<0> + > + > + > + >; +}; + +template<> +struct Policy_LPlusTimes> { + using ExecPolicy = + KernelPolicy< + Collapse, // Direction, Zone + For<1, loop_exec, // Moment + For<2, loop_exec, // Group + Lambda<0> + > + > + > + >; +}; + +template<> +struct Policy_LPlusTimes> { + using ExecPolicy = + KernelPolicy< + Collapse, // Group, Direciton + For<1, loop_exec, // Moment + For<3, loop_exec, // Zone + Lambda<0> + > + > + > + >; +}; + + +template<> +struct Policy_LPlusTimes> { + using ExecPolicy = + KernelPolicy< + Collapse, // Group, Zone, Direciton + For<1, loop_exec, // Moment + Lambda<0> + > + > + >; +}; + +template<> +struct Policy_LPlusTimes> { + using ExecPolicy = + KernelPolicy< + Collapse, // Zone, Direction + For<1, loop_exec, // Moment + For<2, loop_exec, // Group + Lambda<0> + > + > + > + >; +}; + + + +template<> +struct Policy_LPlusTimes> { + using ExecPolicy = + KernelPolicy< + Collapse, // Zone, Group, Direction + For<1, loop_exec, // Moment + Lambda<0> + > + > + >; +}; +#endif // KRIPKE_USE_OPENMP + + + +#ifdef KRIPKE_USE_CUDA + +template<> +struct Policy_LPlusTimes> { + using ExecPolicy = + KernelPolicy< + CudaKernel< + For<0, cuda_block_exec, // Direction + For<2, cuda_block_exec, // group + For<3, cuda_thread_exec, // zone + Thread< + For<1, seq_exec, // Moment + Lambda<0> + > + > + > + > + > + > + >; +}; + +template<> +struct Policy_LPlusTimes> { + using ExecPolicy = + KernelPolicy< + CudaKernel< + For<0, cuda_block_exec, // Direction + For<2, cuda_block_exec, // group + For<3, cuda_thread_exec, // zone + Thread< + For<1, seq_exec, // Moment + Lambda<0> + > + > + > + > + > + > + >; +}; + +template<> +struct Policy_LPlusTimes> { + using ExecPolicy = + KernelPolicy< + CudaKernel< + For<0, cuda_block_exec, // Direction + For<2, cuda_block_exec, // group + For<3, cuda_thread_exec, // zone + Thread< + For<1, seq_exec, // Moment + Lambda<0> + > + > + > + > + > + > + >; +}; + + +template<> +struct Policy_LPlusTimes> { + using ExecPolicy = + KernelPolicy< + CudaKernel< + For<0, cuda_block_exec, // Direction + For<2, cuda_block_exec, // group + For<3, cuda_thread_exec, // zone + Thread< + For<1, seq_exec, // Moment + Lambda<0> + > + > + > + > + > + > + >; +}; + +template<> +struct Policy_LPlusTimes> { + using ExecPolicy = + KernelPolicy< + CudaKernel< + For<0, cuda_block_exec, // Direction + For<2, cuda_block_exec, // group + For<3, cuda_thread_exec, // zone + Thread< + For<1, seq_exec, // Moment + Lambda<0> + > + > + > + > + > + > + >; +}; + + + +template<> +struct Policy_LPlusTimes> { + using ExecPolicy = + KernelPolicy< + CudaKernel< + For<0, cuda_block_exec, // Direction + For<2, cuda_block_exec, // group + For<3, cuda_thread_exec, // zone + Thread< + For<1, seq_exec, // Moment + Lambda<0> + > + > + > + > + > + > + >; +}; + +#endif // KRIPKE_USE_CUDA + + +} +} +#endif diff --git a/src/Kripke/Arch/LTimes.h b/src/Kripke/Arch/LTimes.h new file mode 100644 index 00000000..031c4b00 --- /dev/null +++ b/src/Kripke/Arch/LTimes.h @@ -0,0 +1,356 @@ +/* + * NOTICE + * + * This work was produced at the Lawrence Livermore National Laboratory (LLNL) + * under contract no. DE-AC-52-07NA27344 (Contract 44) between the U.S. + * Department of Energy (DOE) and Lawrence Livermore National Security, LLC + * (LLNS) for the operation of LLNL. The rights of the Federal Government are + * reserved under Contract 44. + * + * DISCLAIMER + * + * This work was prepared as an account of work sponsored by an agency of the + * United States Government. Neither the United States Government nor Lawrence + * Livermore National Security, LLC nor any of their employees, makes any + * warranty, express or implied, or assumes any liability or responsibility + * for the accuracy, completeness, or usefulness of any information, apparatus, + * product, or process disclosed, or represents that its use would not infringe + * privately-owned rights. Reference herein to any specific commercial products, + * process, or service by trade name, trademark, manufacturer or otherwise does + * not necessarily constitute or imply its endorsement, recommendation, or + * favoring by the United States Government or Lawrence Livermore National + * Security, LLC. The views and opinions of authors expressed herein do not + * necessarily state or reflect those of the United States Government or + * Lawrence Livermore National Security, LLC, and shall not be used for + * advertising or product endorsement purposes. + * + * NOTIFICATION OF COMMERCIAL USE + * + * Commercialization of this product is prohibited without notifying the + * Department of Energy (DOE) or Lawrence Livermore National Security. + */ + +#ifndef KRIPKE_ARCH_LTIMES +#define KRIPKE_ARCH_LTIMES + +#include +#include + +namespace Kripke { +namespace Arch { + +template +struct Policy_LTimes; + +template<> +struct Policy_LTimes> { + using ExecPolicy = + KernelPolicy< + For<0, loop_exec, // moment + For<1, loop_exec, // direction + For<2, loop_exec, // group + For<3, loop_exec, // zone + Lambda<0> + > + > + > + > + >; +}; + +template<> +struct Policy_LTimes> { + using ExecPolicy = + KernelPolicy< + For<0, loop_exec, // moment + For<1, loop_exec, // direction + For<3, loop_exec, // zone + For<2, loop_exec, // group + Lambda<0> + > + > + > + > + >; +}; + +template<> +struct Policy_LTimes> { + using ExecPolicy = + KernelPolicy< + For<2, loop_exec, // group + For<0, loop_exec, // moment + For<1, loop_exec, // direction + For<3, loop_exec, // zone + Lambda<0> + > + > + > + > + >; +}; + +template<> +struct Policy_LTimes> { + using ExecPolicy = + KernelPolicy< + For<2, loop_exec, // group + For<3, loop_exec, // zone + For<0, loop_exec, // moment + For<1, loop_exec, // direction + Lambda<0> + > + > + > + > + >; +}; + +template<> +struct Policy_LTimes> { + using ExecPolicy = + KernelPolicy< + For<3, loop_exec, // zone + For<0, loop_exec, // moment + For<1, loop_exec, // direction + For<2, loop_exec, // group + Lambda<0> + > + > + > + > + >; +}; + +template<> +struct Policy_LTimes> { + using ExecPolicy = + KernelPolicy< + For<3, loop_exec, // zone + For<2, loop_exec, // group + For<0, loop_exec, // moment + For<1, loop_exec, // direction + Lambda<0> + > + > + > + > + >; +}; + + + + +#ifdef KRIPKE_USE_OPENMP +template<> +struct Policy_LTimes> { + using ExecPolicy = + KernelPolicy< + Collapse, // Moment Group + For<1, loop_exec, // Direction + For<3, loop_exec, // Zone + Lambda<0> + > + > + > + >; +}; + +template<> +struct Policy_LTimes> { + using ExecPolicy = + KernelPolicy< + Collapse, // Moment Zone + For<1, loop_exec, // Direction + For<2, loop_exec, // Group + Lambda<0> + > + > + > + >; +}; + +template<> +struct Policy_LTimes> { + using ExecPolicy = + KernelPolicy< + Collapse, // Group Moment + For<1, loop_exec, // Direction + For<3, loop_exec, // Zone + Lambda<0> + > + > + > + >; +}; + +template<> +struct Policy_LTimes> { + using ExecPolicy = + KernelPolicy< + Collapse, // Group Zone Moment + For<1, loop_exec, // Direection + Lambda<0> + > + > + >; +}; + +template<> +struct Policy_LTimes> { + using ExecPolicy = + KernelPolicy< + Collapse, // Zone Moment + For<1, loop_exec, // Direction + For<2, loop_exec, // Group + Lambda<0> + > + > + > + >; +}; + +template<> +struct Policy_LTimes> { + using ExecPolicy = + KernelPolicy< + Collapse, // Zone Group + For<0, loop_exec, // Moment + For<1, loop_exec, // Direction + Lambda<0> + > + > + > + >; +}; +#endif // KRIPKE_USE_OPENMP + + + +#ifdef KRIPKE_USE_CUDA +template<> +struct Policy_LTimes> { + using ExecPolicy = + KernelPolicy< + CudaKernel< + For<2, cuda_block_exec, // group + For<0, cuda_block_exec, // moment + For<3, cuda_thread_exec, // zone + Thread< + For<1, seq_exec, // direction + Lambda<0> + > + > + > + > + > + > + >; +}; + +template<> +struct Policy_LTimes> { + using ExecPolicy = + KernelPolicy< + CudaKernel< + For<2, cuda_block_exec, // group + For<0, cuda_block_exec, // moment + For<3, cuda_thread_exec, // zone + Thread< + For<1, seq_exec, // direction + Lambda<0> + > + > + > + > + > + > + >; +}; + +template<> +struct Policy_LTimes> { + using ExecPolicy = + KernelPolicy< + CudaKernel< + For<2, cuda_block_exec, // group + For<0, cuda_block_exec, // moment + For<3, cuda_thread_exec, // zone + Thread< + For<1, seq_exec, // direction + Lambda<0> + > + > + > + > + > + > + >; +}; + +template<> +struct Policy_LTimes> { + using ExecPolicy = + KernelPolicy< + CudaKernel< + For<2, cuda_block_exec, // group + For<0, cuda_block_exec, // moment + For<3, cuda_thread_exec, // zone + Thread< + For<1, seq_exec, // direction + Lambda<0> + > + > + > + > + > + > + >; +}; + +template<> +struct Policy_LTimes> { + using ExecPolicy = + KernelPolicy< + CudaKernel< + For<2, cuda_block_exec, // group + For<0, cuda_block_exec, // moment + For<3, cuda_thread_exec, // zone + Thread< + For<1, seq_exec, // direction + Lambda<0> + > + > + > + > + > + > + >; +}; + +template<> +struct Policy_LTimes> { + using ExecPolicy = + KernelPolicy< + CudaKernel< + For<2, cuda_block_exec, // group + For<0, cuda_block_exec, // moment + For<3, cuda_thread_exec, // zone + Thread< + For<1, seq_exec, // direction + Lambda<0> + > + > + > + > + > + > + >; +}; +#endif // KRIPKE_USE_CUDA + + +} +} + +#endif diff --git a/src/Kripke/Arch/Population.h b/src/Kripke/Arch/Population.h new file mode 100644 index 00000000..442134d5 --- /dev/null +++ b/src/Kripke/Arch/Population.h @@ -0,0 +1,356 @@ +/* + * NOTICE + * + * This work was produced at the Lawrence Livermore National Laboratory (LLNL) + * under contract no. DE-AC-52-07NA27344 (Contract 44) between the U.S. + * Department of Energy (DOE) and Lawrence Livermore National Security, LLC + * (LLNS) for the operation of LLNL. The rights of the Federal Government are + * reserved under Contract 44. + * + * DISCLAIMER + * + * This work was prepared as an account of work sponsored by an agency of the + * United States Government. Neither the United States Government nor Lawrence + * Livermore National Security, LLC nor any of their employees, makes any + * warranty, express or implied, or assumes any liability or responsibility + * for the accuracy, completeness, or usefulness of any information, apparatus, + * product, or process disclosed, or represents that its use would not infringe + * privately-owned rights. Reference herein to any specific commercial products, + * process, or service by trade name, trademark, manufacturer or otherwise does + * not necessarily constitute or imply its endorsement, recommendation, or + * favoring by the United States Government or Lawrence Livermore National + * Security, LLC. The views and opinions of authors expressed herein do not + * necessarily state or reflect those of the United States Government or + * Lawrence Livermore National Security, LLC, and shall not be used for + * advertising or product endorsement purposes. + * + * NOTIFICATION OF COMMERCIAL USE + * + * Commercialization of this product is prohibited without notifying the + * Department of Energy (DOE) or Lawrence Livermore National Security. + */ + +#ifndef KRIPKE_ARCH_POPULATION +#define KRIPKE_ARCH_POPULATION + +#include +#include + +namespace Kripke { +namespace Arch { + + +template +struct Policy_Population; + +template<> +struct Policy_Population>{ + using ReducePolicy = seq_reduce; + + using ExecPolicy = + KernelPolicy< + For<0, loop_exec, // direction + For<1, loop_exec, // group + For<2, loop_exec, // zone + Lambda<0> + > + > + > + >; +}; + +template<> +struct Policy_Population>{ + using ReducePolicy = seq_reduce; + + using ExecPolicy = + KernelPolicy< + For<0, loop_exec, // direction + For<2, loop_exec, // zone + For<1, loop_exec, // group + Lambda<0> + > + > + > + >; +}; + +template<> +struct Policy_Population>{ + using ReducePolicy = seq_reduce; + + using ExecPolicy = + KernelPolicy< + For<1, loop_exec, // group + For<0, loop_exec, // direction + For<2, loop_exec, // zone + Lambda<0> + > + > + > + >; +}; + + +template<> +struct Policy_Population>{ + using ReducePolicy = seq_reduce; + + using ExecPolicy = + KernelPolicy< + For<1, loop_exec, // group + For<2, loop_exec, // zone + For<0, loop_exec, // direction + Lambda<0> + > + > + > + >; +}; + +template<> +struct Policy_Population>{ + using ReducePolicy = seq_reduce; + + using ExecPolicy = + KernelPolicy< + For<2, loop_exec, // zone + For<0, loop_exec, // direction + For<1, loop_exec, // group + Lambda<0> + > + > + > + >; +}; + +template<> +struct Policy_Population>{ + using ReducePolicy = seq_reduce; + + using ExecPolicy = + KernelPolicy< + For<2, loop_exec, // zone + For<1, loop_exec, // group + For<0, loop_exec, // direction + Lambda<0> + > + > + > + >; +}; + + +#ifdef KRIPKE_USE_OPENMP + +template<> +struct Policy_Population>{ + using ReducePolicy = omp_reduce; + + using ExecPolicy = + KernelPolicy< + Collapse, // Direction Group + For<2, loop_exec, // Zone + Lambda<0> + > + > + >; +}; + +template<> +struct Policy_Population>{ + using ReducePolicy = omp_reduce; + + using ExecPolicy = + KernelPolicy< + Collapse, // Direction Zone + For<1, loop_exec, // Group + Lambda<0> + > + > + >; +}; + +template<> +struct Policy_Population>{ + using ReducePolicy = omp_reduce; + + using ExecPolicy = + KernelPolicy< + Collapse, // Group Direction + For<2, loop_exec, // Zone + Lambda<0> + > + > + >; +}; + + +template<> +struct Policy_Population>{ + using ReducePolicy = omp_reduce; + + using ExecPolicy = + KernelPolicy< + Collapse, // Group Zone + For<0, loop_exec, // Direction + Lambda<0> + > + > + >; +}; + +template<> +struct Policy_Population>{ + using ReducePolicy = omp_reduce; + + using ExecPolicy = + KernelPolicy< + Collapse, // Zone Direction + For<1, loop_exec, // Group + Lambda<0> + > + > + >; +}; + +template<> +struct Policy_Population>{ + using ReducePolicy = omp_reduce; + + using ExecPolicy = + KernelPolicy< + Collapse, // Zone Group + For<0, loop_exec, // Direction + Lambda<0> + > + > + >; +}; + + +#endif // KRIPKE_USE_OPENMP + + + +#ifdef KRIPKE_USE_CUDA +template<> +struct Policy_Population>{ + using ReducePolicy = cuda_reduce<1024>; + + using ExecPolicy = + KernelPolicy< + CudaKernel< + For<0, cuda_thread_exec, // direction + For<1, cuda_thread_exec, // group + For<2, cuda_threadblock_exec<32>, // zone + Lambda<0> + > + > + > + > + >; +}; + +template<> +struct Policy_Population>{ + using ReducePolicy = cuda_reduce<1024>; + + using ExecPolicy = + KernelPolicy< + CudaKernel< + For<0, cuda_thread_exec, // direction + For<2, cuda_threadblock_exec<32>, // zone + For<1, cuda_thread_exec, // group + Lambda<0> + > + > + > + > + >; + +}; + +template<> +struct Policy_Population>{ + using ReducePolicy = cuda_reduce<1024>; + + using ExecPolicy = + KernelPolicy< + CudaKernel< + For<1, cuda_thread_exec, // group + For<0, cuda_thread_exec, // direction + For<2, cuda_threadblock_exec<32>, // zone + Lambda<0> + > + > + > + > + >; + +}; + + +template<> +struct Policy_Population>{ + using ReducePolicy = cuda_reduce<1024>; + + using ExecPolicy = + KernelPolicy< + CudaKernel< + For<1, cuda_thread_exec, // group + For<2, cuda_threadblock_exec<32>, // zone + For<0, cuda_thread_exec, // direction + Lambda<0> + > + > + > + > + >; + +}; + +template<> +struct Policy_Population>{ + using ReducePolicy = cuda_reduce<1024>; + + using ExecPolicy = + KernelPolicy< + CudaKernel< + For<2, cuda_threadblock_exec<32>, // zone + For<0, cuda_thread_exec, // direction + For<1, cuda_thread_exec, // group + Lambda<0> + > + > + > + > + >; +}; + +template<> +struct Policy_Population>{ + using ReducePolicy = cuda_reduce<1024>; + + using ExecPolicy = + KernelPolicy< + CudaKernel< + For<2, cuda_threadblock_exec<32>, // zone + For<1, cuda_thread_exec, // group + For<0, cuda_thread_exec, // direction + Lambda<0> + > + > + > + > + >; + +}; +#endif // KRIPKE_USE_CUDA + + + +} +} + +#endif diff --git a/src/Kripke/Arch/Scattering.h b/src/Kripke/Arch/Scattering.h new file mode 100644 index 00000000..7bf05b04 --- /dev/null +++ b/src/Kripke/Arch/Scattering.h @@ -0,0 +1,364 @@ +/* + * NOTICE + * + * This work was produced at the Lawrence Livermore National Laboratory (LLNL) + * under contract no. DE-AC-52-07NA27344 (Contract 44) between the U.S. + * Department of Energy (DOE) and Lawrence Livermore National Security, LLC + * (LLNS) for the operation of LLNL. The rights of the Federal Government are + * reserved under Contract 44. + * + * DISCLAIMER + * + * This work was prepared as an account of work sponsored by an agency of the + * United States Government. Neither the United States Government nor Lawrence + * Livermore National Security, LLC nor any of their employees, makes any + * warranty, express or implied, or assumes any liability or responsibility + * for the accuracy, completeness, or usefulness of any information, apparatus, + * product, or process disclosed, or represents that its use would not infringe + * privately-owned rights. Reference herein to any specific commercial products, + * process, or service by trade name, trademark, manufacturer or otherwise does + * not necessarily constitute or imply its endorsement, recommendation, or + * favoring by the United States Government or Lawrence Livermore National + * Security, LLC. The views and opinions of authors expressed herein do not + * necessarily state or reflect those of the United States Government or + * Lawrence Livermore National Security, LLC, and shall not be used for + * advertising or product endorsement purposes. + * + * NOTIFICATION OF COMMERCIAL USE + * + * Commercialization of this product is prohibited without notifying the + * Department of Energy (DOE) or Lawrence Livermore National Security. + */ + +#ifndef KRIPKE_ARCH_SCATTERING +#define KRIPKE_ARCH_SCATTERING + +#include +#include + +namespace Kripke { +namespace Arch { + +template +struct Policy_Scattering; + +template<> +struct Policy_Scattering> { + using ExecPolicy = + KernelPolicy< + For<0, loop_exec, // moment + For<1, loop_exec, // dst group + For<2, loop_exec, // src group + For<3, loop_exec, // zone + Lambda<0> + > + > + > + > + >; +}; + +template<> +struct Policy_Scattering> { + using ExecPolicy = + KernelPolicy< + For<0, loop_exec, // moment + For<3, loop_exec, // zone + For<1, loop_exec, // dst group + For<2, loop_exec, // src group + Lambda<0> + > + > + > + > + >; +}; + + +template<> +struct Policy_Scattering> { + using ExecPolicy = + KernelPolicy< + For<1, loop_exec, // dst group + For<2, loop_exec, // src group + For<0, loop_exec, // moment + For<3, loop_exec, // zone + Lambda<0> + > + > + > + > + >; +}; + + +template<> +struct Policy_Scattering> { + using ExecPolicy = + KernelPolicy< + For<1, loop_exec, // dst group + For<2, loop_exec, // src group + For<3, loop_exec, // zone + For<0, loop_exec, // moment + Lambda<0> + > + > + > + > + >; +}; + + +template<> +struct Policy_Scattering> { + using ExecPolicy = + KernelPolicy< + For<3, loop_exec, // zone + For<0, loop_exec, // moment + For<1, loop_exec, // dst group + For<2, loop_exec, // src group + Lambda<0> + > + > + > + > + >; +}; + + +template<> +struct Policy_Scattering> { + using ExecPolicy = + KernelPolicy< + For<3, loop_exec, // zone + For<1, loop_exec, // dst group + For<2, loop_exec, // src group + For<0, loop_exec, // moment + Lambda<0> + > + > + > + > + >; +}; + + + +#ifdef KRIPKE_USE_OPENMP +template<> +struct Policy_Scattering> { + using ExecPolicy = + KernelPolicy< + Collapse, // Moment, DstGrp + For<2, loop_exec, // SrcGrp + For<3, loop_exec, // Zone + Lambda<0> + > + > + > + >; +}; + +template<> +struct Policy_Scattering> { + using ExecPolicy = + KernelPolicy< + Collapse, // Moment, Zone, DstGrp + For<2, loop_exec, // SrcGrp + Lambda<0> + > + > + >; +}; + + +template<> +struct Policy_Scattering> { + using ExecPolicy = + KernelPolicy< + Collapse, // DstGrp, Moment + For<2, loop_exec, // SrcGrp + For<3, loop_exec, // Zone + Lambda<0> + > + > + > + >; +}; + + +template<> +struct Policy_Scattering> { + using ExecPolicy = + KernelPolicy< + Collapse, // DstGrp, Zone + For<2, loop_exec, // SrcGrp + For<0, loop_exec, // Moment + Lambda<0> + > + > + > + >; +}; + + +template<> +struct Policy_Scattering> { + using ExecPolicy = + KernelPolicy< + Collapse, // Zone, Moment, DstGrp + For<2, loop_exec, // SrcGrp + Lambda<0> + > + > + >; +}; + + +template<> +struct Policy_Scattering> { + using ExecPolicy = + KernelPolicy< + Collapse, // Zone, DstGrp + For<2, loop_exec, // SrcGrp + For<0, loop_exec, // Moment + Lambda<0> + > + > + > + >; +}; +#endif // KRIPKE_USE_OPENMP + + +#ifdef KRIPKE_USE_CUDA +template<> +struct Policy_Scattering> { + using ExecPolicy = + KernelPolicy< + CudaKernel< + For<0, cuda_block_exec, // moment + For<1, cuda_block_exec, // DstGrp + For<3, cuda_thread_exec, // zone + Thread< + For<2, seq_exec, // SrcGrp + Lambda<0> + > + > + > + > + > + > + >; +}; + +template<> +struct Policy_Scattering> { + using ExecPolicy = + KernelPolicy< + CudaKernel< + For<0, cuda_block_exec, // moment + For<1, cuda_block_exec, // DstGrp + For<3, cuda_thread_exec, // zone + Thread< + For<2, seq_exec, // SrcGrp + Lambda<0> + > + > + > + > + > + > + >; +}; + + +template<> +struct Policy_Scattering> { + using ExecPolicy = + KernelPolicy< + CudaKernel< + For<0, cuda_block_exec, // moment + For<1, cuda_block_exec, // DstGrp + For<3, cuda_thread_exec, // zone + Thread< + For<2, seq_exec, // SrcGrp + Lambda<0> + > + > + > + > + > + > + >; +}; + + +template<> +struct Policy_Scattering> { + using ExecPolicy = + KernelPolicy< + CudaKernel< + For<0, cuda_block_exec, // moment + For<1, cuda_block_exec, // DstGrp + For<3, cuda_thread_exec, // zone + Thread< + For<2, seq_exec, // SrcGrp + Lambda<0> + > + > + > + > + > + > + >; +}; + + +template<> +struct Policy_Scattering> { + using ExecPolicy = + KernelPolicy< + CudaKernel< + For<0, cuda_block_exec, // moment + For<1, cuda_block_exec, // DstGrp + For<3, cuda_thread_exec, // zone + Thread< + For<2, seq_exec, // SrcGrp + Lambda<0> + > + > + > + > + > + > + >; +}; + + +template<> +struct Policy_Scattering> { + using ExecPolicy = + KernelPolicy< + CudaKernel< + For<0, cuda_block_exec, // moment + For<1, cuda_block_exec, // DstGrp + For<3, cuda_thread_exec, // zone + Thread< + For<2, seq_exec, // SrcGrp + Lambda<0> + > + > + > + > + > + > + >; +}; +#endif //KRIPKE_USE_CUDA + + +} +} + +#endif diff --git a/src/Kripke/Arch/Source.h b/src/Kripke/Arch/Source.h new file mode 100644 index 00000000..297c7f31 --- /dev/null +++ b/src/Kripke/Arch/Source.h @@ -0,0 +1,141 @@ +/* + * NOTICE + * + * This work was produced at the Lawrence Livermore National Laboratory (LLNL) + * under contract no. DE-AC-52-07NA27344 (Contract 44) between the U.S. + * Department of Energy (DOE) and Lawrence Livermore National Security, LLC + * (LLNS) for the operation of LLNL. The rights of the Federal Government are + * reserved under Contract 44. + * + * DISCLAIMER + * + * This work was prepared as an account of work sponsored by an agency of the + * United States Government. Neither the United States Government nor Lawrence + * Livermore National Security, LLC nor any of their employees, makes any + * warranty, express or implied, or assumes any liability or responsibility + * for the accuracy, completeness, or usefulness of any information, apparatus, + * product, or process disclosed, or represents that its use would not infringe + * privately-owned rights. Reference herein to any specific commercial products, + * process, or service by trade name, trademark, manufacturer or otherwise does + * not necessarily constitute or imply its endorsement, recommendation, or + * favoring by the United States Government or Lawrence Livermore National + * Security, LLC. The views and opinions of authors expressed herein do not + * necessarily state or reflect those of the United States Government or + * Lawrence Livermore National Security, LLC, and shall not be used for + * advertising or product endorsement purposes. + * + * NOTIFICATION OF COMMERCIAL USE + * + * Commercialization of this product is prohibited without notifying the + * Department of Energy (DOE) or Lawrence Livermore National Security. + */ + +#ifndef KRIPKE_ARCH_SOURCE +#define KRIPKE_ARCH_SOURCE + +#include +#include + +namespace Kripke { +namespace Arch { + +template +struct Policy_Source; + +template +struct Policy_Source> : + Policy_Source>{}; + +template +struct Policy_Source> : + Policy_Source>{}; + +template +struct Policy_Source> : + Policy_Source>{}; + +template +struct Policy_Source> : + Policy_Source>{}; + +template<> +struct Policy_Source> { + using ExecPolicy = + KernelPolicy< + Collapse, // Group, MixElem + Lambda<0> + > + >; +}; + +template<> +struct Policy_Source> { + using ExecPolicy = + KernelPolicy< + Collapse, // MixElem, Group + Lambda<0> + > + >; +}; + + + + +#ifdef KRIPKE_USE_OPENMP +template<> +struct Policy_Source> { + using ExecPolicy = + KernelPolicy< + Collapse, // Group, MixElem + Lambda<0> + > + >; +}; + +template<> +struct Policy_Source> { + using ExecPolicy = + KernelPolicy< + Collapse, // MixElem, Group + Lambda<0> + > + >; +}; +#endif // KRIPKE_USE_OPENMP + + +#ifdef KRIPKE_USE_CUDA +template<> +struct Policy_Source> { + using ExecPolicy = + KernelPolicy< + CudaKernel< + For<0, cuda_thread_exec, // Group + For<1, cuda_threadblock_exec<32>, // MixElem + Lambda<0> + > + > + > + >; +}; + +template<> +struct Policy_Source> { + using ExecPolicy = + KernelPolicy< + CudaKernel< + For<1, cuda_threadblock_exec<32>, // MixElem + For<0, cuda_thread_exec, // Group + Lambda<0> + > + > + > + >; +}; +#endif // KRIPKE_USE_CUDA + + +} +} + +#endif diff --git a/src/Kripke/Arch/SweepSubdomains.h b/src/Kripke/Arch/SweepSubdomains.h new file mode 100644 index 00000000..6392eba7 --- /dev/null +++ b/src/Kripke/Arch/SweepSubdomains.h @@ -0,0 +1,400 @@ +/* + * NOTICE + * + * This work was produced at the Lawrence Livermore National Laboratory (LLNL) + * under contract no. DE-AC-52-07NA27344 (Contract 44) between the U.S. + * Department of Energy (DOE) and Lawrence Livermore National Security, LLC + * (LLNS) for the operation of LLNL. The rights of the Federal Government are + * reserved under Contract 44. + * + * DISCLAIMER + * + * This work was prepared as an account of work sponsored by an agency of the + * United States Government. Neither the United States Government nor Lawrence + * Livermore National Security, LLC nor any of their employees, makes any + * warranty, express or implied, or assumes any liability or responsibility + * for the accuracy, completeness, or usefulness of any information, apparatus, + * product, or process disclosed, or represents that its use would not infringe + * privately-owned rights. Reference herein to any specific commercial products, + * process, or service by trade name, trademark, manufacturer or otherwise does + * not necessarily constitute or imply its endorsement, recommendation, or + * favoring by the United States Government or Lawrence Livermore National + * Security, LLC. The views and opinions of authors expressed herein do not + * necessarily state or reflect those of the United States Government or + * Lawrence Livermore National Security, LLC, and shall not be used for + * advertising or product endorsement purposes. + * + * NOTIFICATION OF COMMERCIAL USE + * + * Commercialization of this product is prohibited without notifying the + * Department of Energy (DOE) or Lawrence Livermore National Security. + */ + +#ifndef KRIPKE_ARCH_SWEEPSUBDOMAINS +#define KRIPKE_ARCH_SWEEPSUBDOMAINS + +#include +#include + +namespace Kripke { +namespace Arch { + +template +struct Policy_SweepSubdomains; + +template<> +struct Policy_SweepSubdomains> { + using ExecPolicy = + KernelPolicy< + For<0, loop_exec, // direction + For<1, loop_exec, // group + For<2, loop_exec, // k + For<3, loop_exec, // j + For<4, loop_exec, // i + Lambda<0> + > + > + > + > + > + >; +}; + + +template<> +struct Policy_SweepSubdomains> { + using ExecPolicy = + KernelPolicy< + For<0, loop_exec, // direction + For<2, loop_exec, // k + For<3, loop_exec, // j + For<4, loop_exec, // i + For<1, loop_exec, // group + Lambda<0> + > + > + > + > + > + >; +}; + + +template<> +struct Policy_SweepSubdomains> { + using ExecPolicy = + KernelPolicy< + For<1, loop_exec, // group + For<0, loop_exec, // direction + For<2, loop_exec, // k + For<3, loop_exec, // j + For<4, loop_exec, // i + Lambda<0> + > + > + > + > + > + >; +}; + + +template<> +struct Policy_SweepSubdomains> { + using ExecPolicy = + KernelPolicy< + For<1, loop_exec, // group + For<2, loop_exec, // k + For<3, loop_exec, // j + For<4, loop_exec, // i + For<0, loop_exec, // direction + Lambda<0> + > + > + > + > + > + >; +}; + + +template<> +struct Policy_SweepSubdomains> { + using ExecPolicy = + KernelPolicy< + For<2, loop_exec, // k + For<3, loop_exec, // j + For<4, loop_exec, // i + For<0, loop_exec, // direction + For<1, loop_exec, // group + Lambda<0> + > + > + > + > + > + >; +}; + + +template<> +struct Policy_SweepSubdomains> { + using ExecPolicy = + KernelPolicy< + For<2, loop_exec, // k + For<3, loop_exec, // j + For<4, loop_exec, // i + For<1, loop_exec, // group + For<0, loop_exec, // direction + Lambda<0> + > + > + > + > + > + >; +}; + + + + +#ifdef KRIPKE_USE_OPENMP + + +template<> +struct Policy_SweepSubdomains> { + + + using ExecPolicy = + KernelPolicy< + Collapse, // direction, group + For<2, loop_exec, // k + For<3, loop_exec, // j + For<4, loop_exec, // i + Lambda<0> + > + > + > + > + >; +}; + + +template<> +struct Policy_SweepSubdomains> { + using ExecPolicy = + KernelPolicy< + For<0, omp_parallel_for_exec, // direction + For<2, loop_exec, // k + For<3, loop_exec, // j + For<4, loop_exec, // i + For<1, loop_exec, // group + Lambda<0> + > + > + > + > + > + >; +}; + + +template<> +struct Policy_SweepSubdomains> { + using ExecPolicy = + KernelPolicy< + Collapse, // group, direction + For<2, loop_exec, // k + For<3, loop_exec, // j + For<4, loop_exec, // i + Lambda<0> + > + > + > + > + >; +}; + + +template<> +struct Policy_SweepSubdomains> { + using ExecPolicy = + KernelPolicy< + For<1, omp_parallel_for_exec, // group + For<2, loop_exec, // k + For<3, loop_exec, // j + For<4, loop_exec, // i + For<0, loop_exec, // direction + Lambda<0> + > + > + > + > + > + >; +}; + + +template<> +struct Policy_SweepSubdomains> { + using ExecPolicy = + KernelPolicy< + Hyperplane<2, seq_exec, ArgList<3,4>, omp_parallel_collapse_exec, + For<0, loop_exec, // direction + For<1, loop_exec, // group + Lambda<0> + > + > + > + >; +}; + + +template<> +struct Policy_SweepSubdomains> { + using ExecPolicy = + KernelPolicy< + Hyperplane<2, seq_exec, ArgList<3,4>, omp_parallel_collapse_exec, + For<1, loop_exec, // group + For<0, loop_exec, // direction + Lambda<0> + > + > + > + >; +}; + +#endif // KRIPKE_USE_OPENMP + + +#ifdef KRIPKE_USE_CUDA +template<> +struct Policy_SweepSubdomains> { + using ExecPolicy = + KernelPolicy< + CudaKernel< + For<0, cuda_block_exec, + For<1, cuda_block_exec, + + Hyperplane< + 2, cuda_seq_syncthreads_exec, + ArgList<3, 4>, cuda_thread_exec, + + Lambda<0> + > + > + > + > + >; +}; + + +template<> +struct Policy_SweepSubdomains> { + using ExecPolicy = + KernelPolicy< + CudaKernel< + For<0, cuda_block_exec, + For<1, cuda_block_exec, + + Hyperplane< + 2, cuda_seq_syncthreads_exec, + ArgList<3, 4>, cuda_thread_exec, + + Lambda<0> + > + > + > + > + >; +}; + + +template<> +struct Policy_SweepSubdomains> { + using ExecPolicy = + KernelPolicy< + CudaKernel< + For<0, cuda_block_exec, + For<1, cuda_block_exec, + + Hyperplane< + 2, cuda_seq_syncthreads_exec, + ArgList<3, 4>, cuda_thread_exec, + + Lambda<0> + > + > + > + > + >; +}; + + +template<> +struct Policy_SweepSubdomains> { + using ExecPolicy = + KernelPolicy< + CudaKernel< + For<0, cuda_block_exec, + For<1, cuda_block_exec, + + Hyperplane< + 2, cuda_seq_syncthreads_exec, + ArgList<3, 4>, cuda_thread_exec, + + Lambda<0> + > + > + > + > + >; +}; + + +template<> +struct Policy_SweepSubdomains> { + using ExecPolicy = + KernelPolicy< + CudaKernel< + For<0, cuda_block_exec, + For<1, cuda_block_exec, + + Hyperplane< + 2, cuda_seq_syncthreads_exec, + ArgList<3, 4>, cuda_thread_exec, + + Lambda<0> + > + > + > + > + >; +}; + + +template<> +struct Policy_SweepSubdomains> { + using ExecPolicy = + KernelPolicy< + CudaKernel< + For<0, cuda_block_exec, + For<1, cuda_block_exec, + + Hyperplane< + 2, cuda_seq_syncthreads_exec, + ArgList<3, 4>, cuda_thread_exec, + + Lambda<0> + > + > + > + > + >; +}; +#endif // KRIPKE_USE_CUDA + +} +} + +#endif diff --git a/src/Kripke/ArchLayout.h b/src/Kripke/ArchLayout.h new file mode 100644 index 00000000..c4c813f5 --- /dev/null +++ b/src/Kripke/ArchLayout.h @@ -0,0 +1,223 @@ +/* + * NOTICE + * + * This work was produced at the Lawrence Livermore National Laboratory (LLNL) + * under contract no. DE-AC-52-07NA27344 (Contract 44) between the U.S. + * Department of Energy (DOE) and Lawrence Livermore National Security, LLC + * (LLNS) for the operation of LLNL. The rights of the Federal Government are + * reserved under Contract 44. + * + * DISCLAIMER + * + * This work was prepared as an account of work sponsored by an agency of the + * United States Government. Neither the United States Government nor Lawrence + * Livermore National Security, LLC nor any of their employees, makes any + * warranty, express or implied, or assumes any liability or responsibility + * for the accuracy, completeness, or usefulness of any information, apparatus, + * product, or process disclosed, or represents that its use would not infringe + * privately-owned rights. Reference herein to any specific commercial products, + * process, or service by trade name, trademark, manufacturer or otherwise does + * not necessarily constitute or imply its endorsement, recommendation, or + * favoring by the United States Government or Lawrence Livermore National + * Security, LLC. The views and opinions of authors expressed herein do not + * necessarily state or reflect those of the United States Government or + * Lawrence Livermore National Security, LLC, and shall not be used for + * advertising or product endorsement purposes. + * + * NOTIFICATION OF COMMERCIAL USE + * + * Commercialization of this product is prohibited without notifying the + * Department of Energy (DOE) or Lawrence Livermore National Security. + */ + +#ifndef KRIPKE_ARCHLAYOUT_H__ +#define KRIPKE_ARCHLAYOUT_H__ + +#include +#include + +#include + +namespace Kripke { + +struct ArchT_Sequential {}; + +#ifdef KRIPKE_USE_OPENMP +struct ArchT_OpenMP {}; +#endif + +#ifdef KRIPKE_USE_CUDA +struct ArchT_CUDA {}; +#endif + + + +enum ArchV { + ArchV_Unknown = -1, + ArchV_Sequential, + +#ifdef KRIPKE_USE_OPENMP + ArchV_OpenMP, +#endif + +#ifdef KRIPKE_USE_CUDA + ArchV_CUDA, +#endif + + ArchV_num_values +}; + + +RAJA_INLINE +std::string archToString(ArchV av){ + switch(av){ + case ArchV_Sequential: return "Sequential"; + +#ifdef KRIPKE_USE_OPENMP + case ArchV_OpenMP: return "OpenMP"; +#endif + +#ifdef KRIPKE_USE_CUDA + case ArchV_CUDA: return "CUDA"; +#endif + + case ArchV_Unknown: + case ArchV_num_values: + default: return "unknown"; + } +} + +RAJA_INLINE +ArchV stringToArch(std::string const &str){ + for(int av = 0;av < (int)ArchV_num_values;++ av){ + if(!strcasecmp(archToString((ArchV)av).c_str(), str.c_str())){ + return (ArchV)av; + } + } + return ArchV_Unknown; +} + +struct LayoutT_DGZ {}; +struct LayoutT_DZG {}; +struct LayoutT_GDZ {}; +struct LayoutT_GZD {}; +struct LayoutT_ZDG {}; +struct LayoutT_ZGD {}; + +enum LayoutV { + LayoutV_Unknown = -1, + LayoutV_DGZ, + LayoutV_DZG, + LayoutV_GDZ, + LayoutV_GZD, + LayoutV_ZDG, + LayoutV_ZGD, + LayoutV_num_values +}; + +RAJA_INLINE +std::string layoutToString(LayoutV lv){ + switch(lv){ + case LayoutV_DGZ: return "DGZ"; + case LayoutV_DZG: return "DZG"; + case LayoutV_GDZ: return "GDZ"; + case LayoutV_GZD: return "GZD"; + case LayoutV_ZDG: return "ZDG"; + case LayoutV_ZGD: return "ZGD"; + case LayoutV_Unknown: + case LayoutV_num_values: + default: return "unknown"; + } +} + +RAJA_INLINE +LayoutV stringToLayout(std::string const &str){ + for(int lv = 0;lv < (int)LayoutV_num_values;++ lv){ + if(!strcasecmp(layoutToString((LayoutV)lv).c_str(), str.c_str())){ + return (LayoutV)lv; + } + } + return LayoutV_Unknown; +} + + +template +struct ArchLayoutT { + using arch_t = ARCH; + using layout_t = LAYOUT; +}; + +struct ArchLayoutV { + ArchV arch_v; + LayoutV layout_v; +}; + + +class ArchLayout : public Kripke::Core::BaseVar { +public: + ArchLayout() = default; + virtual ~ArchLayout() = default; + + ArchLayoutV al_v; +}; + + +template +RAJA_INLINE +void dispatchLayout(LayoutV layout_v, Function const &fcn, Args &&... args) +{ + switch(layout_v){ + case LayoutV_DGZ: fcn(LayoutT_DGZ{}, std::forward(args)...); break; + case LayoutV_DZG: fcn(LayoutT_DZG{}, std::forward(args)...); break; + case LayoutV_GDZ: fcn(LayoutT_GDZ{}, std::forward(args)...); break; + case LayoutV_GZD: fcn(LayoutT_GZD{}, std::forward(args)...); break; + case LayoutV_ZDG: fcn(LayoutT_ZDG{}, std::forward(args)...); break; + case LayoutV_ZGD: fcn(LayoutT_ZGD{}, std::forward(args)...); break; + default: KRIPKE_ABORT("Unknown layout_v=%d\n", (int)layout_v); break; + } +} + +template +RAJA_INLINE +void dispatchArch(ArchV arch_v, Function const &fcn, Args &&... args) +{ + switch(arch_v){ + case ArchV_Sequential: fcn(ArchT_Sequential{}, std::forward(args)...); break; +#ifdef KRIPKE_USE_OPENMP + case ArchV_OpenMP: fcn(ArchT_OpenMP{}, std::forward(args)...); break; +#endif + +#ifdef KRIPKE_USE_CUDA + case ArchV_CUDA: fcn(ArchT_CUDA{}, std::forward(args)...); break; +#endif + default: KRIPKE_ABORT("Unknown arch_v=%d\n", (int)arch_v); break; + } +} + + +template +struct DispatchHelper{ + + template + void operator()(layout_t, Function const &fcn, Args &&... args) const { + using al_t = ArchLayoutT; + fcn(al_t{}, std::forward(args)...); + } +}; + + +template +RAJA_INLINE +void dispatch(ArchLayoutV al_v, Function const &fcn, Args &&... args) +{ + dispatchArch(al_v.arch_v, [&](auto arch_t){ + DispatchHelper helper; + + dispatchLayout(al_v.layout_v, helper, fcn, std::forward(args)...); + }); +} + +} // namespace + +#endif + diff --git a/src/Kripke/CMakeLists.txt b/src/Kripke/CMakeLists.txt deleted file mode 100644 index fb685e10..00000000 --- a/src/Kripke/CMakeLists.txt +++ /dev/null @@ -1,28 +0,0 @@ -include_directories(..) - -add_library(lib_kripke - Directions.cpp - Grid.cpp - Input_Variables.cpp - Kernel.cpp - Layout.cpp - Subdomain.cpp - Sweep_Solver.cpp - ParallelComm.cpp - Timing.cpp - - Kernel/Kernel_3d_GDZ.cpp - Kernel/Kernel_3d_DGZ.cpp - Kernel/Kernel_3d_ZDG.cpp - Kernel/Kernel_3d_DZG.cpp - Kernel/Kernel_3d_ZGD.cpp - Kernel/Kernel_3d_GZD.cpp - - ParallelComm/BlockJacobiComm.cpp - ParallelComm/SweepComm.cpp - - Test/TestKernels.cpp -) - - -set(KRIPKE_LIBS ${KRIPKE_LIBS} lib_kripke PARENT_SCOPE) diff --git a/src/Kripke/Kernel/Kernel_3d_DZG.h b/src/Kripke/Core/BaseVar.cpp similarity index 69% rename from src/Kripke/Kernel/Kernel_3d_DZG.h rename to src/Kripke/Core/BaseVar.cpp index a3ba7ffb..b1eb1c33 100644 --- a/src/Kripke/Kernel/Kernel_3d_DZG.h +++ b/src/Kripke/Core/BaseVar.cpp @@ -30,25 +30,27 @@ * Department of Energy (DOE) or Lawrence Livermore National Security. */ -#ifndef KRIPKE_KERNEL_3D_DZG_H__ -#define KRIPKE_KERNEL_3D_DZG_H__ - -#include - -class Kernel_3d_DZG : public Kernel { - public: - virtual Nesting_Order nestingPsi(void) const; - virtual Nesting_Order nestingPhi(void) const; - virtual Nesting_Order nestingSigt(void) const; - virtual Nesting_Order nestingEll(void) const; - virtual Nesting_Order nestingEllPlus(void) const; - virtual Nesting_Order nestingSigs(void) const; - - virtual void LTimes(Grid_Data *grid_data); - virtual void LPlusTimes(Grid_Data *grid_data); - virtual void scattering(Grid_Data *grid_data); - virtual void source(Grid_Data *grid_data); - virtual void sweep(Subdomain *ga_set); -}; - -#endif +#include + +#include + +using namespace Kripke::Core; + +BaseVar::BaseVar() : m_parent(nullptr){ + +} + +void BaseVar::setParent(DataStore *parent){ + m_parent = parent; +} + +std::string BaseVar::getName() const { + if(m_parent){ + return m_parent->getVariableName(*this); + } + else{ + return "---"; + } +} + + diff --git a/src/Kripke/Kernel/Kernel_3d_DGZ.h b/src/Kripke/Core/BaseVar.h similarity index 70% rename from src/Kripke/Kernel/Kernel_3d_DGZ.h rename to src/Kripke/Core/BaseVar.h index 9aa15a5e..12920d5a 100644 --- a/src/Kripke/Kernel/Kernel_3d_DGZ.h +++ b/src/Kripke/Core/BaseVar.h @@ -30,25 +30,33 @@ * Department of Energy (DOE) or Lawrence Livermore National Security. */ -#ifndef KRIPKE_KERNEL_3D_DGZ_H__ -#define KRIPKE_KERNEL_3D_DGZ_H__ +#ifndef KRIPKE_CORE_BASE_VAR_H__ +#define KRIPKE_CORE_BASE_VAR_H__ -#include +#include -class Kernel_3d_DGZ : public Kernel { +namespace Kripke { +namespace Core { + +class DataStore; + +/** + * Variable base class for DataStore class + */ +class BaseVar { public: - virtual Nesting_Order nestingPsi(void) const; - virtual Nesting_Order nestingPhi(void) const; - virtual Nesting_Order nestingSigt(void) const; - virtual Nesting_Order nestingEll(void) const; - virtual Nesting_Order nestingEllPlus(void) const; - virtual Nesting_Order nestingSigs(void) const; - - virtual void LTimes(Grid_Data *grid_data); - virtual void LPlusTimes(Grid_Data *grid_data); - virtual void scattering(Grid_Data *grid_data); - virtual void source(Grid_Data *grid_data); - virtual void sweep(Subdomain *ga_set); + BaseVar(); + virtual ~BaseVar() = default; + + void setParent(DataStore *parent); + + std::string getName() const; + + private: + DataStore *m_parent; }; + +} } // namespace + #endif diff --git a/src/Kripke/Core/Comm.h b/src/Kripke/Core/Comm.h new file mode 100644 index 00000000..9b1b15f7 --- /dev/null +++ b/src/Kripke/Core/Comm.h @@ -0,0 +1,232 @@ +/* + * NOTICE + * + * This work was produced at the Lawrence Livermore National Laboratory (LLNL) + * under contract no. DE-AC-52-07NA27344 (Contract 44) between the U.S. + * Department of Energy (DOE) and Lawrence Livermore National Security, LLC + * (LLNS) for the operation of LLNL. The rights of the Federal Government are + * reserved under Contract 44. + * + * DISCLAIMER + * + * This work was prepared as an account of work sponsored by an agency of the + * United States Government. Neither the United States Government nor Lawrence + * Livermore National Security, LLC nor any of their employees, makes any + * warranty, express or implied, or assumes any liability or responsibility + * for the accuracy, completeness, or usefulness of any information, apparatus, + * product, or process disclosed, or represents that its use would not infringe + * privately-owned rights. Reference herein to any specific commercial products, + * process, or service by trade name, trademark, manufacturer or otherwise does + * not necessarily constitute or imply its endorsement, recommendation, or + * favoring by the United States Government or Lawrence Livermore National + * Security, LLC. The views and opinions of authors expressed herein do not + * necessarily state or reflect those of the United States Government or + * Lawrence Livermore National Security, LLC, and shall not be used for + * advertising or product endorsement purposes. + * + * NOTIFICATION OF COMMERCIAL USE + * + * Commercialization of this product is prohibited without notifying the + * Department of Energy (DOE) or Lawrence Livermore National Security. + */ + +#ifndef KRIPKE_CORE_COMM_H__ +#define KRIPKE_CORE_COMM_H__ + +#include +#include + +#ifdef KRIPKE_USE_MPI +#include +#endif + +namespace Kripke { +namespace Core { +/** + * An interprocess communicator. + * + * Used as an abstraction layer around MPI... mainly to allow compilation w/o + * MPI + */ +class Comm : public Kripke::Core::BaseVar { + public: + + +#ifdef KRIPKE_USE_MPI + RAJA_INLINE + static void init(int *argc, char ***argv){ + MPI_Init(argc, argv); + } +#else + RAJA_INLINE + static void init(int *, char ***){ + } +#endif + + RAJA_INLINE + static void finalize() { +#ifdef KRIPKE_USE_MPI + MPI_Finalize(); +#endif + } + + + RAJA_INLINE + static Comm getSelf() { +#ifdef KRIPKE_USE_MPI + return Comm(MPI_COMM_SELF); +#else + return Comm(); +#endif + } + +#ifdef KRIPKE_USE_MPI + RAJA_INLINE + Comm() : + m_comm(MPI_COMM_WORLD), + m_rank(0), + m_size(0) + { + int r, s; + MPI_Comm_rank(m_comm, &r); + MPI_Comm_size(m_comm, &s); + m_rank = r; + m_size = s; + } + + RAJA_INLINE + Comm(MPI_Comm c) : + m_comm(c), + m_rank(0), + m_size(0) + { + int r, s; + MPI_Comm_rank(m_comm, &r); + MPI_Comm_size(m_comm, &s); + m_rank = r; + m_size = s; + } +#else + RAJA_INLINE + Comm() : + m_rank(0), + m_size(1) + {} +#endif + + + virtual ~Comm() = default; + + RAJA_INLINE + size_t size() const { + return m_size; + } + + RAJA_INLINE + size_t rank() const { + return m_rank; + } + + RAJA_INLINE +#ifdef KRIPKE_USE_MPI + Comm split(int color, int key) const { + MPI_Comm split_comm; + MPI_Comm_split(m_comm, color, key, &split_comm); + return Comm(split_comm); +#else + Comm split(int , int ) const { + return Comm(); +#endif + } + + /** + * Allreduce SUM a single value. + * Without MPI, this is a NOP + */ + RAJA_INLINE + long allReduceSumLong(long value) const { +#ifdef KRIPKE_USE_MPI + MPI_Allreduce(MPI_IN_PLACE, &value, 1, MPI_LONG, MPI_SUM, m_comm); +#endif + return value; + } + + /** + * Allreduce SUM an array, in-place + * Without MPI, this is a NOP + */ + RAJA_INLINE +#ifdef KRIPKE_USE_MPI + void allReduceSumLong(long *value, size_t len) const { + MPI_Allreduce(MPI_IN_PLACE, value, len, MPI_LONG, MPI_SUM, m_comm); + } +#else + void allReduceSumLong(long *, size_t ) const {} +#endif + + + /** + * Allreduce SUM an array, in-place + * Without MPI, this is a NOP + */ + RAJA_INLINE +#ifdef KRIPKE_USE_MPI + void allReduceSumInt(int *value, size_t len) const { + MPI_Allreduce(MPI_IN_PLACE, value, len, MPI_INT, MPI_SUM, m_comm); + } +#else + void allReduceSumInt(int *, size_t) const {} +#endif + + /** + * Allreduce SUM a single value. + * Without MPI, this is a NOP + */ + RAJA_INLINE + double allReduceSumDouble(double value) const { +#ifdef KRIPKE_USE_MPI + MPI_Allreduce(MPI_IN_PLACE, &value, 1, MPI_DOUBLE, MPI_SUM, m_comm); +#endif + return value; + } + + /** + * Allreduce SUM an array, in-place. + * Without MPI, this is a NOP + */ +#ifdef KRIPKE_USE_MPI + RAJA_INLINE + void allReduceSumDouble(double *value, size_t len) const { + MPI_Allreduce(MPI_IN_PLACE, value, len, MPI_DOUBLE, MPI_SUM, m_comm); + } +#else + RAJA_INLINE + void allReduceSumDouble(double *, size_t) const { + } +#endif + + /** + * Prefix scan SUM a single value. + * Without MPI, this is a NOP + */ + RAJA_INLINE + long scanSumLong(long value) const { +#ifdef KRIPKE_USE_MPI + MPI_Scan(MPI_IN_PLACE, &value, 1, MPI_LONG, MPI_SUM, m_comm); +#endif + return value; + } + + private: +#ifdef KRIPKE_USE_MPI + MPI_Comm m_comm; +#endif + size_t m_rank; + size_t m_size; +}; + + + +} } // namespace + +#endif diff --git a/src/Kripke/Core/DataStore.cpp b/src/Kripke/Core/DataStore.cpp new file mode 100644 index 00000000..e0ee0c1f --- /dev/null +++ b/src/Kripke/Core/DataStore.cpp @@ -0,0 +1,66 @@ + +#include +#include + +using namespace Kripke; +using namespace Kripke::Core; + +DataStore::DataStore(){} + +DataStore::~DataStore(){ + + + while(m_vars.size()){ + auto it = m_vars.begin(); + deleteVariable(it->first); + } + +} + +void DataStore::addVariable(std::string const &name, + Kripke::Core::BaseVar *var) +{ + if(m_vars.find(name) != m_vars.end()){ + throw std::domain_error("Variable '" + name + "' already exists"); + } + + m_vars[name] = var; + + var->setParent(this); +} + + +void DataStore::deleteVariable(std::string const &name){ + auto it = m_vars.find(name); + if(it == m_vars.end()){ + throw std::domain_error("Variable '" + name + "' does not exist"); + } + + // destroy object + //printf("Deleting %s\n", name.c_str()); + delete it->second; + + // remove from map + m_vars.erase(it); +} + + +std::vector DataStore::getVariableList() const{ + std::vector var_list; + + for(auto &i : m_vars){ + var_list.push_back(i.first); + } + + return var_list; +} + + +std::string DataStore::getVariableName(BaseVar const &var) const{ + for(auto &i : m_vars){ + if(i.second == &var){ + return i.first; + } + } + return "==="; +} diff --git a/src/Kripke/Core/DataStore.h b/src/Kripke/Core/DataStore.h new file mode 100644 index 00000000..46c09a13 --- /dev/null +++ b/src/Kripke/Core/DataStore.h @@ -0,0 +1,118 @@ +/* + * NOTICE + * + * This work was produced at the Lawrence Livermore National Laboratory (LLNL) + * under contract no. DE-AC-52-07NA27344 (Contract 44) between the U.S. + * Department of Energy (DOE) and Lawrence Livermore National Security, LLC + * (LLNS) for the operation of LLNL. The rights of the Federal Government are + * reserved under Contract 44. + * + * DISCLAIMER + * + * This work was prepared as an account of work sponsored by an agency of the + * United States Government. Neither the United States Government nor Lawrence + * Livermore National Security, LLC nor any of their employees, makes any + * warranty, express or implied, or assumes any liability or responsibility + * for the accuracy, completeness, or usefulness of any information, apparatus, + * product, or process disclosed, or represents that its use would not infringe + * privately-owned rights. Reference herein to any specific commercial products, + * process, or service by trade name, trademark, manufacturer or otherwise does + * not necessarily constitute or imply its endorsement, recommendation, or + * favoring by the United States Government or Lawrence Livermore National + * Security, LLC. The views and opinions of authors expressed herein do not + * necessarily state or reflect those of the United States Government or + * Lawrence Livermore National Security, LLC, and shall not be used for + * advertising or product endorsement purposes. + * + * NOTIFICATION OF COMMERCIAL USE + * + * Commercialization of this product is prohibited without notifying the + * Department of Energy (DOE) or Lawrence Livermore National Security. + */ + +#ifndef KRIPKE_CORE_DATASTORE_H__ +#define KRIPKE_CORE_DATASTORE_H__ + +#include +#include +#include +#include +#include + +namespace Kripke { +namespace Core { + +/** + * Container to store variables by name + */ +class DataStore { + public: + DataStore(); + ~DataStore(); + DataStore(DataStore const &) = delete; + DataStore &operator=(DataStore const &) = delete; + + void addVariable(std::string const &name, Kripke::Core::BaseVar *); + + template + RAJA_INLINE + T &newVariable(std::string const &name, CTOR_ARGS &&... ctor_args){ + T *new_var = new T(ctor_args...); + addVariable(name, new_var); + return *new_var; + } + + void deleteVariable(std::string const &name); + + template + RAJA_INLINE + T &getVariable(std::string const &name){ + + // Perform lookup by name + auto it = m_vars.find(name); + if(it == m_vars.end()){ + throw std::domain_error("Cannot find '" + name + "' in DataStore"); + } + + // Cast from BaseVar* and check for correctness + T *var_ptr = dynamic_cast(it->second); + KRIPKE_ASSERT(var_ptr != nullptr, "Error casting '%s'", name.c_str()); + + return *var_ptr; + } + + template + RAJA_INLINE + T const &getVariable(std::string const &name) const{ + return const_cast(this)-> template getVariable(name); + } + + std::string getVariableName(BaseVar const &var) const; + + + template + RAJA_INLINE + bool isVariableType(std::string const &name) const{ + + // Perform lookup by name + auto it = m_vars.find(name); + if(it == m_vars.end()){ + return false; + } + + // Cast from BaseVar* to see if it's correct type + T *var_ptr = dynamic_cast(it->second); + + return var_ptr != nullptr; + } + + std::vector getVariableList() const; + + private: + std::map m_vars; + +}; + +} } // namespace + +#endif diff --git a/src/Kripke/Core/DomainVar.cpp b/src/Kripke/Core/DomainVar.cpp new file mode 100644 index 00000000..b33ae6ea --- /dev/null +++ b/src/Kripke/Core/DomainVar.cpp @@ -0,0 +1,71 @@ +/* + * NOTICE + * + * This work was produced at the Lawrence Livermore National Laboratory (LLNL) + * under contract no. DE-AC-52-07NA27344 (Contract 44) between the U.S. + * Department of Energy (DOE) and Lawrence Livermore National Security, LLC + * (LLNS) for the operation of LLNL. The rights of the Federal Government are + * reserved under Contract 44. + * + * DISCLAIMER + * + * This work was prepared as an account of work sponsored by an agency of the + * United States Government. Neither the United States Government nor Lawrence + * Livermore National Security, LLC nor any of their employees, makes any + * warranty, express or implied, or assumes any liability or responsibility + * for the accuracy, completeness, or usefulness of any information, apparatus, + * product, or process disclosed, or represents that its use would not infringe + * privately-owned rights. Reference herein to any specific commercial products, + * process, or service by trade name, trademark, manufacturer or otherwise does + * not necessarily constitute or imply its endorsement, recommendation, or + * favoring by the United States Government or Lawrence Livermore National + * Security, LLC. The views and opinions of authors expressed herein do not + * necessarily state or reflect those of the United States Government or + * Lawrence Livermore National Security, LLC, and shall not be used for + * advertising or product endorsement purposes. + * + * NOTIFICATION OF COMMERCIAL USE + * + * Commercialization of this product is prohibited without notifying the + * Department of Energy (DOE) or Lawrence Livermore National Security. + */ + + +#include + +using namespace Kripke; +using namespace Kripke::Core; + + +void DomainVar::setup_initChunks(Kripke::Core::PartitionSpace const &pspace, + Kripke::Core::SPACE space) +{ + + size_t num_subdomains = pspace.getNumSubdomains(); + size_t num_chunks = pspace.getNumSubdomains(space); + + + // Map subdomains to chunks + m_subdomain_to_chunk.resize(num_subdomains); + for(SdomId sdom_id{0};sdom_id < (int)num_subdomains;++ sdom_id){ + size_t chunk_id = pspace.subdomainToSpace(space, sdom_id); + m_subdomain_to_chunk[*sdom_id] = chunk_id; + } + + // Map chunks to subdomains + m_chunk_to_subdomain.resize(num_chunks); + m_work_list.resize(num_chunks); + for(size_t chunk_id = 0;chunk_id < num_chunks;++ chunk_id){ + SdomId sdom_id = pspace.spaceToSubdomain(space, chunk_id); + m_chunk_to_subdomain[chunk_id] = *sdom_id; + m_work_list[chunk_id] = sdom_id; + } + +} + + +void DomainVar::setup_initChunks(Kripke::Core::DomainVar const &clone_from){ + m_subdomain_to_chunk = clone_from.m_subdomain_to_chunk; + m_chunk_to_subdomain = clone_from.m_chunk_to_subdomain; + m_work_list = clone_from.m_work_list; +} diff --git a/src/Kripke/Core/DomainVar.h b/src/Kripke/Core/DomainVar.h new file mode 100644 index 00000000..7e732afe --- /dev/null +++ b/src/Kripke/Core/DomainVar.h @@ -0,0 +1,98 @@ +/* + * NOTICE + * + * This work was produced at the Lawrence Livermore National Laboratory (LLNL) + * under contract no. DE-AC-52-07NA27344 (Contract 44) between the U.S. + * Department of Energy (DOE) and Lawrence Livermore National Security, LLC + * (LLNS) for the operation of LLNL. The rights of the Federal Government are + * reserved under Contract 44. + * + * DISCLAIMER + * + * This work was prepared as an account of work sponsored by an agency of the + * United States Government. Neither the United States Government nor Lawrence + * Livermore National Security, LLC nor any of their employees, makes any + * warranty, express or implied, or assumes any liability or responsibility + * for the accuracy, completeness, or usefulness of any information, apparatus, + * product, or process disclosed, or represents that its use would not infringe + * privately-owned rights. Reference herein to any specific commercial products, + * process, or service by trade name, trademark, manufacturer or otherwise does + * not necessarily constitute or imply its endorsement, recommendation, or + * favoring by the United States Government or Lawrence Livermore National + * Security, LLC. The views and opinions of authors expressed herein do not + * necessarily state or reflect those of the United States Government or + * Lawrence Livermore National Security, LLC, and shall not be used for + * advertising or product endorsement purposes. + * + * NOTIFICATION OF COMMERCIAL USE + * + * Commercialization of this product is prohibited without notifying the + * Department of Energy (DOE) or Lawrence Livermore National Security. + */ + +#ifndef KRIPKE_CORE_DOMAIN_VAR_H__ +#define KRIPKE_CORE_DOMAIN_VAR_H__ + +#include +#include +#include + +namespace Kripke { +namespace Core { + + /** + * Base class for variables that are defined over a PartitionSpace's + * subdomains + */ + class DomainVar : public Kripke::Core::BaseVar { + public: + DomainVar() = default; + virtual ~DomainVar() = default; + + // Do not allow assignment or copy construction + DomainVar(DomainVar const &) = delete; + DomainVar& operator=(DomainVar const &) = delete; + + RAJA_INLINE + size_t getNumSubdomains() const { + return m_subdomain_to_chunk.size(); + } + + RAJA_INLINE + std::vector const &getWorkList() const { + return m_work_list; + } + + + RAJA_INLINE + void dump() const { + printf("DomainVar:\n"); + + printf(" m_subdomain_to_chunk: "); + for(auto x : m_subdomain_to_chunk){printf("%lu ", (unsigned long)x);} + printf("\n"); + + printf(" m_chunk_to_subdomain: "); + for(auto x : m_chunk_to_subdomain){printf("%lu ", (unsigned long)x);} + printf("\n"); + + printf(" m_work_list: "); + for(auto x : m_work_list){printf("%d ", (int)*x);} + printf("\n"); + } + + protected: + + void setup_initChunks(Kripke::Core::PartitionSpace const &pspace, + Kripke::Core::SPACE space); + + void setup_initChunks(Kripke::Core::DomainVar const &clone_from); + + std::vector m_subdomain_to_chunk; + std::vector m_chunk_to_subdomain; + std::vector m_work_list; + }; + +} } // namespace + +#endif diff --git a/src/Kripke/Core/Field.h b/src/Kripke/Core/Field.h new file mode 100644 index 00000000..a18e01e6 --- /dev/null +++ b/src/Kripke/Core/Field.h @@ -0,0 +1,313 @@ +/* + * NOTICE + * + * This work was produced at the Lawrence Livermore National Laboratory (LLNL) + * under contract no. DE-AC-52-07NA27344 (Contract 44) between the U.S. + * Department of Energy (DOE) and Lawrence Livermore National Security, LLC + * (LLNS) for the operation of LLNL. The rights of the Federal Government are + * reserved under Contract 44. + * + * DISCLAIMER + * + * This work was prepared as an account of work sponsored by an agency of the + * United States Government. Neither the United States Government nor Lawrence + * Livermore National Security, LLC nor any of their employees, makes any + * warranty, express or implied, or assumes any liability or responsibility + * for the accuracy, completeness, or usefulness of any information, apparatus, + * product, or process disclosed, or represents that its use would not infringe + * privately-owned rights. Reference herein to any specific commercial products, + * process, or service by trade name, trademark, manufacturer or otherwise does + * not necessarily constitute or imply its endorsement, recommendation, or + * favoring by the United States Government or Lawrence Livermore National + * Security, LLC. The views and opinions of authors expressed herein do not + * necessarily state or reflect those of the United States Government or + * Lawrence Livermore National Security, LLC, and shall not be used for + * advertising or product endorsement purposes. + * + * NOTIFICATION OF COMMERCIAL USE + * + * Commercialization of this product is prohibited without notifying the + * Department of Energy (DOE) or Lawrence Livermore National Security. + */ + +#ifndef KRIPKE_CORE_FIELD_H__ +#define KRIPKE_CORE_FIELD_H__ + +#include +#include +#include +#include +#include +#include + +#ifdef KRIPKE_USE_CHAI +#define DEBUG +#include +#undef DEBUG +#endif + +namespace Kripke { +namespace Core { + /** + * Base class for Field which provides storage allocation + */ + template + class FieldStorage : public Kripke::Core::DomainVar { + public: + using ElementType = ELEMENT; + +#ifndef KRIPKE_USE_CHAI + using ElementPtr = ELEMENT*; +#else + using ElementPtr = chai::ManagedArray; +#endif + + using Layout1dType = RAJA::TypedLayout>; + using View1dType = RAJA::View; + + + explicit FieldStorage(Kripke::Core::Set const &spanned_set) : + m_set(&spanned_set) + { + + // initialize our decomposition to match that of the specified set + setup_initChunks(spanned_set); + + // allocate all of our chunks, and create layouts for each one + size_t num_chunks = m_chunk_to_subdomain.size(); + m_chunk_to_size.resize(num_chunks, 0); +#ifndef KRIPKE_USE_CHAI + m_chunk_to_data.resize(num_chunks, nullptr); +#else + m_chunk_to_data.resize(num_chunks); +#endif + + for(size_t chunk_id = 0;chunk_id < num_chunks;++ chunk_id){ + + // Get the size of the subdomain from the set + SdomId sdom_id(m_chunk_to_subdomain[chunk_id]); + size_t sdom_size = spanned_set.size(sdom_id); + + m_chunk_to_size[chunk_id] = sdom_size; +#ifndef KRIPKE_USE_CHAI + m_chunk_to_data[chunk_id] = new ElementType[sdom_size]; +#else + m_chunk_to_data[chunk_id].allocate(sdom_size, chai::CPU, + [=](chai::Action action, chai::ExecutionSpace space, size_t bytes){ + /*printf("CHAI[%s, %d]: ", BaseVar::getName().c_str(), (int)chunk_id); + switch(action){ + case chai::ACTION_ALLOC: printf("ALLOC "); break; + case chai::ACTION_FREE: printf("FREE "); break; + case chai::ACTION_MOVE: printf("MOVE "); break; + default: printf("UNKNOWN "); + } + + switch(space){ + case chai::CPU: printf("CPU "); break; +#ifdef KRIPKE_USE_CUDA + case chai::GPU: printf("GPU "); break; +#endif + default: printf("UNK "); + } + + printf("%lu bytes\n", (unsigned long) bytes); +*/ + } + + ); +#endif + } + } + + virtual ~FieldStorage(){ +#ifndef KRIPKE_USE_CHAI + for(auto i : m_chunk_to_data){ + delete[] i; + } +#endif + } + + // Dissallow copy construction + FieldStorage(FieldStorage const &) = delete; + + /** + * Returns the number of elements in this subdomain. + */ + RAJA_INLINE + size_t size(Kripke::SdomId sdom_id) const { + size_t chunk_id = m_subdomain_to_chunk[*sdom_id]; + return m_chunk_to_size[chunk_id]; + } + + + RAJA_INLINE + View1dType getView1d(Kripke::SdomId sdom_id) const { + + size_t chunk_id = m_subdomain_to_chunk[*sdom_id]; + + ElementPtr ptr = m_chunk_to_data[chunk_id]; + size_t sdom_size = m_chunk_to_size[chunk_id]; + + return View1dType(ptr, Layout1dType(sdom_size)); + } + + RAJA_INLINE + ElementType *getData(Kripke::SdomId sdom_id) const { + KRIPKE_ASSERT(*sdom_id < (int)m_subdomain_to_chunk.size(), + "sdom_id(%d) >= num_subdomains(%d)", + (int)*sdom_id, + (int)(int)m_subdomain_to_chunk.size()); + size_t chunk_id = m_subdomain_to_chunk[*sdom_id]; + +#ifndef KRIPKE_USE_CHAI + return m_chunk_to_data[chunk_id]; +#else + // use pointer conversion to get host pointer + ElementType *ptr = m_chunk_to_data[chunk_id]; + + // return host pointer + return(ptr); + +#endif + } + + + RAJA_INLINE + Kripke::Core::Set const &getSet() const { + return *m_set; + } + + protected: + Kripke::Core::Set const *m_set; + std::vector m_chunk_to_size; + std::vector m_chunk_to_data; + }; + + /** + * Defines a multi-dimensional data field defined over a Set + */ + template + class Field : public Kripke::Core::FieldStorage { + public: + + using Parent = Kripke::Core::FieldStorage; + + using ElementType = ELEMENT; +#ifndef KRIPKE_USE_CHAI + using ElementPtr = ELEMENT*; +#else + using ElementPtr = chai::ManagedArray; +#endif + + static constexpr size_t NumDims = sizeof...(IDX_TYPES); + + using DefaultLayoutType = RAJA::TypedLayout>; + using DefaultViewType = RAJA::View; + + template + Field(Kripke::Core::Set const &spanned_set, Order) : + Parent(spanned_set) + { + + KRIPKE_ASSERT(NumDims == spanned_set.getNumDimensions(), + "Number of dimensions must match between Field<%d> and Set<%d>\n", + (int)NumDims, (int)spanned_set.getNumDimensions()); + + auto perm = LayoutInfo::getPermutation(); + + // create layouts for each chunk + size_t num_chunks = Parent::m_chunk_to_subdomain.size(); + m_chunk_to_layout.resize(num_chunks); + for(size_t chunk_id = 0;chunk_id < num_chunks;++ chunk_id){ + + // Create a layout using dim sizes from the Set, and permutation + // defined by the layout function + SdomId sdom_id(Parent::m_chunk_to_subdomain[chunk_id]); + std::array sizes; + for(size_t dim = 0;dim < NumDims;++ dim){ + sizes[dim] = spanned_set.dimSize(sdom_id, dim); + } + + RAJA::Layout &layout = + m_chunk_to_layout[chunk_id]; + layout = RAJA::make_permuted_layout(sizes, perm); + } + } + + virtual ~Field(){ + + } + + + + RAJA_INLINE + DefaultViewType getView(Kripke::SdomId sdom_id) const { + + size_t chunk_id = Parent::m_subdomain_to_chunk[*sdom_id]; + + auto ptr = Parent::m_chunk_to_data[chunk_id]; + auto layout = m_chunk_to_layout[chunk_id]; + + return DefaultViewType(ptr, layout); + } + + + template + RAJA_INLINE + auto getViewOrder(Kripke::SdomId sdom_id) const -> + ViewType + { + size_t chunk_id = Parent::m_subdomain_to_chunk[*sdom_id]; + + ElementPtr ptr = Parent::m_chunk_to_data[chunk_id]; + + using LInfo = LayoutInfo; + using LType = typename LInfo::Layout; + + LType layout = RAJA::make_stride_one(m_chunk_to_layout[chunk_id]); + + return ViewType(ptr, layout); + } + + + + RAJA_INLINE + void dump() const { + printf("Field<>:\n"); + printf(" name: %s\n", BaseVar::getName().c_str()); + printf(" m_set: %p\n", Parent::m_set); + + printf(" m_chunk_to_size: "); + for(auto x : Parent::m_chunk_to_size){printf("%lu ", (unsigned long)x);} + printf("\n"); + +#ifndef KRIPKE_USE_CHAI + printf(" m_chunk_to_data: "); + for(auto x : Parent::m_chunk_to_data){printf("%p ", x);} + printf("\n"); +#endif + + for(size_t chunk_id = 0;chunk_id < Parent::m_chunk_to_data.size();++ chunk_id){ + + SdomId sdom_id(DomainVar::m_chunk_to_subdomain[chunk_id]); + + ElementType *ptr = Parent::getData(sdom_id); + + printf("Chunk %d Data: ", (int)chunk_id); + for(size_t i = 0;i < Parent::m_chunk_to_size[chunk_id];++ i){ + printf(" %e", ptr[i]); + } + printf("\n"); + } + + Kripke::Core::DomainVar::dump(); + } + + protected: + std::vector m_chunk_to_layout; + }; + +} } // namespace + +#endif + diff --git a/src/Kripke/Core/PartitionSpace.cpp b/src/Kripke/Core/PartitionSpace.cpp new file mode 100644 index 00000000..f3f7dd5d --- /dev/null +++ b/src/Kripke/Core/PartitionSpace.cpp @@ -0,0 +1,308 @@ +#include + +#include +#include +#include +#include +#include + +using namespace Kripke; +using namespace Kripke::Core; + +PartitionSpace::PartitionSpace(Kripke::Core::Comm &base_comm, + size_t P, size_t Q, size_t Rx, size_t Ry, size_t Rz) : + m_comm_all(base_comm), + m_local_num_sdom{{0,0,0,0,0,0,0}}, + m_global_num_sdom{{0,0,0,0,0,0,0}}, + m_global_sdom_lower{{0,0,0,0,0,0,0}}, + m_proc_layout(P, Q, Rx, Ry, Rz), + m_proc_xyz_layout(Rx, Ry, Rz), + m_global_sdom_layout(0,0,0,0,0) +{ + size_t num_ranks = P*Q*Rx*Ry*Rz; + + // Check that our number of ranks is compatible + KRIPKE_ASSERT(num_ranks == base_comm.size(), + "Number of MPI ranks must match decomposition, expected %lu ranks\n", + (unsigned long)num_ranks); + + // Assign communicators for P,Q,R + m_comm_space[SPACE_PQR] = base_comm; + m_comm_space[SPACE_R] = base_comm; + + // Compute our rank in pqxyz space + std::array rank{{0,0,0,0,0}}; + m_proc_layout.toIndices(base_comm.rank(), + rank[0], rank[1], rank[2], rank[3], rank[4]); + + // Project out dimensions to get our rank coloring in x,y,z + for(size_t space = 0;space < 5;++ space){ + + // Project out space + std::array proj = rank; + proj[space] = 0; + + // Get the coloring of this processor in space + int color = m_proc_layout(proj[0], proj[1], proj[2], proj[3], proj[4]); + + // Split the communicator + m_comm_space[space] = base_comm.split(color, rank[space]); + }; + + // Project out the R color and rank + int rank_r = m_proc_layout(0, 0, rank[2], rank[3], rank[4]); + int color_r = m_proc_layout(rank[0], rank[1], 0, 0, 0); + + // Split our R communicator + m_comm_space[SPACE_R] = base_comm.split(color_r, rank_r); + + // Project out the PR color and rank + int rank_pr = m_proc_layout(rank[0], 0, rank[2], rank[3], rank[4]); + int color_pr = m_proc_layout(0, rank[1], 0, 0, 0); + + // Split our PR communicator + m_comm_space[SPACE_PR] = base_comm.split(color_pr, rank_pr); +} + + +void PartitionSpace::setup_createSubdomains( + size_t SP, size_t SQ, size_t Sx, size_t Sy, size_t Sz){ + + size_t num_sdom = SP * SQ * Sx * Sy * Sz; + + m_local_num_sdom[SPACE_PQR] = num_sdom; + m_local_num_sdom[SPACE_P] = SP; + m_local_num_sdom[SPACE_Q] = SQ; + m_local_num_sdom[SPACE_RX] = Sx; + m_local_num_sdom[SPACE_RY] = Sy; + m_local_num_sdom[SPACE_RZ] = Sz; + m_local_num_sdom[SPACE_R] = Sx * Sy * Sz; + m_local_num_sdom[SPACE_PR] = SP * Sx * Sy * Sz; + m_local_num_sdom[SPACE_NULL] = 1; + + m_local_sdom_space_layout[SPACE_P] = RAJA::Layout<5>(SP, 0, 0, 0, 0); + m_local_sdom_space_layout[SPACE_Q] = RAJA::Layout<5>(0, SQ, 0, 0, 0); + m_local_sdom_space_layout[SPACE_RX] = RAJA::Layout<5>(0, 0, Sx, 0, 0); + m_local_sdom_space_layout[SPACE_RY] = RAJA::Layout<5>(0, 0, 0, Sy, 0); + m_local_sdom_space_layout[SPACE_RZ] = RAJA::Layout<5>(0, 0, 0, 0, Sz); + m_local_sdom_space_layout[SPACE_R] = RAJA::Layout<5>(0, 0, Sx, Sy, Sz); + m_local_sdom_space_layout[SPACE_PR] = RAJA::Layout<5>(SP, 0, Sx, Sy, Sz); + m_local_sdom_space_layout[SPACE_PQR] = RAJA::Layout<5>(SP, SQ, Sx, Sy, Sz); + m_local_sdom_space_layout[SPACE_NULL] = RAJA::Layout<5>(0, 0, 0, 0, 0); + + + // Compute global subdomain layout (and our local lower indices) + for(size_t space = 0;space < NUM_SPACES;++ space){ + + // Get the communicator for this space + Kripke::Core::Comm const &comm = m_comm_space[space]; + + // Compute the total number of subdomains in this space's partition + m_global_num_sdom[space] = comm.allReduceSumLong(m_local_num_sdom[space]); + + // Compute our lower offset into that global count + m_global_sdom_lower[space] = comm.scanSumLong(m_local_num_sdom[space]) - + m_local_num_sdom[space]; + + } + + m_global_sdom_layout = RAJA::Layout<5>(m_global_num_sdom[SPACE_P], + m_global_num_sdom[SPACE_Q], + m_global_num_sdom[SPACE_RX], + m_global_num_sdom[SPACE_RY], + m_global_num_sdom[SPACE_RZ]); + + +} + +/** + * Creates Set and Field objects that describe the subdomain decomposition. + * @param data_store The DataStore in which to create the objects + */ +void PartitionSpace::createSubdomainData(Kripke::Core::DataStore &data_store) const { + + PartitionSpace &pspace = data_store.getVariable("pspace"); + + // Create a Set that has exactly 1 element for each subdomain + auto &set_sdomid = data_store.newVariable("Set/SdomId", + *this, + getNumSubdomains(SPACE_PQR)); + + // Create a linearized version of the above + auto &set_global_sdomid = + data_store.newVariable("Set/GlobalSdomIdLinear", pspace, set_sdomid); + + // Create a Field to store mappings from local subdomains to global + auto &field_local_to_global = + data_store.newVariable( + "SdomId2GlobalSdomId", set_sdomid, camp::list{}); + + auto &field_global_to_local = + data_store.newVariable( + "GlobalSdomId2SdomId", set_global_sdomid, camp::list{}); + Kripke::Kernel::kConst(field_global_to_local, SdomId{0}); + + auto &field_global_to_rank = + data_store.newVariable( + "GlobalSdomId2Rank", set_global_sdomid, camp::list{}); + Kripke::Kernel::kConst(field_global_to_rank, 0); + + + size_t rank = m_comm_all.rank(); + + for(SdomId sdom_id : set_sdomid.getWorkList()){ + auto local_to_global = field_local_to_global.getView(sdom_id); + auto global_to_local = field_global_to_local.getView(sdom_id); + auto global_to_rank = field_global_to_rank.getView(sdom_id); + + for(SdomId local{0};local < set_sdomid.size(sdom_id);++ local){ + //GlobalSdomId global(*local + set_sdomid.lower(sdom_id)); + + // Get local subdomain coordinates + SdomCoord local_coord = sdomIdToCoord(local); + + // Offset local coordinate to global coordinates + SdomCoord global_coord = coordToGlobalCoord(local_coord); + + // Convert global coordinates to a GlobalSubdomainId + GlobalSdomId global = coordToGlobalSdomId(global_coord); + + local_to_global(local) = global; + global_to_local(global) = local; + global_to_rank(global) = rank; + } + + // Perform collective to gather global addresses of all subdomains + + m_comm_all.allReduceSumLong(field_global_to_rank.getData(sdom_id), + field_global_to_rank.size(sdom_id)); + + m_comm_all.allReduceSumInt((int*)field_global_to_local.getData(sdom_id), + field_global_to_local.size(sdom_id)); + } +} + + +size_t PartitionSpace::getNumSubdomains(Kripke::Core::SPACE space) const{ + return m_local_num_sdom[space]; +} + +size_t PartitionSpace::getGlobalNumSubdomains(Kripke::Core::SPACE space) const{ + return m_global_num_sdom[space]; +} + + +PartitionSpace::SdomCoord PartitionSpace::sdomIdToCoord(Kripke::SdomId sdom_id) const{ + + SdomCoord coord; + + m_local_sdom_space_layout[SPACE_PQR].toIndices(*sdom_id, + coord[0], coord[1], coord[2], coord[3], coord[4]); + + return coord; +} +Kripke::SdomId PartitionSpace::coordToSdomId(SdomCoord coord) const{ + + SdomId sdom_id(m_local_sdom_space_layout[SPACE_PQR]( + coord[0], coord[1], coord[2], coord[3], coord[4])); + + return sdom_id; +} + +PartitionSpace::SdomCoord PartitionSpace::coordToGlobalCoord(SdomCoord local_coord) const{ + SdomCoord global_coord{{ + (ptrdiff_t)(local_coord[0] + m_global_sdom_lower[SPACE_P]), + (ptrdiff_t)(local_coord[1] + m_global_sdom_lower[SPACE_Q]), + (ptrdiff_t)(local_coord[2] + m_global_sdom_lower[SPACE_RX]), + (ptrdiff_t)(local_coord[3] + m_global_sdom_lower[SPACE_RY]), + (ptrdiff_t)(local_coord[4] + m_global_sdom_lower[SPACE_RZ]) }}; + + return global_coord; +} +PartitionSpace::SdomCoord PartitionSpace::globalSdomIdToCoord(Kripke::GlobalSdomId global_sdom_id) const{ + + SdomCoord coord; + + m_global_sdom_layout.toIndices(*global_sdom_id, + coord[0], coord[1], coord[2], coord[3], coord[4]); + + return coord; +} +Kripke::GlobalSdomId PartitionSpace::coordToGlobalSdomId(SdomCoord global_coord) const{ + + GlobalSdomId global_sdom_id(m_global_sdom_layout( + global_coord[0], global_coord[1], global_coord[2], global_coord[3], global_coord[4])); + + return global_sdom_id; +} + +size_t PartitionSpace::subdomainToSpace( + Kripke::Core::SPACE space, SdomId sdom_id) const +{ + // Map the subdomain id back to the bases spaces, P, Q, Rx, Ry, Rz + std::array idx; + m_local_sdom_space_layout[SPACE_PQR].toIndices(*sdom_id, + idx[0], idx[1], idx[2], idx[3], idx[4]); + + size_t space_id = m_local_sdom_space_layout[space]( + idx[0], idx[1], idx[2], idx[3], idx[4]); + + return space_id; +} + + +SdomId PartitionSpace::spaceToSubdomain( + Kripke::Core::SPACE space, size_t space_id) const +{ + // build up indices in the P, Q, Rx, Ry, Rz space + std::array idx{{0, 0, 0, 0, 0}}; + m_local_sdom_space_layout[space].toIndices(space_id, + idx[0], idx[1], idx[2], idx[3], idx[4]); + + // convert those indices to a subdomain + SdomId sdom_id{m_local_sdom_space_layout[SPACE_PQR](idx[0], idx[1], idx[2], idx[3], idx[4])}; + + return sdom_id; +} + + +void PartitionSpace::print() const{ + if(m_comm_all.rank() == 0){ + printf(" Decomposition Space: Procs: Subdomains (local/global):\n"); + printf(" --------------------- ---------- --------------------------\n"); + printf(" (P) Energy: %-10d %d / %d\n", + (int)m_comm_space[SPACE_P].size(), + (int)m_local_num_sdom[SPACE_P], + (int)m_global_num_sdom[SPACE_P]); + printf(" (Q) Direction: %-10d %d / %d\n", + (int)m_comm_space[SPACE_Q].size(), + (int)m_local_num_sdom[SPACE_Q], + (int)m_global_num_sdom[SPACE_Q]); + printf(" (R) Space: %-10d %d / %d\n", + (int)m_comm_space[SPACE_R].size(), + (int)m_local_num_sdom[SPACE_R], + (int)m_global_num_sdom[SPACE_R]); + printf(" (Rx,Ry,Rz) R in XYZ: %dx%dx%d %dx%dx%d / %dx%dx%d\n", + (int)m_comm_space[SPACE_RX].size(), + (int)m_comm_space[SPACE_RY].size(), + (int)m_comm_space[SPACE_RZ].size(), + + (int)m_local_num_sdom[SPACE_RX], + (int)m_local_num_sdom[SPACE_RY], + (int)m_local_num_sdom[SPACE_RZ], + + (int)m_global_num_sdom[SPACE_RX], + (int)m_global_num_sdom[SPACE_RY], + (int)m_global_num_sdom[SPACE_RZ]); + + printf(" (PQR) TOTAL: %-10d %d / %d\n", + (int)m_comm_all.size(), + (int)getNumSubdomains(), + (int)(m_global_num_sdom[SPACE_P] * + m_global_num_sdom[SPACE_Q] * + m_global_num_sdom[SPACE_RX] * + m_global_num_sdom[SPACE_RY] * + m_global_num_sdom[SPACE_RZ])); + + } +} diff --git a/src/Kripke/Core/PartitionSpace.h b/src/Kripke/Core/PartitionSpace.h new file mode 100644 index 00000000..44b67274 --- /dev/null +++ b/src/Kripke/Core/PartitionSpace.h @@ -0,0 +1,129 @@ +/* + * NOTICE + * + * This work was produced at the Lawrence Livermore National Laboratory (LLNL) + * under contract no. DE-AC-52-07NA27344 (Contract 44) between the U.S. + * Department of Energy (DOE) and Lawrence Livermore National Security, LLC + * (LLNS) for the operation of LLNL. The rights of the Federal Government are + * reserved under Contract 44. + * + * DISCLAIMER + * + * This work was prepared as an account of work sponsored by an agency of the + * United States Government. Neither the United States Government nor Lawrence + * Livermore National Security, LLC nor any of their employees, makes any + * warranty, express or implied, or assumes any liability or responsibility + * for the accuracy, completeness, or usefulness of any information, apparatus, + * product, or process disclosed, or represents that its use would not infringe + * privately-owned rights. Reference herein to any specific commercial products, + * process, or service by trade name, trademark, manufacturer or otherwise does + * not necessarily constitute or imply its endorsement, recommendation, or + * favoring by the United States Government or Lawrence Livermore National + * Security, LLC. The views and opinions of authors expressed herein do not + * necessarily state or reflect those of the United States Government or + * Lawrence Livermore National Security, LLC, and shall not be used for + * advertising or product endorsement purposes. + * + * NOTIFICATION OF COMMERCIAL USE + * + * Commercialization of this product is prohibited without notifying the + * Department of Energy (DOE) or Lawrence Livermore National Security. + */ + +#ifndef KRIPKE_CORE_PARTITION_SPACE_H__ +#define KRIPKE_CORE_PARTITION_SPACE_H__ + +#include +#include +#include +#include + +namespace Kripke { +namespace Core { + +enum SPACE { + SPACE_P = 0, + SPACE_Q, + SPACE_RX, + SPACE_RY, + SPACE_RZ, + SPACE_R, + SPACE_PR, + SPACE_PQR, + SPACE_NULL, + NUM_SPACES +}; + +/** + * Defines a decomposition of the phase space by subdomains. + */ +class PartitionSpace : public Kripke::Core::BaseVar { + public: + using SdomCoord = std::array; + + PartitionSpace(Kripke::Core::Comm &base_comm, + size_t P, size_t Q, size_t Rx, size_t Ry, size_t Rz); + + virtual ~PartitionSpace() = default; + + void setup_createSubdomains( + size_t SP, size_t SQ, size_t Sx, size_t Sy, size_t Sz); + + void createSubdomainData(Kripke::Core::DataStore &data_store) const; + + size_t getNumSubdomains(Kripke::Core::SPACE space = SPACE_PQR) const; + size_t getGlobalNumSubdomains(Kripke::Core::SPACE space = SPACE_PQR) const; + + SdomCoord sdomIdToCoord(Kripke::SdomId sdom_id) const; + Kripke::SdomId coordToSdomId(SdomCoord coord) const; + + SdomCoord coordToGlobalCoord(SdomCoord local_coord) const; + SdomCoord globalSdomIdToCoord(Kripke::GlobalSdomId global_sdom_id) const; + Kripke::GlobalSdomId coordToGlobalSdomId(SdomCoord global_coord) const; + + + + Kripke::Core::Comm const &getComm(SPACE space) const { + return m_comm_space[space]; + } + + size_t subdomainToSpace(Kripke::Core::SPACE space, SdomId sdom_id) const; + SdomId spaceToSubdomain(Kripke::Core::SPACE space, size_t sdom_space) const; + + void print() const; + + private: + Kripke::Core::Comm m_comm_all; + + // Parallel decomposition of comm_all + Kripke::Core::Comm m_comm_space[NUM_SPACES]; + + // Decomposition of ranks into subdomains + std::array m_local_num_sdom; + std::array m_global_num_sdom; + std::array m_global_sdom_lower; + + + std::array, NUM_SPACES> m_local_sdom_space_layout; + + RAJA::Layout<5> m_proc_layout; + RAJA::Layout<3> m_proc_xyz_layout; + RAJA::Layout<5> m_global_sdom_layout; + +}; + + + +template +class Field; + +} // namespace Core + +using Field_SdomId2GlobalSdomId = Kripke::Core::Field; +using Field_GlobalSdomId2Rank = Kripke::Core::Field; +using Field_GlobalSdomId2SdomId = Kripke::Core::Field; + + +} // namespace + +#endif diff --git a/src/Kripke/Core/Set.cpp b/src/Kripke/Core/Set.cpp new file mode 100644 index 00000000..f96ff3e5 --- /dev/null +++ b/src/Kripke/Core/Set.cpp @@ -0,0 +1,173 @@ +/* + * NOTICE + * + * This work was produced at the Lawrence Livermore National Laboratory (LLNL) + * under contract no. DE-AC-52-07NA27344 (Contract 44) between the U.S. + * Department of Energy (DOE) and Lawrence Livermore National Security, LLC + * (LLNS) for the operation of LLNL. The rights of the Federal Government are + * reserved under Contract 44. + * + * DISCLAIMER + * + * This work was prepared as an account of work sponsored by an agency of the + * United States Government. Neither the United States Government nor Lawrence + * Livermore National Security, LLC nor any of their employees, makes any + * warranty, express or implied, or assumes any liability or responsibility + * for the accuracy, completeness, or usefulness of any information, apparatus, + * product, or process disclosed, or represents that its use would not infringe + * privately-owned rights. Reference herein to any specific commercial products, + * process, or service by trade name, trademark, manufacturer or otherwise does + * not necessarily constitute or imply its endorsement, recommendation, or + * favoring by the United States Government or Lawrence Livermore National + * Security, LLC. The views and opinions of authors expressed herein do not + * necessarily state or reflect those of the United States Government or + * Lawrence Livermore National Security, LLC, and shall not be used for + * advertising or product endorsement purposes. + * + * NOTIFICATION OF COMMERCIAL USE + * + * Commercialization of this product is prohibited without notifying the + * Department of Energy (DOE) or Lawrence Livermore National Security. + */ + +#include + + +using namespace Kripke; +using namespace Kripke::Core; + + + +/***************************************************************************** + * + * Kripke::Core::Set + * + *****************************************************************************/ + + +Set::Set() : + m_global_size(0) +{ + +} + + +size_t Set::dimSize(Kripke::SdomId sdom_id, size_t ) const{ + return size(sdom_id); +} + + +/***************************************************************************** + * + * Kripke::RangeSet + * + *****************************************************************************/ + +RangeSet::RangeSet(Kripke::Core::PartitionSpace const &pspace, Kripke::Core::SPACE space, + std::vector const &local_sizes) : + m_space(space) +{ + setup_setupByLocalSize(pspace, local_sizes); +} + + +void RangeSet::setup_setupByLocalSize(Kripke::Core::PartitionSpace const &pspace, + std::vector const &local_sizes) +{ + + Comm const &comm = pspace.getComm(m_space); + + // Figure out number of subdomains and chunks + setup_initChunks(pspace, m_space); + size_t num_chunks = m_chunk_to_subdomain.size(); + + KRIPKE_ASSERT(local_sizes.size() == num_chunks, + "Space %d has %lu subdomains, but provided %lu subdomains", + (int)m_space, + (unsigned long)m_chunk_to_subdomain.size(), + (unsigned long)local_sizes.size()); + + + + // Compute global size + long total_local = 0; + for(size_t s : local_sizes){ + total_local += s; + } + m_global_size = comm.allReduceSumLong(total_local); + + + // Copy in local subdomain sizes + m_chunk_to_size = local_sizes; + + + // Compute global offsets for each chunk + m_chunk_to_lower.resize(num_chunks); + m_chunk_to_lower[0] = comm.scanSumLong(total_local) - total_local; + for(size_t i = 1;i < num_chunks;++ i){ + m_chunk_to_lower[i] = m_chunk_to_lower[i-1] + m_chunk_to_size[i-1]; + } + +} + + +/***************************************************************************** + * + * Kripke::LocalRangeSet + * + *****************************************************************************/ + +LocalRangeSet::LocalRangeSet(Kripke::Core::PartitionSpace const &pspace, + size_t local_size) +{ + + Comm const &comm = pspace.getComm(SPACE_PQR); + + // Figure out number of subdomains and chunks + setup_initChunks(pspace, SPACE_NULL); + size_t num_chunks = m_chunk_to_subdomain.size(); + + KRIPKE_ASSERT(num_chunks == 1, "Something's wrong, SPACE_NULL should have 1"); + + // Compute global size + m_global_size = comm.allReduceSumLong(local_size); + + // Copy in local subdomain size + m_chunk_to_size = {local_size}; + + // Compute global offsets for each chunk + m_chunk_to_lower.resize(num_chunks); + m_chunk_to_lower[0] = comm.scanSumLong(local_size) - local_size; +} + +/***************************************************************************** + * + * Kripke::GlobalRangeSet + * + *****************************************************************************/ +GlobalRangeSet::GlobalRangeSet(Kripke::Core::PartitionSpace const &pspace, + size_t global_size) +{ + setup_setGlobalSize(pspace, global_size); +} + + +GlobalRangeSet::GlobalRangeSet(Kripke::Core::PartitionSpace const &pspace, Kripke::Core::Set &parent_set) +{ + setup_setGlobalSize(pspace, parent_set.globalSize()); +} + + +void GlobalRangeSet::setup_setGlobalSize(Kripke::Core::PartitionSpace const &pspace, + size_t global_size) +{ + + setup_initChunks(pspace, SPACE_NULL); + + // Kripke::Core::Set + m_chunk_to_size.resize(1, global_size); + m_chunk_to_lower.resize(1, 0); + m_global_size = global_size; +} + + diff --git a/src/Kripke/Core/Set.h b/src/Kripke/Core/Set.h new file mode 100644 index 00000000..100ccd15 --- /dev/null +++ b/src/Kripke/Core/Set.h @@ -0,0 +1,234 @@ +/* + * NOTICE + * + * This work was produced at the Lawrence Livermore National Laboratory (LLNL) + * under contract no. DE-AC-52-07NA27344 (Contract 44) between the U.S. + * Department of Energy (DOE) and Lawrence Livermore National Security, LLC + * (LLNS) for the operation of LLNL. The rights of the Federal Government are + * reserved under Contract 44. + * + * DISCLAIMER + * + * This work was prepared as an account of work sponsored by an agency of the + * United States Government. Neither the United States Government nor Lawrence + * Livermore National Security, LLC nor any of their employees, makes any + * warranty, express or implied, or assumes any liability or responsibility + * for the accuracy, completeness, or usefulness of any information, apparatus, + * product, or process disclosed, or represents that its use would not infringe + * privately-owned rights. Reference herein to any specific commercial products, + * process, or service by trade name, trademark, manufacturer or otherwise does + * not necessarily constitute or imply its endorsement, recommendation, or + * favoring by the United States Government or Lawrence Livermore National + * Security, LLC. The views and opinions of authors expressed herein do not + * necessarily state or reflect those of the United States Government or + * Lawrence Livermore National Security, LLC, and shall not be used for + * advertising or product endorsement purposes. + * + * NOTIFICATION OF COMMERCIAL USE + * + * Commercialization of this product is prohibited without notifying the + * Department of Energy (DOE) or Lawrence Livermore National Security. + */ + +#ifndef KRIPKE_CORE_SET_H__ +#define KRIPKE_CORE_SET_H__ + +#include +#include +#include +#include +#include + +namespace Kripke { + +namespace Core { + /** + * Base class for defining an ordered set used for dimensioning Fields + */ + class Set : public Kripke::Core::DomainVar { + public: + Set(); + virtual ~Set() = default; + + // Don't allow copy construction + Set(Set const &) = delete; + + /** + * Returns the number of elements in this subdomain. + */ + RAJA_INLINE + size_t size(Kripke::SdomId sdom_id) const { + size_t chunk_id = m_subdomain_to_chunk[*sdom_id]; + return m_chunk_to_size[chunk_id]; + } + + + /** + * Returns the range of a subdomain using a RAJA::RangeSegment + */ + RAJA_INLINE + RAJA::RangeSegment range(Kripke::SdomId sdom_id) const { + return RAJA::RangeSegment(0, size(sdom_id)); + } + + + /** + * Returns the first global index for this subdomain. + */ + RAJA_INLINE + size_t lower(Kripke::SdomId sdom_id) const { + size_t chunk_id = m_subdomain_to_chunk[*sdom_id]; + return m_chunk_to_lower[chunk_id]; + } + + /** + * Returns the global number of unique elements in this set. + */ + RAJA_INLINE + size_t globalSize() const { + return m_global_size; + } + + /** + * Returns the dimensionality of this Set. + */ + virtual size_t getNumDimensions() const = 0; + + /** + * Returns the size of this Set along the specified dimension + */ + virtual size_t dimSize(Kripke::SdomId sdom_id, size_t dim) const; + + protected: + std::vector m_chunk_to_size; + std::vector m_chunk_to_lower; + size_t m_global_size; + }; + + + class RangeSet : public Kripke::Core::Set { + public: + RangeSet(Kripke::Core::PartitionSpace const &pspace, Kripke::Core::SPACE space, + std::vector const &local_sizes); + + virtual ~RangeSet() = default; + + RAJA_INLINE + virtual size_t getNumDimensions() const{return 1;} + + private: + void setup_setupByLocalSize(Kripke::Core::PartitionSpace const &pspace, + std::vector const &local_sizes); + Kripke::Core::SPACE m_space; + }; + + + class LocalRangeSet : public Kripke::Core::Set { + public: + LocalRangeSet(Kripke::Core::PartitionSpace const &pspace, size_t local_size); + + virtual ~LocalRangeSet() = default; + + RAJA_INLINE + virtual size_t getNumDimensions() const{return 1;} + }; + + + class GlobalRangeSet : public Kripke::Core::Set { + public: + GlobalRangeSet(Kripke::Core::PartitionSpace const &pspace, size_t global_size); + GlobalRangeSet(Kripke::Core::PartitionSpace const &pspace, Kripke::Core::Set &parent_set); + + virtual ~GlobalRangeSet() = default; + + RAJA_INLINE + virtual size_t getNumDimensions() const{return 1;} + + private: + void setup_setGlobalSize(Kripke::Core::PartitionSpace const &pspace, size_t global_size); + }; + + + template + class ProductSet : public Kripke::Core::Set { + public: + + using LayoutType = RAJA::Layout; + + template + ProductSet(Kripke::Core::PartitionSpace &pspace, Kripke::Core::SPACE space, + SPAN const &... spanned_sets){ + static_assert(sizeof...(SPAN) == NUM_SETS, + "Must provide same number of sets as dimensionality of ProductSet"); + + setup_initChunks(pspace, space); + setup_setSpannedSets({{(&spanned_sets)...}}); + + } + + virtual ~ProductSet() = default; + + virtual size_t getNumDimensions() const{ + return s_num_sets; + } + + /** + * Returns the size of this Set along the specified dimension + */ + virtual size_t dimSize(Kripke::SdomId sdom_id, size_t dim) const{ + return m_spanned_sets[dim]->size(sdom_id); + } + + RAJA_INLINE + LayoutType getLayout(Kripke::SdomId sdom_id) const { + + std::array sizes; + for(size_t dim = 0;dim < NUM_SETS;++ dim){ + sizes[dim] = dimSize(sdom_id, dim); + } + + //auto perm = camp::make_idx_seq::array(); + auto perm = RAJA::as_array>::get(); + + return RAJA::make_permuted_layout(sizes, perm); + } + + private: + + /** + * Helper function to expand variadic arguments to the constructor. + */ + void setup_setSpannedSets( + std::array const &spanned_sets){ + m_spanned_sets = spanned_sets; + + size_t num_chunks = m_chunk_to_subdomain.size(); + m_chunk_to_size.resize(num_chunks, 1); + m_chunk_to_lower.resize(num_chunks, 0); + for(size_t chunk_id = 0;chunk_id < num_chunks;++ chunk_id){ + Kripke::SdomId sdom_id(m_chunk_to_subdomain[chunk_id]); + for(size_t set_id = 0;set_id < NUM_SETS;++ set_id){ + m_chunk_to_size[chunk_id] *= spanned_sets[set_id]->size(sdom_id); + } + } + + // Compute global size + m_global_size = 1; + for(size_t set_id = 0;set_id < NUM_SETS;++ set_id){ + m_global_size *= spanned_sets[set_id]->globalSize(); + } + } + + static const size_t s_num_sets = NUM_SETS; + + std::array m_spanned_sets; + + + }; + + + +} } // namespace + +#endif + diff --git a/src/Kripke/Core/VarLayout.h b/src/Kripke/Core/VarLayout.h new file mode 100644 index 00000000..f62baad3 --- /dev/null +++ b/src/Kripke/Core/VarLayout.h @@ -0,0 +1,204 @@ +/* + * NOTICE + * + * This work was produced at the Lawrence Livermore National Laboratory (LLNL) + * under contract no. DE-AC-52-07NA27344 (Contract 44) between the U.S. + * Department of Energy (DOE) and Lawrence Livermore National Security, LLC + * (LLNS) for the operation of LLNL. The rights of the Federal Government are + * reserved under Contract 44. + * + * DISCLAIMER + * + * This work was prepared as an account of work sponsored by an agency of the + * United States Government. Neither the United States Government nor Lawrence + * Livermore National Security, LLC nor any of their employees, makes any + * warranty, express or implied, or assumes any liability or responsibility + * for the accuracy, completeness, or usefulness of any information, apparatus, + * product, or process disclosed, or represents that its use would not infringe + * privately-owned rights. Reference herein to any specific commercial products, + * process, or service by trade name, trademark, manufacturer or otherwise does + * not necessarily constitute or imply its endorsement, recommendation, or + * favoring by the United States Government or Lawrence Livermore National + * Security, LLC. The views and opinions of authors expressed herein do not + * necessarily state or reflect those of the United States Government or + * Lawrence Livermore National Security, LLC, and shall not be used for + * advertising or product endorsement purposes. + * + * NOTIFICATION OF COMMERCIAL USE + * + * Commercialization of this product is prohibited without notifying the + * Department of Energy (DOE) or Lawrence Livermore National Security. + */ + +#ifndef KRIPKE_CORE_VARLAYOUT_H__ +#define KRIPKE_CORE_VARLAYOUT_H__ + +namespace Kripke { +namespace Core { + +/* + * Helper class that finds type T in a camp::list, and gives the index into + * the type list. + * + * N is the size of the type list + * Order is a camp::list that contains the type list + * T is the type to search for in Order + * + * The resulting member 'value' contains the offset into Order of T. + * value is -1 when T is not contained in Order. + * + */ +template +struct GetOrderHelper; + +template +struct GetOrderHelper, T>{ + static const camp::idx_t value = GetOrderHelper, T>::value; +}; + +template +struct GetOrderHelper, OrderT>{ + static const camp::idx_t value = (N-1) - sizeof...(OrderTRest); +}; + +template +constexpr camp::idx_t getOrder(){ + return GetOrderHelper::value, Order, T>::value; +} + + + +/* + * A helper class that determines which index of a Field is stride-one. + * + * This is the same as asking which of the Fields types has the highest + * offset in the index Order list. + * + * 'i' is the number of types types + * 'Order' is the ordering of all index types + * 'Types' is the index types appearing in the Field class + */ +template +struct ExtractStrideOne; + +template +struct ExtractStrideOne>{ + using LTypes = camp::list; + using T = camp::at_v; + using next_t = ExtractStrideOne; + + static constexpr camp::idx_t our_value = getOrder(); + + static constexpr camp::idx_t value = + next_t::value > our_value ? next_t::value : our_value; + + static constexpr camp::idx_t arg = + next_t::value > our_value ? next_t::arg : i; +}; + +template +struct ExtractStrideOne<0, Order, camp::list>{ + using LTypes = camp::list; + using T = camp::at_v; + + static constexpr camp::idx_t value = getOrder(); + static constexpr camp::idx_t arg = 0; +}; + + + + +template +struct ArgsToOrder { + + static constexpr camp::idx_t num_types = sizeof...(T); + using type = camp::idx_seq()...>; + + using array_t = std::array; + + + static constexpr camp::idx_t stride_one = + ExtractStrideOne<((camp::idx_t)sizeof...(T))-1, Order, camp::list >::arg; + + template + static array_t toArray_expanded(bool debug, camp::idx_seq, camp::idx_seq){ + using pair_t = std::pair; + using parray_t = std::array; + + parray_t p{{pair_t{RangeInts, OrderInts}...}}; + + std::sort(p.begin(), p.end(), + [=](pair_t const & a, pair_t const & b){ + return a.second < b.second; + }); + + if(debug){ + array_t a{{(p[RangeInts].second)...}}; + return a; + } + else{ + array_t a{{(p[RangeInts].first)...}}; + return a; + } + } + + static array_t toArray(bool debug = false){ + return toArray_expanded(debug, camp::make_idx_seq_t{}, camp::idx_seq()...>{}); + } + + + static void print(){ + array_t a = toArray(true); + array_t b = toArray(false); + + printf("A:"); + for(camp::idx_t i = 0;i < (camp::idx_t)sizeof...(T);++i){ + printf("%d ", (int)a[i]); + } + printf(" B:"); + for(camp::idx_t i = 0;i < (camp::idx_t)sizeof...(T);++i){ + printf("%d ", (int)b[i]); + } + printf(" [stride-one arg=%d]\n", (int)stride_one); + } + +}; + + + +/* + * Default layout is canonical ordering. + * + * This class is specialized for fields that needs data layouts to change. + */ + +template +struct LayoutInfo { + + using args_to_order_t = ArgsToOrder; + + // Default stride-one-index is the right-most index + constexpr static ptrdiff_t num_dims = sizeof...(IndexTypes); + constexpr static ptrdiff_t stride_one_dim = args_to_order_t::stride_one; + + //using Layout = RAJA::TypedLayout>; + using Layout = RAJA::TypedLayout, stride_one_dim>; + + static std::array getPermutation(){ + return args_to_order_t::toArray(); + } +}; + + +template +using LayoutType = typename LayoutInfo::Layout; + +template +using ViewType = RAJA::View, ElementPtr>; + + + +} // namespace Core +} // namespace Kripke + +#endif diff --git a/src/Kripke/Directions.cpp b/src/Kripke/Directions.cpp deleted file mode 100644 index e1a8fa4b..00000000 --- a/src/Kripke/Directions.cpp +++ /dev/null @@ -1,211 +0,0 @@ -/* - * NOTICE - * - * This work was produced at the Lawrence Livermore National Laboratory (LLNL) - * under contract no. DE-AC-52-07NA27344 (Contract 44) between the U.S. - * Department of Energy (DOE) and Lawrence Livermore National Security, LLC - * (LLNS) for the operation of LLNL. The rights of the Federal Government are - * reserved under Contract 44. - * - * DISCLAIMER - * - * This work was prepared as an account of work sponsored by an agency of the - * United States Government. Neither the United States Government nor Lawrence - * Livermore National Security, LLC nor any of their employees, makes any - * warranty, express or implied, or assumes any liability or responsibility - * for the accuracy, completeness, or usefulness of any information, apparatus, - * product, or process disclosed, or represents that its use would not infringe - * privately-owned rights. Reference herein to any specific commercial products, - * process, or service by trade name, trademark, manufacturer or otherwise does - * not necessarily constitute or imply its endorsement, recommendation, or - * favoring by the United States Government or Lawrence Livermore National - * Security, LLC. The views and opinions of authors expressed herein do not - * necessarily state or reflect those of the United States Government or - * Lawrence Livermore National Security, LLC, and shall not be used for - * advertising or product endorsement purposes. - * - * NOTIFICATION OF COMMERCIAL USE - * - * Commercialization of this product is prohibited without notifying the - * Department of Energy (DOE) or Lawrence Livermore National Security. - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -namespace { - /* - GaussLegendre returns the n point Gauss-Legendre quadrature rule for - the integral between x1 and x2. - */ - void GaussLegendre(double x1, double x2, std::vector &x, - std::vector &w, double eps) - { - int n = x.size(); - int m, j, i; - double z1, z, xm, xl, pp, p3, p2, p1; - - m=(n+1)/2; - xm=0.5*(x2+x1); - xl=0.5*(x2-x1); - for(i=1; i<=m; i++){ - z=cos(M_PI*(i-0.25)/(n+0.5)); - do { - p1=1.0; - p2=0.0; - for(j=1; j<=n; j++){ - p3=p2; - p2=p1; - p1=((2.0*j-1.0)*z*p2-(j-1.0)*p3)/j; - } - pp=n*(z*p1-p2)/(z*z-1.0); - z1=z; - z=z1-p1/pp; - } while(fabs(z-z1) > eps); - x[i-1]=xm-xl*z; - x[n-i]=xm+xl*z; - w[i-1]=2.0*xl/((1.0-z*z)*pp*pp); - - w[n-i]=w[i-1]; - } - } - - - bool dirSortFcn(Directions const &a, Directions const &b){ - return b.octant < a.octant; - } -} - -/** - * Initializes the quadrature set information for a Grid_Data object. - * This guarantees that each pair have a single originating octant. - */ -void InitDirections(Grid_Data *grid_data, Input_Variables *input_vars) -{ - std::vector &directions = grid_data->directions; - - // Get set description from user - int num_directions_per_octant = input_vars->num_directions/8; - int num_directions = input_vars->num_directions; - - // allocate storage - directions.resize(num_directions); - - // Are we running a REAL quadrature set? - int num_polar = input_vars->quad_num_polar; - int num_azimuth = input_vars->quad_num_azimuthal; - - std::vector polar_cos; - std::vector polar_weight; - if(num_polar > 0){ - // make sure the user specified the correct number of quadrature points - if(num_polar % 4 != 0){ - printf("Must have number of polar angles be a multiple of 4\n"); - MPI_Abort(MPI_COMM_WORLD, 1); - } - if(num_azimuth % 2 != 0){ - printf("Must have number of azimuthal angles be a multiple of 2\n"); - MPI_Abort(MPI_COMM_WORLD, 1); - } - if(num_polar*num_azimuth != num_directions){ - printf("You need to specify %d total directions, not %d\n", - num_polar*num_azimuth, num_directions); - MPI_Abort(MPI_COMM_WORLD, 1); - } - - // Compute gauss legendre weights - polar_cos.resize(num_polar); - polar_weight.resize(num_polar); - GaussLegendre(-1.0, 1.0, polar_cos, polar_weight, DBL_EPSILON); - - // compute azmuhtal angles and weights - std::vector az_angle(num_azimuth); - std::vector az_weight(num_azimuth); - double dangle = 2.0*M_PI/((double) num_azimuth); - - for(int i=0; i 0.) ? 1 : -1; - directions[d].jd = (ycos > 0.) ? 1 : -1; - directions[d].kd = (zcos > 0.) ? 1 : -1; - - directions[d].octant = 0; - if(directions[d].id == -1){ - directions[d].octant += 1; - } - if(directions[d].jd == -1){ - directions[d].octant += 2; - } - if(directions[d].kd == -1){ - directions[d].octant += 4; - } - - directions[d].xcos = std::abs(xcos); - directions[d].ycos = std::abs(ycos); - directions[d].zcos = std::abs(zcos); - directions[d].w = w; - - ++ d; - } - } - - // Sort by octant.. so each set has same directions - std::sort(directions.begin(), directions.end(), dirSortFcn); - } - else{ - // Do (essentialy) an S2 quadrature.. but with repeated directions - - // Compute x,y,z cosine values - double mu = cos(M_PI/4); - double eta = sqrt(1-mu*mu) * cos(M_PI/4); - double xi = sqrt(1-mu*mu) * sin(M_PI/4); - int d = 0; - for(int octant = 0;octant < 8;++ octant){ - double omegas[3]; - omegas[0] = octant & 0x1; - omegas[1] = (octant>>1) & 0x1; - omegas[2] = (octant>>2) & 0x1; - - for(int sd=0; sd 0.) ? 1 : -1; - directions[d].jd = (omegas[1] > 0.) ? 1 : -1; - directions[d].kd = (omegas[2] > 0.) ? 1 : -1; - - // Store quadrature point's weight - directions[d].w = 4.0*M_PI / (double)num_directions; - directions[d].xcos = mu; - directions[d].ycos = eta; - directions[d].zcos = xi; - } - } - } -} - - - - diff --git a/src/Kripke/Generate.cpp b/src/Kripke/Generate.cpp new file mode 100644 index 00000000..a7a5dd09 --- /dev/null +++ b/src/Kripke/Generate.cpp @@ -0,0 +1,119 @@ +/* + * NOTICE + * + * This work was produced at the Lawrence Livermore National Laboratory (LLNL) + * under contract no. DE-AC-52-07NA27344 (Contract 44) between the U.S. + * Department of Energy (DOE) and Lawrence Livermore National Security, LLC + * (LLNS) for the operation of LLNL. The rights of the Federal Government are + * reserved under Contract 44. + * + * DISCLAIMER + * + * This work was prepared as an account of work sponsored by an agency of the + * United States Government. Neither the United States Government nor Lawrence + * Livermore National Security, LLC nor any of their employees, makes any + * warranty, express or implied, or assumes any liability or responsibility + * for the accuracy, completeness, or usefulness of any information, apparatus, + * product, or process disclosed, or represents that its use would not infringe + * privately-owned rights. Reference herein to any specific commercial products, + * process, or service by trade name, trademark, manufacturer or otherwise does + * not necessarily constitute or imply its endorsement, recommendation, or + * favoring by the United States Government or Lawrence Livermore National + * Security, LLC. The views and opinions of authors expressed herein do not + * necessarily state or reflect those of the United States Government or + * Lawrence Livermore National Security, LLC, and shall not be used for + * advertising or product endorsement purposes. + * + * NOTIFICATION OF COMMERCIAL USE + * + * Commercialization of this product is prohibited without notifying the + * Department of Energy (DOE) or Lawrence Livermore National Security. + */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace Kripke; +using namespace Kripke::Core; + + +void Kripke::generateProblem(Kripke::Core::DataStore &data_store, + InputVariables const &input_vars) +{ + + Comm default_comm; + + if(default_comm.rank() == 0){ + printf("\nGenerating Problem\n"); + printf("==================\n\n"); + } + + // Create and start a timing object + data_store.addVariable("timing", new Kripke::Timing()); + KRIPKE_TIMER(data_store, Generate); + + + // Create parallel and subdomain decomposition + Generate::generateDecomp(data_store, input_vars); + + // Create energy discretization + Generate::generateEnergy(data_store, input_vars); + + // Create angular discretization, quadrature set and L/L+ matrices + Generate::generateQuadrature(data_store, input_vars); + + // Create a spatial mesh, and paint it with materials + Generate::generateSpace(data_store, input_vars); + + // Create cross sections and transfer matrix + Generate::generateData(data_store, input_vars); + + + + // Display all of the fields that were created, and what their sizes are + if(default_comm.rank() == 0){ + + // Collect variables that are Fields of doubles + std::vector field_names; + for(auto const &var_name : data_store.getVariableList()){ + if(data_store.isVariableType>(var_name)){ + field_names.push_back(var_name); + } + } + std::sort(field_names.begin(), field_names.end()); + + printf("\n"); + printf(" Memory breakdown of Field variables:\n"); + printf(" Field Variable Num Elements Megabytes\n"); + printf(" -------------- ------------ ---------\n"); + + unsigned long total_size = 0; + for(auto const &field_name : field_names){ + + unsigned long field_size = data_store.getVariable>(field_name).getSet().globalSize(); + total_size += field_size; + + printf(" %-24s %12lu %12.3lf\n", + field_name.c_str(), + field_size, + (double)field_size*8.0/1024.0/1024.0); + } + + printf(" -------- ------------ ---------\n"); + printf(" TOTAL %12lu %12.3lf\n", + total_size, + (double)total_size*8.0/1024.0/1024.0); + + printf("\n"); + printf(" Generation Complete!\n"); + } +} diff --git a/src/Kripke/Kernel.cpp b/src/Kripke/Generate.h similarity index 62% rename from src/Kripke/Kernel.cpp rename to src/Kripke/Generate.h index 6d6c7acb..0ff9acf2 100644 --- a/src/Kripke/Kernel.cpp +++ b/src/Kripke/Generate.h @@ -30,40 +30,42 @@ * Department of Energy (DOE) or Lawrence Livermore National Security. */ -#include -#include -#include +#ifndef KRIPKE_GENERATE_H__ +#define KRIPKE_GENERATE_H__ -#include -#include -#include -#include -#include -#include +#include +#include +#include +namespace Kripke { + + /** + * Takes an Input_Variables object and generates a problem in the DataStore + */ + void generateProblem(Kripke::Core::DataStore &data_store, + InputVariables const &input_variables); + + + namespace Generate { + + + void generateDecomp(Kripke::Core::DataStore &data_store, + InputVariables const &input_variables); + + void generateEnergy(Kripke::Core::DataStore &data_store, + InputVariables const &input_variables); + + void generateQuadrature(Kripke::Core::DataStore &data_store, + InputVariables const &input_variables); + + void generateSpace(Kripke::Core::DataStore &data_store, + InputVariables const &input_variables); + + void generateData(Kripke::Core::DataStore &data_store, + InputVariables const &input_variables); -/** - * Factory to create a kernel object for the specified nesting - */ -Kernel *createKernel(Nesting_Order nest, int num_dims){ - if(num_dims == 3){ - switch(nest){ - case NEST_GDZ: - return new Kernel_3d_GDZ(); - case NEST_DGZ: - return new Kernel_3d_DGZ(); - case NEST_ZDG: - return new Kernel_3d_ZDG(); - case NEST_DZG: - return new Kernel_3d_DZG(); - case NEST_ZGD: - return new Kernel_3d_ZGD(); - case NEST_GZD: - return new Kernel_3d_GZD(); - } } - MPI_Abort(MPI_COMM_WORLD, 1); - return NULL; } +#endif diff --git a/src/Kripke/Generate/Data.cpp b/src/Kripke/Generate/Data.cpp new file mode 100644 index 00000000..4486502e --- /dev/null +++ b/src/Kripke/Generate/Data.cpp @@ -0,0 +1,134 @@ +/* + * NOTICE + * + * This work was produced at the Lawrence Livermore National Laboratory (LLNL) + * under contract no. DE-AC-52-07NA27344 (Contract 44) between the U.S. + * Department of Energy (DOE) and Lawrence Livermore National Security, LLC + * (LLNS) for the operation of LLNL. The rights of the Federal Government are + * reserved under Contract 44. + * + * DISCLAIMER + * + * This work was prepared as an account of work sponsored by an agency of the + * United States Government. Neither the United States Government nor Lawrence + * Livermore National Security, LLC nor any of their employees, makes any + * warranty, express or implied, or assumes any liability or responsibility + * for the accuracy, completeness, or usefulness of any information, apparatus, + * product, or process disclosed, or represents that its use would not infringe + * privately-owned rights. Reference herein to any specific commercial products, + * process, or service by trade name, trademark, manufacturer or otherwise does + * not necessarily constitute or imply its endorsement, recommendation, or + * favoring by the United States Government or Lawrence Livermore National + * Security, LLC. The views and opinions of authors expressed herein do not + * necessarily state or reflect those of the United States Government or + * Lawrence Livermore National Security, LLC, and shall not be used for + * advertising or product endorsement purposes. + * + * NOTIFICATION OF COMMERCIAL USE + * + * Commercialization of this product is prohibited without notifying the + * Department of Energy (DOE) or Lawrence Livermore National Security. + */ + +#include + +#include +#include +#include +#include +#include +#include +#include + +using namespace Kripke; +using namespace Kripke::Core; + + +void Kripke::Generate::generateData(Kripke::Core::DataStore &data_store, + InputVariables const &input_vars) +{ + + PartitionSpace &pspace = data_store.getVariable("pspace"); + + + // Create a set to span angular the flux + Set const &dir_set = data_store.getVariable("Set/Direction"); + Set const &group_set = data_store.getVariable("Set/Group"); + Set const &zone_set = data_store.getVariable("Set/Zone"); + ProductSet<3> *flux_set = new ProductSet<3>(pspace, SPACE_PQR, + dir_set, group_set, zone_set); + + data_store.addVariable("Set/Flux", flux_set); + + ArchLayoutV al_v = data_store.getVariable("al").al_v; + + // Create Solution and RHS fields + createField(data_store, "psi", al_v, *flux_set); + createField(data_store, "rhs", al_v, *flux_set); + + + // Create a set to span moments of the angular flux + Set const &moment_set = data_store.getVariable("Set/Moment"); + ProductSet<3> *fluxmoment_set = new ProductSet<3>(pspace, SPACE_PR, + moment_set, group_set, zone_set); + + data_store.addVariable("Set/FluxMoment", fluxmoment_set); + + + // Create flux moment and source moment fields + createField(data_store, "phi", al_v, *fluxmoment_set); + createField(data_store, "phi_out", al_v, *fluxmoment_set); + + + // Create "plane data" to hold face-centered values while sweeping + Set const &zonei_set = data_store.getVariable("Set/ZoneI"); + Set const &zonej_set = data_store.getVariable("Set/ZoneJ"); + Set const &zonek_set = data_store.getVariable("Set/ZoneK"); + Set const &iplane_set = data_store.newVariable>("Set/IPlane", pspace, SPACE_PQR, dir_set, group_set, zonej_set, zonek_set); + Set const &jplane_set = data_store.newVariable>("Set/JPlane", pspace, SPACE_PQR, dir_set, group_set, zonei_set, zonek_set); + Set const &kplane_set = data_store.newVariable>("Set/KPlane", pspace, SPACE_PQR, dir_set, group_set, zonei_set, zonej_set); + createField(data_store, "i_plane", al_v, iplane_set); + createField(data_store, "j_plane", al_v, jplane_set); + createField(data_store, "k_plane", al_v, kplane_set); + + // Create a set to span scattering transfer matrix + Set const &material_set = data_store.getVariable("Set/Material"); + Set const &legendre_set = data_store.getVariable("Set/Legendre"); + Set const &global_group_set = data_store.getVariable("Set/GlobalGroup"); + ProductSet<4> *sigs_set = new ProductSet<4>(pspace, SPACE_NULL, + material_set, legendre_set, global_group_set, global_group_set); + + data_store.addVariable("Set/SigmaS", sigs_set); + + + // Create storage for the scattering transfer matrix + createField(data_store, "data/sigs", al_v, *sigs_set); + auto &field_sigs = data_store.getVariable("data/sigs"); + + // Zero out entire matrix + Kripke::Kernel::kConst(field_sigs, 0.0); + + // Assign basic diagonal data to matrix + for(auto sdom_id : field_sigs.getWorkList()){ + + // Assign diagonal to the user input for each material + // Assume each group has same behavior + auto sigs = field_sigs.getView(sdom_id); + int global_num_groups = global_group_set.size(sdom_id); + Legendre n{0}; + for(Material mat{0};mat < 3;++ mat){ + RAJA::forall( + RAJA::TypedRangeSegment(0, global_num_groups), + [=](GlobalGroup g){ + sigs(mat, n, g, g) = input_vars.sigs[*mat]; + }); + } + } + + + +} + + + + diff --git a/src/Kripke/Generate/Decomp.cpp b/src/Kripke/Generate/Decomp.cpp new file mode 100644 index 00000000..c0837585 --- /dev/null +++ b/src/Kripke/Generate/Decomp.cpp @@ -0,0 +1,83 @@ +/* + * NOTICE + * + * This work was produced at the Lawrence Livermore National Laboratory (LLNL) + * under contract no. DE-AC-52-07NA27344 (Contract 44) between the U.S. + * Department of Energy (DOE) and Lawrence Livermore National Security, LLC + * (LLNS) for the operation of LLNL. The rights of the Federal Government are + * reserved under Contract 44. + * + * DISCLAIMER + * + * This work was prepared as an account of work sponsored by an agency of the + * United States Government. Neither the United States Government nor Lawrence + * Livermore National Security, LLC nor any of their employees, makes any + * warranty, express or implied, or assumes any liability or responsibility + * for the accuracy, completeness, or usefulness of any information, apparatus, + * product, or process disclosed, or represents that its use would not infringe + * privately-owned rights. Reference herein to any specific commercial products, + * process, or service by trade name, trademark, manufacturer or otherwise does + * not necessarily constitute or imply its endorsement, recommendation, or + * favoring by the United States Government or Lawrence Livermore National + * Security, LLC. The views and opinions of authors expressed herein do not + * necessarily state or reflect those of the United States Government or + * Lawrence Livermore National Security, LLC, and shall not be used for + * advertising or product endorsement purposes. + * + * NOTIFICATION OF COMMERCIAL USE + * + * Commercialization of this product is prohibited without notifying the + * Department of Energy (DOE) or Lawrence Livermore National Security. + */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace Kripke; +using namespace Kripke::Core; + + +void Kripke::Generate::generateDecomp(Kripke::Core::DataStore &data_store, + InputVariables const &input_vars) +{ + // Create a "Comm World" + auto &comm = data_store.newVariable("comm"); + + // Create our ArchLayout object to describe how we are going to + // execute, and what data layouts we want + auto &al_var = data_store.newVariable("al"); + al_var.al_v = input_vars.al_v; + + // Create our partitioning over MPI + auto &pspace = data_store.newVariable("pspace", + comm, + 1, + 1, + input_vars.npx, + input_vars.npy, + input_vars.npz); + + // Create our local partition over subdomains + pspace.setup_createSubdomains( + input_vars.num_groupsets, + input_vars.num_dirsets, + input_vars.num_zonesets_dim[0], + input_vars.num_zonesets_dim[1], + input_vars.num_zonesets_dim[2]); + + // Create utility Sets and Fields that describe our global subdomain layout + pspace.createSubdomainData(data_store); + pspace.print(); + + +} + + diff --git a/src/Kripke/Directions.h b/src/Kripke/Generate/Energy.cpp similarity index 61% rename from src/Kripke/Directions.h rename to src/Kripke/Generate/Energy.cpp index ff2afeed..f2d7f758 100644 --- a/src/Kripke/Directions.h +++ b/src/Kripke/Generate/Energy.cpp @@ -30,32 +30,40 @@ * Department of Energy (DOE) or Lawrence Livermore National Security. */ -#ifndef KRIPKE_DIRECTIONS_H__ -#define KRIPKE_DIRECTIONS_H__ +#include -#include +#include +#include +#include +#include +#include +#include +#include + +using namespace Kripke; +using namespace Kripke::Core; + + +void Kripke::Generate::generateEnergy(Kripke::Core::DataStore &data_store, + InputVariables const &input_vars) +{ + + PartitionSpace &pspace = data_store.getVariable("pspace"); + + // Create sets for energy discretization + size_t ngrp_per_sdom = input_vars.num_groups / + pspace.getGlobalNumSubdomains(SPACE_P); + + std::vector local_grps(pspace.getNumSubdomains(SPACE_P), + ngrp_per_sdom); + + RangeSet *grp_set = new RangeSet(pspace, SPACE_P, local_grps); + data_store.addVariable("Set/Group", grp_set); + + GlobalRangeSet *global_grp_set = new GlobalRangeSet(pspace, *grp_set); + data_store.addVariable("Set/GlobalGroup", global_grp_set); + + +} -class Grid_Data; -struct Input_Variables; -/** - * Contains information needed for one quadrature set direction. - */ -struct Directions{ - double xcos; /* Absolute value of the x-direction cosine. */ - double ycos; /* Absolute value of the y-direction cosine. */ - double zcos; /* Absolute value of the z-direction cosine. */ - double w; /* weight for the quadrature rule.*/ - int id; /* direction flag (= 1 if x-direction - cosine is positive; = -1 if not). */ - int jd; /* direction flag (= 1 if y-direction - cosine is positive; = -1 if not). */ - int kd; /* direction flag (= 1 if z-direction - cosine is positive; = -1 if not). */ - int octant; -}; - - -void InitDirections(Grid_Data *grid_data, Input_Variables *input_vars); - -#endif diff --git a/src/Kripke/Generate/Quadrature.cpp b/src/Kripke/Generate/Quadrature.cpp new file mode 100644 index 00000000..a9e1d1ca --- /dev/null +++ b/src/Kripke/Generate/Quadrature.cpp @@ -0,0 +1,546 @@ +/* + * NOTICE + * + * This work was produced at the Lawrence Livermore National Laboratory (LLNL) + * under contract no. DE-AC-52-07NA27344 (Contract 44) between the U.S. + * Department of Energy (DOE) and Lawrence Livermore National Security, LLC + * (LLNS) for the operation of LLNL. The rights of the Federal Government are + * reserved under Contract 44. + * + * DISCLAIMER + * + * This work was prepared as an account of work sponsored by an agency of the + * United States Government. Neither the United States Government nor Lawrence + * Livermore National Security, LLC nor any of their employees, makes any + * warranty, express or implied, or assumes any liability or responsibility + * for the accuracy, completeness, or usefulness of any information, apparatus, + * product, or process disclosed, or represents that its use would not infringe + * privately-owned rights. Reference herein to any specific commercial products, + * process, or service by trade name, trademark, manufacturer or otherwise does + * not necessarily constitute or imply its endorsement, recommendation, or + * favoring by the United States Government or Lawrence Livermore National + * Security, LLC. The views and opinions of authors expressed herein do not + * necessarily state or reflect those of the United States Government or + * Lawrence Livermore National Security, LLC, and shall not be used for + * advertising or product endorsement purposes. + * + * NOTIFICATION OF COMMERCIAL USE + * + * Commercialization of this product is prohibited without notifying the + * Department of Energy (DOE) or Lawrence Livermore National Security. + */ + +#include + +#include +#include +#include +#include +#include +#include +#include + +using namespace Kripke; +using namespace Kripke::Core; + + +namespace { + + /** + * Contains information needed for one quadrature set direction. + */ + struct QuadraturePoint { + double xcos; /* Absolute value of the x-direction cosine. */ + double ycos; /* Absolute value of the y-direction cosine. */ + double zcos; /* Absolute value of the z-direction cosine. */ + double w; /* weight for the quadrature rule.*/ + int id; /* direction flag (= 1 if x-direction + cosine is positive; = -1 if not). */ + int jd; /* direction flag (= 1 if y-direction + cosine is positive; = -1 if not). */ + int kd; /* direction flag (= 1 if z-direction + cosine is positive; = -1 if not). */ + int octant; + }; + + + /* + GaussLegendre returns the n point Gauss-Legendre quadrature rule for + the integral between x1 and x2. + */ + void GaussLegendre(double x1, double x2, std::vector &x, + std::vector &w, double eps) + { + int n = x.size(); + int m, j, i; + double z1, z, xm, xl, pp, p3, p2, p1; + + m=(n+1)/2; + xm=0.5*(x2+x1); + xl=0.5*(x2-x1); + for(i=1; i<=m; i++){ + z=cos(M_PI*(i-0.25)/(n+0.5)); + do { + p1=1.0; + p2=0.0; + for(j=1; j<=n; j++){ + p3=p2; + p2=p1; + p1=((2.0*j-1.0)*z*p2-(j-1.0)*p3)/j; + } + pp=n*(z*p1-p2)/(z*z-1.0); + z1=z; + z=z1-p1/pp; + } while(fabs(z-z1) > eps); + x[i-1]=xm-xl*z; + x[n-i]=xm+xl*z; + w[i-1]=2.0*xl/((1.0-z*z)*pp*pp); + + w[n-i]=w[i-1]; + } + } + + + bool dirSortFcn(QuadraturePoint const &a, QuadraturePoint const &b){ + return b.octant < a.octant; + } + + double FactFcn(int n) + { + double fact = 1.0; + for(int i = n;i > 0 ;--i){ + fact *= (double)i; + } + return(fact); + } + + inline double PnmFcn(int n, int m, double x) + { + /*----------------------------------------------------------------- + * It is assumed that 0 <= m <= n and that abs(x) <= 1.0. + * No error checking is done, however. + *---------------------------------------------------------------*/ + double fact, pnn=0, pmm, pmmp1, somx2; + + int i, nn; + + if(std::abs(x) > 1.0){ + KRIPKE_ABORT("Bad input to PnmFcn: abs(x) > 1.0, x = %e\n", x); + } + else if((x > 1.0) && (x <= 1.0)){ + x = 1.0; + } + else if((-1.0 <= x ) && (x < -1.0)){ + x = -1.0; + } + + pmm=1.0; + if(m > 0){ + somx2=sqrt((1.0-x)*(1.0+x)); + fact=1.0; + for(i=1; i<=m; i++){ + pmm *= -fact*somx2; + fact += 2.0; + } + } + if(n == m){ + return(pmm); + } + else { + pmmp1=x*(2*m+1)*pmm; + if(n == (m+1)){ + return(pmmp1); + } + else { + for(nn=m+2; nn<=n; nn++){ + pnn=(x*(2*nn-1)*pmmp1-(nn+m-1)*pmm)/(nn-m); + pmm=pmmp1; + pmmp1=pnn; + } + return(pnn); + } + } + } + + inline double YnmFcn(int n, int m, double mu, double eta, double xi) + { + double fac1, fac2, anm, ynm, pnm, dm0, taum, tmp, phi, phi_tmp; + double floor=1.e-20; + int nn, mm; + + /* Calculate the correct phi for omega=(mu,eta,xi) */ + tmp = fabs(eta/(mu+floor)); + phi_tmp = atan(tmp); + if( (mu>0) && (eta>0) ){ + phi = phi_tmp; + } + else if( (mu<0) && (eta>0) ){ + phi = M_PI - fabs(phi_tmp); + } + else if( (mu<0) && (eta<0) ){ + phi = M_PI + fabs(phi_tmp); + } + else { + phi = 2.0*M_PI - fabs(phi_tmp); + } + + /* Begin evaluation of Ynm(omega) */ + nn = n - std::abs(m); + fac1 = (double) FactFcn(nn); + nn = n + std::abs(m); + fac2 = (double) FactFcn(nn); + mm = std::abs(m); + pnm = PnmFcn(n, mm, xi); + tmp = ((double) m)*phi; + if(m >= 0){ + taum = cos(tmp); + } + else {taum = sin(-tmp); } + if(m == 0){ + dm0 = 1.0; + } + else {dm0 = 0.0; } + + tmp = ((2*n+1)*fac1)/(2.0*(1.0+dm0)*M_PI*fac2); + anm = sqrt( tmp ); + ynm = anm*pnm*taum; + return(ynm); + } +} + + + +/** + * Initializes the quadrature set information for a Grid_Data object. + * This guarantees that each pair have a single originating octant. + */ +static +std::vector +createQuadratureSet(InputVariables const &input_vars) +{ + std::vector directions; + + // Get set description from user + int num_directions_per_octant = input_vars.num_directions/8; + int num_directions = input_vars.num_directions; + + // allocate storage + directions.resize(num_directions); + + // Are we running a REAL quadrature set? + int num_polar = input_vars.quad_num_polar; + int num_azimuth = input_vars.quad_num_azimuthal; + + std::vector polar_cos; + std::vector polar_weight; + if(num_polar > 0){ + // make sure the user specified the correct number of quadrature points + KRIPKE_ASSERT(num_polar % 4 == 0, + "Must have number of polar angles be a multiple of 4\n"); + + KRIPKE_ASSERT(num_azimuth % 2 == 0, + "Must have number of azimuthal angles be a multiple of 2\n"); + + KRIPKE_ASSERT(num_polar*num_azimuth == num_directions, + "You need to specify %d total directions, not %d\n", + num_polar*num_azimuth, num_directions); + + // Compute gauss legendre weights + polar_cos.resize(num_polar); + polar_weight.resize(num_polar); + GaussLegendre(-1.0, 1.0, polar_cos, polar_weight, DBL_EPSILON); + + // compute azmuhtal angles and weights + std::vector az_angle(num_azimuth); + std::vector az_weight(num_azimuth); + double dangle = 2.0*M_PI/((double) num_azimuth); + + for(int i=0; i 0.) ? 1 : -1; + directions[d].jd = (ycos > 0.) ? 1 : -1; + directions[d].kd = (zcos > 0.) ? 1 : -1; + + directions[d].octant = 0; + if(directions[d].id == -1){ + directions[d].octant += 1; + } + if(directions[d].jd == -1){ + directions[d].octant += 2; + } + if(directions[d].kd == -1){ + directions[d].octant += 4; + } + + directions[d].xcos = std::abs(xcos); + directions[d].ycos = std::abs(ycos); + directions[d].zcos = std::abs(zcos); + directions[d].w = w; + + ++ d; + } + } + + // Sort by octant.. so each set has same directions + std::sort(directions.begin(), directions.end(), dirSortFcn); + } + else{ + // Do (essentialy) an S2 quadrature.. but with repeated directions + + // Compute x,y,z cosine values + double mu = cos(M_PI/4); + double eta = sqrt(1-mu*mu) * cos(M_PI/4); + double xi = sqrt(1-mu*mu) * sin(M_PI/4); + int d = 0; + for(int octant = 0;octant < 8;++ octant){ + double omegas[3]; + omegas[0] = octant & 0x1; + omegas[1] = (octant>>1) & 0x1; + omegas[2] = (octant>>2) & 0x1; + + for(int sd=0; sd 0.) ? 1 : -1; + directions[d].jd = (omegas[1] > 0.) ? 1 : -1; + directions[d].kd = (omegas[2] > 0.) ? 1 : -1; + + // Store quadrature point's weight + directions[d].w = 4.0*M_PI / (double)num_directions; + directions[d].xcos = mu; + directions[d].ycos = eta; + directions[d].zcos = xi; + } + } + } + + return directions; +} + + + + +void Kripke::Generate::generateQuadrature(Kripke::Core::DataStore &data_store, + InputVariables const &input_vars) +{ + + PartitionSpace &pspace = data_store.getVariable("pspace"); + + ArchLayoutV al_v = data_store.getVariable("al").al_v; + + // Create sets for angular discretization + size_t ndir_per_sdom = input_vars.num_directions / + pspace.getGlobalNumSubdomains(SPACE_Q); + + std::vector local_dirs(pspace.getNumSubdomains(SPACE_Q), + ndir_per_sdom); + + RangeSet *dir_set = new RangeSet(pspace, SPACE_Q, local_dirs); + + data_store.addVariable("Set/Direction", dir_set); + + + size_t legendre_order = input_vars.legendre_order; + size_t num_moments = (legendre_order+1)*(legendre_order+1); + + GlobalRangeSet *moment_set = new GlobalRangeSet(pspace, num_moments); + data_store.addVariable("Set/Moment", moment_set); + + GlobalRangeSet *legendre_set = new GlobalRangeSet(pspace, legendre_order+1); + data_store.addVariable("Set/Legendre", legendre_set); + + // Create a mapping from moments to their Legendre scattering coefficients + auto &field_moment_to_legendre = createField( + data_store, "moment_to_legendre", al_v, *moment_set); + + // create a global mapping... just easier this way + std::vector moment_list(moment_set->globalSize()); + int nm = 0; + for(int n = 0;n < (int)legendre_order+1;++ n){ + for(int m = -n;m <= n; ++ m){ + moment_list[nm] = Legendre{n}; + ++ nm; + } + } + KRIPKE_ASSERT(nm == (int)moment_set->globalSize()); + + // fill in the global + for(SdomId sdom_id : field_moment_to_legendre.getWorkList()){ + auto moment_to_legendre = field_moment_to_legendre.getView(sdom_id); + + RAJA::forall( + RAJA::TypedRangeSegment(0, moment_set->size(sdom_id)), + [=](Moment nm){ + moment_to_legendre(nm) = moment_list[(*nm) + moment_set->lower(sdom_id)]; + }); + } + + + // Create the quadrature set + auto quadrature_points = createQuadratureSet(input_vars); + + // Create and populate fields for quadrature set data + auto &field_xcos = createField(data_store, "quadrature/xcos", al_v, *dir_set); + auto &field_ycos = createField(data_store, "quadrature/ycos", al_v, *dir_set); + auto &field_zcos = createField(data_store, "quadrature/zcos", al_v, *dir_set); + auto &field_w = createField(data_store, "quadrature/w", al_v, *dir_set); + auto &field_id = createField(data_store, "quadrature/id", al_v, *dir_set); + auto &field_jd = createField(data_store, "quadrature/jd", al_v, *dir_set); + auto &field_kd = createField(data_store, "quadrature/kd", al_v, *dir_set); + auto &field_octant = createField(data_store, "quadrature/octant", al_v, *dir_set); + + for(SdomId sdom_id : field_xcos.getWorkList()){ + int num_directions = dir_set->size(sdom_id); + int direction_lower = dir_set->lower(sdom_id); + + auto xcos = field_xcos.getView(sdom_id); + auto ycos = field_ycos.getView(sdom_id); + auto zcos = field_zcos.getView(sdom_id); + auto w = field_w.getView(sdom_id); + auto id = field_id.getView(sdom_id); + auto jd = field_jd.getView(sdom_id); + auto kd = field_kd.getView(sdom_id); + auto octant = field_octant.getView(sdom_id); + + RAJA::forall( + RAJA::TypedRangeSegment(0, num_directions), + [=](Direction d){ + QuadraturePoint const &point_d = quadrature_points[(*d)+direction_lower]; + xcos(d) = point_d.xcos; + ycos(d) = point_d.ycos; + zcos(d) = point_d.zcos; + w(d) = point_d.w; + id(d) = point_d.id; + jd(d) = point_d.jd; + kd(d) = point_d.kd; + octant(d) = point_d.octant; + }); + } + + + // Create a set to describe the L and L+ matrices + auto &set_ell = data_store.newVariable>("Set/Ell", + pspace, SPACE_Q, *moment_set, *dir_set); + + auto &set_ell_plus = data_store.newVariable>("Set/EllPlus", + pspace, SPACE_Q, *dir_set, *moment_set); + + // Allocate and initialize the L and L+ matrices + auto &field_ell = createField(data_store, "ell", al_v, set_ell); + auto &field_ell_plus = createField(data_store, "ell_plus", al_v, set_ell_plus); + + for(SdomId sdom_id : field_xcos.getWorkList()){ + auto ell = field_ell.getView(sdom_id); + auto ell_plus = field_ell_plus.getView(sdom_id); + + int num_directions = dir_set->size(sdom_id); + int direction_lower = dir_set->lower(sdom_id); + + double SQRT4PI = std::sqrt(4*M_PI); + Moment nm{0}; + for(int n=0; n < (int)legendre_order+1; n++){ + for(int m=-n; m<=n; m++){ + RAJA::forall( + RAJA::TypedRangeSegment(0, num_directions), + [=](Direction d){ + + QuadraturePoint const &point_d = quadrature_points[(*d)+direction_lower]; + // Get quadrature point info + double xcos = (point_d.id)*(point_d.xcos); + double ycos = (point_d.jd)*(point_d.ycos); + double zcos = (point_d.kd)*(point_d.zcos); + double w = point_d.w; + + double ynm = YnmFcn(n, m, xcos, ycos, zcos); + + // Compute element of L and L+ + ell(nm, d) = w*ynm/SQRT4PI; + ell_plus(d,nm) = ynm*SQRT4PI; + }); + nm ++; + } + } + } + + + // Create fields to store subdomain adjacency information for boundary comm + // used in sweeps and block jacobi solves + auto &set_dimension = + data_store.newVariable("Set/Dimension", pspace, 3); + + auto &set_adjacency = data_store.newVariable>("Set/Adjacency", + pspace, SPACE_PQR, set_dimension); + + auto &field_upwind = createField(data_store, "upwind", al_v, set_adjacency); + auto &field_downwind = createField(data_store, "downwind", al_v, set_adjacency); + + for(SdomId sdom_id : field_upwind.getWorkList()){ + // Get local subdomain coordinates + auto local_coord = pspace.sdomIdToCoord(sdom_id); + + // Offset local coordinate to global coordinates + auto global_coord = pspace.coordToGlobalCoord(local_coord); + + std::array sweep_dir = + {{ + field_id.getView(sdom_id), + field_jd.getView(sdom_id), + field_kd.getView(sdom_id) + }}; + + // Compute upwind and downwind coordinate + auto upwind = field_upwind.getView(sdom_id); + auto downwind = field_downwind.getView(sdom_id); + for(Dimension dim{0};dim < 3;++ dim){ + + // Compute upwind and downwind coordinates for this dimensions neighbor + auto global_upwind = global_coord; + auto global_downwind = global_coord; + global_upwind [*dim+SPACE_RX] -= sweep_dir[*dim](Direction{0}); + global_downwind[*dim+SPACE_RX] += sweep_dir[*dim](Direction{0}); + + // Is this an upwind boundary condition? + if(global_upwind[*dim+SPACE_RX] < 0 || + global_upwind[*dim+SPACE_RX] >= (ptrdiff_t)pspace.getGlobalNumSubdomains((Kripke::Core::SPACE)(*dim+SPACE_RX))) + { + upwind(dim) = GlobalSdomId{-1}; + } + // Not a BC, so compute the subdomain id + else{ + upwind(dim) = pspace.coordToGlobalSdomId(global_upwind); + } + + + // Is this an downwind boundary condition? + if(global_downwind[*dim+SPACE_RX] < 0 || + global_downwind[*dim+SPACE_RX] >= (ptrdiff_t)pspace.getGlobalNumSubdomains((Kripke::Core::SPACE)(*dim+SPACE_RX))) + { + downwind(dim) = GlobalSdomId{-1}; + } + // Not a BC, so compute the subdomain id + else{ + downwind(dim) = pspace.coordToGlobalSdomId(global_downwind); + } + } + + } +} + + diff --git a/src/Kripke/Generate/Space.cpp b/src/Kripke/Generate/Space.cpp new file mode 100644 index 00000000..fec06281 --- /dev/null +++ b/src/Kripke/Generate/Space.cpp @@ -0,0 +1,393 @@ +/* + * NOTICE + * + * This work was produced at the Lawrence Livermore National Laboratory (LLNL) + * under contract no. DE-AC-52-07NA27344 (Contract 44) between the U.S. + * Department of Energy (DOE) and Lawrence Livermore National Security, LLC + * (LLNS) for the operation of LLNL. The rights of the Federal Government are + * reserved under Contract 44. + * + * DISCLAIMER + * + * This work was prepared as an account of work sponsored by an agency of the + * United States Government. Neither the United States Government nor Lawrence + * Livermore National Security, LLC nor any of their employees, makes any + * warranty, express or implied, or assumes any liability or responsibility + * for the accuracy, completeness, or usefulness of any information, apparatus, + * product, or process disclosed, or represents that its use would not infringe + * privately-owned rights. Reference herein to any specific commercial products, + * process, or service by trade name, trademark, manufacturer or otherwise does + * not necessarily constitute or imply its endorsement, recommendation, or + * favoring by the United States Government or Lawrence Livermore National + * Security, LLC. The views and opinions of authors expressed herein do not + * necessarily state or reflect those of the United States Government or + * Lawrence Livermore National Security, LLC, and shall not be used for + * advertising or product endorsement purposes. + * + * NOTIFICATION OF COMMERCIAL USE + * + * Commercialization of this product is prohibited without notifying the + * Department of Energy (DOE) or Lawrence Livermore National Security. + */ + +#include + +#include +#include +#include +#include +#include +#include +#include + +using namespace Kripke; +using namespace Kripke::Core; + +namespace { + +struct ZoneMixture { + double fraction[3]; + + size_t numMixed() const { + + return ( fraction[0] > 0.0 ? 1 : 0 ) + + ( fraction[1] > 0.0 ? 1 : 0 ) + + ( fraction[2] > 0.0 ? 1 : 0 ); + + } +}; + +} + + +void Kripke::Generate::generateSpace(Kripke::Core::DataStore &data_store, + InputVariables const &input_vars) +{ + PartitionSpace &pspace = data_store.getVariable("pspace"); + + ArchLayoutV al_v = data_store.getVariable("al").al_v; + + // Create set for X mesh + size_t nx_per_sdom = input_vars.nx / + pspace.getGlobalNumSubdomains(SPACE_RX); + + KRIPKE_ASSERT(nx_per_sdom * pspace.getGlobalNumSubdomains(SPACE_RX) == (size_t)input_vars.nx, + "Number of zones in X must evenly divide into the number of subdomains\n"); + + std::vector local_nx(pspace.getNumSubdomains(SPACE_RX), + nx_per_sdom); + + auto &set_zonei = data_store.newVariable( + "Set/ZoneI", pspace, SPACE_RX, local_nx); + + + + + // Create set for Y mesh + size_t ny_per_sdom = input_vars.ny / + pspace.getGlobalNumSubdomains(SPACE_RY); + + KRIPKE_ASSERT(ny_per_sdom * pspace.getGlobalNumSubdomains(SPACE_RY) == (size_t)input_vars.ny, + "Number of zones in Y must evenly divide into the number of subdomains\n"); + + std::vector local_ny(pspace.getNumSubdomains(SPACE_RY), + ny_per_sdom); + + auto &set_zonej = data_store.newVariable( + "Set/ZoneJ", pspace, SPACE_RY, local_ny); + + + + // Create set for Z mesh + size_t nz_per_sdom = input_vars.nz / + pspace.getGlobalNumSubdomains(SPACE_RZ); + + KRIPKE_ASSERT(nz_per_sdom * pspace.getGlobalNumSubdomains(SPACE_RZ) == (size_t)input_vars.nz, + "Number of zones in Z must evenly divide into the number of subdomains\n"); + + std::vector local_nz(pspace.getNumSubdomains(SPACE_RZ), + nz_per_sdom); + + auto &set_zonek = data_store.newVariable( + "Set/ZoneK", pspace, SPACE_RZ, local_nz); + + + + // Create a total set of zones in the problem + auto &set_zone = data_store.newVariable>("Set/Zone", + pspace, SPACE_R, set_zonei, set_zonej, set_zonek); + + + // Create a 1d linearized set of zones + auto &set_zone_linear = data_store.newVariable>("Set/ZoneLinear", + pspace, SPACE_R, set_zone); + + // Create a set of the number of materials + data_store.newVariable("Set/Material", pspace, 3); + + + + /* Set grid deltas for a uniform mesh (full-space, no reflecting BC's) + * x: -60.0 to 60.0 + * y: -100.0 to 100.0 + * z: -60.0 to 60.0 + */ + double const x_min = -60.0; + double const x_max = 60.0; + + double const y_min = -100.0; + double const y_max = 100.0; + + double const z_min = -60.0; + double const z_max = 60.0; + + + auto &field_dx = createField(data_store, "dx", al_v, set_zonei); + double dx = (x_max-x_min) / set_zonei.globalSize(); + Kripke::Kernel::kConst(field_dx, dx); + + auto &field_dy = createField(data_store, "dy", al_v, set_zonej); + double dy = (y_max-y_min) / set_zonej.globalSize(); + Kripke::Kernel::kConst(field_dy, dy); + + auto &field_dz = createField(data_store, "dz", al_v, set_zonek); + double dz = (z_max-z_min) / set_zonek.globalSize(); + Kripke::Kernel::kConst(field_dz, dz); + + + // Create a zone volume field (this is simple considering our uniform grid) + double zone_volume = dx*dy*dz; + auto &field_volume = createField(data_store, "volume", al_v, set_zone_linear); + Kripke::Kernel::kConst(field_volume, zone_volume); + + + /* + * Define a function describing the material region distribution in space + */ + auto material_fcn = [](double x, double y, double z) -> Material { + + // Problem is defined for one octant, with reflecting boundaries + // We "unreflect" it here by taking abs values + x = std::abs(x); + y = std::abs(y); + z = std::abs(z); + + // Central 20x20x20 box is Region 1 + if(x <= 10.0 && y <= 10.0 && z <= 10.0){ + return Material{0}; + } + + // Leg 1 of Region 2 + if(x <= 10.0 && y <= 60.0 && z <= 10.0){ + return Material{1}; + } + + // Leg 2 of Region 2 + if(x <= 40.0 && y >= 50.0 && y <= 60.0 && z <= 10.0){ + return Material{1}; + } + + // Leg 3 of Region 2 + if(x >= 30.0 && x <= 40.0 && y >= 50.0 && y <= 60.0 && z <= 40.0){ + return Material{1}; + } + + // Leg 4 of Region 2 + if(x >= 30.0 && x <= 40.0 && y >= 50.0 && z >= 30.0 && z <= 40.0){ + return Material{1}; + } + + // Rest is filled with region 3 + return Material{2}; + }; + + + + + + /* + * For each subdomain in space, build up dynamic arrays to hold material + * mixture information + */ + + // number of subsamples per spatial dimension + int num_subsamples = input_vars.num_material_subsamples; + double sample_vol_frac = 1.0 / (double)(num_subsamples*num_subsamples*num_subsamples); + + auto sdom_list = set_zone.getWorkList(); + std::vector> mix; + + for(SdomId sdom_id : sdom_list){ + + double x0 = x_min + dx*set_zonei.lower(sdom_id); + double y0 = y_min + dy*set_zonej.lower(sdom_id); + double z0 = z_min + dz*set_zonek.lower(sdom_id); + + std::vector sdom_mix(set_zone.size(sdom_id)); + auto zone_layout = set_zone.getLayout(sdom_id); + + // iterate over the zones, assume uniform mesh for our coordinate + // calculations + for (int i = 0; i < (int)set_zonei.size(sdom_id); i ++) { + for (int j = 0; j < (int)set_zonej.size(sdom_id); j ++) { + for (int k = 0; k < (int)set_zonek.size(sdom_id); k ++) { + + int zone = zone_layout(i,j,k); + + double xi = x0 + dx*i; + double yi = y0 + dy*j; + double zi = z0 + dz*k; + + // subsample probe the geometry to get our materials + sdom_mix[zone] = {{0.0, 0.0, 0.0}}; // fraction of both materials + + for(int si = 0;si < num_subsamples;++ si){ + for(int sj = 0;sj < num_subsamples;++ sj){ + for(int sk = 0;sk < num_subsamples;++ sk){ + + double x = xi + dx*(si+1)/(num_subsamples+1); + double y = yi + dy*(sj+1)/(num_subsamples+1); + double z = zi + dz*(sk+1)/(num_subsamples+1); + + Material mat = material_fcn(x, y, z); + sdom_mix[zone].fraction[*mat] += sample_vol_frac; + + } + } + } + } + } + } + + mix.push_back(sdom_mix); + } // sdom_id + + + // Go through and count number of mixed elements + std::vector sdom_to_num_mixed; + for(auto &sdom_mix : mix){ + size_t n = 0; + for(auto &z : sdom_mix){ + n += z.numMixed(); + } + sdom_to_num_mixed.push_back(n); + } + + // Create a new set that describes the number of mixed zones per sdom + auto &set_mixelem = data_store.newVariable( + "Set/MixElem", pspace, SPACE_R, sdom_to_num_mixed); + + // Create fields to store mixture information + auto &field_mixed_to_zone = createField( + data_store, "mixelem_to_zone", al_v, set_mixelem); + + auto &field_mixed_to_material = createField( + data_store, "mixelem_to_material", al_v, set_mixelem); + + auto &field_mixed_to_fraction = createField( + data_store, "mixelem_to_fraction", al_v, set_mixelem); + + auto &field_zone_to_num_mixelem = createField( + data_store, "zone_to_num_mixelem", al_v, set_zone_linear); + + auto &field_zone_to_mixelem = createField( + data_store, "zone_to_mixelem", al_v, set_zone_linear); + + + // Populate mixture fields with our dynamic data + RAJA::ReduceSum total_volume_red[3]; + for(size_t i = 0;i < sdom_list.size();++ i){ + SdomId sdom_id = sdom_list[i]; + + int num_zones = set_zone.size(sdom_id); + int num_mixelems = set_mixelem.size(sdom_id); + + auto const &sdom_mix = mix[i]; + + auto mixed_to_zone = field_mixed_to_zone.getView(sdom_id); + auto mixed_to_material = field_mixed_to_material.getView(sdom_id); + auto mixed_to_fraction = field_mixed_to_fraction.getView(sdom_id); + auto zone_to_num_mixelem = field_zone_to_num_mixelem.getView(sdom_id); + auto zone_to_mixelem = field_zone_to_mixelem.getView(sdom_id); + + RAJA::ReduceSum mixelem(MixElem{0}); + RAJA::forall( + RAJA::TypedRangeSegment(0, num_zones), + [=](Zone z){ + ZoneMixture const &zone_mix = sdom_mix[*z]; + int num_zone_mix = 0; + + zone_to_mixelem(z) = mixelem; + + double zone_frac = 0.0; + for(Material m{0};m < 3;++ m){ + if(zone_mix.fraction[*m] > 0.0){ + MixElem me = mixelem; + + mixed_to_zone(me) = z; + mixed_to_material(me) = m; + mixed_to_fraction(me) = zone_mix.fraction[*m]; + zone_frac += zone_mix.fraction[*m]; + total_volume_red[*m] += zone_mix.fraction[*m] * zone_volume; + num_zone_mix ++; + mixelem += MixElem{1}; + } + } + KRIPKE_ASSERT(zone_frac == 1.0, "Zone fraction wrong: %e", zone_frac); + zone_to_num_mixelem(z) = num_zone_mix; + }); + + KRIPKE_ASSERT((*((MixElem)mixelem)) == num_mixelems, "Mismatch in mixture info"); + } + + // Display the total volume + Kripke::Core::Comm default_comm; + auto const &r_comm = pspace.getComm(SPACE_R); + double total_volume[3]; + total_volume[0] = total_volume_red[0]; + total_volume[1] = total_volume_red[1]; + total_volume[2] = total_volume_red[2]; + r_comm.allReduceSumDouble(total_volume, 3); + if(default_comm.rank() == 0){ + printf("\n Material Volumes=[%e, %e, %e]\n", total_volume[0], + total_volume[1],total_volume[2]); + } + + + // Allocate storage for our zonal total cross-section + auto &set_group = data_store.getVariable("Set/Group"); + auto &set_sigt_zonal = data_store.newVariable>( + "Set/SigmaTZonal", pspace, SPACE_PR, set_group, set_zone); + auto &field_sigt = createField( + data_store, "sigt_zonal", al_v, set_sigt_zonal); + Kripke::Kernel::kConst(field_sigt, 0.0); + + for(SdomId sdom_id : field_sigt.getWorkList()){ + + auto mixelem_to_zone = field_mixed_to_zone.getView(sdom_id); + auto mixelem_to_material = field_mixed_to_material.getView(sdom_id); + auto mixelem_to_fraction = field_mixed_to_fraction.getView(sdom_id); + auto sigt = field_sigt.getView(sdom_id); + + int num_groups = set_group.size(sdom_id); + int num_mixelem = set_mixelem.size(sdom_id); + + for(Group g{0};g < num_groups;++ g){ + + RAJA::forall( + RAJA::TypedRangeSegment(0, num_mixelem), + [=](MixElem mixelem){ + Zone z = mixelem_to_zone(mixelem); + Material mat = mixelem_to_material(mixelem); + + sigt(g, z) += mixelem_to_fraction(mixelem) * input_vars.sigt[*mat]; + }); + } + + } + +} + + + + diff --git a/src/Kripke/Grid.cpp b/src/Kripke/Grid.cpp deleted file mode 100644 index 6fac0f8b..00000000 --- a/src/Kripke/Grid.cpp +++ /dev/null @@ -1,547 +0,0 @@ -/* - * NOTICE - * - * This work was produced at the Lawrence Livermore National Laboratory (LLNL) - * under contract no. DE-AC-52-07NA27344 (Contract 44) between the U.S. - * Department of Energy (DOE) and Lawrence Livermore National Security, LLC - * (LLNS) for the operation of LLNL. The rights of the Federal Government are - * reserved under Contract 44. - * - * DISCLAIMER - * - * This work was prepared as an account of work sponsored by an agency of the - * United States Government. Neither the United States Government nor Lawrence - * Livermore National Security, LLC nor any of their employees, makes any - * warranty, express or implied, or assumes any liability or responsibility - * for the accuracy, completeness, or usefulness of any information, apparatus, - * product, or process disclosed, or represents that its use would not infringe - * privately-owned rights. Reference herein to any specific commercial products, - * process, or service by trade name, trademark, manufacturer or otherwise does - * not necessarily constitute or imply its endorsement, recommendation, or - * favoring by the United States Government or Lawrence Livermore National - * Security, LLC. The views and opinions of authors expressed herein do not - * necessarily state or reflect those of the United States Government or - * Lawrence Livermore National Security, LLC, and shall not be used for - * advertising or product endorsement purposes. - * - * NOTIFICATION OF COMMERCIAL USE - * - * Commercialization of this product is prohibited without notifying the - * Department of Energy (DOE) or Lawrence Livermore National Security. - */ - -#include - -#include -#include -#include -#include -#include -#include - -#ifdef KRIPKE_USE_SILO -#include -#include -#include -#endif - -/** - * Grid_Data constructor -*/ -Grid_Data::Grid_Data(Input_Variables *input_vars) -{ - sweep_trace = input_vars->sweep_trace; - trace_file = NULL; - trace_offset = 0.0; - - // Create object to describe processor and subdomain layout in space - // and their adjacencies - Layout *layout = createLayout(input_vars); - - // create the kernel object based on nesting - kernel = createKernel(input_vars->nesting, 3); - - // Create quadrature set (for all directions) - int total_num_directions = input_vars->num_directions; - InitDirections(this, input_vars); - - num_direction_sets = input_vars->num_dirsets; - num_directions_per_set = total_num_directions/num_direction_sets; - num_group_sets = input_vars->num_groupsets; - num_groups_per_set = input_vars->num_groups/ num_group_sets; - num_zone_sets = 1; - for(int dim = 0;dim < 3;++ dim){ - num_zone_sets *= input_vars->num_zonesets_dim[dim]; - } - - legendre_order = input_vars->legendre_order; - total_num_moments = (legendre_order+1)*(legendre_order+1); - - int num_subdomains = num_direction_sets*num_group_sets*num_zone_sets; - - Nesting_Order nest = input_vars->nesting; - - /* Set ncalls */ - niter = input_vars->niter; - - // setup mapping of moments to legendre coefficients - moment_to_coeff.resize(total_num_moments); - int nm = 0; - for(int n = 0;n < legendre_order+1;++ n){ - for(int m = -n;m <= n; ++ m){ - moment_to_coeff[nm] = n; - ++ nm; - } - } - - // setup cross-sections - int total_num_groups = num_group_sets*num_groups_per_set; - sigma_tot.resize(total_num_groups, 0.0); - - // Setup scattering transfer matrix for 3 materials - - sigs = new SubTVec(kernel->nestingSigs(), total_num_groups*total_num_groups, legendre_order+1, 3); - - // Set to isotropic scattering given user inputs - sigs->clear(0.0); - for(int mat = 0;mat < 3;++ mat){ - for(int g = 0;g < total_num_groups;++ g){ - int idx_g_gp = g*total_num_groups + g; - (*sigs)(idx_g_gp, 0, mat) = input_vars->sigs[mat]; - } - } - - // just allocate pointer vectors, we will allocate them below - ell.resize(num_direction_sets, NULL); - ell_plus.resize(num_direction_sets, NULL); - phi.resize(num_zone_sets, NULL); - phi_out.resize(num_zone_sets, NULL); - - // Initialize Subdomains - zs_to_sdomid.resize(num_zone_sets); - subdomains.resize(num_subdomains); - for(int gs = 0;gs < num_group_sets;++ gs){ - for(int ds = 0;ds < num_direction_sets;++ ds){ - for(int zs = 0;zs < num_zone_sets;++ zs){ - // Compupte subdomain id - int sdom_id = layout->setIdToSubdomainId(gs, ds, zs); - - // Setup the subdomain - Subdomain &sdom = subdomains[sdom_id]; - sdom.setup(sdom_id, input_vars, gs, ds, zs, directions, kernel, layout); - - // Create ell and ell_plus, if this is the first of this ds - bool compute_ell = false; - if(ell[ds] == NULL){ - ell[ds] = new SubTVec(kernel->nestingEll(), total_num_moments, sdom.num_directions, 1); - ell_plus[ds] = new SubTVec(kernel->nestingEllPlus(), total_num_moments, sdom.num_directions, 1); - - compute_ell = true; - } - - // Create phi and phi_out, if this is the first of this zs - if(phi[zs] == NULL){ - phi[zs] = new SubTVec(nest, total_num_groups, total_num_moments, sdom.num_zones); - phi_out[zs] = new SubTVec(nest, total_num_groups, total_num_moments, sdom.num_zones); - } - - // setup zs to sdom mapping - if(gs == 0 && ds == 0){ - zs_to_sdomid[zs] = sdom_id; - } - - // Set the variables for this subdomain - sdom.setVars(ell[ds], ell_plus[ds], phi[zs], phi_out[zs]); - - if(compute_ell){ - // Compute the L and L+ matrices - sdom.computeLLPlus(legendre_order); - } - } - } - } - delete layout; - - - - // Now compute number of elements allocated globally, - // and get each materials volume - long long vec_size[4] = {0,0,0,0}; - double vec_volume[3] = {0.0, 0.0, 0.0}; - for(int sdom_id = 0;sdom_id < subdomains.size();++sdom_id){ - Subdomain &sdom = subdomains[sdom_id]; - vec_size[0] += sdom.psi->elements; - vec_size[1] += sdom.psi->elements; - } - for(int zs = 0;zs < num_zone_sets;++ zs){ - vec_size[2] += phi[zs]->elements; - vec_size[3] += phi_out[zs]->elements; - int sdom_id = zs_to_sdomid[zs]; - for(int mat = 0;mat < 3;++ mat){ - vec_volume[mat] += subdomains[sdom_id].reg_volume[mat]; - } - } - - long long global_size[4]; - MPI_Reduce(vec_size, global_size, 4, MPI_LONG_LONG_INT, MPI_SUM, 0, MPI_COMM_WORLD); - - double global_volume[3]; - MPI_Reduce(vec_volume, global_volume, 3, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); - - int mpi_rank; - MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank); - if(mpi_rank == 0){ - printf("Unknown counts: psi=%ld, rhs=%ld, phi=%ld, phi_out=%ld\n", - (long)global_size[0], (long)global_size[1], (long)global_size[2], (long)global_size[3]); - printf("Region volumes: Reg1=%e, Reg2=%e, Reg3=%e\n", - global_volume[0], global_volume[1], global_volume[2]); - } -} - -Grid_Data::~Grid_Data(){ - delete kernel; - for(int zs = 0;zs < num_zone_sets;++ zs){ - delete phi[zs]; - delete phi_out[zs]; - } - for(int ds = 0;ds < num_direction_sets;++ ds){ - delete ell[ds]; - delete ell_plus[ds]; - } - delete sigs; -} - -/** - * Randomizes all variables and matrices for testing suite. - */ -void Grid_Data::randomizeData(void){ - for(int i = 0;i < sigma_tot.size();++i){ - sigma_tot[i] = drand48(); - } - - for(int i = 0;i < directions.size();++i){ - directions[i].xcos = drand48(); - directions[i].ycos = drand48(); - directions[i].zcos = drand48(); - } - - - for(int s = 0;s < subdomains.size();++ s){ - subdomains[s].randomizeData(); - } - - for(int zs = 0;zs < num_zone_sets;++ zs){ - phi[zs]->randomizeData(); - phi_out[zs]->randomizeData(); - } - - for(int ds = 0;ds < num_direction_sets;++ ds){ - ell[ds]->randomizeData(); - ell_plus[ds]->randomizeData(); - } - - sigs->randomizeData(); -} - - -/** - * Returns the integral of psi.. to look at convergence - */ -double Grid_Data::particleEdit(void){ - // sum up particles for psi and rhs - double part = 0.0; - for(int sdom_id = 0;sdom_id < subdomains.size();++ sdom_id){ - Subdomain &sdom = subdomains[sdom_id]; - - int num_zones = sdom.num_zones; - int num_directions = sdom.num_directions; - int num_groups= sdom.num_groups; - Directions *dirs = sdom.directions; - - for(int z = 0;z < num_zones;++ z){ - double vol = sdom.volume[z]; - for(int d = 0;d < num_directions;++ d){ - double w = dirs[d].w; - for(int g = 0;g < num_groups;++ g){ - part += w * (*sdom.psi)(g,d,z) * vol; - } - } - } - } - - // reduce - double part_global; - MPI_Reduce(&part, &part_global, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); - - return part_global; -} - - -/** - * Copies all variables and matrices for testing suite. - * Correctly copies data from one nesting to another. - */ -void Grid_Data::copy(Grid_Data const &b){ - sigma_tot = b.sigma_tot; - directions = b.directions; - - subdomains.resize(b.subdomains.size()); - for(int s = 0;s < subdomains.size();++ s){ - subdomains[s].copy(b.subdomains[s]); - } - - for(int zs = 0;zs < num_zone_sets;++ zs){ - phi[zs]->copy(*b.phi[zs]); - phi_out[zs]->copy(*b.phi_out[zs]); - } - - for(int ds = 0;ds < ell.size();++ ds){ - ell[ds]->copy(*b.ell[ds]); - ell_plus[ds]->copy(*b.ell_plus[ds]); - } - - sigs->copy(*b.sigs); -} - -/** - * Compares all variables and matrices for testing suite. - * Correctly compares data from one nesting to another. - */ -bool Grid_Data::compare(Grid_Data const &b, double tol, bool verbose){ - bool is_diff = false; - - for(int i = 0;i < directions.size();++i){ - std::stringstream dirname; - dirname << "directions[" << i << "]"; - - is_diff |= compareScalar(dirname.str()+".xcos", - directions[i].xcos, b.directions[i].xcos, tol, verbose); - - is_diff |= compareScalar(dirname.str()+".ycos", - directions[i].ycos, b.directions[i].ycos, tol, verbose); - - is_diff |= compareScalar(dirname.str()+".zcos", - directions[i].zcos, b.directions[i].zcos, tol, verbose); - } - - for(int s = 0;s < subdomains.size();++ s){ - is_diff |= subdomains[s].compare( - b.subdomains[s], tol, verbose); - - } - is_diff |= compareVector("sigma_tot", sigma_tot, b.sigma_tot, tol, verbose); - - for(int zs = 0;zs < num_zone_sets;++ zs){ - is_diff |= phi[zs]->compare("phi", *b.phi[zs], tol, verbose); - is_diff |= phi_out[zs]->compare("phi_out", *b.phi_out[zs], tol, verbose); - } - - for(int ds = 0;ds < ell.size();++ ds){ - is_diff |= ell[ds]->compare("ell", *b.ell[ds], tol, verbose); - is_diff |= ell_plus[ds]->compare("ell_plus", *b.ell_plus[ds], tol, verbose); - } - - is_diff |= sigs->compare("sigs", *b.sigs, tol, verbose); - - return is_diff; -} - - -#ifdef KRIPKE_USE_SILO - -enum MultivarType { - MULTI_MESH, - MULTI_MAT, - MULTI_VAR -}; - -namespace { - /** - Writes a multimesh or multivar to the root file. - */ - - void siloWriteMulti(DBfile *root, MultivarType mv_type, - std::string const &fname_base, std::string const &var_name, - std::vector sdom_id_list, int var_type = 0) - { - int mpi_size; - MPI_Comm_size(MPI_COMM_WORLD, &mpi_size); - int num_sdom = sdom_id_list.size(); - - // setup names and types - std::vector var_types(mpi_size*num_sdom, var_type); - std::vector var_names(mpi_size*num_sdom); - int var_idx = 0; - for(int rank = 0;rank < mpi_size;++ rank){ - for(int idx = 0;idx < num_sdom;++ idx){ - int sdom_id = sdom_id_list[idx]; - std::stringstream name; - name << fname_base << "/rank_" << rank << ".silo:/sdom" << sdom_id << "/" << var_name; - var_names[var_idx] = strdup(name.str().c_str()); - var_idx ++; - } - } - - if(mv_type == MULTI_MESH){ - DBPutMultimesh(root, var_name.c_str(), mpi_size*num_sdom, - &var_names[0], &var_types[0], NULL); - } - else if(mv_type == MULTI_MAT){ - DBPutMultimat(root, var_name.c_str(), mpi_size*num_sdom, - &var_names[0], NULL); - } - else{ - DBPutMultivar(root, var_name.c_str(), mpi_size*num_sdom, - &var_names[0], &var_types[0] , NULL); - } - - // cleanup - for(int i = 0;i < mpi_size*num_sdom; ++i){ - free(var_names[i]); - } - } - - void siloWriteRectMesh(DBfile *silo_file, - std::string const &mesh_name, - int const *nzones, - double const *zeros, - double const *deltas_x, - double const *deltas_y, - double const *deltas_z) - { - static char const *coordnames[3] = {"X", "Y", "Z"}; - double const *deltas[3] = {deltas_x, deltas_y, deltas_z}; - double *coords[3]; - for(int dim = 0;dim < 3;++ dim){ - coords[dim] = new double[nzones[dim]]; - coords[dim][0] = zeros[dim]; - for(int z = 0;z < nzones[dim];++ z){ - coords[dim][1+z] = coords[dim][z] + deltas[dim][z]; - } - } - int nnodes[3] = { - nzones[0]+1, - nzones[1]+1, - nzones[2]+1 - }; - - DBPutQuadmesh(silo_file, mesh_name.c_str(), const_cast(coordnames), coords, nnodes, 3, DB_DOUBLE, - DB_COLLINEAR, NULL); - - // cleanup - delete[] coords[0]; - delete[] coords[1]; - delete[] coords[2]; - } - - -} //namespace - - -void Grid_Data::writeSilo(std::string const &fname_base){ - - // Recompute Phi... so we can write out phi0 - kernel->LTimes(this); - - int mpi_rank; - MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank); - - if(mpi_rank == 0){ - // Create a root file - std::string fname_root = fname_base + ".silo"; - DBfile *root = DBCreate(fname_root.c_str(), - DB_CLOBBER, DB_LOCAL, NULL, DB_HDF5); - - // Write out multimesh and multivars - siloWriteMulti(root, MULTI_MESH, fname_base, "mesh", zs_to_sdomid, DB_QUAD_RECT); - siloWriteMulti(root, MULTI_MAT, fname_base, "material", zs_to_sdomid); - siloWriteMulti(root, MULTI_VAR, fname_base, "phi0", zs_to_sdomid, DB_QUADVAR); - - // Close root file - DBClose(root); - - // Create a subdirectory to hold processor info - mkdir(fname_base.c_str(), 0750); - } - - // Sync up, so everyone sees the subdirectory - MPI_Barrier(MPI_COMM_WORLD); - - // Create our processor file - std::stringstream ss_proc; - ss_proc << fname_base << "/rank_" << mpi_rank << ".silo"; - DBfile *proc = DBCreate(ss_proc.str().c_str(), - DB_CLOBBER, DB_LOCAL, NULL, DB_HDF5); - - // Write out data for each subdomain - int num_zone_sets = zs_to_sdomid.size(); - for(int sdom_idx = 0;sdom_idx < num_zone_sets;++ sdom_idx){ - int sdom_id = zs_to_sdomid[sdom_idx]; - Subdomain &sdom = subdomains[sdom_id]; - - // Create a directory for the subdomain - std::stringstream dirname; - dirname << "/sdom" << sdom_id; - DBMkDir(proc, dirname.str().c_str()); - - // Set working directory - DBSetDir(proc, dirname.str().c_str()); - - // Write the mesh - siloWriteRectMesh(proc, "mesh", sdom.nzones, sdom.zeros, - &sdom.deltas[0][1], &sdom.deltas[1][1], &sdom.deltas[2][1]); - - - // Write the material - { - int num_zones = sdom.num_zones; - int num_mixed = sdom.mixed_material.size(); - int matnos[3] = {1, 2, 3}; - std::vector matlist(num_zones, 0); - std::vector mix_next(num_mixed, 0); - std::vector mix_mat(num_mixed, 0); - - // setup matlist and mix_next arrays - int last_z = -1; - for(int m = 0;m < num_mixed;++ m){ - mix_mat[m] = sdom.mixed_material[m] + 1; - int z = sdom.mixed_to_zones[m]; - if(matlist[z] == 0){ - matlist[z] = -(1+m); - } - // if we are still on the same zone, make sure the last mix points - // here - if(z == last_z){ - mix_next[m-1] = m+1; - } - last_z = z; - } - - DBPutMaterial(proc, "material", "mesh", 3, matnos, - &matlist[0], sdom.nzones, 3, - &mix_next[0], &mix_mat[0], &sdom.mixed_to_zones[0], &sdom.mixed_fraction[0], num_mixed, - DB_DOUBLE, NULL); - } - - // Write phi0 - { - - int num_zones = sdom.num_zones; - std::vector phi0(num_zones); - - // extract phi0 from phi for the 0th group - for(int z = 0;z < num_zones;++ z){ - phi0[z] = (*sdom.phi)(0,0,z); - } - - DBPutQuadvar1(proc, "phi0", "mesh", &phi0[0], - sdom.nzones, 3, NULL, 0, DB_DOUBLE, DB_ZONECENT, NULL); - } - } - - // Close processor file - DBClose(proc); -} -#endif - - diff --git a/src/Kripke/Grid.h b/src/Kripke/Grid.h deleted file mode 100644 index ee85c18d..00000000 --- a/src/Kripke/Grid.h +++ /dev/null @@ -1,106 +0,0 @@ -/* - * NOTICE - * - * This work was produced at the Lawrence Livermore National Laboratory (LLNL) - * under contract no. DE-AC-52-07NA27344 (Contract 44) between the U.S. - * Department of Energy (DOE) and Lawrence Livermore National Security, LLC - * (LLNS) for the operation of LLNL. The rights of the Federal Government are - * reserved under Contract 44. - * - * DISCLAIMER - * - * This work was prepared as an account of work sponsored by an agency of the - * United States Government. Neither the United States Government nor Lawrence - * Livermore National Security, LLC nor any of their employees, makes any - * warranty, express or implied, or assumes any liability or responsibility - * for the accuracy, completeness, or usefulness of any information, apparatus, - * product, or process disclosed, or represents that its use would not infringe - * privately-owned rights. Reference herein to any specific commercial products, - * process, or service by trade name, trademark, manufacturer or otherwise does - * not necessarily constitute or imply its endorsement, recommendation, or - * favoring by the United States Government or Lawrence Livermore National - * Security, LLC. The views and opinions of authors expressed herein do not - * necessarily state or reflect those of the United States Government or - * Lawrence Livermore National Security, LLC, and shall not be used for - * advertising or product endorsement purposes. - * - * NOTIFICATION OF COMMERCIAL USE - * - * Commercialization of this product is prohibited without notifying the - * Department of Energy (DOE) or Lawrence Livermore National Security. - */ - -#ifndef KRIPKE_GRID_DATA_H__ -#define KRIPKE_GRID_DATA_H__ - -#include -#include -#include -#include -#include -#include - -// Foreward Decl -struct Input_Variables; -struct Grid_Data; -struct SubTVec; - - -/** - * Contains all grid parameters and variables. - */ -struct Grid_Data { -public: - explicit Grid_Data(Input_Variables *input_vars); - ~Grid_Data(); - - void randomizeData(void); - void copy(Grid_Data const &b); - bool compare(Grid_Data const &b, double tol, bool verbose); - double particleEdit(void); -#ifdef KRIPKE_USE_SILO - void writeSilo(std::string const &fname); -#endif - - Timing timing; - - int niter; - - double source_value; - - std::vector sigma_tot; // Cross section data - - int num_group_sets; // Number of group-sets - int num_groups_per_set; // How many groups in each set - int num_direction_sets; // Number of direction-sets - int num_directions_per_set; // Number of directions per dir set - int num_zone_sets; // Number of zone sets - int legendre_order; // Legendra expansion order ( >= 0 ) - int total_num_moments; // Number of spherical harmonic moments - - std::vector moment_to_coeff; // Map from harmonic moments to legendre coefficients - - std::vector directions; // Quadrature point data, for all directions - Kernel *kernel; // Layout-specific math kernels - - std::vector subdomains; // Group/Angle/Zone set data - std::vector zs_to_sdomid; // map of zonesets to subdomains with ds=gs=0 - - // Variables: - SubTVec *sigs; // scattering lookup table for each material - // G=g->gp, D=legendre coeff, Z=matidx - - // Per directionset ell and ell_plus matrices (Subdomain point into these arrays) - std::vector ell; // L matrix in nm_offset coordinates - std::vector ell_plus; // L+ matrix in nm_offset coordinates - - // Per zoneset phi and phi_out (Subdomains point into these arrays) - std::vector phi; // Moments of psi - std::vector phi_out; // Scattering source - - bool sweep_trace; - FILE *trace_file; - double trace_offset; -}; - -#endif diff --git a/src/Kripke/Input_Variables.cpp b/src/Kripke/InputVariables.cpp similarity index 87% rename from src/Kripke/Input_Variables.cpp rename to src/Kripke/InputVariables.cpp index 3e349040..df4b39aa 100644 --- a/src/Kripke/Input_Variables.cpp +++ b/src/Kripke/InputVariables.cpp @@ -30,15 +30,16 @@ * Department of Energy (DOE) or Lawrence Livermore National Security. */ -#include +#include -#include +#include + +using namespace Kripke; /** * Setup the default input choices */ -Input_Variables::Input_Variables() : - run_name("kripke"), +InputVariables::InputVariables() : nx(16), ny(16), nz(16), num_directions(96), num_groups(32), @@ -46,16 +47,16 @@ Input_Variables::Input_Variables() : quad_num_polar(0), quad_num_azimuthal(0), - nesting(NEST_DGZ), + al_v(ArchLayoutV{KRIPKE_ARCHV_DEFAULT, KRIPKE_LAYOUTV_DEFAULT}), npx(1), npy(1), npz(1), num_dirsets(8), num_groupsets(2), - layout_pattern(0), niter(10), parallel_method(PMETHOD_SWEEP), - sweep_trace(false) + num_material_subsamples(4), + run_name("kripke") { num_zonesets_dim[0] = 1; num_zonesets_dim[1] = 1; @@ -73,10 +74,11 @@ Input_Variables::Input_Variables() : /** * Checks validity of inputs, returns 'true' on error. */ -bool Input_Variables::checkValues(void) const{ +bool InputVariables::checkValues(void) const{ // make sure any output only goes to root - int rank; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); + + Kripke::Core::Comm comm; + int rank = comm.rank(); if(num_zonesets_dim[0] <= 0 || num_zonesets_dim[1] <= 0 || num_zonesets_dim[2] <= 0){ if(!rank) @@ -84,18 +86,6 @@ bool Input_Variables::checkValues(void) const{ return true; } - if(layout_pattern < 0 || layout_pattern > 1){ - if(!rank) - printf("Layout(%d) must be either 0 or 1\n", layout_pattern); - return true; - } - - if(nesting < 0){ - if(!rank) - printf("Invalid nesting selected\n"); - return true; - } - if(num_groups < 1){ if(!rank) printf("Number of groups (%d) needs to be at least 1\n", num_groups); diff --git a/src/Kripke/Input_Variables.h b/src/Kripke/InputVariables.h similarity index 88% rename from src/Kripke/Input_Variables.h rename to src/Kripke/InputVariables.h index 3f017197..cd1cacbf 100644 --- a/src/Kripke/Input_Variables.h +++ b/src/Kripke/InputVariables.h @@ -33,14 +33,15 @@ #ifndef KRIPKE_INPUT_VARIABLES_H__ #define KRIPKE_INPUT_VARIABLES_H__ -#include +#include +#include /** * This structure defines the input parameters to setup a problem. */ -struct Input_Variables { - Input_Variables(); +struct InputVariables { + InputVariables(); bool checkValues(void) const; @@ -53,28 +54,23 @@ struct Input_Variables { int quad_num_azimuthal; // Number of azimuthal quadrature points (0 for dummy) // On-Node Options - Nesting_Order nesting; // Data layout and loop ordering (of Psi) + Kripke::ArchLayoutV al_v; // Data layout and architecture selection // Parallel Decomp int npx, npy, npz; // The number of processors in x,y,z int num_dirsets; // Number of direction sets int num_groupsets; // Number of energy group sets int num_zonesets_dim[3]; // Number of zoneset in x, y, z - int layout_pattern; // Which subdomain/task layout to use // Physics and Solver Options int niter; // number of solver iterations to run ParallelMethod parallel_method; double sigt[3]; // total cross section for 3 materials double sigs[3]; // total scattering cross section for 3 materials + int num_material_subsamples; // number of subsamples in each dimension for mesh painting // Output Options std::string run_name; // Name to use when generating output files -#ifdef KRIPKE_USE_SILO - std::string silo_basename; // name prefix for silo output files -#endif - bool sweep_trace; // Output per-rank sweep trace file - }; #endif diff --git a/src/Kripke/Kernel.h b/src/Kripke/Kernel.h index a7158cda..09d68468 100644 --- a/src/Kripke/Kernel.h +++ b/src/Kripke/Kernel.h @@ -34,35 +34,78 @@ #define KRIPKE_KERNEL_H__ #include +#include +#include -struct Grid_Data; -struct SubTVec; -struct Subdomain; +namespace Kripke { -/** - * This is the Kernel base-class and interface definition. - * This abstracts the storage of Psi, Phi, L, L+ from the rest of the code, - * providing data-layout specific routines. - */ -class Kernel { - public: - virtual Nesting_Order nestingPsi(void) const = 0; - virtual Nesting_Order nestingPhi(void) const = 0; - virtual Nesting_Order nestingSigt(void) const = 0; - virtual Nesting_Order nestingEll(void) const = 0; - virtual Nesting_Order nestingEllPlus(void) const = 0; - virtual Nesting_Order nestingSigs(void) const = 0; - - // Computational Kernels - virtual void LTimes(Grid_Data *grid_data) = 0; - virtual void LPlusTimes(Grid_Data *grid_data) = 0; - virtual void scattering(Grid_Data *grid_data) = 0; - virtual void source(Grid_Data *grid_data) = 0; - virtual void sweep(Subdomain *ga_set) = 0; -}; - - -// Factory to create correct kernel object -Kernel *createKernel(Nesting_Order, int num_dims); + namespace Kernel { + + void LPlusTimes(Kripke::Core::DataStore &data_store); + + + void LTimes(Kripke::Core::DataStore &data_store); + + + double population(Kripke::Core::DataStore &data_store); + + + void scattering(Kripke::Core::DataStore &data_store); + + + void source(Kripke::Core::DataStore &data_store); + + + void sweepSubdomain(Kripke::Core::DataStore &data_store, Kripke::SdomId sdom_id); + + + template + RAJA_INLINE + void kConst(FieldType &field, Kripke::SdomId sdom_id, typename FieldType::ElementType value){ + auto view1d = field.getView1d(sdom_id); + int num_elem = field.size(sdom_id); + RAJA::forall( + RAJA::RangeSegment(0, num_elem), + [=](RAJA::Index_type i){ + view1d(i) = value; + }); + } + + template + RAJA_INLINE + void kConst(FieldType &field, typename FieldType::ElementType value){ + for(Kripke::SdomId sdom_id : field.getWorkList()){ + kConst(field, sdom_id, value); + } + } + + + + + template + RAJA_INLINE + void kCopy(FieldType &field_dst, Kripke::SdomId sdom_id_dst, + FieldType &field_src, Kripke::SdomId sdom_id_src){ + auto view_src = field_src.getView1d(sdom_id_src); + auto view_dst = field_dst.getView1d(sdom_id_dst); + int num_elem = field_src.size(sdom_id_src); + + RAJA::forall( + RAJA::RangeSegment(0, num_elem), + [=](RAJA::Index_type i){ + view_src(i) = view_dst(i); + }); + } + + template + RAJA_INLINE + void kCopy(FieldType &field_dst, FieldType &field_src){ + for(Kripke::SdomId sdom_id : field_dst.getWorkList()){ + kCopy(field_dst, sdom_id, field_src, sdom_id); + } + } + + } +} #endif diff --git a/src/Kripke/Kernel/Kernel_3d_DGZ.cpp b/src/Kripke/Kernel/Kernel_3d_DGZ.cpp deleted file mode 100644 index d21e6a29..00000000 --- a/src/Kripke/Kernel/Kernel_3d_DGZ.cpp +++ /dev/null @@ -1,367 +0,0 @@ -/* - * NOTICE - * - * This work was produced at the Lawrence Livermore National Laboratory (LLNL) - * under contract no. DE-AC-52-07NA27344 (Contract 44) between the U.S. - * Department of Energy (DOE) and Lawrence Livermore National Security, LLC - * (LLNS) for the operation of LLNL. The rights of the Federal Government are - * reserved under Contract 44. - * - * DISCLAIMER - * - * This work was prepared as an account of work sponsored by an agency of the - * United States Government. Neither the United States Government nor Lawrence - * Livermore National Security, LLC nor any of their employees, makes any - * warranty, express or implied, or assumes any liability or responsibility - * for the accuracy, completeness, or usefulness of any information, apparatus, - * product, or process disclosed, or represents that its use would not infringe - * privately-owned rights. Reference herein to any specific commercial products, - * process, or service by trade name, trademark, manufacturer or otherwise does - * not necessarily constitute or imply its endorsement, recommendation, or - * favoring by the United States Government or Lawrence Livermore National - * Security, LLC. The views and opinions of authors expressed herein do not - * necessarily state or reflect those of the United States Government or - * Lawrence Livermore National Security, LLC, and shall not be used for - * advertising or product endorsement purposes. - * - * NOTIFICATION OF COMMERCIAL USE - * - * Commercialization of this product is prohibited without notifying the - * Department of Energy (DOE) or Lawrence Livermore National Security. - */ - -#include -#include -#include - -Nesting_Order Kernel_3d_DGZ::nestingPsi(void) const { - return NEST_DGZ; -} - -Nesting_Order Kernel_3d_DGZ::nestingPhi(void) const { - return NEST_DGZ; -} - -Nesting_Order Kernel_3d_DGZ::nestingSigt(void) const { - return NEST_DGZ; -} - -Nesting_Order Kernel_3d_DGZ::nestingEll(void) const { - return NEST_ZGD; -} - -Nesting_Order Kernel_3d_DGZ::nestingEllPlus(void) const { - return NEST_ZDG; -} - -Nesting_Order Kernel_3d_DGZ::nestingSigs(void) const { - return NEST_DGZ; -} - - -void Kernel_3d_DGZ::LTimes(Grid_Data *grid_data) { - // Outer parameters - int num_moments = grid_data->total_num_moments; - - // Zero Phi - for(int ds = 0;ds < grid_data->num_zone_sets;++ ds){ - grid_data->phi[ds]->clear(0.0); - } - - // Loop over Subdomains - int num_subdomains = grid_data->subdomains.size(); - for (int sdom_id = 0; sdom_id < num_subdomains; ++ sdom_id){ - Subdomain &sdom = grid_data->subdomains[sdom_id]; - - // Get dimensioning - int num_zones = sdom.num_zones; - int num_groups = sdom.phi->groups; - int num_local_groups = sdom.num_groups; - int group0 = sdom.group0; - int num_local_directions = sdom.num_directions; - int num_gz = num_groups*num_zones; - int num_locgz = num_local_groups*num_zones; - - // Get pointers - double const * KRESTRICT ell = sdom.ell->ptr(); - double const * KRESTRICT psi = sdom.psi->ptr(); - double * KRESTRICT phi = sdom.phi->ptr(); - - for(int nm = 0;nm < num_moments;++nm){ - double const * KRESTRICT ell_nm = ell + nm*num_local_directions; - double * KRESTRICT phi_nm = phi + nm*num_gz + group0*num_zones; - - for (int d = 0; d < num_local_directions; d++) { - double const * KRESTRICT psi_d = psi + d*num_locgz; - double const ell_nm_d = ell_nm[d]; - -#ifdef KRIPKE_USE_OPENMP -#pragma omp parallel for -#endif - for(int gz = 0;gz < num_locgz; ++ gz){ - phi_nm[gz] += ell_nm_d * psi_d[gz]; - } - } - } - } -} - -void Kernel_3d_DGZ::LPlusTimes(Grid_Data *grid_data) { - // Outer parameters - int num_moments = grid_data->total_num_moments; - - // Loop over Subdomains - int num_subdomains = grid_data->subdomains.size(); - for (int sdom_id = 0; sdom_id < num_subdomains; ++ sdom_id){ - Subdomain &sdom = grid_data->subdomains[sdom_id]; - - // Get dimensioning - int num_zones = sdom.num_zones; - int num_local_groups = sdom.num_groups; - int num_groups = sdom.phi_out->groups; - int group0 = sdom.group0; - int num_local_directions = sdom.num_directions; - int num_groups_zones = num_local_groups*num_zones; - - // Zero RHS - sdom.rhs->clear(0.0); - - // Get pointers - double const * KRESTRICT phi_out = sdom.phi_out->ptr() + group0*num_zones; - double const * KRESTRICT ell_plus = sdom.ell_plus->ptr(); - double * KRESTRICT rhs = sdom.rhs->ptr(); - - for (int d = 0; d < num_local_directions; d++) { - double * KRESTRICT rhs_d = rhs + d*num_groups_zones; - double const * KRESTRICT ell_plus_d = ell_plus + d*num_moments; - - for(int nm = 0;nm < num_moments;++nm){ - double const ell_plus_d_nm = ell_plus_d[nm]; - double const * KRESTRICT phi_out_nm = phi_out + nm*num_groups*num_zones; - -#ifdef KRIPKE_USE_OPENMP -#pragma omp parallel for -#endif - for(int gz = 0;gz < num_groups_zones; ++ gz){ - rhs_d[gz] += ell_plus_d_nm * phi_out_nm[gz]; - } - } - } - } -} - -/** - Compute scattering source term phi_out from flux moments in phi. - phi_out(gp,z,nm) = sum_g { sigs(g, n, gp) * phi(g,z,nm) } -*/ -void Kernel_3d_DGZ::scattering(Grid_Data *grid_data){ - // Loop over zoneset subdomains - for(int zs = 0;zs < grid_data->num_zone_sets;++ zs){ - // get material mix information - int sdom_id = grid_data->zs_to_sdomid[zs]; - Subdomain &sdom = grid_data->subdomains[sdom_id]; - int const * KRESTRICT zones_to_mixed = &sdom.zones_to_mixed[0]; - int const * KRESTRICT num_mixed = &sdom.num_mixed[0]; - int const * KRESTRICT mixed_material = &sdom.mixed_material[0]; - double const * KRESTRICT mixed_fraction = &sdom.mixed_fraction[0]; - double const * KRESTRICT sigs = grid_data->sigs->ptr(); - - int const * KRESTRICT moment_to_coeff = &grid_data->moment_to_coeff[0]; - double const * KRESTRICT phi = grid_data->phi[zs]->ptr(); - double * KRESTRICT phi_out = grid_data->phi_out[zs]->ptr(); - - // Zero out source terms - grid_data->phi_out[zs]->clear(0.0); - - // grab dimensions - //int num_mixed = sdom.mixed_to_zones.size(); - int num_zones = sdom.num_zones; - int num_groups = grid_data->phi_out[zs]->groups; - int num_moments = grid_data->total_num_moments; - int num_gz = num_groups*num_zones; - - for(int nm = 0;nm < num_moments;++ nm){ - // map nm to n - int n = moment_to_coeff[nm]; - double const * KRESTRICT sigs_n = sigs + n*3*num_groups*num_groups; - double const * KRESTRICT phi_nm = phi + nm*num_gz; - double * KRESTRICT phi_out_nm = phi_out + nm*num_gz; - - for(int g = 0;g < num_groups;++ g){ - double const * KRESTRICT sigs_n_g = sigs_n + g*3*num_groups; - double const * KRESTRICT phi_nm_g = phi_nm + g*num_zones; - - for(int gp = 0;gp < num_groups;++ gp){ - double const * KRESTRICT sigs_n_g_gp = sigs_n_g + gp*3; - double * KRESTRICT phi_out_nm_gp = phi_out_nm + gp*num_zones; - -#ifdef KRIPKE_USE_OPENMP -#pragma omp parallel for -#endif - for(int zone = 0;zone < num_zones;++ zone){ - double phi_out_nm_gp_z = 0.0; - int mix_start = zones_to_mixed[zone]; - int mix_stop = mix_start + num_mixed[zone]; - - for(int mix = mix_start;mix < mix_stop;++ mix){ - int material = mixed_material[mix]; - double fraction = mixed_fraction[mix]; - - phi_out_nm_gp_z += sigs_n_g_gp[material] * phi_nm_g[zone] * fraction; - } - phi_out_nm_gp[zone] += phi_out_nm_gp_z; - } - } - } - } - } -} - - -/** - * Add an isotropic source, with flux of 1, to every zone with Region 1 - * (or material 0). - * - * Since it's isotropic, we're just adding this to nm=0. - */ -void Kernel_3d_DGZ::source(Grid_Data *grid_data){ - // Loop over zoneset subdomains - for(int zs = 0;zs < grid_data->num_zone_sets;++ zs){ - // get the phi and phi out references - SubTVec &phi_out = *grid_data->phi_out[zs]; - - // get material mix information - int sdom_id = grid_data->zs_to_sdomid[zs]; - Subdomain &sdom = grid_data->subdomains[sdom_id]; - int const * KRESTRICT mixed_to_zones = &sdom.mixed_to_zones[0]; - int const * KRESTRICT mixed_material = &sdom.mixed_material[0]; - double const * KRESTRICT mixed_fraction = &sdom.mixed_fraction[0]; - double * KRESTRICT phi_out_nm0 = phi_out.ptr(); - - // grab dimensions - int num_mixed = sdom.mixed_to_zones.size(); - int num_zones = sdom.num_zones; - int num_groups = phi_out.groups; - -#ifdef KRIPKE_USE_OPENMP -#pragma omp parallel for -#endif - for(int g = 0;g < num_groups;++ g){ - double * KRESTRICT phi_out_nm0_g = phi_out_nm0 + g*num_zones; - - for(int mix = 0;mix < num_mixed;++ mix){ - int zone = mixed_to_zones[mix]; - int material = mixed_material[mix]; - double fraction = mixed_fraction[mix]; - - if(material == 0){ - phi_out_nm0_g[zone] += 1.0 * fraction; - } - } - } - } -} - - -// Macros for offsets with fluxes on cell faces -#define I_PLANE_INDEX(j, k) ((k)*(local_jmax) + (j)) -#define J_PLANE_INDEX(i, k) ((k)*(local_imax) + (i)) -#define K_PLANE_INDEX(i, j) ((j)*(local_imax) + (i)) -#define Zonal_INDEX(i, j, k) ((i) + (local_imax)*(j) \ - + (local_imax)*(local_jmax)*(k)) - -void Kernel_3d_DGZ::sweep(Subdomain *sdom) { - int num_directions = sdom->num_directions; - int num_groups = sdom->num_groups; - int num_zones = sdom->num_zones; - - Directions *direction = sdom->directions; - - int local_imax = sdom->nzones[0]; - int local_jmax = sdom->nzones[1]; - int local_kmax = sdom->nzones[2]; - - double const * KRESTRICT dx = &sdom->deltas[0][0]; - double const * KRESTRICT dy = &sdom->deltas[1][0]; - double const * KRESTRICT dz = &sdom->deltas[2][0]; - - double const * KRESTRICT sigt = sdom->sigt->ptr(); - double * KRESTRICT psi = sdom->psi->ptr(); - double const * KRESTRICT rhs = sdom->rhs->ptr(); - - double * KRESTRICT psi_lf = sdom->plane_data[0]->ptr(); - double * KRESTRICT psi_fr = sdom->plane_data[1]->ptr(); - double * KRESTRICT psi_bo = sdom->plane_data[2]->ptr(); - - int num_gz = num_groups * num_zones; - int num_gz_i = local_jmax * local_kmax * num_groups; - int num_gz_j = local_imax * local_kmax * num_groups; - int num_gz_k = local_imax * local_jmax * num_groups; - int num_z_i = local_jmax * local_kmax; - int num_z_j = local_imax * local_kmax; - int num_z_k = local_imax * local_jmax; - - // All directions have same id,jd,kd, since these are all one Direction Set - // So pull that information out now - Grid_Sweep_Block const &extent = sdom->sweep_block; - -#ifdef KRIPKE_USE_OPENMP -#pragma omp parallel for -#endif - for (int d = 0; d < num_directions; ++d) { - double xcos = 2.0 * direction[d].xcos; - double ycos = 2.0 * direction[d].ycos; - double zcos = 2.0 * direction[d].zcos; - - double * KRESTRICT psi_d = psi + d*num_gz; - double const * KRESTRICT rhs_d = rhs + d*num_gz; - - double * KRESTRICT psi_lf_d = psi_lf + d*num_gz_i; - double * KRESTRICT psi_fr_d = psi_fr + d*num_gz_j; - double * KRESTRICT psi_bo_d = psi_bo + d*num_gz_k; - - for (int g = 0; g < num_groups; ++g) { - double const * KRESTRICT sigt_g = sigt + g*num_zones; - double * KRESTRICT psi_d_g = psi_d + g*num_zones; - double const * KRESTRICT rhs_d_g = rhs_d + g*num_zones; - - double * KRESTRICT psi_lf_d_g = psi_lf_d + g*num_z_i; - double * KRESTRICT psi_fr_d_g = psi_fr_d + g*num_z_j; - double * KRESTRICT psi_bo_d_g = psi_bo_d + g*num_z_k; - - for (int k = extent.start_k; k != extent.end_k; k += extent.inc_k) { - double zcos_dzk = zcos / dz[k + 1]; - - for (int j = extent.start_j; j != extent.end_j; j += extent.inc_j) { - double ycos_dyj = ycos / dy[j + 1]; - - for (int i = extent.start_i; i != extent.end_i; i += extent.inc_i) { - double xcos_dxi = xcos / dx[i + 1]; - - int z_idx = Zonal_INDEX(i, j, k); - int z_i = I_PLANE_INDEX(j, k); - int z_j = J_PLANE_INDEX(i, k); - int z_k = K_PLANE_INDEX(i, j); - - /* Calculate new zonal flux */ - double psi_d_g_z = (rhs_d_g[z_idx] - + psi_lf_d_g[z_i] * xcos_dxi - + psi_fr_d_g[z_j] * ycos_dyj - + psi_bo_d_g[z_k] * zcos_dzk) - / (xcos_dxi + ycos_dyj + zcos_dzk + sigt_g[z_idx]); - - psi_d_g[z_idx] = psi_d_g_z; - - /* Apply diamond-difference relationships */ - psi_lf_d_g[z_i] = 2.0 * psi_d_g_z - psi_lf_d_g[z_i]; - psi_fr_d_g[z_j] = 2.0 * psi_d_g_z - psi_fr_d_g[z_j]; - psi_bo_d_g[z_k] = 2.0 * psi_d_g_z - psi_bo_d_g[z_k]; - } - } - } - } // group - } // direction - -} - - diff --git a/src/Kripke/Kernel/Kernel_3d_DZG.cpp b/src/Kripke/Kernel/Kernel_3d_DZG.cpp deleted file mode 100644 index 03e6e074..00000000 --- a/src/Kripke/Kernel/Kernel_3d_DZG.cpp +++ /dev/null @@ -1,374 +0,0 @@ -/* - * NOTICE - * - * This work was produced at the Lawrence Livermore National Laboratory (LLNL) - * under contract no. DE-AC-52-07NA27344 (Contract 44) between the U.S. - * Department of Energy (DOE) and Lawrence Livermore National Security, LLC - * (LLNS) for the operation of LLNL. The rights of the Federal Government are - * reserved under Contract 44. - * - * DISCLAIMER - * - * This work was prepared as an account of work sponsored by an agency of the - * United States Government. Neither the United States Government nor Lawrence - * Livermore National Security, LLC nor any of their employees, makes any - * warranty, express or implied, or assumes any liability or responsibility - * for the accuracy, completeness, or usefulness of any information, apparatus, - * product, or process disclosed, or represents that its use would not infringe - * privately-owned rights. Reference herein to any specific commercial products, - * process, or service by trade name, trademark, manufacturer or otherwise does - * not necessarily constitute or imply its endorsement, recommendation, or - * favoring by the United States Government or Lawrence Livermore National - * Security, LLC. The views and opinions of authors expressed herein do not - * necessarily state or reflect those of the United States Government or - * Lawrence Livermore National Security, LLC, and shall not be used for - * advertising or product endorsement purposes. - * - * NOTIFICATION OF COMMERCIAL USE - * - * Commercialization of this product is prohibited without notifying the - * Department of Energy (DOE) or Lawrence Livermore National Security. - */ - -#include -#include -#include - -Nesting_Order Kernel_3d_DZG::nestingPsi(void) const { - return NEST_DZG; -} - -Nesting_Order Kernel_3d_DZG::nestingPhi(void) const { - return NEST_DZG; -} - -Nesting_Order Kernel_3d_DZG::nestingSigt(void) const { - return NEST_DZG; -} - -Nesting_Order Kernel_3d_DZG::nestingEll(void) const { - return NEST_ZGD; -} - -Nesting_Order Kernel_3d_DZG::nestingEllPlus(void) const { - return NEST_ZDG; -} - -Nesting_Order Kernel_3d_DZG::nestingSigs(void) const { - return NEST_DZG; -} - - -void Kernel_3d_DZG::LTimes(Grid_Data *grid_data) { - // Outer parameters - int num_moments = grid_data->total_num_moments; - - // Zero Phi - for(int ds = 0;ds < grid_data->num_zone_sets;++ ds){ - grid_data->phi[ds]->clear(0.0); - } - - // Loop over Subdomains - int num_subdomains = grid_data->subdomains.size(); - for (int sdom_id = 0; sdom_id < num_subdomains; ++ sdom_id){ - Subdomain &sdom = grid_data->subdomains[sdom_id]; - - // Get dimensioning - int num_zones = sdom.num_zones; - int num_groups = sdom.phi->groups; - int num_local_groups = sdom.num_groups; - int group0 = sdom.group0; - int num_local_directions = sdom.num_directions; - int num_gz = num_groups*num_zones; - int num_locgz = num_local_groups*num_zones; - - // Get pointers - double const * KRESTRICT ell = sdom.ell->ptr(); - double const * KRESTRICT psi = sdom.psi->ptr(); - double * KRESTRICT phi = sdom.phi->ptr(); - - for(int nm = 0;nm < num_moments;++nm){ - double const * KRESTRICT ell_nm = ell + nm*num_local_directions; - double * KRESTRICT phi_nm = phi + nm*num_gz; - - for (int d = 0; d < num_local_directions; d++) { - double const * KRESTRICT psi_d = psi + d*num_locgz; - double const ell_nm_d = ell_nm[d]; - -#ifdef KRIPKE_USE_OPENMP -#pragma omp parallel for -#endif - for (int z = 0;z < num_zones;++ z){ - double const * KRESTRICT psi_d_z = psi_d + z*num_local_groups; - double * KRESTRICT phi_nm_z = phi_nm + z*num_groups + group0; - - for(int g = 0;g < num_local_groups; ++ g){ - phi_nm_z[g] += ell_nm_d * psi_d_z[g]; - } - } - } - } - } -} - -void Kernel_3d_DZG::LPlusTimes(Grid_Data *grid_data) { - // Outer parameters - int num_moments = grid_data->total_num_moments; - - // Loop over Subdomains - int num_subdomains = grid_data->subdomains.size(); - for (int sdom_id = 0; sdom_id < num_subdomains; ++ sdom_id){ - Subdomain &sdom = grid_data->subdomains[sdom_id]; - - // Get dimensioning - int num_zones = sdom.num_zones; - int num_groups = sdom.phi_out->groups; - int num_local_groups = sdom.num_groups; - int group0 = sdom.group0; - int num_local_directions = sdom.num_directions; - int num_gz = num_groups*num_zones; - int num_locgz = num_local_groups*num_zones; - - // Zero RHS - sdom.rhs->clear(0.0); - - // Get pointers - double const * KRESTRICT phi_out = sdom.phi_out->ptr() + group0; - double const * KRESTRICT ell_plus = sdom.ell_plus->ptr(); - double * KRESTRICT rhs = sdom.rhs->ptr(); - - for (int d = 0; d < num_local_directions; d++) { - double * KRESTRICT rhs_d = rhs + d*num_locgz; - double const * KRESTRICT ell_plus_d = ell_plus + d*num_moments; - - for(int nm = 0;nm < num_moments;++nm){ - double const ell_plus_d_nm = ell_plus_d[nm]; - double const * KRESTRICT phi_out_nm = phi_out + nm*num_gz; - -#ifdef KRIPKE_USE_OPENMP -#pragma omp parallel for -#endif - for(int z = 0;z < num_zones;++ z){ - double const * KRESTRICT phi_out_nm_z = phi_out_nm + z*num_groups; - double * KRESTRICT rhs_d_z = rhs_d + z*num_local_groups; - - for(int g = 0;g < num_local_groups;++ g){ - rhs_d_z[g] += ell_plus_d_nm * phi_out_nm_z[g]; - } - } - } - } - } -} - - -/** - Compute scattering source term phi_out from flux moments in phi. - phi_out(gp,z,nm) = sum_g { sigs(g, n, gp) * phi(g,z,nm) } - -*/ -void Kernel_3d_DZG::scattering(Grid_Data *grid_data){ - // Loop over zoneset subdomains - for(int zs = 0;zs < grid_data->num_zone_sets;++ zs){ - // get material mix information - int sdom_id = grid_data->zs_to_sdomid[zs]; - Subdomain &sdom = grid_data->subdomains[sdom_id]; - int const * KRESTRICT zones_to_mixed = &sdom.zones_to_mixed[0]; - int const * KRESTRICT num_mixed = &sdom.num_mixed[0]; - int const * KRESTRICT mixed_material = &sdom.mixed_material[0]; - double const * KRESTRICT mixed_fraction = &sdom.mixed_fraction[0]; - double const * KRESTRICT sigs = grid_data->sigs->ptr(); - - int const * KRESTRICT moment_to_coeff = &grid_data->moment_to_coeff[0]; - double const * KRESTRICT phi = grid_data->phi[zs]->ptr(); - double * KRESTRICT phi_out = grid_data->phi_out[zs]->ptr(); - - // Zero out source terms - grid_data->phi_out[zs]->clear(0.0); - - // grab dimensions - int num_zones = sdom.num_zones; - int num_groups = grid_data->phi_out[zs]->groups; - int num_moments = grid_data->total_num_moments; - int num_gz = num_groups*num_zones; - - for(int nm = 0;nm < num_moments;++ nm){ - // map nm to n - int n = moment_to_coeff[nm]; - double const * KRESTRICT sigs_n = sigs + n*3*num_groups*num_groups; - double const * KRESTRICT phi_nm = phi + nm*num_gz; - double * KRESTRICT phi_out_nm = phi_out + nm*num_gz; - -#ifdef KRIPKE_USE_OPENMP -#pragma omp parallel for -#endif - for(int zone = 0;zone < num_zones;++ zone){ - int mix_start = zones_to_mixed[zone]; - int mix_stop = mix_start + num_mixed[zone]; - - for(int mix = mix_start;mix < mix_stop;++ mix){ - int material = mixed_material[mix]; - double fraction = mixed_fraction[mix]; - - double const * KRESTRICT sigs_n_mat = sigs_n + material*num_groups*num_groups; - double const * KRESTRICT phi_nm_z = phi_nm + zone*num_groups; - double * KRESTRICT phi_out_nm_z = phi_out_nm + zone*num_groups; - - for(int g = 0;g < num_groups;++ g){ - double const * KRESTRICT sigs_n_mat_g = sigs_n_mat + g*num_groups; - double const phi_nm_z_g = phi_nm_z[g]; - - for(int gp = 0;gp < num_groups;++ gp){ - phi_out_nm_z[gp] += sigs_n_mat_g[gp] * phi_nm_z_g * fraction; - } - } - } - } - } - } -} - -/** - * Add an isotropic source, with flux of 1, to every zone with Region 1 - * (or material 0). - * - * Since it's isotropic, we're just adding this to nm=0. - */ -void Kernel_3d_DZG::source(Grid_Data *grid_data){ - // Loop over zoneset subdomains - for(int zs = 0;zs < grid_data->num_zone_sets;++ zs){ - // get the phi and phi out references - SubTVec &phi_out = *grid_data->phi_out[zs]; - - // get material mix information - int sdom_id = grid_data->zs_to_sdomid[zs]; - Subdomain &sdom = grid_data->subdomains[sdom_id]; - int const * KRESTRICT zones_to_mixed = &sdom.zones_to_mixed[0]; - int const * KRESTRICT num_mixed = &sdom.num_mixed[0]; - int const * KRESTRICT mixed_material = &sdom.mixed_material[0]; - double const * KRESTRICT mixed_fraction = &sdom.mixed_fraction[0]; - double * KRESTRICT phi_out_nm0 = phi_out.ptr(); - - // grab dimensions - int num_zones = sdom.num_zones; - int num_groups = phi_out.groups; - int num_moments = grid_data->total_num_moments; - -#ifdef KRIPKE_USE_OPENMP -#pragma omp parallel for -#endif - for(int zone = 0;zone < num_zones;++ zone){ - int mix_start = zones_to_mixed[zone]; - int mix_stop = mix_start + num_mixed[zone]; - - for(int mix = mix_start;mix < mix_stop;++ mix){ - int material = mixed_material[mix]; - double fraction = mixed_fraction[mix]; - double * KRESTRICT phi_out_nm0_z = phi_out_nm0 + zone*num_groups; - - if(material == 0){ - for(int g = 0;g < num_groups;++ g){ - phi_out_nm0_z[g] += 1.0 * fraction; - } - } - } - } - } -} - - - -// Macros for offsets with fluxes on cell faces -#define I_PLANE_INDEX(j, k) ((k)*(local_jmax) + (j)) -#define J_PLANE_INDEX(i, k) ((k)*(local_imax) + (i)) -#define K_PLANE_INDEX(i, j) ((j)*(local_imax) + (i)) -#define Zonal_INDEX(i, j, k) ((i) + (local_imax)*(j) \ - + (local_imax)*(local_jmax)*(k)) - -void Kernel_3d_DZG::sweep(Subdomain *sdom) { - int num_directions = sdom->num_directions; - int num_groups = sdom->num_groups; - int num_zones = sdom->num_zones; - - Directions *direction = sdom->directions; - - int local_imax = sdom->nzones[0]; - int local_jmax = sdom->nzones[1]; - int local_kmax = sdom->nzones[2]; - - double const * KRESTRICT dx = &sdom->deltas[0][0]; - double const * KRESTRICT dy = &sdom->deltas[1][0]; - double const * KRESTRICT dz = &sdom->deltas[2][0]; - - double const * KRESTRICT sigt = sdom->sigt->ptr(); - double * KRESTRICT psi = sdom->psi->ptr(); - double const * KRESTRICT rhs = sdom->rhs->ptr(); - - double * KRESTRICT psi_lf = sdom->plane_data[0]->ptr(); - double * KRESTRICT psi_fr = sdom->plane_data[1]->ptr(); - double * KRESTRICT psi_bo = sdom->plane_data[2]->ptr(); - - int num_zg = num_zones * num_groups; - int num_zg_i = local_jmax * local_kmax * num_groups; - int num_zg_j = local_imax * local_kmax * num_groups; - int num_zg_k = local_imax * local_jmax * num_groups; - - // All directions have same id,jd,kd, since these are all one Direction Set - // So pull that information out now - Grid_Sweep_Block const &extent = sdom->sweep_block; - -#ifdef KRIPKE_USE_OPENMP -#pragma omp parallel for -#endif - for (int d = 0; d < num_directions; ++d) { - double xcos = 2.0 * direction[d].xcos; - double ycos = 2.0 * direction[d].ycos; - double zcos = 2.0 * direction[d].zcos; - - double * KRESTRICT psi_d = psi + d*num_zg; - double const * KRESTRICT rhs_d = rhs + d*num_zg; - - double * KRESTRICT psi_lf_d = psi_lf + d*num_zg_i; - double * KRESTRICT psi_fr_d = psi_fr + d*num_zg_j; - double * KRESTRICT psi_bo_d = psi_bo + d*num_zg_k; - - // Perform transport sweep of the grid 1 cell at a time. - for (int k = extent.start_k; k != extent.end_k; k += extent.inc_k) { - double zcos_dzk = zcos / dz[k + 1]; - - for (int j = extent.start_j; j != extent.end_j; j += extent.inc_j) { - double ycos_dyj = ycos / dy[j + 1]; - - for (int i = extent.start_i; i != extent.end_i; i += extent.inc_i) { - double xcos_dxi = xcos / dx[i + 1]; - - int z = Zonal_INDEX(i, j, k); - double const * KRESTRICT sigt_z = sigt + z*num_groups; - double * KRESTRICT psi_d_z = psi_d + z*num_groups; - double const * KRESTRICT rhs_d_z = rhs_d + z*num_groups; - - double * KRESTRICT psi_lf_d_z = psi_lf_d + I_PLANE_INDEX(j, k)*num_groups; - double * KRESTRICT psi_fr_d_z = psi_fr_d + J_PLANE_INDEX(i, k)*num_groups; - double * KRESTRICT psi_bo_d_z = psi_bo_d + K_PLANE_INDEX(i, j)*num_groups; - - for (int g = 0; g < num_groups; ++g) { - // Calculate new zonal flux - double psi_d_z_g = (rhs_d_z[g] - + psi_lf_d_z[g] * xcos_dxi - + psi_fr_d_z[g] * ycos_dyj - + psi_bo_d_z[g] * zcos_dzk) - / (xcos_dxi + ycos_dyj + zcos_dzk + sigt_z[g]); - - psi_d_z[g] = psi_d_z_g; - - // Apply diamond-difference relationships - psi_lf_d_z[g] = 2.0 * psi_d_z_g - psi_lf_d_z[g]; - psi_fr_d_z[g] = 2.0 * psi_d_z_g - psi_fr_d_z[g]; - psi_bo_d_z[g] = 2.0 * psi_d_z_g - psi_bo_d_z[g]; - } - } - } - } - } -} - diff --git a/src/Kripke/Kernel/Kernel_3d_GDZ.cpp b/src/Kripke/Kernel/Kernel_3d_GDZ.cpp deleted file mode 100644 index 941960c7..00000000 --- a/src/Kripke/Kernel/Kernel_3d_GDZ.cpp +++ /dev/null @@ -1,373 +0,0 @@ -/* - * NOTICE - * - * This work was produced at the Lawrence Livermore National Laboratory (LLNL) - * under contract no. DE-AC-52-07NA27344 (Contract 44) between the U.S. - * Department of Energy (DOE) and Lawrence Livermore National Security, LLC - * (LLNS) for the operation of LLNL. The rights of the Federal Government are - * reserved under Contract 44. - * - * DISCLAIMER - * - * This work was prepared as an account of work sponsored by an agency of the - * United States Government. Neither the United States Government nor Lawrence - * Livermore National Security, LLC nor any of their employees, makes any - * warranty, express or implied, or assumes any liability or responsibility - * for the accuracy, completeness, or usefulness of any information, apparatus, - * product, or process disclosed, or represents that its use would not infringe - * privately-owned rights. Reference herein to any specific commercial products, - * process, or service by trade name, trademark, manufacturer or otherwise does - * not necessarily constitute or imply its endorsement, recommendation, or - * favoring by the United States Government or Lawrence Livermore National - * Security, LLC. The views and opinions of authors expressed herein do not - * necessarily state or reflect those of the United States Government or - * Lawrence Livermore National Security, LLC, and shall not be used for - * advertising or product endorsement purposes. - * - * NOTIFICATION OF COMMERCIAL USE - * - * Commercialization of this product is prohibited without notifying the - * Department of Energy (DOE) or Lawrence Livermore National Security. - */ - -#include -#include -#include - -Nesting_Order Kernel_3d_GDZ::nestingPsi(void) const { - return NEST_GDZ; -} - -Nesting_Order Kernel_3d_GDZ::nestingPhi(void) const { - return NEST_GDZ; -} - -Nesting_Order Kernel_3d_GDZ::nestingSigt(void) const { - return NEST_DGZ; -} - -Nesting_Order Kernel_3d_GDZ::nestingEll(void) const { - return NEST_ZGD; -} - -Nesting_Order Kernel_3d_GDZ::nestingEllPlus(void) const { - return NEST_ZDG; -} - -Nesting_Order Kernel_3d_GDZ::nestingSigs(void) const { - return NEST_GDZ; -} - - -void Kernel_3d_GDZ::LTimes(Grid_Data *grid_data) { - // Outer parameters - int num_moments = grid_data->total_num_moments; - - // Clear phi - for(int ds = 0;ds < grid_data->num_zone_sets;++ ds){ - grid_data->phi[ds]->clear(0.0); - } - - // Loop over Subdomains - int num_subdomains = grid_data->subdomains.size(); - for (int sdom_id = 0; sdom_id < num_subdomains; ++ sdom_id){ - Subdomain &sdom = grid_data->subdomains[sdom_id]; - - // Get dimensioning - int num_zones = sdom.num_zones; - int num_local_groups = sdom.num_groups; - int group0 = sdom.group0; - int num_local_directions = sdom.num_directions; - int num_dz = num_zones*num_local_directions; - int num_nmz = num_zones*num_moments; - - // Get pointers - double const * KRESTRICT ell = sdom.ell->ptr(); - double const * KRESTRICT psi = sdom.psi->ptr(); - double * KRESTRICT phi = sdom.phi->ptr(); - -#ifdef KRIPKE_USE_OPENMP -#pragma omp parallel for -#endif - for (int g = 0; g < num_local_groups; ++g) { - double const * KRESTRICT psi_g = psi + g*num_dz; - double * KRESTRICT phi_g = phi + (group0+g)*num_nmz; - - for(int nm = 0;nm < num_moments;++nm){ - double const * KRESTRICT ell_nm = ell + nm*num_local_directions; - double * KRESTRICT phi_g_nm = phi_g + nm*num_zones; - - for (int d = 0; d < num_local_directions; d++) { - double const * KRESTRICT psi_g_d = psi_g + d*num_zones; - double const ell_nm_d = ell_nm[d]; - - for(int z = 0;z < num_zones; ++ z){ - phi_g_nm[z] += ell_nm_d * psi_g_d[z]; - } - } - } - } - } -} - -void Kernel_3d_GDZ::LPlusTimes(Grid_Data *grid_data) { - // Outer parameters - int num_moments = grid_data->total_num_moments; - - // Loop over Subdomains - int num_subdomains = grid_data->subdomains.size(); - for (int sdom_id = 0; sdom_id < num_subdomains; ++ sdom_id){ - Subdomain &sdom = grid_data->subdomains[sdom_id]; - - // Get dimensioning - int num_zones = sdom.num_zones; - int num_local_groups = sdom.num_groups; - int group0 = sdom.group0; - int num_local_directions = sdom.num_directions; - int num_nmz = num_moments*num_zones; - int num_dz = num_local_directions*num_zones; - - // Zero RHS - sdom.rhs->clear(0.0); - - // Get pointers - double const * KRESTRICT phi_out = sdom.phi_out->ptr(); - double const * KRESTRICT ell_plus = sdom.ell_plus->ptr(); - double * KRESTRICT rhs = sdom.rhs->ptr(); - -#ifdef KRIPKE_USE_OPENMP -#pragma omp parallel for -#endif - for (int g = 0; g < num_local_groups; ++g) { - double const * KRESTRICT phi_out_g = phi_out + (group0+g)*num_nmz; - double * KRESTRICT rhs_g = rhs + g*num_dz; - - for (int d = 0; d < num_local_directions; d++) { - double const * KRESTRICT ell_plus_d = ell_plus + d*num_moments; - double * KRESTRICT rhs_g_d = rhs_g + d*num_zones; - - for(int nm = 0;nm < num_moments;++nm){ - double const * KRESTRICT phi_out_g_nm = phi_out_g + nm*num_zones; - double const ell_plus_d_nm = ell_plus_d[nm]; - - for(int z = 0;z < num_zones; ++ z){ - rhs_g_d[z] += ell_plus_d_nm * phi_out_g_nm[z]; - } - } - } - } - } -} - -/** - Compute scattering source term phi_out from flux moments in phi. - phi_out(gp,z,nm) = sum_g { sigs(g, n, gp) * phi(g,z,nm) } -*/ -void Kernel_3d_GDZ::scattering(Grid_Data *grid_data){ - // Loop over zoneset subdomains - for(int zs = 0;zs < grid_data->num_zone_sets;++ zs){ - // get material mix information - int sdom_id = grid_data->zs_to_sdomid[zs]; - Subdomain &sdom = grid_data->subdomains[sdom_id]; - int const * KRESTRICT zones_to_mixed = &sdom.zones_to_mixed[0]; - int const * KRESTRICT num_mixed = &sdom.num_mixed[0]; - int const * KRESTRICT mixed_material = &sdom.mixed_material[0]; - double const * KRESTRICT mixed_fraction = &sdom.mixed_fraction[0]; - double const * KRESTRICT sigs = grid_data->sigs->ptr(); - - int const * KRESTRICT moment_to_coeff = &grid_data->moment_to_coeff[0]; - double const * KRESTRICT phi = grid_data->phi[zs]->ptr(); - double * KRESTRICT phi_out = grid_data->phi_out[zs]->ptr(); - - // Zero out source terms - grid_data->phi_out[zs]->clear(0.0); - - // grab dimensions - int num_zones = sdom.num_zones; - int num_groups = grid_data->phi_out[zs]->groups; - int num_moments = grid_data->total_num_moments; - int num_coeff = grid_data->legendre_order+1; - int num_nmz = num_moments*num_zones; - - for(int g = 0;g < num_groups;++ g){ - double const * KRESTRICT sigs_g = sigs + g*num_groups*num_coeff*3; - double const * KRESTRICT phi_g = phi + g*num_nmz; - - for(int gp = 0;gp < num_groups;++ gp){ - double const * KRESTRICT sigs_g_gp = sigs_g + gp*num_coeff*3; - double * KRESTRICT phi_out_gp = phi_out + gp*num_nmz; - - for(int nm = 0;nm < num_moments;++ nm){ - // map nm to n - int n = moment_to_coeff[nm]; - - double const * KRESTRICT sigs_g_gp_n = sigs_g_gp + n*3; - double const * KRESTRICT phi_g_nm = phi_g + nm*num_zones; - double * KRESTRICT phi_out_gp_nm = phi_out_gp + nm*num_zones; - -#ifdef KRIPKE_USE_OPENMP -#pragma omp parallel for -#endif - for(int zone = 0;zone < num_zones;++ zone){ - int mix_start = zones_to_mixed[zone]; - int mix_stop = mix_start + num_mixed[zone]; - - for(int mix = mix_start;mix < mix_stop;++ mix){ - int material = mixed_material[mix]; - double fraction = mixed_fraction[mix]; - - phi_out_gp_nm[zone] += sigs_g_gp_n[material] * phi_g_nm[zone] * fraction; - } - } - } - } - } - } -} - - -/** - * Add an isotropic source, with flux of 1, to every zone with Region 1 - * (or material 0). - * - * Since it's isotropic, we're just adding this to nm=0. - */ -void Kernel_3d_GDZ::source(Grid_Data *grid_data){ - // Loop over zoneset subdomains - for(int zs = 0;zs < grid_data->num_zone_sets;++ zs){ - - // get material mix information - int sdom_id = grid_data->zs_to_sdomid[zs]; - Subdomain &sdom = grid_data->subdomains[sdom_id]; - int const * KRESTRICT mixed_to_zones = &sdom.mixed_to_zones[0]; - int const * KRESTRICT mixed_material = &sdom.mixed_material[0]; - double const * KRESTRICT mixed_fraction = &sdom.mixed_fraction[0]; - double * KRESTRICT phi_out = grid_data->phi_out[zs]->ptr(); - - // grab dimensions - int num_mixed = sdom.mixed_to_zones.size(); - int num_zones = sdom.num_zones; - int num_groups = grid_data->phi_out[zs]->groups; - int num_moments = grid_data->total_num_moments; - -#ifdef KRIPKE_USE_OPENMP -#pragma omp parallel for -#endif - for(int g = 0;g < num_groups;++ g){ - double * KRESTRICT phi_out_g_nm0 = phi_out + g*num_zones*num_moments; - - for(int mix = 0;mix < num_mixed;++ mix){ - int zone = mixed_to_zones[mix]; - int material = mixed_material[mix]; - double fraction = mixed_fraction[mix]; - - if(material == 0){ - phi_out_g_nm0[zone] += 1.0 * fraction; - } - } - } - } -} - -// Macros for offsets with fluxes on cell faces -#define I_PLANE_INDEX(j, k) ((k)*(local_jmax) + (j)) -#define J_PLANE_INDEX(i, k) ((k)*(local_imax) + (i)) -#define K_PLANE_INDEX(i, j) ((j)*(local_imax) + (i)) -#define Zonal_INDEX(i, j, k) ((i) + (local_imax)*(j) \ - + (local_imax)*(local_jmax)*(k)) - -void Kernel_3d_GDZ::sweep(Subdomain *sdom) { - int num_directions = sdom->num_directions; - int num_groups = sdom->num_groups; - int num_zones = sdom->num_zones; - - Directions *direction = sdom->directions; - - int local_imax = sdom->nzones[0]; - int local_jmax = sdom->nzones[1]; - int local_kmax = sdom->nzones[2]; - - double const * KRESTRICT dx = &sdom->deltas[0][0]; - double const * KRESTRICT dy = &sdom->deltas[1][0]; - double const * KRESTRICT dz = &sdom->deltas[2][0]; - - double const * KRESTRICT sigt = sdom->sigt->ptr(); - double * KRESTRICT psi = sdom->psi->ptr(); - double const * KRESTRICT rhs = sdom->rhs->ptr(); - - double * KRESTRICT psi_lf = sdom->plane_data[0]->ptr(); - double * KRESTRICT psi_fr = sdom->plane_data[1]->ptr(); - double * KRESTRICT psi_bo = sdom->plane_data[2]->ptr(); - - int num_dz = num_zones * num_directions; - int num_dz_i = local_jmax * local_kmax * num_directions; - int num_dz_j = local_imax * local_kmax * num_directions; - int num_dz_k = local_imax * local_jmax * num_directions; - int num_z_i = local_jmax * local_kmax; - int num_z_j = local_imax * local_kmax; - int num_z_k = local_imax * local_jmax; - - // All directions have same id,jd,kd, since these are all one Direction Set - // So pull that information out now - Grid_Sweep_Block const &extent = sdom->sweep_block; - -#ifdef KRIPKE_USE_OPENMP -#pragma omp parallel for -#endif - for (int g = 0; g < num_groups; ++g) { - - double const * KRESTRICT sigt_g = sigt + num_zones*g; - double * KRESTRICT psi_g = psi + g*num_dz; - double const * KRESTRICT rhs_g = rhs + g*num_dz; - - double * KRESTRICT psi_lf_g = psi_lf + g*num_dz_i; - double * KRESTRICT psi_fr_g = psi_fr + g*num_dz_j; - double * KRESTRICT psi_bo_g = psi_bo + g*num_dz_k; - - for (int d = 0; d < num_directions; ++d) { - double * KRESTRICT psi_g_d = psi_g + d*num_zones; - double const * KRESTRICT rhs_g_d = rhs_g + d*num_zones; - double * KRESTRICT psi_lf_g_d = psi_lf_g + d*num_z_i; - double * KRESTRICT psi_fr_g_d = psi_fr_g + d*num_z_j; - double * KRESTRICT psi_bo_g_d = psi_bo_g + d*num_z_k; - - double xcos = 2.0 * direction[d].xcos; - double ycos = 2.0 * direction[d].ycos; - double zcos = 2.0 * direction[d].zcos; - - // Perform transport sweep of the grid 1 cell at a time. - for (int k = extent.start_k; k != extent.end_k; k += extent.inc_k) { - double zcos_dzk = zcos / dz[k + 1]; - - for (int j = extent.start_j; j != extent.end_j; j += extent.inc_j) { - double ycos_dyj = ycos / dy[j + 1]; - - for (int i = extent.start_i; i != extent.end_i; i += extent.inc_i) { - double xcos_dxi = xcos / dx[i + 1]; - - int z_idx = Zonal_INDEX(i, j, k); - int z_i = I_PLANE_INDEX(j, k); - int z_j = J_PLANE_INDEX(i, k); - int z_k = K_PLANE_INDEX(i, j); - - // Calculate new zonal flux - double psi_g_d_z = (rhs_g_d[z_idx] - + psi_lf_g_d[z_i] * xcos_dxi - + psi_fr_g_d[z_j] * ycos_dyj - + psi_bo_g_d[z_k] * zcos_dzk) - / (xcos_dxi + ycos_dyj + zcos_dzk + sigt_g[z_idx]); - - psi_g_d[z_idx] = psi_g_d_z; - - // Apply diamond-difference relationships - psi_lf_g_d[z_i] = 2.0 * psi_g_d_z - psi_lf_g_d[z_i]; - psi_fr_g_d[z_j] = 2.0 * psi_g_d_z - psi_fr_g_d[z_j]; - psi_bo_g_d[z_k] = 2.0 * psi_g_d_z - psi_bo_g_d[z_k]; - } - } - } - } - } - -} - diff --git a/src/Kripke/Kernel/Kernel_3d_GZD.cpp b/src/Kripke/Kernel/Kernel_3d_GZD.cpp deleted file mode 100644 index 51e328d3..00000000 --- a/src/Kripke/Kernel/Kernel_3d_GZD.cpp +++ /dev/null @@ -1,368 +0,0 @@ -/* - * NOTICE - * - * This work was produced at the Lawrence Livermore National Laboratory (LLNL) - * under contract no. DE-AC-52-07NA27344 (Contract 44) between the U.S. - * Department of Energy (DOE) and Lawrence Livermore National Security, LLC - * (LLNS) for the operation of LLNL. The rights of the Federal Government are - * reserved under Contract 44. - * - * DISCLAIMER - * - * This work was prepared as an account of work sponsored by an agency of the - * United States Government. Neither the United States Government nor Lawrence - * Livermore National Security, LLC nor any of their employees, makes any - * warranty, express or implied, or assumes any liability or responsibility - * for the accuracy, completeness, or usefulness of any information, apparatus, - * product, or process disclosed, or represents that its use would not infringe - * privately-owned rights. Reference herein to any specific commercial products, - * process, or service by trade name, trademark, manufacturer or otherwise does - * not necessarily constitute or imply its endorsement, recommendation, or - * favoring by the United States Government or Lawrence Livermore National - * Security, LLC. The views and opinions of authors expressed herein do not - * necessarily state or reflect those of the United States Government or - * Lawrence Livermore National Security, LLC, and shall not be used for - * advertising or product endorsement purposes. - * - * NOTIFICATION OF COMMERCIAL USE - * - * Commercialization of this product is prohibited without notifying the - * Department of Energy (DOE) or Lawrence Livermore National Security. - */ - -#include -#include -#include - -Nesting_Order Kernel_3d_GZD::nestingPsi(void) const { - return NEST_GZD; -} - -Nesting_Order Kernel_3d_GZD::nestingPhi(void) const { - return NEST_GZD; -} - -Nesting_Order Kernel_3d_GZD::nestingSigt(void) const { - return NEST_DGZ; -} - -Nesting_Order Kernel_3d_GZD::nestingEll(void) const { - return NEST_ZGD; -} - -Nesting_Order Kernel_3d_GZD::nestingEllPlus(void) const { - return NEST_ZDG; -} - -Nesting_Order Kernel_3d_GZD::nestingSigs(void) const { - return NEST_GZD; -} - - -void Kernel_3d_GZD::LTimes(Grid_Data *grid_data) { - // Outer parameters - int num_moments = grid_data->total_num_moments; - - // Clear phi - for(int ds = 0;ds < grid_data->num_zone_sets;++ ds){ - grid_data->phi[ds]->clear(0.0); - } - - // Loop over Subdomains - int num_subdomains = grid_data->subdomains.size(); - for (int sdom_id = 0; sdom_id < num_subdomains; ++ sdom_id){ - Subdomain &sdom = grid_data->subdomains[sdom_id]; - - // Get dimensioning - int num_zones = sdom.num_zones; - int num_local_groups = sdom.num_groups; - int group0 = sdom.group0; - int num_local_directions = sdom.num_directions; - int num_groups_zones = num_local_groups*num_zones; - int num_dz = num_zones*num_local_directions; - int num_nmz = num_zones*num_moments; - - // Get pointers - double const * KRESTRICT ell = sdom.ell->ptr(); - double const * KRESTRICT psi = sdom.psi->ptr(); - double * KRESTRICT phi = sdom.phi->ptr(); - -#ifdef KRIPKE_USE_OPENMP -#pragma omp parallel for -#endif - for (int g = 0; g < num_local_groups; ++g) { - double const * KRESTRICT psi_g = psi + g*num_dz; - double * KRESTRICT phi_g = phi + (group0+g)*num_nmz; - - for(int z = 0;z < num_zones; ++ z){ - double const * KRESTRICT psi_g_z = psi_g + z*num_local_directions; - double * KRESTRICT phi_g_z = phi_g + z*num_moments; - - for(int nm = 0;nm < num_moments;++nm){ - double const * KRESTRICT ell_nm = ell + nm*num_local_directions; - - double phi_g_z_nm = 0.0; - for (int d = 0; d < num_local_directions; d++) { - phi_g_z_nm += ell_nm[d] * psi_g_z[d]; - } - phi_g_z[nm] += phi_g_z_nm; - } - } - } - } -} - -void Kernel_3d_GZD::LPlusTimes(Grid_Data *grid_data) { - // Outer parameters - int num_moments = grid_data->total_num_moments; - - // Loop over Subdomains - int num_subdomains = grid_data->subdomains.size(); - for (int sdom_id = 0; sdom_id < num_subdomains; ++ sdom_id){ - Subdomain &sdom = grid_data->subdomains[sdom_id]; - - // Get dimensioning - int num_zones = sdom.num_zones; - int num_local_groups = sdom.num_groups; - int group0 = sdom.group0; - int num_local_directions = sdom.num_directions; - int num_nmz = num_moments*num_zones; - int num_dz = num_local_directions*num_zones; - - // Zero RHS - sdom.rhs->clear(0.0); - - // Get pointers - double const * KRESTRICT phi_out = sdom.phi_out->ptr(); - double const * KRESTRICT ell_plus = sdom.ell_plus->ptr(); - double * KRESTRICT rhs = sdom.rhs->ptr(); - - for (int g = 0; g < num_local_groups; ++g) { - double const * KRESTRICT phi_out_g = phi_out + (group0+g)*num_nmz; - double * KRESTRICT rhs_g = rhs + g*num_dz; - -#ifdef KRIPKE_USE_OPENMP -#pragma omp parallel for -#endif - for(int z = 0;z < num_zones; ++ z){ - double const * KRESTRICT phi_out_g_z = phi_out_g + z*num_moments; - double * KRESTRICT rhs_g_z = rhs_g + z*num_local_directions; - - for (int d = 0; d < num_local_directions; d++) { - double const * KRESTRICT ell_plus_d = ell_plus + d*num_moments; - - double rhs_g_z_d = 0.0; - for(int nm = 0;nm < num_moments;++nm){ - rhs_g_z_d += ell_plus_d[nm] * phi_out_g_z[nm]; - } - rhs_g_z[d] += rhs_g_z_d; - } - } - } - } -} - - -/** - Compute scattering source term phi_out from flux moments in phi. - phi_out(gp,z,nm) = sum_g { sigs(g, n, gp) * phi(g,z,nm) } -*/ -void Kernel_3d_GZD::scattering(Grid_Data *grid_data){ - // Loop over zoneset subdomains - for(int zs = 0;zs < grid_data->num_zone_sets;++ zs){ - // get material mix information - int sdom_id = grid_data->zs_to_sdomid[zs]; - Subdomain &sdom = grid_data->subdomains[sdom_id]; - int const * KRESTRICT zones_to_mixed = &sdom.zones_to_mixed[0]; - int const * KRESTRICT num_mixed = &sdom.num_mixed[0]; - int const * KRESTRICT mixed_material = &sdom.mixed_material[0]; - double const * KRESTRICT mixed_fraction = &sdom.mixed_fraction[0]; - double const * KRESTRICT sigs = grid_data->sigs->ptr(); - - int const * KRESTRICT moment_to_coeff = &grid_data->moment_to_coeff[0]; - double const * KRESTRICT phi = grid_data->phi[zs]->ptr(); - double * KRESTRICT phi_out = grid_data->phi_out[zs]->ptr(); - - // Zero out source terms - grid_data->phi_out[zs]->clear(0.0); - - // grab dimensions - int num_zones = sdom.num_zones; - int num_groups = grid_data->phi_out[zs]->groups; - int num_moments = grid_data->total_num_moments; - int num_coeff = grid_data->legendre_order+1; - int num_nmz = num_moments*num_zones; - - for(int g = 0;g < num_groups;++ g){ - double const * KRESTRICT sigs_g = sigs + g*num_groups*num_coeff*3; - double const * KRESTRICT phi_g = phi + g*num_nmz; - - for(int gp = 0;gp < num_groups;++ gp){ - double const * KRESTRICT sigs_g_gp = sigs_g + gp*num_coeff*3; - double * KRESTRICT phi_out_gp = phi_out + gp*num_nmz; - -#ifdef KRIPKE_USE_OPENMP -#pragma omp parallel for -#endif - for(int zone = 0;zone < num_zones;++ zone){ - int mix_start = zones_to_mixed[zone]; - int mix_stop = mix_start + num_mixed[zone]; - - for(int mix = mix_start;mix < mix_stop;++ mix){ - int material = mixed_material[mix]; - double fraction = mixed_fraction[mix]; - - double const * KRESTRICT sigs_g_gp_mat = sigs_g_gp + material*num_coeff; - double const * KRESTRICT phi_g_z = phi_g + zone*num_moments; - double * KRESTRICT phi_out_gp_z = phi_out_gp + zone*num_moments; - - for(int nm = 0;nm < num_moments;++ nm){ - // map nm to n - int n = moment_to_coeff[nm]; - - phi_out_gp_z[nm] += sigs_g_gp_mat[n] * phi_g_z[nm] * fraction; - } - } - } - } - } - } -} - - -/** - * Add an isotropic source, with flux of 1, to every zone with Region 1 - * (or material 0). - * - * Since it's isotropic, we're just adding this to nm=0. - */ -void Kernel_3d_GZD::source(Grid_Data *grid_data){ - // Loop over zoneset subdomains - for(int zs = 0;zs < grid_data->num_zone_sets;++ zs){ - - // get material mix information - int sdom_id = grid_data->zs_to_sdomid[zs]; - Subdomain &sdom = grid_data->subdomains[sdom_id]; - int const * KRESTRICT mixed_to_zones = &sdom.mixed_to_zones[0]; - int const * KRESTRICT mixed_material = &sdom.mixed_material[0]; - double const * KRESTRICT mixed_fraction = &sdom.mixed_fraction[0]; - double * KRESTRICT phi_out = grid_data->phi_out[zs]->ptr(); - - // grab dimensions - int num_mixed = sdom.mixed_to_zones.size(); - int num_zones = sdom.num_zones; - int num_groups = grid_data->phi_out[zs]->groups; - int num_moments = grid_data->total_num_moments; - -#ifdef KRIPKE_USE_OPENMP -#pragma omp parallel for -#endif - for(int g = 0;g < num_groups;++ g){ - double * KRESTRICT phi_out_g_nm0 = phi_out + g*num_zones*num_moments; - - for(int mix = 0;mix < num_mixed;++ mix){ - int zone = mixed_to_zones[mix]; - int material = mixed_material[mix]; - double fraction = mixed_fraction[mix]; - - if(material == 0){ - phi_out_g_nm0[zone*num_moments] += 1.0 * fraction; - } - } - } - } -} - -// Macros for offsets with fluxes on cell faces -#define I_PLANE_INDEX(j, k) ((k)*(local_jmax) + (j)) -#define J_PLANE_INDEX(i, k) ((k)*(local_imax) + (i)) -#define K_PLANE_INDEX(i, j) ((j)*(local_imax) + (i)) -#define Zonal_INDEX(i, j, k) ((i) + (local_imax)*(j) \ - + (local_imax)*(local_jmax)*(k)) - -void Kernel_3d_GZD::sweep(Subdomain *sdom) { - int num_directions = sdom->num_directions; - int num_groups = sdom->num_groups; - int num_zones = sdom->num_zones; - - Directions *direction = sdom->directions; - - int local_imax = sdom->nzones[0]; - int local_jmax = sdom->nzones[1]; - int local_kmax = sdom->nzones[2]; - - double const * KRESTRICT dx = &sdom->deltas[0][0]; - double const * KRESTRICT dy = &sdom->deltas[1][0]; - double const * KRESTRICT dz = &sdom->deltas[2][0]; - - double const * KRESTRICT sigt = sdom->sigt->ptr(); - double * KRESTRICT psi = sdom->psi->ptr(); - double const * KRESTRICT rhs = sdom->rhs->ptr(); - - double * KRESTRICT psi_lf = sdom->plane_data[0]->ptr(); - double * KRESTRICT psi_fr = sdom->plane_data[1]->ptr(); - double * KRESTRICT psi_bo = sdom->plane_data[2]->ptr(); - - int num_zd = num_zones * num_directions; - int num_zd_i = local_jmax * local_kmax * num_directions; - int num_zd_j = local_imax * local_kmax * num_directions; - int num_zd_k = local_imax * local_jmax * num_directions; - - // All directions have same id,jd,kd, since these are all one Direction Set - // So pull that information out now - Grid_Sweep_Block const &extent = sdom->sweep_block; - -#ifdef KRIPKE_USE_OPENMP -#pragma omp parallel for -#endif - for (int g = 0; g < num_groups; ++g) { - double const * KRESTRICT sigt_g = sigt + num_zones*g; - double * KRESTRICT psi_g = psi + g*num_zd; - double const * KRESTRICT rhs_g = rhs + g*num_zd; - - double * KRESTRICT psi_lf_g = psi_lf + g*num_zd_i; - double * KRESTRICT psi_fr_g = psi_fr + g*num_zd_j; - double * KRESTRICT psi_bo_g = psi_bo + g*num_zd_k; - - // Perform transport sweep of the grid 1 cell at a time. - for (int k = extent.start_k; k != extent.end_k; k += extent.inc_k) { - double two_dz = 2.0 / dz[k + 1]; - for (int j = extent.start_j; j != extent.end_j; j += extent.inc_j) { - double two_dy = 2.0 / dy[j + 1]; - for (int i = extent.start_i; i != extent.end_i; i += extent.inc_i) { - double two_dx = 2.0 / dx[i + 1]; - - int z = Zonal_INDEX(i, j, k); - - double const sigt_g_z = sigt_g[z]; - double * KRESTRICT psi_g_z = psi_g + z*num_directions; - double const * KRESTRICT rhs_g_z = rhs_g + z*num_directions; - - double * KRESTRICT psi_lf_g_z = psi_lf_g + I_PLANE_INDEX(j, k)*num_directions; - double * KRESTRICT psi_fr_g_z = psi_fr_g + J_PLANE_INDEX(i, k)*num_directions; - double * KRESTRICT psi_bo_g_z = psi_bo_g + K_PLANE_INDEX(i, j)*num_directions; - - for (int d = 0; d < num_directions; ++d) { - double xcos_dxi = direction[d].xcos * two_dx; - double ycos_dyj = direction[d].ycos * two_dy; - double zcos_dzk = direction[d].zcos * two_dz; - - // Calculate new zonal flux - double psi_g_z_d = (rhs_g_z[d] + psi_lf_g_z[d] * xcos_dxi - + psi_fr_g_z[d] * ycos_dyj + psi_bo_g_z[d] * zcos_dzk) - / (xcos_dxi + ycos_dyj + zcos_dzk + sigt_g_z); - - psi_g_z[d] = psi_g_z_d; - - // Apply diamond-difference relationships - psi_lf_g_z[d] = 2.0 * psi_g_z_d - psi_lf_g_z[d]; - psi_fr_g_z[d] = 2.0 * psi_g_z_d - psi_fr_g_z[d]; - psi_bo_g_z[d] = 2.0 * psi_g_z_d - psi_bo_g_z[d]; - } - } - } - } - } -} - - diff --git a/src/Kripke/Kernel/Kernel_3d_GZD.h b/src/Kripke/Kernel/Kernel_3d_GZD.h deleted file mode 100644 index bb7f58cf..00000000 --- a/src/Kripke/Kernel/Kernel_3d_GZD.h +++ /dev/null @@ -1,54 +0,0 @@ -/* - * NOTICE - * - * This work was produced at the Lawrence Livermore National Laboratory (LLNL) - * under contract no. DE-AC-52-07NA27344 (Contract 44) between the U.S. - * Department of Energy (DOE) and Lawrence Livermore National Security, LLC - * (LLNS) for the operation of LLNL. The rights of the Federal Government are - * reserved under Contract 44. - * - * DISCLAIMER - * - * This work was prepared as an account of work sponsored by an agency of the - * United States Government. Neither the United States Government nor Lawrence - * Livermore National Security, LLC nor any of their employees, makes any - * warranty, express or implied, or assumes any liability or responsibility - * for the accuracy, completeness, or usefulness of any information, apparatus, - * product, or process disclosed, or represents that its use would not infringe - * privately-owned rights. Reference herein to any specific commercial products, - * process, or service by trade name, trademark, manufacturer or otherwise does - * not necessarily constitute or imply its endorsement, recommendation, or - * favoring by the United States Government or Lawrence Livermore National - * Security, LLC. The views and opinions of authors expressed herein do not - * necessarily state or reflect those of the United States Government or - * Lawrence Livermore National Security, LLC, and shall not be used for - * advertising or product endorsement purposes. - * - * NOTIFICATION OF COMMERCIAL USE - * - * Commercialization of this product is prohibited without notifying the - * Department of Energy (DOE) or Lawrence Livermore National Security. - */ - -#ifndef KRIPKE_KERNEL_3D_GZD_H__ -#define KRIPKE_KERNEL_3D_GZD_H__ - -#include - -class Kernel_3d_GZD : public Kernel { - public: - virtual Nesting_Order nestingPsi(void) const; - virtual Nesting_Order nestingPhi(void) const; - virtual Nesting_Order nestingSigt(void) const; - virtual Nesting_Order nestingEll(void) const; - virtual Nesting_Order nestingEllPlus(void) const; - virtual Nesting_Order nestingSigs(void) const; - - virtual void LTimes(Grid_Data *grid_data); - virtual void LPlusTimes(Grid_Data *grid_data); - virtual void scattering(Grid_Data *grid_data); - virtual void source(Grid_Data *grid_data); - virtual void sweep(Subdomain *ga_set); -}; - -#endif diff --git a/src/Kripke/Kernel/Kernel_3d_ZDG.cpp b/src/Kripke/Kernel/Kernel_3d_ZDG.cpp deleted file mode 100644 index 23747271..00000000 --- a/src/Kripke/Kernel/Kernel_3d_ZDG.cpp +++ /dev/null @@ -1,372 +0,0 @@ -/* - * NOTICE - * - * This work was produced at the Lawrence Livermore National Laboratory (LLNL) - * under contract no. DE-AC-52-07NA27344 (Contract 44) between the U.S. - * Department of Energy (DOE) and Lawrence Livermore National Security, LLC - * (LLNS) for the operation of LLNL. The rights of the Federal Government are - * reserved under Contract 44. - * - * DISCLAIMER - * - * This work was prepared as an account of work sponsored by an agency of the - * United States Government. Neither the United States Government nor Lawrence - * Livermore National Security, LLC nor any of their employees, makes any - * warranty, express or implied, or assumes any liability or responsibility - * for the accuracy, completeness, or usefulness of any information, apparatus, - * product, or process disclosed, or represents that its use would not infringe - * privately-owned rights. Reference herein to any specific commercial products, - * process, or service by trade name, trademark, manufacturer or otherwise does - * not necessarily constitute or imply its endorsement, recommendation, or - * favoring by the United States Government or Lawrence Livermore National - * Security, LLC. The views and opinions of authors expressed herein do not - * necessarily state or reflect those of the United States Government or - * Lawrence Livermore National Security, LLC, and shall not be used for - * advertising or product endorsement purposes. - * - * NOTIFICATION OF COMMERCIAL USE - * - * Commercialization of this product is prohibited without notifying the - * Department of Energy (DOE) or Lawrence Livermore National Security. - */ - -#include -#include -#include - -Nesting_Order Kernel_3d_ZDG::nestingPsi(void) const { - return NEST_ZDG; -} - -Nesting_Order Kernel_3d_ZDG::nestingPhi(void) const { - return NEST_ZDG; -} - -Nesting_Order Kernel_3d_ZDG::nestingSigt(void) const { - return NEST_DZG; -} - -Nesting_Order Kernel_3d_ZDG::nestingEll(void) const { - return NEST_ZGD; -} - -Nesting_Order Kernel_3d_ZDG::nestingEllPlus(void) const { - return NEST_ZDG; -} - -Nesting_Order Kernel_3d_ZDG::nestingSigs(void) const { - return NEST_ZDG; -} - - -void Kernel_3d_ZDG::LTimes(Grid_Data *grid_data) { - // Outer parameters - int num_moments = grid_data->total_num_moments; - - // Clear phi - for(int ds = 0;ds < grid_data->num_zone_sets;++ ds){ - grid_data->phi[ds]->clear(0.0); - } - - // Loop over Subdomains - int num_subdomains = grid_data->subdomains.size(); - for (int sdom_id = 0; sdom_id < num_subdomains; ++ sdom_id){ - Subdomain &sdom = grid_data->subdomains[sdom_id]; - - // Get dimensioning - int num_groups = sdom.phi->groups; - int num_zones = sdom.num_zones; - int num_local_groups = sdom.num_groups; - int group0 = sdom.group0; - int num_local_directions = sdom.num_directions; - int num_gnm = num_groups * num_moments; - int num_locgd = num_local_groups * num_local_directions; - - // Get pointers - double const * KRESTRICT ell = sdom.ell->ptr(); - double const * KRESTRICT psi = sdom.psi->ptr(); - double * KRESTRICT phi = sdom.phi->ptr(); - -#ifdef KRIPKE_USE_OPENMP -#pragma omp parallel for -#endif - for (int z = 0; z < num_zones; z++) { - double const * KRESTRICT psi_z = psi + z*num_locgd; - double * KRESTRICT phi_z = phi + z*num_gnm; - - for(int nm = 0;nm < num_moments;++nm){ - double const * KRESTRICT ell_nm = ell + nm*num_local_directions; - double * KRESTRICT phi_z_nm_g0 = phi_z + nm*num_groups + group0; - - for (int d = 0; d < num_local_directions; d++) { - double const ell_nm_d = ell_nm[d]; - double const * KRESTRICT psi_z_d = psi_z + d*num_local_groups; - - for (int g = 0; g < num_local_groups; ++g) { - phi_z_nm_g0[g] += ell_nm_d * psi_z_d[g]; - } - } - } - } - } -} - -void Kernel_3d_ZDG::LPlusTimes(Grid_Data *grid_data) { - // Outer parameters - int num_moments = grid_data->total_num_moments; - - // Loop over Subdomains - int num_subdomains = grid_data->subdomains.size(); - for (int sdom_id = 0; sdom_id < num_subdomains; ++ sdom_id){ - Subdomain &sdom = grid_data->subdomains[sdom_id]; - - // Get dimensioning - int num_zones = sdom.num_zones; - int num_groups = sdom.phi->groups; - int num_local_groups = sdom.num_groups; - int group0 = sdom.group0; - int num_local_directions = sdom.num_directions; - int num_gnm = num_moments*num_groups; - int num_locgd = num_local_directions*num_local_groups; - - // Zero RHS - sdom.rhs->clear(0.0); - - // Get pointers - double const * KRESTRICT phi_out = sdom.phi_out->ptr(); - double const * KRESTRICT ell_plus = sdom.ell_plus->ptr(); - double * KRESTRICT rhs = sdom.rhs->ptr(); - -#ifdef KRIPKE_USE_OPENMP -#pragma omp parallel for -#endif - for(int z = 0;z < num_zones; ++ z){ - double const * KRESTRICT phi_out_z = phi_out + z*num_gnm; - double * KRESTRICT rhs_z = rhs + z*num_locgd; - - for (int d = 0; d < num_local_directions; d++) { - double const * KRESTRICT ell_plus_d = ell_plus + d*num_moments; - double * KRESTRICT rhs_z_d = rhs_z + d*num_local_groups; - - for(int nm = 0;nm < num_moments;++nm){ - double const * KRESTRICT phi_out_z_nm = phi_out_z + nm*num_groups + group0; - double const ell_plus_d_nm = ell_plus_d[nm]; - - for (int g = 0; g < num_local_groups; ++g) { - rhs_z_d[g] += ell_plus_d_nm * phi_out_z_nm[g]; - } - } - } - } - } -} - - -/** - Compute scattering source term phi_out from flux moments in phi. - phi_out(gp,z,nm) = sum_g { sigs(g, n, gp) * phi(g,z,nm) } -*/ -void Kernel_3d_ZDG::scattering(Grid_Data *grid_data){ - // Loop over zoneset subdomains - for(int zs = 0;zs < grid_data->num_zone_sets;++ zs){ - // get material mix information - int sdom_id = grid_data->zs_to_sdomid[zs]; - Subdomain &sdom = grid_data->subdomains[sdom_id]; - int const * KRESTRICT zones_to_mixed = &sdom.zones_to_mixed[0]; - int const * KRESTRICT num_mixed = &sdom.num_mixed[0]; - int const * KRESTRICT mixed_material = &sdom.mixed_material[0]; - double const * KRESTRICT mixed_fraction = &sdom.mixed_fraction[0]; - double const * KRESTRICT sigs = grid_data->sigs->ptr(); - - int const * KRESTRICT moment_to_coeff = &grid_data->moment_to_coeff[0]; - double const * KRESTRICT phi = grid_data->phi[zs]->ptr(); - double * KRESTRICT phi_out = grid_data->phi_out[zs]->ptr(); - - // Zero out source terms - grid_data->phi_out[zs]->clear(0.0); - - // grab dimensions - int num_zones = sdom.num_zones; - int num_groups = grid_data->phi_out[zs]->groups; - int num_moments = grid_data->total_num_moments; - int num_coeff = grid_data->legendre_order+1; - int num_nmg = num_moments*num_groups; - -#ifdef KRIPKE_USE_OPENMP -#pragma omp parallel for -#endif - for(int zone = 0;zone < num_zones;++ zone){ - int mix_start = zones_to_mixed[zone]; - int mix_stop = mix_start + num_mixed[zone]; - - for(int mix = mix_start;mix < mix_stop;++ mix){ - int material = mixed_material[mix]; - double fraction = mixed_fraction[mix]; - - double const * KRESTRICT sigs_mat = sigs + material*num_coeff*num_groups*num_groups; - double const * KRESTRICT phi_z = phi + zone*num_nmg; - double * KRESTRICT phi_out_z = phi_out + zone*num_nmg; - - for(int nm = 0;nm < num_moments;++ nm){ - // map nm to n - int n = moment_to_coeff[nm]; - - double const * KRESTRICT sigs_mat_n = sigs_mat + n*num_groups*num_groups; - double const * KRESTRICT phi_z_nm = phi_z + nm*num_groups; - double * KRESTRICT phi_out_z_nm = phi_out_z + nm*num_groups; - - for(int g = 0;g < num_groups;++ g){ - double const * KRESTRICT sigs_mat_n_g = sigs_mat_n + g*num_groups; - double const phi_z_nm_g = phi_z_nm[g]; - - for(int gp = 0;gp < num_groups;++ gp){ - phi_out_z_nm[gp] += sigs_mat_n_g[gp] * phi_z_nm_g * fraction; - } - } - } - } - } - } -} - -/** - * Add an isotropic source, with flux of 1, to every zone with Region 1 - * (or material 0). - * - * Since it's isotropic, we're just adding this to nm=0. - */ -void Kernel_3d_ZDG::source(Grid_Data *grid_data){ - // Loop over zoneset subdomains - for(int zs = 0;zs < grid_data->num_zone_sets;++ zs){ - - // get material mix information - int sdom_id = grid_data->zs_to_sdomid[zs]; - Subdomain &sdom = grid_data->subdomains[sdom_id]; - int const * KRESTRICT zones_to_mixed = &sdom.zones_to_mixed[0]; - int const * KRESTRICT num_mixed = &sdom.num_mixed[0]; - int const * KRESTRICT mixed_to_zones = &sdom.mixed_to_zones[0]; - int const * KRESTRICT mixed_material = &sdom.mixed_material[0]; - double const * KRESTRICT mixed_fraction = &sdom.mixed_fraction[0]; - double * KRESTRICT phi_out = grid_data->phi_out[zs]->ptr(); - - // grab dimensions - int num_zones = sdom.num_zones; - int num_groups = grid_data->phi_out[zs]->groups; - int num_moments = grid_data->total_num_moments; - -#ifdef KRIPKE_USE_OPENMP -#pragma omp parallel for -#endif - for(int zone = 0;zone < num_zones;++ zone){ - int mix_start = zones_to_mixed[zone]; - int mix_stop = mix_start + num_mixed[zone]; - - for(int mix = mix_start;mix < mix_stop;++ mix){ - int material = mixed_material[mix]; - double fraction = mixed_fraction[mix]; - double * KRESTRICT phi_out_z_nm0 = phi_out + zone*num_moments*num_groups; - - if(material == 0){ - for(int g = 0;g < num_groups;++ g){ - phi_out_z_nm0[g] += 1.0 * fraction; - } - } - } - } - } -} - -// Macros for offsets with fluxes on cell faces -#define I_PLANE_INDEX(j, k) ((k)*(local_jmax) + (j)) -#define J_PLANE_INDEX(i, k) ((k)*(local_imax) + (i)) -#define K_PLANE_INDEX(i, j) ((j)*(local_imax) + (i)) -#define Zonal_INDEX(i, j, k) ((i) + (local_imax)*(j) \ - + (local_imax)*(local_jmax)*(k)) - -void Kernel_3d_ZDG::sweep(Subdomain *sdom) { - int num_directions = sdom->num_directions; - int num_groups = sdom->num_groups; - int num_zones = sdom->num_zones; - - Directions *direction = sdom->directions; - - int local_imax = sdom->nzones[0]; - int local_jmax = sdom->nzones[1]; - int local_kmax = sdom->nzones[2]; - - double const * KRESTRICT dx = &sdom->deltas[0][0]; - double const * KRESTRICT dy = &sdom->deltas[1][0]; - double const * KRESTRICT dz = &sdom->deltas[2][0]; - - double const * KRESTRICT sigt = sdom->sigt->ptr(); - double * KRESTRICT psi = sdom->psi->ptr(); - double const * KRESTRICT rhs = sdom->rhs->ptr(); - - double * KRESTRICT psi_lf = sdom->plane_data[0]->ptr(); - double * KRESTRICT psi_fr = sdom->plane_data[1]->ptr(); - double * KRESTRICT psi_bo = sdom->plane_data[2]->ptr(); - - int num_gd = num_groups * num_directions; - - // Upwind/Downwind face flux data - SubTVec &i_plane = *sdom->plane_data[0]; - SubTVec &j_plane = *sdom->plane_data[1]; - SubTVec &k_plane = *sdom->plane_data[2]; - - // All directions have same id,jd,kd, since these are all one Direction Set - // So pull that information out now - Grid_Sweep_Block const &extent = sdom->sweep_block; - - for (int k = extent.start_k; k != extent.end_k; k += extent.inc_k) { - double two_dz = 2.0 / dz[k + 1]; - for (int j = extent.start_j; j != extent.end_j; j += extent.inc_j) { - double two_dy = 2.0 / dy[j + 1]; - for (int i = extent.start_i; i != extent.end_i; i += extent.inc_i) { - double two_dx = 2.0 / dx[i + 1]; - - int z = Zonal_INDEX(i, j, k); - - double const * KRESTRICT sigt_z = sigt + z*num_groups; - double * KRESTRICT psi_z = psi + z*num_gd; - double const * KRESTRICT rhs_z = rhs + z*num_gd; - - double * KRESTRICT psi_lf_z = psi_lf + I_PLANE_INDEX(j, k) * num_gd; - double * KRESTRICT psi_fr_z = psi_fr + J_PLANE_INDEX(i, k) * num_gd; - double * KRESTRICT psi_bo_z = psi_bo + K_PLANE_INDEX(i, j) * num_gd; -#ifdef KRIPKE_USE_OPENMP -#pragma omp parallel for -#endif - for (int d = 0; d < num_directions; ++d) { - - double xcos_dxi = two_dx * direction[d].xcos; - double ycos_dyj = two_dy * direction[d].ycos; - double zcos_dzk = two_dz * direction[d].zcos; - - double * KRESTRICT psi_z_d = psi_z + d*num_groups; - double const * KRESTRICT rhs_z_d = rhs_z + d*num_groups; - - double * KRESTRICT psi_lf_z_d = psi_lf_z + d*num_groups; - double * KRESTRICT psi_fr_z_d = psi_fr_z + d*num_groups; - double * KRESTRICT psi_bo_z_d = psi_bo_z + d*num_groups; - - for (int g = 0; g < num_groups; ++g) { - // Calculate new zonal flux - double psi_z_d_g = (rhs_z_d[g] - + psi_lf_z_d[g] * xcos_dxi - + psi_fr_z_d[g] * ycos_dyj - + psi_bo_z_d[g] * zcos_dzk) - / (xcos_dxi + ycos_dyj + zcos_dzk + sigt_z[g]); - - psi_z_d[g] = psi_z_d_g; - - // Apply diamond-difference relationships - psi_lf_z_d[g] = 2.0 * psi_z_d_g - psi_lf_z_d[g]; - psi_fr_z_d[g] = 2.0 * psi_z_d_g - psi_fr_z_d[g]; - psi_bo_z_d[g] = 2.0 * psi_z_d_g - psi_bo_z_d[g]; - } - } - } - } - } -} - diff --git a/src/Kripke/Kernel/Kernel_3d_ZDG.h b/src/Kripke/Kernel/Kernel_3d_ZDG.h deleted file mode 100644 index d4908a03..00000000 --- a/src/Kripke/Kernel/Kernel_3d_ZDG.h +++ /dev/null @@ -1,54 +0,0 @@ -/* - * NOTICE - * - * This work was produced at the Lawrence Livermore National Laboratory (LLNL) - * under contract no. DE-AC-52-07NA27344 (Contract 44) between the U.S. - * Department of Energy (DOE) and Lawrence Livermore National Security, LLC - * (LLNS) for the operation of LLNL. The rights of the Federal Government are - * reserved under Contract 44. - * - * DISCLAIMER - * - * This work was prepared as an account of work sponsored by an agency of the - * United States Government. Neither the United States Government nor Lawrence - * Livermore National Security, LLC nor any of their employees, makes any - * warranty, express or implied, or assumes any liability or responsibility - * for the accuracy, completeness, or usefulness of any information, apparatus, - * product, or process disclosed, or represents that its use would not infringe - * privately-owned rights. Reference herein to any specific commercial products, - * process, or service by trade name, trademark, manufacturer or otherwise does - * not necessarily constitute or imply its endorsement, recommendation, or - * favoring by the United States Government or Lawrence Livermore National - * Security, LLC. The views and opinions of authors expressed herein do not - * necessarily state or reflect those of the United States Government or - * Lawrence Livermore National Security, LLC, and shall not be used for - * advertising or product endorsement purposes. - * - * NOTIFICATION OF COMMERCIAL USE - * - * Commercialization of this product is prohibited without notifying the - * Department of Energy (DOE) or Lawrence Livermore National Security. - */ - -#ifndef KRIPKE_KERNEL_3D_ZDG_H__ -#define KRIPKE_KERNEL_3D_ZDG_H__ - -#include - -class Kernel_3d_ZDG : public Kernel { - public: - virtual Nesting_Order nestingPsi(void) const; - virtual Nesting_Order nestingPhi(void) const; - virtual Nesting_Order nestingSigt(void) const; - virtual Nesting_Order nestingEll(void) const; - virtual Nesting_Order nestingEllPlus(void) const; - virtual Nesting_Order nestingSigs(void) const; - - virtual void LTimes(Grid_Data *grid_data); - virtual void LPlusTimes(Grid_Data *grid_data); - virtual void scattering(Grid_Data *grid_data); - virtual void source(Grid_Data *grid_data); - virtual void sweep(Subdomain *ga_set); -}; - -#endif diff --git a/src/Kripke/Kernel/Kernel_3d_ZGD.cpp b/src/Kripke/Kernel/Kernel_3d_ZGD.cpp deleted file mode 100644 index d0a5c456..00000000 --- a/src/Kripke/Kernel/Kernel_3d_ZGD.cpp +++ /dev/null @@ -1,366 +0,0 @@ -/* - * NOTICE - * - * This work was produced at the Lawrence Livermore National Laboratory (LLNL) - * under contract no. DE-AC-52-07NA27344 (Contract 44) between the U.S. - * Department of Energy (DOE) and Lawrence Livermore National Security, LLC - * (LLNS) for the operation of LLNL. The rights of the Federal Government are - * reserved under Contract 44. - * - * DISCLAIMER - * - * This work was prepared as an account of work sponsored by an agency of the - * United States Government. Neither the United States Government nor Lawrence - * Livermore National Security, LLC nor any of their employees, makes any - * warranty, express or implied, or assumes any liability or responsibility - * for the accuracy, completeness, or usefulness of any information, apparatus, - * product, or process disclosed, or represents that its use would not infringe - * privately-owned rights. Reference herein to any specific commercial products, - * process, or service by trade name, trademark, manufacturer or otherwise does - * not necessarily constitute or imply its endorsement, recommendation, or - * favoring by the United States Government or Lawrence Livermore National - * Security, LLC. The views and opinions of authors expressed herein do not - * necessarily state or reflect those of the United States Government or - * Lawrence Livermore National Security, LLC, and shall not be used for - * advertising or product endorsement purposes. - * - * NOTIFICATION OF COMMERCIAL USE - * - * Commercialization of this product is prohibited without notifying the - * Department of Energy (DOE) or Lawrence Livermore National Security. - */ - -#include -#include -#include - -Nesting_Order Kernel_3d_ZGD::nestingPsi(void) const { - return NEST_ZGD; -} - -Nesting_Order Kernel_3d_ZGD::nestingPhi(void) const { - return NEST_ZGD; -} - -Nesting_Order Kernel_3d_ZGD::nestingSigt(void) const { - return NEST_DZG; -} - -Nesting_Order Kernel_3d_ZGD::nestingEll(void) const { - return NEST_ZGD; -} - -Nesting_Order Kernel_3d_ZGD::nestingEllPlus(void) const { - return NEST_ZDG; -} - -Nesting_Order Kernel_3d_ZGD::nestingSigs(void) const { - return NEST_ZGD; -} - -void Kernel_3d_ZGD::LTimes(Grid_Data *grid_data) { - // Outer parameters - int num_moments = grid_data->total_num_moments; - - // Clear phi - for(int ds = 0;ds < grid_data->num_zone_sets;++ ds){ - grid_data->phi[ds]->clear(0.0); - } - - // Loop over Subdomains - int num_subdomains = grid_data->subdomains.size(); - for (int sdom_id = 0; sdom_id < num_subdomains; ++ sdom_id){ - Subdomain &sdom = grid_data->subdomains[sdom_id]; - - // Get dimensioning - int num_groups = sdom.phi->groups; - int num_zones = sdom.num_zones; - int num_local_groups = sdom.num_groups; - int group0 = sdom.group0; - int num_local_directions = sdom.num_directions; - int num_gnm = num_groups * num_moments; - int num_locgd = num_local_groups * num_local_directions; - - // Get pointers - double const * KRESTRICT ell = sdom.ell->ptr(); - double const * KRESTRICT psi = sdom.psi->ptr(); - double * KRESTRICT phi = sdom.phi->ptr(); - -#ifdef KRIPKE_USE_OPENMP -#pragma omp parallel for -#endif - for (int z = 0; z < num_zones; z++) { - double const * KRESTRICT psi_z = psi + z*num_locgd; - double * KRESTRICT phi_z = phi + z*num_gnm; - - for (int g = 0; g < num_local_groups; ++g) { - double const * KRESTRICT psi_z_g = psi_z + g*num_local_directions; - double * KRESTRICT phi_z_g = phi_z + (group0+g)*num_moments; - - for(int nm = 0;nm < num_moments;++nm){ - double const * KRESTRICT ell_nm = ell + nm*num_local_directions; - - double phi_z_g_nm = 0.0; - for (int d = 0; d < num_local_directions; d++) { - phi_z_g_nm += ell_nm[d] * psi_z_g[d]; - } - phi_z_g[nm] += phi_z_g_nm; - } - } - } - } -} - -void Kernel_3d_ZGD::LPlusTimes(Grid_Data *grid_data) { - // Outer parameters - int num_moments = grid_data->total_num_moments; - - // Loop over Subdomains - int num_subdomains = grid_data->subdomains.size(); - for (int sdom_id = 0; sdom_id < num_subdomains; ++ sdom_id){ - Subdomain &sdom = grid_data->subdomains[sdom_id]; - - // Get dimensioning - int num_zones = sdom.num_zones; - int num_groups = sdom.phi->groups; - int num_local_groups = sdom.num_groups; - int group0 = sdom.group0; - int num_local_directions = sdom.num_directions; - int num_gnm = num_moments*num_groups; - int num_locgd = num_local_directions*num_local_groups; - - // Zero RHS - sdom.rhs->clear(0.0); - - // Get pointers - double const * KRESTRICT phi_out = sdom.phi_out->ptr(); - double const * KRESTRICT ell_plus = sdom.ell_plus->ptr(); - double * KRESTRICT rhs = sdom.rhs->ptr(); - -#ifdef KRIPKE_USE_OPENMP -#pragma omp parallel for -#endif - for(int z = 0;z < num_zones; ++ z){ - double const * KRESTRICT phi_out_z = phi_out + z*num_gnm; - double * KRESTRICT rhs_z = rhs + z*num_locgd; - - for (int g = 0; g < num_local_groups; ++g) { - double const * KRESTRICT phi_out_z_g = phi_out_z + (group0+g)*num_moments; - double * KRESTRICT rhs_z_g = rhs_z + g*num_local_directions; - - for (int d = 0; d < num_local_directions; d++) { - double const * KRESTRICT ell_plus_d = ell_plus + d*num_moments; - - double rhs_z_g_d = 0.0; - for(int nm = 0;nm < num_moments; ++nm){ - rhs_z_g_d += ell_plus_d[nm] * phi_out_z_g[nm]; - } - rhs_z_g[d] = rhs_z_g_d; - } - } - } - } -} - - -/** - Compute scattering source term phi_out from flux moments in phi. - phi_out(gp,z,nm) = sum_g { sigs(g, n, gp) * phi(g,z,nm) } -*/ -void Kernel_3d_ZGD::scattering(Grid_Data *grid_data){ - // Loop over zoneset subdomains - for(int zs = 0;zs < grid_data->num_zone_sets;++ zs){ - // get material mix information - int sdom_id = grid_data->zs_to_sdomid[zs]; - Subdomain &sdom = grid_data->subdomains[sdom_id]; - int const * KRESTRICT zones_to_mixed = &sdom.zones_to_mixed[0]; - int const * KRESTRICT num_mixed = &sdom.num_mixed[0]; - int const * KRESTRICT mixed_material = &sdom.mixed_material[0]; - double const * KRESTRICT mixed_fraction = &sdom.mixed_fraction[0]; - double const * KRESTRICT sigs = grid_data->sigs->ptr(); - - int const * KRESTRICT moment_to_coeff = &grid_data->moment_to_coeff[0]; - double const * KRESTRICT phi = grid_data->phi[zs]->ptr(); - double * KRESTRICT phi_out = grid_data->phi_out[zs]->ptr(); - - // Zero out source terms - grid_data->phi_out[zs]->clear(0.0); - - // grab dimensions - int num_zones = sdom.num_zones; - int num_groups = grid_data->phi_out[zs]->groups; - int num_moments = grid_data->total_num_moments; - int num_coeff = grid_data->legendre_order+1; - int num_nmg = num_moments*num_groups; - -#ifdef KRIPKE_USE_OPENMP -#pragma omp parallel for -#endif - for(int zone = 0;zone < num_zones;++ zone){ - int mix_start = zones_to_mixed[zone]; - int mix_stop = mix_start + num_mixed[zone]; - - for(int mix = mix_start;mix < mix_stop;++ mix){ - int material = mixed_material[mix]; - double fraction = mixed_fraction[mix]; - - double const * KRESTRICT sigs_mat = sigs + material*num_coeff*num_groups*num_groups; - double const * KRESTRICT phi_z = phi + zone*num_nmg; - double * KRESTRICT phi_out_z = phi_out + zone*num_nmg; - - for(int g = 0;g < num_groups;++ g){ - double const * KRESTRICT sigs_mat_g = sigs_mat + g*num_groups*num_coeff; - double const * KRESTRICT phi_z_g = phi_z + g*num_moments; - - for(int gp = 0;gp < num_groups;++ gp){ - double const * KRESTRICT sigs_mat_g_gp = sigs_mat_g + gp*num_coeff; - double * KRESTRICT phi_out_z_gp = phi_out_z + gp*num_moments; - - for(int nm = 0;nm < num_moments;++ nm){ - // map nm to n - int n = moment_to_coeff[nm]; - - phi_out_z_gp[nm] += sigs_mat_g_gp[n] * phi_z_g[nm] * fraction; - } - } - } - } - } - } -} - -/** - * Add an isotropic source, with flux of 1, to every zone with Region 1 - * (or material 0). - * - * Since it's isotropic, we're just adding this to nm=0. - */ -void Kernel_3d_ZGD::source(Grid_Data *grid_data){ - // Loop over zoneset subdomains - for(int zs = 0;zs < grid_data->num_zone_sets;++ zs){ - - // get material mix information - int sdom_id = grid_data->zs_to_sdomid[zs]; - Subdomain &sdom = grid_data->subdomains[sdom_id]; - int const * KRESTRICT zones_to_mixed = &sdom.zones_to_mixed[0]; - int const * KRESTRICT num_mixed = &sdom.num_mixed[0]; - int const * KRESTRICT mixed_material = &sdom.mixed_material[0]; - double const * KRESTRICT mixed_fraction = &sdom.mixed_fraction[0]; - double * KRESTRICT phi_out = grid_data->phi_out[zs]->ptr(); - - // grab dimensions - int num_zones = sdom.num_zones; - int num_groups = grid_data->phi_out[zs]->groups; - int num_moments = grid_data->total_num_moments; - -#ifdef KRIPKE_USE_OPENMP -#pragma omp parallel for -#endif - for(int zone = 0;zone < num_zones;++ zone){ - int mix_start = zones_to_mixed[zone]; - int mix_stop = mix_start + num_mixed[zone]; - - for(int mix = mix_start;mix < mix_stop;++ mix){ - int material = mixed_material[mix]; - double fraction = mixed_fraction[mix]; - double * KRESTRICT phi_out_z = phi_out + zone*num_moments*num_groups; - - if(material == 0){ - for(int g = 0;g < num_groups;++ g){ - phi_out_z[g*num_moments] += 1.0 * fraction; - } - } - } - } - } -} - -// Macros for offsets with fluxes on cell faces -#define I_PLANE_INDEX(j, k) ((k)*(local_jmax) + (j)) -#define J_PLANE_INDEX(i, k) ((k)*(local_imax) + (i)) -#define K_PLANE_INDEX(i, j) ((j)*(local_imax) + (i)) -#define Zonal_INDEX(i, j, k) ((i) + (local_imax)*(j) \ - + (local_imax)*(local_jmax)*(k)) - -void Kernel_3d_ZGD::sweep(Subdomain *sdom) { - int num_directions = sdom->num_directions; - int num_groups = sdom->num_groups; - int num_zones = sdom->num_zones; - - Directions *direction = sdom->directions; - - int local_imax = sdom->nzones[0]; - int local_jmax = sdom->nzones[1]; - int local_kmax = sdom->nzones[2]; - - double const * KRESTRICT dx = &sdom->deltas[0][0]; - double const * KRESTRICT dy = &sdom->deltas[1][0]; - double const * KRESTRICT dz = &sdom->deltas[2][0]; - - double const * KRESTRICT sigt = sdom->sigt->ptr(); - double * KRESTRICT psi = sdom->psi->ptr(); - double const * KRESTRICT rhs = sdom->rhs->ptr(); - - double * KRESTRICT psi_lf = sdom->plane_data[0]->ptr(); - double * KRESTRICT psi_fr = sdom->plane_data[1]->ptr(); - double * KRESTRICT psi_bo = sdom->plane_data[2]->ptr(); - - int num_gd = num_groups * num_directions; - - // All directions have same id,jd,kd, since these are all one Direction Set - // So pull that information out now - Grid_Sweep_Block const &extent = sdom->sweep_block; - - // Perform transport sweep of the grid 1 cell at a time. - for (int k = extent.start_k; k != extent.end_k; k += extent.inc_k) { - double two_dz = 2.0 / dz[k + 1]; - for (int j = extent.start_j; j != extent.end_j; j += extent.inc_j) { - double two_dy = 2.0 / dy[j + 1]; - for (int i = extent.start_i; i != extent.end_i; i += extent.inc_i) { - double two_dx = 2.0 / dx[i + 1]; - - int z = Zonal_INDEX(i, j, k); - double const * KRESTRICT sigt_z = sigt + z*num_groups; - double * KRESTRICT psi_z = psi + z*num_gd; - double const * KRESTRICT rhs_z = rhs + z*num_gd; - - double * KRESTRICT psi_lf_z = psi_lf + I_PLANE_INDEX(j, k) * num_gd; - double * KRESTRICT psi_fr_z = psi_fr + J_PLANE_INDEX(i, k) * num_gd; - double * KRESTRICT psi_bo_z = psi_bo + K_PLANE_INDEX(i, j) * num_gd; - -#ifdef KRIPKE_USE_OPENMP -#pragma omp parallel for -#endif - for (int g = 0; g < num_groups; ++g) { - double * KRESTRICT psi_z_g = psi_z + g * num_directions; - double const * KRESTRICT rhs_z_g = rhs_z + g * num_directions; - - double * KRESTRICT psi_lf_z_g = psi_lf_z + g * num_directions; - double * KRESTRICT psi_fr_z_g = psi_fr_z + g * num_directions; - double * KRESTRICT psi_bo_z_g = psi_bo_z + g * num_directions; - - for (int d = 0; d < num_directions; ++d) { - double xcos_dxi = direction[d].xcos * two_dx; - double ycos_dyj = direction[d].ycos * two_dy; - double zcos_dzk = direction[d].zcos * two_dz; - - // Calculate new zonal flux - double psi_z_g_d = (rhs_z_g[d] - + psi_lf_z_g[d] * xcos_dxi - + psi_fr_z_g[d] * ycos_dyj - + psi_bo_z_g[d] * zcos_dzk) - / (xcos_dxi + ycos_dyj + zcos_dzk + sigt_z[g]); - - psi_z_g[d] = psi_z_g_d; - - // Apply diamond-difference relationships - psi_lf_z_g[d] = 2.0 * psi_z_g_d - psi_lf_z_g[d]; - psi_fr_z_g[d] = 2.0 * psi_z_g_d - psi_fr_z_g[d]; - psi_bo_z_g[d] = 2.0 * psi_z_g_d - psi_bo_z_g[d]; - } - } - } - } - } -} - diff --git a/src/Kripke/Kernel/Kernel_3d_ZGD.h b/src/Kripke/Kernel/Kernel_3d_ZGD.h deleted file mode 100644 index b538f356..00000000 --- a/src/Kripke/Kernel/Kernel_3d_ZGD.h +++ /dev/null @@ -1,54 +0,0 @@ -/* - * NOTICE - * - * This work was produced at the Lawrence Livermore National Laboratory (LLNL) - * under contract no. DE-AC-52-07NA27344 (Contract 44) between the U.S. - * Department of Energy (DOE) and Lawrence Livermore National Security, LLC - * (LLNS) for the operation of LLNL. The rights of the Federal Government are - * reserved under Contract 44. - * - * DISCLAIMER - * - * This work was prepared as an account of work sponsored by an agency of the - * United States Government. Neither the United States Government nor Lawrence - * Livermore National Security, LLC nor any of their employees, makes any - * warranty, express or implied, or assumes any liability or responsibility - * for the accuracy, completeness, or usefulness of any information, apparatus, - * product, or process disclosed, or represents that its use would not infringe - * privately-owned rights. Reference herein to any specific commercial products, - * process, or service by trade name, trademark, manufacturer or otherwise does - * not necessarily constitute or imply its endorsement, recommendation, or - * favoring by the United States Government or Lawrence Livermore National - * Security, LLC. The views and opinions of authors expressed herein do not - * necessarily state or reflect those of the United States Government or - * Lawrence Livermore National Security, LLC, and shall not be used for - * advertising or product endorsement purposes. - * - * NOTIFICATION OF COMMERCIAL USE - * - * Commercialization of this product is prohibited without notifying the - * Department of Energy (DOE) or Lawrence Livermore National Security. - */ - -#ifndef KRIPKE_KERNEL_3D_ZGD_H__ -#define KRIPKE_KERNEL_3D_ZGD_H__ - -#include - -class Kernel_3d_ZGD : public Kernel { - public: - virtual Nesting_Order nestingPsi(void) const; - virtual Nesting_Order nestingPhi(void) const; - virtual Nesting_Order nestingSigt(void) const; - virtual Nesting_Order nestingEll(void) const; - virtual Nesting_Order nestingEllPlus(void) const; - virtual Nesting_Order nestingSigs(void) const; - - virtual void LTimes(Grid_Data *grid_data); - virtual void LPlusTimes(Grid_Data *grid_data); - virtual void scattering(Grid_Data *grid_data); - virtual void source(Grid_Data *grid_data); - virtual void sweep(Subdomain *ga_set); -}; - -#endif diff --git a/src/Kripke/Kernel/LPlusTimes.cpp b/src/Kripke/Kernel/LPlusTimes.cpp new file mode 100644 index 00000000..64568fcd --- /dev/null +++ b/src/Kripke/Kernel/LPlusTimes.cpp @@ -0,0 +1,114 @@ +/* + * NOTICE + * + * This work was produced at the Lawrence Livermore National Laboratory (LLNL) + * under contract no. DE-AC-52-07NA27344 (Contract 44) between the U.S. + * Department of Energy (DOE) and Lawrence Livermore National Security, LLC + * (LLNS) for the operation of LLNL. The rights of the Federal Government are + * reserved under Contract 44. + * + * DISCLAIMER + * + * This work was prepared as an account of work sponsored by an agency of the + * United States Government. Neither the United States Government nor Lawrence + * Livermore National Security, LLC nor any of their employees, makes any + * warranty, express or implied, or assumes any liability or responsibility + * for the accuracy, completeness, or usefulness of any information, apparatus, + * product, or process disclosed, or represents that its use would not infringe + * privately-owned rights. Reference herein to any specific commercial products, + * process, or service by trade name, trademark, manufacturer or otherwise does + * not necessarily constitute or imply its endorsement, recommendation, or + * favoring by the United States Government or Lawrence Livermore National + * Security, LLC. The views and opinions of authors expressed herein do not + * necessarily state or reflect those of the United States Government or + * Lawrence Livermore National Security, LLC, and shall not be used for + * advertising or product endorsement purposes. + * + * NOTIFICATION OF COMMERCIAL USE + * + * Commercialization of this product is prohibited without notifying the + * Department of Energy (DOE) or Lawrence Livermore National Security. + */ + +#include +#include +#include +#include +#include + +using namespace Kripke; +using namespace Kripke::Core; + +struct LPlusTimesSdom { + + template + void operator()(AL al, + Kripke::SdomId sdom_id, + Set const &set_dir, + Set const &set_group, + Set const &set_zone, + Set const &set_moment, + Field_Moments &field_phi_out, + Field_Flux &field_rhs, + Field_EllPlus &field_ell_plus) const + { + + using ExecPolicy = typename Kripke::Arch::Policy_LPlusTimes::ExecPolicy; + + auto sdom_al = getSdomAL(al, sdom_id); + + // Get dimensioning + int num_directions = set_dir.size(sdom_id); + int num_groups = set_group.size(sdom_id); + int num_moments = set_moment.size(sdom_id); + int num_zones = set_zone.size(sdom_id); + + // Get views + auto phi_out = sdom_al.getView(field_phi_out); + auto rhs = sdom_al.getView(field_rhs); + auto ell_plus = sdom_al.getView(field_ell_plus); + + // Compute: rhs = ell_plus * phi_out + RAJA::kernel( + camp::make_tuple( + RAJA::TypedRangeSegment(0, num_directions), + RAJA::TypedRangeSegment(0, num_moments), + RAJA::TypedRangeSegment(0, num_groups), + RAJA::TypedRangeSegment(0, num_zones) ), + KRIPKE_LAMBDA (Direction d, Moment nm, Group g, Zone z) { + + rhs(d,g,z) += ell_plus(d, nm) * phi_out(nm, g, z); + + } + ); + } + +}; + + + +void Kripke::Kernel::LPlusTimes(Kripke::Core::DataStore &data_store) +{ + KRIPKE_TIMER(data_store, LPlusTimes); + + Set const &set_dir = data_store.getVariable("Set/Direction"); + Set const &set_group = data_store.getVariable("Set/Group"); + Set const &set_zone = data_store.getVariable("Set/Zone"); + Set const &set_moment = data_store.getVariable("Set/Moment"); + + auto &field_phi_out = data_store.getVariable("phi_out"); + auto &field_rhs = data_store.getVariable("rhs"); + auto &field_ell_plus = data_store.getVariable("ell_plus"); + + ArchLayoutV al_v = data_store.getVariable("al").al_v; + + // Loop over Subdomains + for (Kripke::SdomId sdom_id : field_rhs.getWorkList()){ + + Kripke::dispatch(al_v, LPlusTimesSdom{}, sdom_id, + set_dir, set_group, set_zone, set_moment, + field_phi_out, field_rhs, field_ell_plus); + + } + +} diff --git a/src/Kripke/Kernel/LTimes.cpp b/src/Kripke/Kernel/LTimes.cpp new file mode 100644 index 00000000..0371280f --- /dev/null +++ b/src/Kripke/Kernel/LTimes.cpp @@ -0,0 +1,130 @@ +/* + * NOTICE + * + * This work was produced at the Lawrence Livermore National Laboratory (LLNL) + * under contract no. DE-AC-52-07NA27344 (Contract 44) between the U.S. + * Department of Energy (DOE) and Lawrence Livermore National Security, LLC + * (LLNS) for the operation of LLNL. The rights of the Federal Government are + * reserved under Contract 44. + * + * DISCLAIMER + * + * This work was prepared as an account of work sponsored by an agency of the + * United States Government. Neither the United States Government nor Lawrence + * Livermore National Security, LLC nor any of their employees, makes any + * warranty, express or implied, or assumes any liability or responsibility + * for the accuracy, completeness, or usefulness of any information, apparatus, + * product, or process disclosed, or represents that its use would not infringe + * privately-owned rights. Reference herein to any specific commercial products, + * process, or service by trade name, trademark, manufacturer or otherwise does + * not necessarily constitute or imply its endorsement, recommendation, or + * favoring by the United States Government or Lawrence Livermore National + * Security, LLC. The views and opinions of authors expressed herein do not + * necessarily state or reflect those of the United States Government or + * Lawrence Livermore National Security, LLC, and shall not be used for + * advertising or product endorsement purposes. + * + * NOTIFICATION OF COMMERCIAL USE + * + * Commercialization of this product is prohibited without notifying the + * Department of Energy (DOE) or Lawrence Livermore National Security. + */ + +#include +#include +#include +#include +#include +#include + +#include + +using namespace Kripke; +using namespace Kripke::Core; + + +struct LTimesSdom { + + template + RAJA_INLINE + void operator()(AL al, + Kripke::SdomId sdom_id, + Set const &set_dir, + Set const &set_group, + Set const &set_zone, + Set const &set_moment, + Field_Flux &field_psi, + Field_Moments &field_phi, + Field_Ell &field_ell) const + { + + using ExecPolicy = typename Kripke::Arch::Policy_LTimes::ExecPolicy; + + auto sdom_al = getSdomAL(al, sdom_id); + + // Get dimensioning + int num_directions = set_dir.size(sdom_id); + int num_groups = set_group.size(sdom_id); + int num_moments = set_moment.size(sdom_id); + int num_zones = set_zone.size(sdom_id); + + // Get pointers + auto psi = sdom_al.getView(field_psi); + auto phi = sdom_al.getView(field_phi); + auto ell = sdom_al.getView(field_ell); + + // Compute: phi = ell * psi + RAJA::kernel( + camp::make_tuple( + RAJA::TypedRangeSegment(0, num_moments), + RAJA::TypedRangeSegment(0, num_directions), + RAJA::TypedRangeSegment(0, num_groups), + RAJA::TypedRangeSegment(0, num_zones) ), + KRIPKE_LAMBDA (Moment nm, Direction d, Group g, Zone z) { + + phi(nm,g,z) += ell(nm, d) * psi(d, g, z); + + } + ); + + + } + +}; + + + + + + + + +void Kripke::Kernel::LTimes(Kripke::Core::DataStore &data_store) +{ + KRIPKE_TIMER(data_store, LTimes); + + ArchLayoutV al_v = data_store.getVariable("al").al_v; + + Set const &set_dir = data_store.getVariable("Set/Direction"); + Set const &set_group = data_store.getVariable("Set/Group"); + Set const &set_zone = data_store.getVariable("Set/Zone"); + Set const &set_moment = data_store.getVariable("Set/Moment"); + + auto &field_psi = data_store.getVariable("psi"); + auto &field_phi = data_store.getVariable("phi"); + auto &field_ell = data_store.getVariable("ell"); + + // Loop over Subdomains + for (Kripke::SdomId sdom_id : field_psi.getWorkList()){ + + + Kripke::dispatch(al_v, LTimesSdom{}, sdom_id, + set_dir, set_group, set_zone, set_moment, + field_psi, field_phi, field_ell); + + + } + +} + + diff --git a/src/Kripke/Kernel/Population.cpp b/src/Kripke/Kernel/Population.cpp new file mode 100644 index 00000000..7eb35745 --- /dev/null +++ b/src/Kripke/Kernel/Population.cpp @@ -0,0 +1,123 @@ +/* + * NOTICE + * + * This work was produced at the Lawrence Livermore National Laboratory (LLNL) + * under contract no. DE-AC-52-07NA27344 (Contract 44) between the U.S. + * Department of Energy (DOE) and Lawrence Livermore National Security, LLC + * (LLNS) for the operation of LLNL. The rights of the Federal Government are + * reserved under Contract 44. + * + * DISCLAIMER + * + * This work was prepared as an account of work sponsored by an agency of the + * United States Government. Neither the United States Government nor Lawrence + * Livermore National Security, LLC nor any of their employees, makes any + * warranty, express or implied, or assumes any liability or responsibility + * for the accuracy, completeness, or usefulness of any information, apparatus, + * product, or process disclosed, or represents that its use would not infringe + * privately-owned rights. Reference herein to any specific commercial products, + * process, or service by trade name, trademark, manufacturer or otherwise does + * not necessarily constitute or imply its endorsement, recommendation, or + * favoring by the United States Government or Lawrence Livermore National + * Security, LLC. The views and opinions of authors expressed herein do not + * necessarily state or reflect those of the United States Government or + * Lawrence Livermore National Security, LLC, and shall not be used for + * advertising or product endorsement purposes. + * + * NOTIFICATION OF COMMERCIAL USE + * + * Commercialization of this product is prohibited without notifying the + * Department of Energy (DOE) or Lawrence Livermore National Security. + */ + +#include + +#include +#include +#include +#include +#include + +using namespace Kripke; +using namespace Kripke::Core; + + +struct PopulationSdom { + + template + void operator()(AL al, + Kripke::SdomId sdom_id, + Set const &set_dir, + Set const &set_group, + Set const &set_zone, + Field_Flux &field_psi, + Field_Direction2Double &field_w, + Field_Zone2Double &field_volume, + double *part_ptr) const + { + using Policy = Kripke::Arch::Policy_Population; + using ReducePolicy = typename Policy::ReducePolicy; + using ExecPolicy = typename Policy::ExecPolicy; + + auto sdom_al = getSdomAL(al, sdom_id); + + int num_directions = set_dir.size(sdom_id); + int num_groups = set_group.size(sdom_id); + int num_zones = set_zone.size(sdom_id); + + auto psi = sdom_al.getView(field_psi); + auto w = sdom_al.getView(field_w); + auto volume = sdom_al.getView(field_volume); + + RAJA::ReduceSum part_red(0.0); + + RAJA::kernel( + camp::make_tuple( + RAJA::TypedRangeSegment(0, num_directions), + RAJA::TypedRangeSegment(0, num_groups), + RAJA::TypedRangeSegment(0, num_zones) ), + KRIPKE_LAMBDA (Direction d, Group g, Zone z) { + + part_red += w(d) * psi(d,g,z) * volume(z); + + } + ); + + *part_ptr += (double)part_red; + } + +}; + + +/** + * Returns the integral of Psi over all phase-space, to look at convergence + */ +double Kripke::Kernel::population(Kripke::Core::DataStore &data_store) +{ + KRIPKE_TIMER(data_store, Population); + + ArchLayoutV al_v = data_store.getVariable("al").al_v; + + Set const &set_dir = data_store.getVariable("Set/Direction"); + Set const &set_group = data_store.getVariable("Set/Group"); + Set const &set_zone = data_store.getVariable("Set/Zone"); + + auto &field_psi = data_store.getVariable("psi"); + auto &field_w = data_store.getVariable("quadrature/w"); + auto &field_volume = data_store.getVariable("volume"); + + // sum up particles for psi and rhs + double part = 0.0; + for (Kripke::SdomId sdom_id : field_psi.getWorkList()){ + + Kripke::dispatch(al_v, PopulationSdom{}, sdom_id, + set_dir, set_group, set_zone, + field_psi, field_w, field_volume, + &part); + } + + // reduce + auto const &comm = data_store.getVariable("comm"); + return comm.allReduceSumDouble(part); +} + diff --git a/src/Kripke/Kernel/Scattering.cpp b/src/Kripke/Kernel/Scattering.cpp new file mode 100644 index 00000000..4f6045ca --- /dev/null +++ b/src/Kripke/Kernel/Scattering.cpp @@ -0,0 +1,188 @@ +/* + * NOTICE + * + * This work was produced at the Lawrence Livermore National Laboratory (LLNL) + * under contract no. DE-AC-52-07NA27344 (Contract 44) between the U.S. + * Department of Energy (DOE) and Lawrence Livermore National Security, LLC + * (LLNS) for the operation of LLNL. The rights of the Federal Government are + * reserved under Contract 44. + * + * DISCLAIMER + * + * This work was prepared as an account of work sponsored by an agency of the + * United States Government. Neither the United States Government nor Lawrence + * Livermore National Security, LLC nor any of their employees, makes any + * warranty, express or implied, or assumes any liability or responsibility + * for the accuracy, completeness, or usefulness of any information, apparatus, + * product, or process disclosed, or represents that its use would not infringe + * privately-owned rights. Reference herein to any specific commercial products, + * process, or service by trade name, trademark, manufacturer or otherwise does + * not necessarily constitute or imply its endorsement, recommendation, or + * favoring by the United States Government or Lawrence Livermore National + * Security, LLC. The views and opinions of authors expressed herein do not + * necessarily state or reflect those of the United States Government or + * Lawrence Livermore National Security, LLC, and shall not be used for + * advertising or product endorsement purposes. + * + * NOTIFICATION OF COMMERCIAL USE + * + * Commercialization of this product is prohibited without notifying the + * Department of Energy (DOE) or Lawrence Livermore National Security. + */ + +#include + +#include +#include +#include +#include +#include + +using namespace Kripke; +using namespace Kripke::Core; + +/** + Compute scattering source term phi_out from flux moments in phi. + phi_out(gp,z,nm) = sum_g { sigs(g, n, gp) * phi(g,z,nm) } +*/ + +struct ScatteringSdom { + + template + RAJA_INLINE + void operator()(AL al, + Kripke::SdomId sdom_src, + Kripke::SdomId sdom_dst, + Set const &set_group, + Set const &set_zone, + Set const &set_moment, + Field_Moments &field_phi, + Field_Moments &field_phi_out, + Field_SigmaS &field_sigs, + Field_Zone2MixElem &field_zone_to_mixelem, + Field_Zone2Int &field_zone_to_num_mixelem, + Field_MixElem2Material &field_mixelem_to_material, + Field_MixElem2Double &field_mixelem_to_fraction, + Field_Moment2Legendre &field_moment_to_legendre) const + { + + using ExecPolicy = typename Kripke::Arch::Policy_Scattering::ExecPolicy; + + auto sdom_al = getSdomAL(al, sdom_src); + + // Get glower for src and dst ranges (to index into sigma_s) + int glower_src = set_group.lower(sdom_src); + int glower_dst = set_group.lower(sdom_dst); + + + // get material mix information + auto moment_to_legendre = sdom_al.getView(field_moment_to_legendre); + + auto phi = sdom_al.getView(field_phi); + auto phi_out = sdom_al.getView(field_phi_out, sdom_dst); + auto sigs = sdom_al.getView(field_sigs); + + auto zone_to_mixelem = sdom_al.getView(field_zone_to_mixelem); + auto zone_to_num_mixelem = sdom_al.getView(field_zone_to_num_mixelem); + auto mixelem_to_material = sdom_al.getView(field_mixelem_to_material); + auto mixelem_to_fraction = sdom_al.getView(field_mixelem_to_fraction); + + // grab dimensions + int num_zones = set_zone.size(sdom_src); + int num_groups_src = set_group.size(sdom_src); + int num_groups_dst = set_group.size(sdom_dst); + int num_moments = set_moment.size(sdom_dst); + + RAJA::kernel( + camp::make_tuple( + RAJA::TypedRangeSegment(0, num_moments), + RAJA::TypedRangeSegment(0, num_groups_dst), + RAJA::TypedRangeSegment(0, num_groups_src), + RAJA::TypedRangeSegment(0, num_zones) ), + KRIPKE_LAMBDA (Moment nm, Group g, Group gp, Zone z) { + + // map nm to n + Legendre n = moment_to_legendre(nm); + + GlobalGroup global_g{*g+glower_dst}; + GlobalGroup global_gp{*gp+glower_src}; + + MixElem mix_start = zone_to_mixelem(z); + MixElem mix_stop = mix_start + zone_to_num_mixelem(z); + + double sigs_z = 0.0; + for(MixElem mix = mix_start;mix < mix_stop;++ mix){ + Material mat = mixelem_to_material(mix); + double fraction = mixelem_to_fraction(mix); + + sigs_z += sigs(mat, n, global_g, global_gp) * fraction; + } + phi_out(nm, g, z) += sigs_z * phi(nm, gp, z); + } + ); + } + +}; + + + +/** + Compute scattering source term phi_out from flux moments in phi. + phi_out(gp,z,nm) = sum_g { sigs(g, n, gp) * phi(g,z,nm) } +*/ + +void Kripke::Kernel::scattering(Kripke::Core::DataStore &data_store) +{ + KRIPKE_TIMER(data_store, Scattering); + + ArchLayoutV al_v = data_store.getVariable("al").al_v; + + auto &pspace = data_store.getVariable("pspace"); + + auto &set_group = data_store.getVariable("Set/Group"); + auto &set_moment = data_store.getVariable("Set/Moment"); + auto &set_zone = data_store.getVariable("Set/Zone"); + + auto &field_phi = data_store.getVariable("phi"); + auto &field_phi_out = data_store.getVariable("phi_out"); + auto &field_sigs = data_store.getVariable("data/sigs"); + + auto &field_zone_to_mixelem = data_store.getVariable("zone_to_mixelem"); + auto &field_zone_to_num_mixelem = data_store.getVariable("zone_to_num_mixelem"); + auto &field_mixed_to_material = data_store.getVariable("mixelem_to_material"); + auto &field_mixed_to_fraction = data_store.getVariable("mixelem_to_fraction"); + + auto &field_moment_to_legendre = data_store.getVariable("moment_to_legendre"); + + + // Loop over subdomains and compute scattering source + for(auto sdom_src : field_phi.getWorkList()){ + for(auto sdom_dst : field_phi_out.getWorkList()){ + + // Only work on subdomains where src and dst are on the same R subdomain + size_t r_src = pspace.subdomainToSpace(SPACE_R, sdom_src); + size_t r_dst = pspace.subdomainToSpace(SPACE_R, sdom_dst); + if(r_src != r_dst){ + continue; + } + + Kripke::dispatch(al_v, ScatteringSdom{}, sdom_src, + sdom_dst, + set_group, set_zone, set_moment, + field_phi, field_phi_out, field_sigs, + field_zone_to_mixelem, + field_zone_to_num_mixelem, + field_mixed_to_material, + field_mixed_to_fraction, + field_moment_to_legendre); + + + + } + + } + + +} + + diff --git a/src/Kripke/Kernel/Source.cpp b/src/Kripke/Kernel/Source.cpp new file mode 100644 index 00000000..75d440aa --- /dev/null +++ b/src/Kripke/Kernel/Source.cpp @@ -0,0 +1,137 @@ +/* + * NOTICE + * + * This work was produced at the Lawrence Livermore National Laboratory (LLNL) + * under contract no. DE-AC-52-07NA27344 (Contract 44) between the U.S. + * Department of Energy (DOE) and Lawrence Livermore National Security, LLC + * (LLNS) for the operation of LLNL. The rights of the Federal Government are + * reserved under Contract 44. + * + * DISCLAIMER + * + * This work was prepared as an account of work sponsored by an agency of the + * United States Government. Neither the United States Government nor Lawrence + * Livermore National Security, LLC nor any of their employees, makes any + * warranty, express or implied, or assumes any liability or responsibility + * for the accuracy, completeness, or usefulness of any information, apparatus, + * product, or process disclosed, or represents that its use would not infringe + * privately-owned rights. Reference herein to any specific commercial products, + * process, or service by trade name, trademark, manufacturer or otherwise does + * not necessarily constitute or imply its endorsement, recommendation, or + * favoring by the United States Government or Lawrence Livermore National + * Security, LLC. The views and opinions of authors expressed herein do not + * necessarily state or reflect those of the United States Government or + * Lawrence Livermore National Security, LLC, and shall not be used for + * advertising or product endorsement purposes. + * + * NOTIFICATION OF COMMERCIAL USE + * + * Commercialization of this product is prohibited without notifying the + * Department of Energy (DOE) or Lawrence Livermore National Security. + */ + +#include + +#include +#include +#include +#include + +using namespace Kripke; +using namespace Kripke::Core; + +/** + * Add an isotropic source, with flux of 1, to every zone with Region 1 + * (or material 0). + * + * Since it's isotropic, we're just adding this to nm=0. + */ +struct SourceSdom { + + template + RAJA_INLINE + void operator()(AL al, + Kripke::SdomId sdom_id, + Set const &set_group, + Set const &set_mixelem, + Field_Moments &field_phi_out, + Field_MixElem2Zone &field_mixed_to_zone, + Field_MixElem2Material &field_mixed_to_material, + Field_MixElem2Double &field_mixed_to_fraction, + double source_strength) const + { + + using ExecPolicy = typename Kripke::Arch::Policy_Source::ExecPolicy; + + auto sdom_al = getSdomAL(al, sdom_id); + + // Source term is isotropic + Moment nm{0}; + + auto phi_out = sdom_al.getView(field_phi_out); + + auto mixelem_to_zone = sdom_al.getView(field_mixed_to_zone); + auto mixelem_to_material = sdom_al.getView(field_mixed_to_material); + auto mixelem_to_fraction = sdom_al.getView(field_mixed_to_fraction); + + int num_mixed = set_mixelem.size(sdom_id); + int num_groups = set_group.size(sdom_id); + + + // Compute: phi = ell * psi + RAJA::kernel( + camp::make_tuple( + RAJA::TypedRangeSegment(0, num_groups), + RAJA::TypedRangeSegment(0, num_mixed) ), + KRIPKE_LAMBDA (Group g, MixElem mix) { + + Material material = mixelem_to_material(mix); + + if(material == 2){ + Zone z = mixelem_to_zone(mix); + double fraction = mixelem_to_fraction(mix); + + phi_out(nm, g, z) += source_strength * fraction; + } + + } + ); + + } +}; + + + +void Kripke::Kernel::source(DataStore &data_store) +{ + KRIPKE_TIMER(data_store, Source); + + ArchLayoutV al_v = data_store.getVariable("al").al_v; + + auto &set_group = data_store.getVariable("Set/Group"); + auto &set_mixelem = data_store.getVariable("Set/MixElem"); + + auto &field_phi_out = data_store.getVariable("phi_out"); + + auto &field_mixed_to_zone = data_store.getVariable("mixelem_to_zone"); + auto &field_mixed_to_material = data_store.getVariable("mixelem_to_material"); + auto &field_mixed_to_fraction = data_store.getVariable("mixelem_to_fraction"); + + double source_strength = 1.0; + + + // Loop over zoneset subdomains + for(auto sdom_id : field_phi_out.getWorkList()){ + + Kripke::dispatch(al_v, SourceSdom{}, sdom_id, + set_group, set_mixelem, + field_phi_out, + field_mixed_to_zone, + field_mixed_to_material, + field_mixed_to_fraction, + source_strength); + + } + + +} diff --git a/src/Kripke/Kernel/SweepSubdomain.cpp b/src/Kripke/Kernel/SweepSubdomain.cpp new file mode 100644 index 00000000..4d262429 --- /dev/null +++ b/src/Kripke/Kernel/SweepSubdomain.cpp @@ -0,0 +1,144 @@ +/* + * NOTICE + * + * This work was produced at the Lawrence Livermore National Laboratory (LLNL) + * under contract no. DE-AC-52-07NA27344 (Contract 44) between the U.S. + * Department of Energy (DOE) and Lawrence Livermore National Security, LLC + * (LLNS) for the operation of LLNL. The rights of the Federal Government are + * reserved under Contract 44. + * + * DISCLAIMER + * + * This work was prepared as an account of work sponsored by an agency of the + * United States Government. Neither the United States Government nor Lawrence + * Livermore National Security, LLC nor any of their employees, makes any + * warranty, express or implied, or assumes any liability or responsibility + * for the accuracy, completeness, or usefulness of any information, apparatus, + * product, or process disclosed, or represents that its use would not infringe + * privately-owned rights. Reference herein to any specific commercial products, + * process, or service by trade name, trademark, manufacturer or otherwise does + * not necessarily constitute or imply its endorsement, recommendation, or + * favoring by the United States Government or Lawrence Livermore National + * Security, LLC. The views and opinions of authors expressed herein do not + * necessarily state or reflect those of the United States Government or + * Lawrence Livermore National Security, LLC, and shall not be used for + * advertising or product endorsement purposes. + * + * NOTIFICATION OF COMMERCIAL USE + * + * Commercialization of this product is prohibited without notifying the + * Department of Energy (DOE) or Lawrence Livermore National Security. + */ + +#include + +#include +#include +#include +#include + +using namespace Kripke; +using namespace Kripke::Core; + + + +struct SweepSdom { + + template + RAJA_INLINE + void operator()(AL al, Kripke::Core::DataStore &data_store, + Kripke::SdomId sdom_id) const + { + + using ExecPolicy = typename Kripke::Arch::Policy_SweepSubdomains::ExecPolicy; + + auto sdom_al = getSdomAL(al, sdom_id); + + int num_directions = data_store.getVariable("Set/Direction").size(sdom_id); + int num_groups = data_store.getVariable("Set/Group").size(sdom_id); + int local_imax = data_store.getVariable("Set/ZoneI").size(sdom_id); + int local_jmax = data_store.getVariable("Set/ZoneJ").size(sdom_id); + int local_kmax = data_store.getVariable("Set/ZoneK").size(sdom_id); + + auto xcos = sdom_al.getView(data_store.getVariable("quadrature/xcos")); + auto ycos = sdom_al.getView(data_store.getVariable("quadrature/ycos")); + auto zcos = sdom_al.getView(data_store.getVariable("quadrature/zcos")); + auto view_id = sdom_al.getView(data_store.getVariable("quadrature/id")); + auto view_jd = sdom_al.getView(data_store.getVariable("quadrature/jd")); + auto view_kd = sdom_al.getView(data_store.getVariable("quadrature/kd")); + + auto dx = sdom_al.getView(data_store.getVariable("dx")); + auto dy = sdom_al.getView(data_store.getVariable("dy")); + auto dz = sdom_al.getView(data_store.getVariable("dz")); + + auto sigt = sdom_al.getView(data_store.getVariable("sigt_zonal")); + auto psi = sdom_al.getView(data_store.getVariable("psi")); + auto rhs = sdom_al.getView(data_store.getVariable("rhs")); + + auto psi_lf = sdom_al.getView(data_store.getVariable("i_plane")); + auto psi_fr = sdom_al.getView(data_store.getVariable("j_plane")); + auto psi_bo = sdom_al.getView(data_store.getVariable("k_plane")); + + // Assumption: all directions in this sdom have same mesh traversal + Direction d0{0}; + int id = view_id(d0); + int jd = view_jd(d0); + int kd = view_kd(d0); + + ZoneI start_i((id>0) ? 0 : local_imax-1); + ZoneJ start_j((jd>0) ? 0 : local_jmax-1); + ZoneK start_k((kd>0) ? 0 : local_kmax-1); + + ZoneI end_i((id>0) ? local_imax : -1); + ZoneJ end_j((jd>0) ? local_jmax : -1); + ZoneK end_k((kd>0) ? local_kmax : -1); + + auto zone_layout = data_store.getVariable>("Set/Zone").getLayout(sdom_id); + + RAJA::kernel( + camp::make_tuple( + RAJA::TypedRangeSegment(0, num_directions), + RAJA::TypedRangeSegment(0, num_groups), + RAJA::TypedRangeStrideSegment(*start_k, *end_k, kd), + RAJA::TypedRangeStrideSegment(*start_j, *end_j, jd), + RAJA::TypedRangeStrideSegment(*start_i, *end_i, id) + + + ), + KRIPKE_LAMBDA (Direction d, Group g, ZoneK k, ZoneJ j, ZoneI i) { + + double xcos_dxi = 2.0 * xcos(d) / dx(i); + double ycos_dyj = 2.0 * ycos(d) / dy(j); + double zcos_dzk = 2.0 * zcos(d) / dz(k); + + Zone z(zone_layout(*i, *j, *k)); + + /* Calculate new zonal flux */ + double psi_d_g_z = (rhs(d,g,z) + + psi_lf(d, g, j, k) * xcos_dxi + + psi_fr(d, g, i, k) * ycos_dyj + + psi_bo(d, g, i, j) * zcos_dzk) + / (xcos_dxi + ycos_dyj + zcos_dzk + sigt(g, z)); + + psi(d, g, z) = psi_d_g_z; + + /* Apply diamond-difference relationships */ + psi_lf(d, g, j, k) = 2.0 * psi_d_g_z - psi_lf(d, g, j, k); + psi_fr(d, g, i, k) = 2.0 * psi_d_g_z - psi_fr(d, g, i, k); + psi_bo(d, g, i, j) = 2.0 * psi_d_g_z - psi_bo(d, g, i, j); + + } + ); + } +}; + +void Kripke::Kernel::sweepSubdomain(Kripke::Core::DataStore &data_store, + Kripke::SdomId sdom_id) +{ + KRIPKE_TIMER(data_store, SweepSubdomain); + + ArchLayoutV al_v = data_store.getVariable("al").al_v; + + Kripke::dispatch(al_v, SweepSdom{}, data_store, sdom_id); + +} diff --git a/src/Kripke/Layout.cpp b/src/Kripke/Layout.cpp deleted file mode 100644 index 55d19aac..00000000 --- a/src/Kripke/Layout.cpp +++ /dev/null @@ -1,372 +0,0 @@ -/* - * NOTICE - * - * This work was produced at the Lawrence Livermore National Laboratory (LLNL) - * under contract no. DE-AC-52-07NA27344 (Contract 44) between the U.S. - * Department of Energy (DOE) and Lawrence Livermore National Security, LLC - * (LLNS) for the operation of LLNL. The rights of the Federal Government are - * reserved under Contract 44. - * - * DISCLAIMER - * - * This work was prepared as an account of work sponsored by an agency of the - * United States Government. Neither the United States Government nor Lawrence - * Livermore National Security, LLC nor any of their employees, makes any - * warranty, express or implied, or assumes any liability or responsibility - * for the accuracy, completeness, or usefulness of any information, apparatus, - * product, or process disclosed, or represents that its use would not infringe - * privately-owned rights. Reference herein to any specific commercial products, - * process, or service by trade name, trademark, manufacturer or otherwise does - * not necessarily constitute or imply its endorsement, recommendation, or - * favoring by the United States Government or Lawrence Livermore National - * Security, LLC. The views and opinions of authors expressed herein do not - * necessarily state or reflect those of the United States Government or - * Lawrence Livermore National Security, LLC, and shall not be used for - * advertising or product endorsement purposes. - * - * NOTIFICATION OF COMMERCIAL USE - * - * Commercialization of this product is prohibited without notifying the - * Department of Energy (DOE) or Lawrence Livermore National Security. - */ - -#include - -#include -#include - -namespace { - /* - The following 2 routines are used to map: - 1) mpi ranks to/from processors in x,y,z - 2) zoneset ids to/from zoneset in x,y,z - */ - - /** - Helper routine to take an index, and return a 3-dimensional set of indices, - given size of each index dimension. - */ - inline void rankToIndices(int rank, int *indices, int const *sizes){ - indices[0] = rank / (sizes[1]*sizes[2]); - rank = rank % (sizes[1]*sizes[2]); - indices[1] = rank / sizes[2]; - indices[2] = rank % sizes[2]; - } - - /** - Helper routine to take an index, and return a 3-dimensional set of indices, - given size of each index dimension. - */ - inline int indicesToRank(int const *indices, int const *sizes){ - int rank; - - rank = indices[0]*(sizes[1]*sizes[2]); - rank += indices[1]*sizes[2]; - rank += indices[2]; - - return rank; - } -} - -Layout::Layout(Input_Variables *input_vars){ - num_group_sets = input_vars->num_groupsets; - num_direction_sets = input_vars->num_dirsets; - num_zone_sets = 1; - for(int dim = 0;dim < 3;++ dim){ - num_zone_sets_dim[dim] = input_vars->num_zonesets_dim[dim]; - num_zone_sets *= input_vars->num_zonesets_dim[dim]; - } - - // grab total number of zones - total_zones[0] = input_vars->nx; - total_zones[1] = input_vars->ny; - total_zones[2] = input_vars->nz; - - // Grab size of processor grid - num_procs[0] = input_vars->npx; - num_procs[1] = input_vars->npy; - num_procs[2] = input_vars->npz; - - /* Set the requested processor grid size */ - int R = num_procs[0] * num_procs[1] * num_procs[2]; - - /* Check requested size is the same as MPI_COMM_WORLD */ - int size; - MPI_Comm_size(MPI_COMM_WORLD, &size); - if(R != size){ - int myid; - MPI_Comm_rank(MPI_COMM_WORLD, &myid); - if(myid == 0){ - printf("ERROR: Incorrect number of MPI tasks. Need %d MPI tasks.", R); - } - MPI_Abort(MPI_COMM_WORLD, 1); - } - - /* Compute the local coordinates in the processor decomposition */ - int mpi_rank; - MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank); - rankToIndices(mpi_rank, our_rank, num_procs); -} -Layout::~Layout(){ - -} - -/** - Computes the subdomain ID based on a given groupset, directionset, and zoneset. -*/ -int Layout::setIdToSubdomainId(int gs, int ds, int zs) const{ - int indices[3] = {gs, ds, zs}; - int sizes[3] = {num_group_sets, num_direction_sets, num_zone_sets}; - - return indicesToRank(indices, sizes); -} - -/** - Computes groupset, directionset, and zoneset from a subdomain ID. -*/ -void Layout::subdomainIdToSetId(int sdom_id, int &gs, int &ds, int &zs) const { - int indices[3]; - int sizes[3] = {num_group_sets, num_direction_sets, num_zone_sets}; - - rankToIndices(sdom_id, indices, sizes); - - gs = indices[0]; - ds = indices[1]; - zs = indices[2]; -} - -/** - Computes the zoneset id along a particular dimension. -*/ -int Layout::subdomainIdToZoneSetDim(int sdom_id, int dim) const{ - // Compute zoneset - int gs, ds, zs; - subdomainIdToSetId(sdom_id, gs, ds, zs); - - // Compute zone set - int zs_dim[3]; - rankToIndices(zs, zs_dim, num_zone_sets_dim); - - return zs_dim[dim]; -} - -/** - Computes the number of zones in this subdomain, along specified dimension. -*/ -int Layout::getNumZones(int sdom_id, int dim) const{ - - // get the zoneset index along the specified dimension - int zs_dim = subdomainIdToZoneSetDim(sdom_id, dim); - - int total_subdomains = num_procs[dim] * num_zone_sets_dim[dim]; - int global_subdomain = num_zone_sets_dim[dim] * our_rank[dim] + zs_dim; - - // Compute subset of global zone indices - int num_zones = total_zones[dim] / total_subdomains; - int rem = total_zones[dim] % total_subdomains; - if(rem != 0 && global_subdomain < rem){ - num_zones ++; - } - - return num_zones; -} - - - - - - -BlockLayout::BlockLayout(Input_Variables *input_vars) : - Layout(input_vars) -{ - -} -BlockLayout::~BlockLayout(){ - -} - -Neighbor BlockLayout::getNeighbor(int our_sdom_id, int dim, int dir) const{ - Neighbor n; - - // get our processor indices, so we can find neighbors - int proc[3] = {our_rank[0], our_rank[1], our_rank[2]}; - - int gs, ds, zs; - subdomainIdToSetId(our_sdom_id, gs, ds, zs); - - // Compute out spatial subdomain indices - int zs_dim[3]; - for(int d = 0;d < 3;++ d){ - zs_dim[d] = subdomainIdToZoneSetDim(our_sdom_id, d); - } - - // Offest along dir,dim to get neighboring indices - zs_dim[dim] += dir; - - // Check if the neighbor is remote, and wrap zoneset indices - if(zs_dim[dim] >= num_zone_sets_dim[dim]){ - zs_dim[dim] = 0; - proc[dim] += dir; - } - else if(zs_dim[dim] < 0){ - zs_dim[dim] = num_zone_sets_dim[dim]-1; - proc[dim] += dir; - } - - // Compute the mpi rank of the neighbor - if(proc[dim] < 0 || proc[dim] >= num_procs[dim]){ - // we hit a boundary condition - n.mpi_rank = -1; - n.subdomain_id = -1; - } - else{ - // There is a neighbor, so compute its rank - n.mpi_rank = indicesToRank(proc, num_procs); - - // Compute neighboring subdomain id - zs = indicesToRank(zs_dim, num_zone_sets_dim); - n.subdomain_id = setIdToSubdomainId(gs, ds, zs); - } - - return n; -} - -/** - Compute the spatial extents of a subdomain along a given dimension. -*/ -std::pair BlockLayout::getSpatialExtents(int sdom_id, int dim) const{ - - // Start with global problem dimensions - std::pair ext_global(-60.0, 60.0); - if(dim == 1){ - ext_global.first = -100.0; - ext_global.second = 100.0; - } - - // Subdivide by number of processors in specified dimension - double dx = (ext_global.second - ext_global.first) / (double)num_procs[dim]; - std::pair ext_proc( - ext_global.first + dx*(double)our_rank[dim], - ext_global.first + dx*(double)(our_rank[dim] + 1) - ); - - // get the zoneset index along the specified dimension - int zs_dim = subdomainIdToZoneSetDim(sdom_id, dim); - - // Subdivide by number of subdomains in specified dimension - double sdx = (ext_proc.second - ext_proc.first) / (double)num_zone_sets_dim[dim]; - std::pair ext_sdom( - ext_proc.first + sdx*(double)zs_dim, - ext_proc.first + sdx*(double)(zs_dim + 1) - ); - - return ext_sdom; -} - - - -ScatterLayout::ScatterLayout(Input_Variables *input_vars) : - Layout(input_vars) -{ - -} -ScatterLayout::~ScatterLayout(){ - -} - -Neighbor ScatterLayout::getNeighbor(int our_sdom_id, int dim, int dir) const{ - Neighbor n; - - // get our processor indices, so we can find neighbors - int proc[3] = {our_rank[0], our_rank[1], our_rank[2]}; - - int gs, ds, zs; - subdomainIdToSetId(our_sdom_id, gs, ds, zs); - - // Compute our spatial subdomain indices - int zs_dim[3]; - for(int d = 0;d < 3;++ d){ - zs_dim[d] = subdomainIdToZoneSetDim(our_sdom_id, d); - } - - // Offest along dir,dim to get neighboring subdomain indices - proc[dim] += dir; - - // Check if we wrapped mpi ranks, and should bump zoneset indices - if(proc[dim] >= num_procs[dim]){ - proc[dim] = 0; - zs_dim[dim] += dir; - } - else if(proc[dim] < 0){ - proc[dim] = num_procs[dim]-1; - zs_dim[dim] += dir; - } - - // Compute zone set indices, and detect boundary condition - if(zs_dim[dim] < 0 || zs_dim[dim] >= num_zone_sets_dim[dim]){ - // we hit a boundary condition - n.mpi_rank = -1; - n.subdomain_id = -1; - - } - else{ - // There is a neighbor, so compute its rank - n.mpi_rank = indicesToRank(proc, num_procs); - - // Compute neighboring subdomain id - zs = indicesToRank(zs_dim, num_zone_sets_dim); - n.subdomain_id = setIdToSubdomainId(gs, ds, zs); - } - - - return n; -} - -/** - Compute the spatial extents of a subdomain along a given dimension. -*/ -std::pair ScatterLayout::getSpatialExtents(int sdom_id, int dim) const{ - - // Start with global problem dimensions - std::pair ext_global(-60.0, 60.0); - if(dim == 1){ - ext_global.first = -100.0; - ext_global.second = 100.0; - } - - // get the zoneset index along the specified dimension - int zs_dim = subdomainIdToZoneSetDim(sdom_id, dim); - - // Subdivide by number of subdomains in specified dimension - double sdx = (ext_global.second - ext_global.first) / (double)num_zone_sets_dim[dim]; - std::pair ext_sdom( - ext_global.first + sdx*(double)zs_dim, - ext_global.first + sdx*(double)(zs_dim + 1) - ); - - // Subdivide by number of processors in specified dimension - double dx = (ext_sdom.second - ext_sdom.first) / (double)num_procs[dim]; - std::pair ext_proc( - ext_sdom.first + dx*(double)our_rank[dim], - ext_sdom.first + dx*(double)(our_rank[dim] + 1) - ); - - - return ext_proc; -} - - -/** - Factory to create Layout object based on user defined inputs -*/ -Layout *createLayout(Input_Variables *input_vars){ - switch(input_vars->layout_pattern){ - case 0: - return new BlockLayout(input_vars); - case 1: - return new ScatterLayout(input_vars); - } - printf("Unknown Layout patter\n"); - MPI_Abort(MPI_COMM_WORLD, 1); - return NULL; -} diff --git a/src/Kripke/Layout.h b/src/Kripke/Layout.h deleted file mode 100644 index 1794c697..00000000 --- a/src/Kripke/Layout.h +++ /dev/null @@ -1,101 +0,0 @@ -/* - * NOTICE - * - * This work was produced at the Lawrence Livermore National Laboratory (LLNL) - * under contract no. DE-AC-52-07NA27344 (Contract 44) between the U.S. - * Department of Energy (DOE) and Lawrence Livermore National Security, LLC - * (LLNS) for the operation of LLNL. The rights of the Federal Government are - * reserved under Contract 44. - * - * DISCLAIMER - * - * This work was prepared as an account of work sponsored by an agency of the - * United States Government. Neither the United States Government nor Lawrence - * Livermore National Security, LLC nor any of their employees, makes any - * warranty, express or implied, or assumes any liability or responsibility - * for the accuracy, completeness, or usefulness of any information, apparatus, - * product, or process disclosed, or represents that its use would not infringe - * privately-owned rights. Reference herein to any specific commercial products, - * process, or service by trade name, trademark, manufacturer or otherwise does - * not necessarily constitute or imply its endorsement, recommendation, or - * favoring by the United States Government or Lawrence Livermore National - * Security, LLC. The views and opinions of authors expressed herein do not - * necessarily state or reflect those of the United States Government or - * Lawrence Livermore National Security, LLC, and shall not be used for - * advertising or product endorsement purposes. - * - * NOTIFICATION OF COMMERCIAL USE - * - * Commercialization of this product is prohibited without notifying the - * Department of Energy (DOE) or Lawrence Livermore National Security. - */ - -#ifndef KRIPKE_LAYOUT_H__ -#define KRIPKE_LAYOUT_H__ - -#include - -// foreward decl -struct Input_Variables; - -/** - Describes a neighboring Subdomain using both mpi-rank and subdomin id -*/ -struct Neighbor{ - int mpi_rank; // Neighbors MPI rank, or -1 for boundary condition - int subdomain_id; // Subdomain ID of neighbor -}; - - - -/** - Describes relationships between MPI-ranks and subdomains. - This is an interface, allowing different layout schemes to be implemented as derived types. - */ -class Layout { - public: - explicit Layout(Input_Variables *input_vars); - virtual ~Layout(); - - virtual int setIdToSubdomainId(int gs, int ds, int zs) const; - virtual int subdomainIdToZoneSetDim(int sdom_id, int dim) const; - virtual void subdomainIdToSetId(int sdom_id, int &gs, int &ds, int &zs) const; - virtual Neighbor getNeighbor(int our_sdom_id, int dim, int dir) const = 0; - virtual std::pair getSpatialExtents(int sdom_id, int dim) const = 0; - virtual int getNumZones(int sdom_id, int dim) const; - - protected: - int num_group_sets; // Number of group sets - int num_direction_sets; // Number of direction sets - int num_zone_sets; // Number of zone sets - int num_zone_sets_dim[3];// Number of zone sets in each dimension - - int total_zones[3]; // Total number of zones in each dimension - - int num_procs[3]; // Number of MPI ranks in each dimensions - int our_rank[3]; // Our mpi indices in xyz -}; - -class BlockLayout : public Layout { - public: - explicit BlockLayout(Input_Variables *input_vars); - virtual ~BlockLayout(); - - virtual Neighbor getNeighbor(int our_sdom_id, int dim, int dir) const; - virtual std::pair getSpatialExtents(int sdom_id, int dim) const; -}; - -class ScatterLayout : public Layout { - public: - explicit ScatterLayout(Input_Variables *input_vars); - virtual ~ScatterLayout(); - - virtual Neighbor getNeighbor(int our_sdom_id, int dim, int dir) const; - virtual std::pair getSpatialExtents(int sdom_id, int dim) const; -}; - - -// Factory to create layout object -Layout *createLayout(Input_Variables *input_vars); - -#endif diff --git a/src/Kripke/ParallelComm.cpp b/src/Kripke/ParallelComm.cpp index 71bc1556..91b3ce59 100644 --- a/src/Kripke/ParallelComm.cpp +++ b/src/Kripke/ParallelComm.cpp @@ -32,189 +32,197 @@ #include -#include -#include -#include +#include +#include +#include -ParallelComm::ParallelComm(Grid_Data *grid_data_ptr) : - grid_data(grid_data_ptr) -{ - -} +using namespace Kripke; -ParallelComm::~ParallelComm(){ - -} - -int ParallelComm::computeTag(int mpi_rank, int sdom_id){ - int mpi_size; - MPI_Comm_size(MPI_COMM_WORLD, &mpi_size); - - int tag = mpi_rank + mpi_size*sdom_id; +ParallelComm::ParallelComm(Kripke::Core::DataStore &data_store) : + m_data_store(&data_store) +{ + m_plane_data[0] = &m_data_store->getVariable("i_plane"); + m_plane_data[1] = &m_data_store->getVariable("j_plane"); + m_plane_data[2] = &m_data_store->getVariable("k_plane"); - return tag; } -void ParallelComm::computeRankSdom(int tag, int &mpi_rank, int &sdom_id){ - int mpi_size; - MPI_Comm_size(MPI_COMM_WORLD, &mpi_size); - mpi_rank = tag % mpi_size; - sdom_id = tag / mpi_size; -} /** Finds subdomain in the queue by its subdomain id. */ -int ParallelComm::findSubdomain(int sdom_id){ +int ParallelComm::findSubdomain(SdomId sdom_id){ // find subdomain in queue - int index; + size_t index; for(index = 0;index < queue_sdom_ids.size();++ index){ - if(queue_sdom_ids[index] == sdom_id){ + if(queue_sdom_ids[index] == *sdom_id){ break; } } if(index == queue_sdom_ids.size()){ - printf("Cannot find subdomain id %d in work queue\n", sdom_id); - MPI_Abort(MPI_COMM_WORLD, 1); + KRIPKE_ABORT("Cannot find subdomain id %ld in work queue\n", (long)*sdom_id); } return index; } -Subdomain *ParallelComm::dequeueSubdomain(int sdom_id){ +void ParallelComm::dequeueSubdomain(SdomId sdom_id){ int index = findSubdomain(sdom_id); - // Get subdomain pointer before removing it from queue - Subdomain *sdom = queue_subdomains[index]; - // remove subdomain from queue queue_sdom_ids.erase(queue_sdom_ids.begin()+index); - queue_subdomains.erase(queue_subdomains.begin()+index); queue_depends.erase(queue_depends.begin()+index); - return sdom; } /** Adds a subdomain to the work queue. - Determines if upwind dependencies require communication, and posts appropirate Irecv's. - All recieves use the plane_data[] arrays as recieve buffers. + Determines if upwind dependencies require communication, and posts appropriate Irecv's. + All receives use the plane_data[] arrays as receive buffers. */ -void ParallelComm::postRecvs(int sdom_id, Subdomain &sdom){ - int mpi_rank, mpi_size; - MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank); - MPI_Comm_size(MPI_COMM_WORLD, &mpi_size); +void ParallelComm::postRecvs(Kripke::Core::DataStore &data_store, SdomId sdom_id){ + using namespace Kripke::Core; + Comm comm; + int mpi_rank = comm.rank(); + + auto upwind = data_store.getVariable("upwind").getView(sdom_id); + + auto global_to_rank = data_store.getVariable("GlobalSdomId2Rank").getView(SdomId{0}); + +#ifdef KRIPKE_USE_MPI + auto local_to_global = data_store.getVariable("SdomId2GlobalSdomId").getView(SdomId{0}); +#endif // go thru each dimensions upwind neighbors, and add the dependencies int num_depends = 0; - for(int dim = 0;dim < 3;++ dim){ + for(Dimension dim{0};dim < 3;++ dim){ + // If it's a boundary condition, skip it - if(sdom.upwind[dim].mpi_rank < 0){ + if(upwind(dim) < 0){ continue; } // If it's an on-rank communication (from another subdomain) - if(sdom.upwind[dim].mpi_rank == mpi_rank){ + GlobalSdomId upwind_sdom = upwind(dim); + int upwind_rank = global_to_rank(upwind_sdom); + + if(upwind_rank == mpi_rank){ // skip it, but track the dependency num_depends ++; continue; } +#ifdef KRIPKE_USE_MPI + // Add request to pending list recv_requests.push_back(MPI_Request()); - recv_subdomains.push_back(sdom_id); + recv_subdomains.push_back(*sdom_id); + + auto &plane_data = *m_plane_data[*dim]; + double *plane_data_ptr = plane_data.getData(sdom_id); + size_t plane_data_size = plane_data.size(sdom_id); - // compute the tag id of THIS subdomain (tags are always based on destination) - int tag = computeTag(sdom.upwind[dim].mpi_rank, sdom.upwind[dim].subdomain_id); + GlobalSdomId global_sdom_id = local_to_global(sdom_id); // Post the recieve - MPI_Irecv(sdom.plane_data[dim]->ptr(), sdom.plane_data[dim]->elements, MPI_DOUBLE, sdom.upwind[dim].mpi_rank, - tag, MPI_COMM_WORLD, &recv_requests[recv_requests.size()-1]); + MPI_Irecv(plane_data_ptr, plane_data_size, MPI_DOUBLE, upwind_rank, + *global_sdom_id, MPI_COMM_WORLD, &recv_requests[recv_requests.size()-1]); // increment number of dependencies num_depends ++; +#else + // No MPI, so this doesn't make sense + KRIPKE_ASSERT("All comms should be on-node without MPI"); +#endif } // add subdomain to queue - queue_sdom_ids.push_back(sdom_id); - queue_subdomains.push_back(&sdom); + queue_sdom_ids.push_back(*sdom_id); queue_depends.push_back(num_depends); } -void ParallelComm::postSends(Subdomain *sdom, double *src_buffers[3]){ +void ParallelComm::postSends(Kripke::Core::DataStore &data_store, Kripke::SdomId sdom_id, + double *src_buffers[3]) +{ // post sends for downwind dependencies - int mpi_rank, mpi_size; - MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank); - MPI_Comm_size(MPI_COMM_WORLD, &mpi_size); - for(int dim = 0;dim < 3;++ dim){ + Kripke::Core::Comm comm; + int mpi_rank = comm.rank(); + + auto downwind = data_store.getVariable("downwind").getView(sdom_id); + auto global_to_rank = data_store.getVariable("GlobalSdomId2Rank").getView(SdomId{0}); + auto global_to_sdom_id = data_store.getVariable("GlobalSdomId2SdomId").getView(SdomId{0}); + + for(Dimension dim{0};dim < 3;++ dim){ // If it's a boundary condition, skip it - if(sdom->downwind[dim].mpi_rank < 0){ + if(downwind(dim) < 0){ continue; } + + // If it's an on-rank communication (to another subdomain) - if(sdom->downwind[dim].mpi_rank == mpi_rank){ + GlobalSdomId downwind_sdom = downwind(dim); + int downwind_rank = global_to_rank(downwind_sdom); + if(downwind_rank == mpi_rank){ + + SdomId sdom_id_downwind = global_to_sdom_id(downwind(dim)); + // find the local subdomain in the queue, and decrement the counter - for(int i = 0;i < queue_sdom_ids.size();++ i){ - if(queue_sdom_ids[i] == sdom->downwind[dim].subdomain_id){ + for(size_t i = 0;i < queue_sdom_ids.size();++ i){ + if(queue_sdom_ids[i] == *sdom_id_downwind){ queue_depends[i] --; break; } } // copy the boundary condition data into the downwinds plane data - Subdomain &sdom_downwind = grid_data->subdomains[sdom->downwind[dim].subdomain_id]; - sdom_downwind.plane_data[dim]->copy(*sdom->plane_data[dim]); - int num_elem = sdom_downwind.plane_data[dim]->elements; - //double const * KRESTRICT src_ptr = sdom->plane_data[dim]->ptr(); - double * KRESTRICT dst_ptr = sdom_downwind.plane_data[dim]->ptr(); + auto dst_plane = m_plane_data[*dim]->getView1d(sdom_id_downwind); + int num_elem = m_plane_data[*dim]->size(sdom_id_downwind); for(int i = 0;i < num_elem;++ i){ - dst_ptr[i] = src_buffers[dim][i]; + dst_plane(i) = src_buffers[*dim][i]; } continue; } +#ifdef KRIPKE_USE_MPI + // At this point, we know that we have to send an MPI message // Add request to send queue -#ifdef KRIPKE_SWEEP_ISEND send_requests.push_back(MPI_Request()); -#endif - // compute the tag id of TARGET subdomain (tags are always based on destination) - int tag = computeTag(mpi_rank, sdom->downwind[dim].subdomain_id); + // Get size of outgoing boudnary data + auto &plane_data = *m_plane_data[*dim]; + size_t plane_data_size = plane_data.size(sdom_id); // Post the send -#ifdef KRIPKE_SWEEP_ISEND - MPI_Isend(src_buffers[dim], sdom->plane_data[dim]->elements, MPI_DOUBLE, sdom->downwind[dim].mpi_rank, - tag, MPI_COMM_WORLD, &send_requests[send_requests.size()-1]); -#endif + MPI_Isend(src_buffers[*dim], plane_data_size, MPI_DOUBLE, downwind_rank, + *downwind_sdom, MPI_COMM_WORLD, &send_requests[send_requests.size()-1]); -#ifdef KRIPKE_SWEEP_SEND - MPI_Send(src_buffers[dim], sdom->plane_data[dim]->elements, MPI_DOUBLE, sdom->downwind[dim].mpi_rank, - tag, MPI_COMM_WORLD); +#else + // We cannot SEND anything without MPI, so fail + KRIPKE_ASSERT("Cannot send messages without MPI"); #endif -#ifdef KRIPKE_SWEEP_SSEND - MPI_Ssend(src_buffers[dim], sdom->plane_data[dim]->elements, MPI_DOUBLE, sdom->downwind[dim].mpi_rank, - tag, MPI_COMM_WORLD); -#endif - - } } // Checks if there are any outstanding subdomains to complete bool ParallelComm::workRemaining(void){ - return (recv_requests.size() > 0 || queue_subdomains.size() > 0); +#ifdef KRIPKE_USE_MPI + return (recv_requests.size() > 0 || queue_sdom_ids.size() > 0); +#else + return (queue_sdom_ids.size() > 0); +#endif } // Blocks until all sends have completed, and flushes the send queues void ParallelComm::waitAllSends(void){ +#ifdef KRIPKE_USE_MPI // Wait for all remaining sends to complete, then return false int num_sends = send_requests.size(); if(num_sends > 0){ @@ -222,13 +230,14 @@ void ParallelComm::waitAllSends(void){ MPI_Waitall(num_sends, &send_requests[0], &status[0]); send_requests.clear(); } +#endif } /** Checks for incomming messages, and does relevant bookkeeping. */ void ParallelComm::testRecieves(void){ - +#ifdef KRIPKE_USE_MPI // Check for any recv requests that have completed int num_requests = recv_requests.size(); bool done = false; @@ -252,7 +261,7 @@ void ParallelComm::testRecieves(void){ num_requests --; // decrement the dependency count for that subdomain - for(int i = 0;i < queue_sdom_ids.size();++ i){ + for(size_t i = 0;i < queue_sdom_ids.size();++ i){ if(queue_sdom_ids[i] == sdom_id){ queue_depends[i] --; break; @@ -263,15 +272,16 @@ void ParallelComm::testRecieves(void){ done = true; } } +#endif } -std::vector ParallelComm::getReadyList(void){ +std::vector ParallelComm::getReadyList(void){ // build up a list of ready subdomains - std::vector ready; - for(int i = 0;i < queue_depends.size();++ i){ + std::vector ready; + for(size_t i = 0;i < queue_depends.size();++ i){ if(queue_depends[i] == 0){ - ready.push_back(queue_sdom_ids[i]); + ready.push_back(SdomId(queue_sdom_ids[i])); } } return ready; diff --git a/src/Kripke/ParallelComm.h b/src/Kripke/ParallelComm.h index 95730d7e..fd11ea67 100644 --- a/src/Kripke/ParallelComm.h +++ b/src/Kripke/ParallelComm.h @@ -30,88 +30,99 @@ * Department of Energy (DOE) or Lawrence Livermore National Security. */ -#ifndef KRIPKE_COMM_H__ -#define KRIPKE_COMM_H__ +#ifndef KRIPKE_PARALLELCOMM_H__ +#define KRIPKE_PARALLELCOMM_H__ -#include -#include +#include +#include -class Grid_Data; -class Subdomain; +struct Grid_Data; + +namespace Kripke { + +namespace Core { + class DataStore; + + template + class FieldStorage; +} class ParallelComm { public: - explicit ParallelComm(Grid_Data *grid_data_ptr); - virtual ~ParallelComm(); + explicit ParallelComm(Kripke::Core::DataStore &data_store); + virtual ~ParallelComm() = default; // Adds a subdomain to the work queue - virtual void addSubdomain(int sdom_id, Subdomain &sdom) = 0; + virtual void addSubdomain(Kripke::Core::DataStore &data_store, SdomId sdom_id) = 0; // Checks if there are any outstanding subdomains to complete // false indicates all work is done, and all sends have completed virtual bool workRemaining(void); // Returns a vector of ready subdomains, and clears them from the ready queue - virtual std::vector readySubdomains(void) = 0; + virtual std::vector readySubdomains(void) = 0; // Marks subdomains as complete, and performs downwind communication - virtual void markComplete(int sdom_id) = 0; + virtual void markComplete(SdomId sdom_id) = 0; protected: - static int computeTag(int mpi_rank, int sdom_id); - static void computeRankSdom(int tag, int &mpi_rank, int &sdom_id); - - int findSubdomain(int sdom_id); - Subdomain *dequeueSubdomain(int sdom_id); - void postRecvs(int sdom_id, Subdomain &sdom); - void postSends(Subdomain *sdom, double *buffers[3]); + int findSubdomain(SdomId sdom_id); + void dequeueSubdomain(SdomId sdom_id); + void postRecvs(Kripke::Core::DataStore &data_store, SdomId sdom_id); + void postSends(Kripke::Core::DataStore &data_store, SdomId sdom_id_upwind, double *buffers[3]); void testRecieves(void); void waitAllSends(void); - std::vector getReadyList(void); + std::vector getReadyList(void); + Kripke::Core::DataStore *m_data_store; - Grid_Data *grid_data; + Kripke::Core::FieldStorage *m_plane_data[3]; // These vectors contian the recieve requests +#ifdef KRIPKE_USE_MPI std::vector recv_requests; +#endif std::vector recv_subdomains; // These vectors have the subdomains, and the remaining dependencies std::vector queue_sdom_ids; - std::vector queue_subdomains; std::vector queue_depends; // These vectors have the remaining send requests that are incomplete +#ifdef KRIPKE_USE_MPI std::vector send_requests; +#endif }; class SweepComm : public ParallelComm { public: - explicit SweepComm(Grid_Data *data); + explicit SweepComm(Kripke::Core::DataStore &data_store); virtual ~SweepComm(); - virtual void addSubdomain(int sdom_id, Subdomain &sdom); + virtual void addSubdomain(Kripke::Core::DataStore &data_store, SdomId sdom_id); virtual bool workRemaining(void); - virtual std::vector readySubdomains(void); - virtual void markComplete(int sdom_id); + virtual std::vector readySubdomains(void); + virtual void markComplete(SdomId sdom_id); }; class BlockJacobiComm : public ParallelComm { public: - explicit BlockJacobiComm(Grid_Data *data); + explicit BlockJacobiComm(Kripke::Core::DataStore &data_store); virtual ~BlockJacobiComm(); - void addSubdomain(int sdom_id, Subdomain &sdom); - bool workRemaining(void); - std::vector readySubdomains(void); - void markComplete(int sdom_id); + virtual void addSubdomain(Kripke::Core::DataStore &data_store, SdomId sdom_id); + virtual bool workRemaining(void); + virtual std::vector readySubdomains(void); + virtual void markComplete(SdomId sdom_id); private: bool posted_sends; }; +} + #endif diff --git a/src/Kripke/ParallelComm/BlockJacobiComm.cpp b/src/Kripke/ParallelComm/BlockJacobiComm.cpp index 36e3a1cb..323a7176 100644 --- a/src/Kripke/ParallelComm/BlockJacobiComm.cpp +++ b/src/Kripke/ParallelComm/BlockJacobiComm.cpp @@ -31,41 +31,60 @@ */ #include -#include -#include + +#include +#include +#include +#include #include #include -#include #include #include +using namespace Kripke; +using namespace Kripke::Core; -BlockJacobiComm::BlockJacobiComm(Grid_Data *data) : ParallelComm(data), posted_sends(false) +BlockJacobiComm::BlockJacobiComm(Kripke::Core::DataStore &data_store) : +ParallelComm(data_store), posted_sends(false) { - + ArchLayoutV al_v = data_store.getVariable("al").al_v; + + Set const &set_iplane = data_store.getVariable("Set/IPlane"); + Set const &set_jplane = data_store.getVariable("Set/JPlane"); + Set const &set_kplane = data_store.getVariable("Set/KPlane"); + createField(data_store, "old_i_plane", al_v, set_iplane); + createField(data_store, "old_j_plane", al_v, set_jplane); + createField(data_store, "old_k_plane", al_v, set_kplane); } BlockJacobiComm::~BlockJacobiComm(){ + m_data_store->deleteVariable("old_i_plane"); + m_data_store->deleteVariable("old_j_plane"); + m_data_store->deleteVariable("old_k_plane"); } /** Adds a subdomain to the work queue. Determines if upwind dependencies require communication, and posts appropirate Irecv's. */ -void BlockJacobiComm::addSubdomain(int sdom_id, Subdomain &sdom){ +void BlockJacobiComm::addSubdomain(Kripke::Core::DataStore &data_store, SdomId sdom_id){ + // Copy old flux data to send buffers - for(int dim = 0;dim < 3;++ dim){ - int nelem = sdom.plane_data[dim]->elements; - double const * KRESTRICT src = sdom.plane_data[dim]->ptr(); - double * KRESTRICT dst = sdom.old_plane_data[dim]->ptr(); - for(int i = 0;i < nelem;++ i){ - dst[i] = src[i]; - } - } + auto &i_plane = m_data_store->getVariable("i_plane"); + auto &old_i_plane = m_data_store->getVariable("old_i_plane"); + Kernel::kCopy(old_i_plane, i_plane); + + auto &j_plane = m_data_store->getVariable("j_plane"); + auto &old_j_plane = m_data_store->getVariable("old_j_plane"); + Kernel::kCopy(old_j_plane, j_plane); + + auto &k_plane = m_data_store->getVariable("k_plane"); + auto &old_k_plane = m_data_store->getVariable("old_k_plane"); + Kernel::kCopy(old_k_plane, k_plane); // post recieves - postRecvs(sdom_id, sdom); + postRecvs(data_store, sdom_id); } @@ -73,18 +92,23 @@ void BlockJacobiComm::addSubdomain(int sdom_id, Subdomain &sdom){ // false indicates all work is done, and all sends have completed bool BlockJacobiComm::workRemaining(void){ if(!posted_sends){ + + auto &old_i_plane = m_data_store->getVariable("old_i_plane"); + auto &old_j_plane = m_data_store->getVariable("old_j_plane"); + auto &old_k_plane = m_data_store->getVariable("old_k_plane"); + // post sends for all queued subdomains - for(int i = 0;i < queue_subdomains.size();++ i){ - Subdomain *sdom = queue_subdomains[i]; + for(size_t i = 0;i < queue_sdom_ids.size();++ i){ + SdomId sdom_id(queue_sdom_ids[i]); // Send new downwind info for sweep double *buf[3] = { - sdom->old_plane_data[0]->ptr(), - sdom->old_plane_data[1]->ptr(), - sdom->old_plane_data[2]->ptr() + old_i_plane.getData(sdom_id), + old_j_plane.getData(sdom_id), + old_k_plane.getData(sdom_id) }; - postSends(sdom, buf); + postSends(*m_data_store, sdom_id, buf); } posted_sends = true; } @@ -101,7 +125,7 @@ bool BlockJacobiComm::workRemaining(void){ /** Checks for incomming messages, and returns a list of ready subdomain id's */ -std::vector BlockJacobiComm::readySubdomains(void){ +std::vector BlockJacobiComm::readySubdomains(void){ testRecieves(); // return list of any ready subdomains @@ -110,7 +134,7 @@ std::vector BlockJacobiComm::readySubdomains(void){ -void BlockJacobiComm::markComplete(int sdom_id){ +void BlockJacobiComm::markComplete(SdomId sdom_id){ // remove subdomain from work queue dequeueSubdomain(sdom_id); } diff --git a/src/Kripke/ParallelComm/SweepComm.cpp b/src/Kripke/ParallelComm/SweepComm.cpp index 13a61997..81e377d6 100644 --- a/src/Kripke/ParallelComm/SweepComm.cpp +++ b/src/Kripke/ParallelComm/SweepComm.cpp @@ -31,17 +31,21 @@ */ #include -#include -#include + +#include +#include +#include +#include #include #include -#include #include #include +using namespace Kripke; + -SweepComm::SweepComm(Grid_Data *data) : ParallelComm(data) +SweepComm::SweepComm(Kripke::Core::DataStore &data_store) : ParallelComm(data_store) { } @@ -53,9 +57,9 @@ SweepComm::~SweepComm(){ Adds a subdomain to the work queue. Determines if upwind dependencies require communication, and posts appropirate Irecv's. */ -void SweepComm::addSubdomain(int sdom_id, Subdomain &sdom){ +void SweepComm::addSubdomain(Kripke::Core::DataStore &data_store, SdomId sdom_id){ // Post recieves for upwind dependencies, and add to the queue - postRecvs(sdom_id, sdom); + postRecvs(data_store, sdom_id); } @@ -78,7 +82,7 @@ bool SweepComm::workRemaining(void){ /** Checks for incomming messages, and returns a list of ready subdomain id's */ -std::vector SweepComm::readySubdomains(void){ +std::vector SweepComm::readySubdomains(void){ // check for incomming messages testRecieves(); @@ -87,16 +91,20 @@ std::vector SweepComm::readySubdomains(void){ } -void SweepComm::markComplete(int sdom_id){ +void SweepComm::markComplete(SdomId sdom_id){ // Get subdomain pointer and remove from work queue - Subdomain *sdom = dequeueSubdomain(sdom_id); + dequeueSubdomain(sdom_id); + + auto &i_plane = m_data_store->getVariable("i_plane"); + auto &j_plane = m_data_store->getVariable("j_plane"); + auto &k_plane = m_data_store->getVariable("k_plane"); // Send new downwind info for sweep double *buf[3] = { - sdom->plane_data[0]->ptr(), - sdom->plane_data[1]->ptr(), - sdom->plane_data[2]->ptr() + i_plane.getData(sdom_id), + j_plane.getData(sdom_id), + k_plane.getData(sdom_id) }; - postSends(sdom, buf); + postSends(*m_data_store, sdom_id, buf); } diff --git a/src/Kripke/SteadyStateSolver.cpp b/src/Kripke/SteadyStateSolver.cpp new file mode 100644 index 00000000..65d593fb --- /dev/null +++ b/src/Kripke/SteadyStateSolver.cpp @@ -0,0 +1,144 @@ +/* + * NOTICE + * + * This work was produced at the Lawrence Livermore National Laboratory (LLNL) + * under contract no. DE-AC-52-07NA27344 (Contract 44) between the U.S. + * Department of Energy (DOE) and Lawrence Livermore National Security, LLC + * (LLNS) for the operation of LLNL. The rights of the Federal Government are + * reserved under Contract 44. + * + * DISCLAIMER + * + * This work was prepared as an account of work sponsored by an agency of the + * United States Government. Neither the United States Government nor Lawrence + * Livermore National Security, LLC nor any of their employees, makes any + * warranty, express or implied, or assumes any liability or responsibility + * for the accuracy, completeness, or usefulness of any information, apparatus, + * product, or process disclosed, or represents that its use would not infringe + * privately-owned rights. Reference herein to any specific commercial products, + * process, or service by trade name, trademark, manufacturer or otherwise does + * not necessarily constitute or imply its endorsement, recommendation, or + * favoring by the United States Government or Lawrence Livermore National + * Security, LLC. The views and opinions of authors expressed herein do not + * necessarily state or reflect those of the United States Government or + * Lawrence Livermore National Security, LLC, and shall not be used for + * advertising or product endorsement purposes. + * + * NOTIFICATION OF COMMERCIAL USE + * + * Commercialization of this product is prohibited without notifying the + * Department of Energy (DOE) or Lawrence Livermore National Security. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace Kripke::Core; + +/** + Run solver iterations. +*/ +int Kripke::SteadyStateSolver (Kripke::Core::DataStore &data_store, size_t max_iter, bool block_jacobi) +{ + KRIPKE_TIMER(data_store, Solve); + + PartitionSpace &pspace = data_store.getVariable("pspace"); + + Kripke::Core::Comm const &comm = data_store.getVariable("comm"); + if(comm.rank() == 0){ + printf("\n"); + printf("Steady State Solve\n"); + printf("==================\n\n"); + } + + + // Intialize unknowns + Kripke::Kernel::kConst(data_store.getVariable("psi"), 0.0); + + + // Loop over iterations + double part_last = 0.0; + for(size_t iter = 0;iter < max_iter;++ iter){ + + + /* + * Compute the RHS: rhs = LPlus*S*L*psi + Q + */ + + + // Discrete to Moments transformation (phi = L*psi) + Kripke::Kernel::kConst(data_store.getVariable("phi"), 0.0); + Kripke::Kernel::LTimes(data_store); + + + + // Compute Scattering Source Term (psi_out = S*phi) + Kripke::Kernel::kConst(data_store.getVariable("phi_out"), 0.0); + Kripke::Kernel::scattering(data_store); + + + + // Compute External Source Term (psi_out = psi_out + Q) + Kripke::Kernel::source(data_store); + + + + // Moments to Discrete transformation (rhs = LPlus*psi_out) + Kripke::Kernel::kConst(data_store.getVariable("rhs"), 0.0); + Kripke::Kernel::LPlusTimes(data_store); + + + + + + + + /* + * Sweep (psi = Hinv*rhs) + */ + { + // Create a list of all groups + int num_subdomains = pspace.getNumSubdomains(SPACE_PQR); + std::vector sdom_list(num_subdomains); + for(SdomId i{0};i < num_subdomains;++ i){ + sdom_list[*i] = i; + } + + // Sweep everything + Kripke::SweepSolver(data_store, sdom_list, block_jacobi); + } + + + + /* + * Population edit and convergence test + */ + double part = Kripke::Kernel::population(data_store); + if(comm.rank() == 0){ + printf(" iter %d: particle count=%e, change=%e\n", (int)iter, part, (part-part_last)/part); + fflush(stdout); + } + part_last = part; + + + + } + + if(comm.rank() == 0){ + printf(" Solver terminated\n"); + } + + return(0); +} + + + + diff --git a/src/Kripke/Test/TestKernels.h b/src/Kripke/SteadyStateSolver.h similarity index 85% rename from src/Kripke/Test/TestKernels.h rename to src/Kripke/SteadyStateSolver.h index 2330e657..210447f6 100644 --- a/src/Kripke/Test/TestKernels.h +++ b/src/Kripke/SteadyStateSolver.h @@ -30,11 +30,20 @@ * Department of Energy (DOE) or Lawrence Livermore National Security. */ -#ifndef KRIPKE_TOOLS_TEST_KERNELS_H__ -#define KRIPKE_TOOLS_TEST_KERNELS_H__ +#ifndef KRIPKE_STEADYSTATESOLVER_H__ +#define KRIPKE_STEADYSTATESOLVER_H__ -struct Input_Variables; -void testKernels(Input_Variables &input_variables); +#include + +namespace Kripke { + + class DataStore; + + int SteadyStateSolver(Kripke::Core::DataStore &data_store, size_t max_iter, bool block_jacobi); + + +} // namespace #endif + diff --git a/src/Kripke/SubTVec.h b/src/Kripke/SubTVec.h deleted file mode 100644 index 08bc20d9..00000000 --- a/src/Kripke/SubTVec.h +++ /dev/null @@ -1,220 +0,0 @@ -/* - * NOTICE - * - * This work was produced at the Lawrence Livermore National Laboratory (LLNL) - * under contract no. DE-AC-52-07NA27344 (Contract 44) between the U.S. - * Department of Energy (DOE) and Lawrence Livermore National Security, LLC - * (LLNS) for the operation of LLNL. The rights of the Federal Government are - * reserved under Contract 44. - * - * DISCLAIMER - * - * This work was prepared as an account of work sponsored by an agency of the - * United States Government. Neither the United States Government nor Lawrence - * Livermore National Security, LLC nor any of their employees, makes any - * warranty, express or implied, or assumes any liability or responsibility - * for the accuracy, completeness, or usefulness of any information, apparatus, - * product, or process disclosed, or represents that its use would not infringe - * privately-owned rights. Reference herein to any specific commercial products, - * process, or service by trade name, trademark, manufacturer or otherwise does - * not necessarily constitute or imply its endorsement, recommendation, or - * favoring by the United States Government or Lawrence Livermore National - * Security, LLC. The views and opinions of authors expressed herein do not - * necessarily state or reflect those of the United States Government or - * Lawrence Livermore National Security, LLC, and shall not be used for - * advertising or product endorsement purposes. - * - * NOTIFICATION OF COMMERCIAL USE - * - * Commercialization of this product is prohibited without notifying the - * Department of Energy (DOE) or Lawrence Livermore National Security. - */ - -#ifndef KRIPKE_SUBTVEC_H__ -#define KRIPKE_SUBTVEC_H__ - -#include -#include -#include -#include - -/** - * A transport vector (used for Psi and Phi, RHS, etc.) - * - * This provides the inner most three strides of - * Psi[GS][DS][G][D][Z] - * but in whatever nesting order is specified. - */ -struct SubTVec { - SubTVec(Nesting_Order nesting, int ngrps, int ndir_mom, int nzones): - groups(ngrps), - directions(ndir_mom), - zones(nzones), - elements(groups*directions*zones), - data_linear(elements) - { - setupIndices(nesting, &data_linear[0]); - } - - - /** - * ALIASING version of constructor. - * Use this when you have a data buffer already, and don't want this class - * to do any memory management. - */ - SubTVec(Nesting_Order nesting, int ngrps, int ndir_mom, int nzones, double *ptr): - groups(ngrps), - directions(ndir_mom), - zones(nzones), - elements(groups*directions*zones), - data_linear(0) - { - setupIndices(nesting, ptr); - } - - ~SubTVec(){ - } - - void setupIndices(Nesting_Order nesting, double *ptr){ - // setup nesting order - switch(nesting){ - case NEST_GDZ: - ext_to_int[0] = 0; - ext_to_int[1] = 1; - ext_to_int[2] = 2; - break; - case NEST_GZD: - ext_to_int[0] = 0; - ext_to_int[2] = 1; - ext_to_int[1] = 2; - break; - case NEST_DZG: - ext_to_int[1] = 0; - ext_to_int[2] = 1; - ext_to_int[0] = 2; - break; - case NEST_DGZ: - ext_to_int[1] = 0; - ext_to_int[0] = 1; - ext_to_int[2] = 2; - break; - case NEST_ZDG: - ext_to_int[2] = 0; - ext_to_int[1] = 1; - ext_to_int[0] = 2; - break; - case NEST_ZGD: - ext_to_int[2] = 0; - ext_to_int[0] = 1; - ext_to_int[1] = 2; - break; - } - - // setup dimensionality - int size_ext[3]; - size_ext[0] = groups; - size_ext[1] = directions; - size_ext[2] = zones; - - // map to internal indices - for(int i = 0; i < 3; ++i){ - size_int[ext_to_int[i]] = size_ext[i]; - } - - data_pointer = ptr; - } - - inline double* ptr(void){ - return data_pointer; - } - - inline double* ptr(int g, int d, int z){ - return &(*this)(g,d,z); - } - - // These are NOT efficient.. just used to re-stride data for comparisons - inline double &operator()(int g, int d, int z) { - int idx[3]; - idx[ext_to_int[0]] = g; - idx[ext_to_int[1]] = d; - idx[ext_to_int[2]] = z; - int offset = idx[0] * size_int[1]*size_int[2] + - idx[1] * size_int[2] + - idx[2]; - return data_pointer[offset]; - } - inline double operator()(int g, int d, int z) const { - return (*const_cast(this))(g,d,z); - } - - inline double sum(void) const { - double s = 0.0; - for(size_t i = 0;i < elements;++ i){ - s+= data_linear[i]; - } - return s; - } - - inline void clear(double v){ -#ifdef KRIPKE_USE_OPENMP -#pragma omp parallel for -#endif - for(int i = 0;i < elements;++ i){ - data_linear[i] = v; - } - } - - inline void randomizeData(void){ - for(int i = 0;i < elements;++ i){ - data_linear[i] = drand48(); - } - } - - inline void copy(SubTVec const &b){ - for(int g = 0;g < groups;++ g){ - for(int d = 0;d < directions; ++ d){ - for(int z = 0;z < zones;++ z){ - // Copy using abstract indexing - (*this)(g,d,z) = b(g,d,z); - } - } - } - } - - inline bool compare(std::string const &name, SubTVec const &b, - double tol, bool verbose){ - - bool is_diff = false; - int num_wrong = 0; - for(int g = 0;g < groups;++ g){ - for(int d = 0;d < directions; ++ d){ - for(int z = 0;z < zones;++ z){ - // Copy using abstract indexing - double err = std::abs((*this)(g,d,z) - b(g,d,z)); - if(err > tol){ - is_diff = true; - if(verbose){ - printf("%s[g=%d, d=%d, z=%d]: |%e - %e| = %e\n", - name.c_str(), g,d,z, (*this)(g,d,z), b(g,d,z), err); - num_wrong ++; - if(num_wrong > 100){ - return true; - } - } - } - } - } - } - return is_diff; - } - - int ext_to_int[3]; // external index to internal index mapping - int size_int[3]; // size of each dimension in internal indices - - int groups, directions, zones, elements; - double *data_pointer; - std::vector data_linear; -}; - - -#endif diff --git a/src/Kripke/Subdomain.cpp b/src/Kripke/Subdomain.cpp deleted file mode 100644 index b12ff2c3..00000000 --- a/src/Kripke/Subdomain.cpp +++ /dev/null @@ -1,482 +0,0 @@ -/* - * NOTICE - * - * This work was produced at the Lawrence Livermore National Laboratory (LLNL) - * under contract no. DE-AC-52-07NA27344 (Contract 44) between the U.S. - * Department of Energy (DOE) and Lawrence Livermore National Security, LLC - * (LLNS) for the operation of LLNL. The rights of the Federal Government are - * reserved under Contract 44. - * - * DISCLAIMER - * - * This work was prepared as an account of work sponsored by an agency of the - * United States Government. Neither the United States Government nor Lawrence - * Livermore National Security, LLC nor any of their employees, makes any - * warranty, express or implied, or assumes any liability or responsibility - * for the accuracy, completeness, or usefulness of any information, apparatus, - * product, or process disclosed, or represents that its use would not infringe - * privately-owned rights. Reference herein to any specific commercial products, - * process, or service by trade name, trademark, manufacturer or otherwise does - * not necessarily constitute or imply its endorsement, recommendation, or - * favoring by the United States Government or Lawrence Livermore National - * Security, LLC. The views and opinions of authors expressed herein do not - * necessarily state or reflect those of the United States Government or - * Lawrence Livermore National Security, LLC, and shall not be used for - * advertising or product endorsement purposes. - * - * NOTIFICATION OF COMMERCIAL USE - * - * Commercialization of this product is prohibited without notifying the - * Department of Energy (DOE) or Lawrence Livermore National Security. - */ - -#include -#include -#include - -#include -#include - - -namespace { - /** - This function defined the material distribution in space. - This defines Problem 3 from Kobayashi - Where Region 1 is material 0, 2 is 1 and 3 is 2. - */ - inline int queryMaterial(double x, double y, double z){ - // Problem is defined for one octant, with reflecting boundaries - // We "unreflect" it here by taking abs values - x = std::abs(x); - y = std::abs(y); - z = std::abs(z); - - // Central 20x20x20 box is Region 1 - if(x <= 10.0 && y <= 10.0 && z <= 10.0){ - return 0; - } - - // Leg 1 of Region 2 - if(x <= 10.0 && y <= 60.0 && z <= 10.0){ - return 1; - } - - // Leg 2 of Region 2 - if(x <= 40.0 && y >= 50.0 && y <= 60.0 && z <= 10.0){ - return 1; - } - - // Leg 3 of Region 2 - if(x >= 30.0 && x <= 40.0 && y >= 50.0 && y <= 60.0 && z <= 40.0){ - return 1; - } - - // Leg 4 of Region 2 - if(x >= 30.0 && x <= 40.0 && y >= 50.0 && z >= 30.0 && z <= 40.0){ - return 1; - } - - // Rest is filled with region 3 - return 2; - } -} - - - -Subdomain::Subdomain() : - idx_dir_set(0), - idx_group_set(0), - idx_zone_set(0), - num_groups(0), - num_directions(0), - num_zones(0), - group0(0), - direction0(0), - psi(NULL), - rhs(NULL), - sigt(NULL), - directions(NULL), - ell(NULL), - ell_plus(NULL), - phi(NULL), - phi_out(NULL) -{ - for(int dim = 0;dim < 3;++ dim){ - plane_data[dim] = NULL; - old_plane_data[dim] = NULL; - } -} -Subdomain::~Subdomain(){ - delete psi; - delete rhs; - delete sigt; - for(int dim = 0;dim < 3;++ dim){ - delete plane_data[dim]; - delete old_plane_data[dim]; - } -} - - -/** - Setup subdomain and allocate data -*/ -void Subdomain::setup(int sdom_id, Input_Variables *input_vars, int gs, int ds, int zs, - std::vector &direction_list, Kernel *kernel, Layout *layout) -{ - // set the set indices - idx_group_set = gs; - idx_dir_set = ds; - idx_zone_set = zs; - - num_groups = input_vars->num_groups / input_vars->num_groupsets; - group0 = gs * num_groups; - - num_directions = input_vars->num_directions / input_vars->num_dirsets; - direction0 = ds * num_directions; - directions = &direction_list[direction0]; - - num_zones = 1; - for(int dim = 0;dim < 3;++ dim){ - // Compute number of zones in this dimension - nzones[dim] = layout->getNumZones(sdom_id, dim); - num_zones *= nzones[dim]; - - // Compute grid deltas in this dimension (including ghost zone deltas) - std::pair dim_extent = layout->getSpatialExtents(sdom_id, dim); - zeros[dim] = dim_extent.first; - double dx = (dim_extent.second-dim_extent.first)/(double)nzones[dim]; - deltas[dim].resize(nzones[dim]+2); - for(int z = 0;z < nzones[dim]+2;++ z){ - deltas[dim][z] = dx; - } - } - - // allocate storage for the sweep boundary data (upwind and downwind share same buffer) - plane_data[0] = new SubTVec(kernel->nestingPsi(), num_groups, num_directions, nzones[1] * nzones[2]); - plane_data[1] = new SubTVec(kernel->nestingPsi(), num_groups, num_directions, nzones[0] * nzones[2]); - plane_data[2] = new SubTVec(kernel->nestingPsi(), num_groups, num_directions, nzones[0] * nzones[1]); - - // For block-jacobi parallel method - old_plane_data[0] = new SubTVec(kernel->nestingPsi(), num_groups, num_directions, nzones[1] * nzones[2]); - old_plane_data[1] = new SubTVec(kernel->nestingPsi(), num_groups, num_directions, nzones[0] * nzones[2]); - old_plane_data[2] = new SubTVec(kernel->nestingPsi(), num_groups, num_directions, nzones[0] * nzones[1]); - - // allocate the storage for solution and source terms - psi = new SubTVec(kernel->nestingPsi(), num_groups, num_directions, num_zones); - psi->clear(0.0); - rhs = new SubTVec(kernel->nestingPsi(), num_groups, num_directions, num_zones); - sigt = new SubTVec(kernel->nestingSigt(), num_groups, 1, num_zones); - sigt->clear(0.0); - - computeSweepIndexSet(); - - // Setup neighbor data - int dirs[3] = { directions[0].id, directions[0].jd, directions[0].kd}; - for(int dim = 0;dim < 3;++ dim){ - downwind[dim] = layout->getNeighbor(sdom_id, dim, dirs[dim]); - upwind[dim] = layout->getNeighbor(sdom_id, dim, -1 * dirs[dim]); - } - - // paint the mesh - reg_volume[0] = 0.0; - reg_volume[1] = 0.0; - reg_volume[2] = 0.0; - int num_subsamples = 4; // number of subsamples per spatial dimension - double sample_vol_frac = 1.0 / (double)(num_subsamples*num_subsamples*num_subsamples); - int zone_id = 0; - double pz = zeros[2]; - - for (int k = 0; k < nzones[2]; k++) { - double sdz = deltas[2][k+1] / (double)(num_subsamples+1); - double py = zeros[1]; - - for (int j = 0; j != nzones[1]; j ++) { - double sdy = deltas[1][j+1] / (double)(num_subsamples+1); - double px = zeros[0]; - - for (int i = 0; i != nzones[0]; i ++) { - double sdx = deltas[0][i+1] / (double)(num_subsamples+1); - - double zone_volume = deltas[0][i+1] * deltas[1][j+1] * deltas[2][k+1]; - volume.push_back(zone_volume); - - // subsample probe the geometry to get our materials - double frac[3] = {0.0, 0.0, 0.0}; // fraction of both materials - double spz = pz + sdz; - - for(int sk = 0;sk < num_subsamples;++ sk){ - double spy = py + sdy; - for(int sj = 0;sj < num_subsamples;++ sj){ - double spx = px + sdx; - for(int si = 0;si < num_subsamples;++ si){ - - int mat = queryMaterial(spx, spy, spz); - frac[mat] += sample_vol_frac; - - spx += sdx; - } - spy += sdy; - } - spz += sdz; - } - - // Add material to zone - int nmixed = 0; - for(int mat = 0;mat < 3;++ mat){ - if(frac[mat] > 0.0){ - nmixed ++; - if(nmixed == 1){ - zones_to_mixed.push_back(mixed_to_zones.size()); - } - mixed_to_zones.push_back(zone_id); - mixed_material.push_back(mat); - mixed_fraction.push_back(frac[mat]); - reg_volume[mat] += frac[mat] * zone_volume; - - // initialize background sigt - for(int g = 0;g < num_groups;++ g){ - (*sigt)(g,0,zone_id) += frac[mat] * input_vars->sigt[mat]; - } - } - } - num_mixed.push_back(nmixed); - - // increment zone - px += deltas[0][i+1]; - zone_id ++; - } - py += deltas[1][j+1]; - } - pz += deltas[2][k+1]; - } -} - -void Subdomain::setVars(SubTVec *ell_ptr, SubTVec *ell_plus_ptr, - SubTVec *phi_ptr, SubTVec *phi_out_ptr){ - - ell = ell_ptr; - ell_plus = ell_plus_ptr; - phi = phi_ptr; - phi_out = phi_out_ptr; -} - - -/** - * Randomizes data for a set. - */ -void Subdomain::randomizeData(void){ - psi->randomizeData(); - rhs->randomizeData(); - sigt->randomizeData(); - - for(int d = 0;d < 3;++ d){ - for(int i = 0;i < deltas[d].size();++ i){ - deltas[d][i] = drand48(); - } - } -} - -/** - * Copies two sets, allowing for different nestings. - */ -void Subdomain::copy(Subdomain const &b){ - psi->copy(*b.psi); - rhs->copy(*b.rhs); - sigt->copy(*b.sigt); - - for(int d = 0;d < 3;++ d){ - deltas[d] = b.deltas[d]; - } -} - -/** - * Compares two sets, allowing for different nestings. - */ -bool Subdomain::compare(Subdomain const &b, double tol, bool verbose){ - std::stringstream namess; - namess << "gdset[gs=" << idx_group_set << ", ds=" << idx_dir_set << ", zs=" << idx_zone_set << "]"; - std::string name = namess.str(); - - bool is_diff = false; - is_diff |= psi->compare(name+".psi", *b.psi, tol, verbose); - is_diff |= rhs->compare(name+".rhs", *b.rhs, tol, verbose); - is_diff |= sigt->compare(name+".sigt", *b.sigt, tol, verbose); - - is_diff |= compareVector(name+".deltas[0]", deltas[0], b.deltas[0], tol, verbose); - is_diff |= compareVector(name+".deltas[1]", deltas[1], b.deltas[1], tol, verbose); - is_diff |= compareVector(name+".deltas[2]", deltas[2], b.deltas[2], tol, verbose); - - return is_diff; -} - -/** - * Compute sweep index sets. - * Determines logical indices, and increments for i,j,k based on grid - * information and quadrature set sweeping direction. - */ -void Subdomain::computeSweepIndexSet(void){ - if(directions[0].id > 0){ - sweep_block.start_i = 0; - sweep_block.end_i = nzones[0]; - sweep_block.inc_i = 1; - } - else { - sweep_block.start_i = nzones[0]-1; - sweep_block.end_i = -1; - sweep_block.inc_i = -1; - } - - if(directions[0].jd > 0){ - sweep_block.start_j = 0; - sweep_block.end_j = nzones[1]; - sweep_block.inc_j = 1; - } - else { - sweep_block.start_j = nzones[1]-1; - sweep_block.end_j = -1; - sweep_block.inc_j = -1; - } - - if(directions[0].kd > 0){ - sweep_block.start_k = 0; - sweep_block.end_k = nzones[2]; - sweep_block.inc_k = 1; - } - else { - sweep_block.start_k = nzones[2]-1; - sweep_block.end_k = -1; - sweep_block.inc_k = -1; - } -} - -namespace { - double FactFcn(int n) - { - double fact = 1.0; - for(int i = n;i > 0 ;--i){ - fact *= (double)i; - } - return(fact); - } - - inline double PnmFcn(int n, int m, double x) - { - /*----------------------------------------------------------------- - * It is assumed that 0 <= m <= n and that abs(x) <= 1.0. - * No error checking is done, however. - *---------------------------------------------------------------*/ - double fact, pnn, pmm, pmmp1, somx2; - - int i, nn; - - if(std::abs(x) > 1.0){ - printf("Bad input to ardra_PnmFcn: abs(x) > 1.0, x = %e\n", x); - MPI_Abort(MPI_COMM_WORLD, 1); - } - else if((x > 1.0) && (x <= 1.0)){ - x = 1.0; - } - else if((-1.0 <= x ) && (x < -1.0)){ - x = -1.0; - } - - pmm=1.0; - if(m > 0){ - somx2=sqrt((1.0-x)*(1.0+x)); - fact=1.0; - for(i=1; i<=m; i++){ - pmm *= -fact*somx2; - fact += 2.0; - } - } - if(n == m){ - return(pmm); - } - else { - pmmp1=x*(2*m+1)*pmm; - if(n == (m+1)){ - return(pmmp1); - } - else { - for(nn=m+2; nn<=n; nn++){ - pnn=(x*(2*nn-1)*pmmp1-(nn+m-1)*pmm)/(nn-m); - pmm=pmmp1; - pmmp1=pnn; - } - return(pnn); - } - } - } - - inline double YnmFcn(int n, int m, double mu, double eta, double xi) - { - double fac1, fac2, anm, ynm, pnm, dm0, taum, tmp, phi, phi_tmp; - double floor=1.e-20; - int nn, mm; - - /* Calculate the correct phi for omega=(mu,eta,xi) */ - tmp = fabs(eta/(mu+floor)); - phi_tmp = atan(tmp); - if( (mu>0) && (eta>0) ){ - phi = phi_tmp; - } - else if( (mu<0) && (eta>0) ){ - phi = M_PI - fabs(phi_tmp); - } - else if( (mu<0) && (eta<0) ){ - phi = M_PI + fabs(phi_tmp); - } - else { - phi = 2.0*M_PI - fabs(phi_tmp); - } - - /* Begin evaluation of Ynm(omega) */ - nn = n - std::abs(m); - fac1 = (double) FactFcn(nn); - nn = n + std::abs(m); - fac2 = (double) FactFcn(nn); - mm = std::abs(m); - pnm = PnmFcn(n, mm, xi); - tmp = ((double) m)*phi; - if(m >= 0){ - taum = cos(tmp); - } - else {taum = sin(-tmp); } - if(m == 0){ - dm0 = 1.0; - } - else {dm0 = 0.0; } - tmp = ((2*n+1)*fac1)/(2.0*(1.0+dm0)*M_PI*fac2); - anm = sqrt( tmp ); - ynm = anm*pnm*taum; - return(ynm); - } -} - -/** - * Compute L and L+ - * This assumes that the quadrature set is defined. - */ -void Subdomain::computeLLPlus(int legendre_order){ - int dir0 = direction0; - double SQRT4PI = std::sqrt(4*M_PI); - for(int n=0, nm=0; n < legendre_order+1; n++){ - for(int m=-n; m<=n; m++){ - for(int d=0; d -#include - -// Foreward Decl -struct Directions; -struct SubTVec; -struct Input_Variables; -class Kernel; - -/** - * Provides sweep index sets for a given octant. - * This generalizes the sweep pattern, and allows for experimenting with - * a tiled approach to on-node sweeps. - */ -struct Grid_Sweep_Block { - int start_i, start_j, start_k; // starting index - int end_i, end_j, end_k; // termination conditon (one past) - int inc_i, inc_j, inc_k; // increment -}; - - - -/** - * Contains parameters and variables that describe a single Group Set and - * Direction Set. - */ -struct Subdomain { - Subdomain(); - ~Subdomain(); - - void setup(int sdom_id, Input_Variables *input_vars, int gs, int ds, int zs, - std::vector &direction_list, Kernel *kernel, Layout *layout); - - void setVars(SubTVec *ell_ptr, SubTVec *ell_plus_ptr, - SubTVec *phi_ptr, SubTVec *phi_out_ptr); - - void randomizeData(void); - void copy(Subdomain const &b); - bool compare(Subdomain const &b, double tol, bool verbose); - void computeSweepIndexSet(void); - void computeLLPlus(int legendre_order); - - int idx_group_set; - int idx_dir_set; - int idx_zone_set; - - int num_groups; // Number of groups in this set - int num_directions; // Number of directions in this set - int num_zones; // Number of zones in this set - - double zeros[3]; // origin of local mesh - int nzones[3]; // Number of zones in each dimension - std::vector deltas[3]; // Spatial grid deltas in each dimension (including ghost zones) - - int group0; // Starting global group id - int direction0; // Starting global direction id - - Grid_Sweep_Block sweep_block; - - // Neighbors - Neighbor upwind[3]; // Upwind dependencies in x,y,z - Neighbor downwind[3]; // Downwind neighbors in x,y,z - - // Sweep boundary data - SubTVec *plane_data[3]; - SubTVec *old_plane_data[3]; - - // Variables - SubTVec *psi; // Solution - SubTVec *rhs; // RHS, source term - SubTVec *sigt; // Zonal per-group cross-section - - // Pointers into directions and directionset data from Grid_Data - Directions *directions; - SubTVec *ell; - SubTVec *ell_plus; - SubTVec *phi; - SubTVec *phi_out; - - // Materials on the mesh, used for scattering lookup - double reg_volume[3]; // volume of each material region - std::vector volume; // volume of each zone - std::vector mixed_to_zones; // mapping from mixed slot to zones - std::vector num_mixed; // mapping from mixed slot to zones - std::vector zones_to_mixed; // mapping from zones to first mixed slot - std::vector mixed_material; // material number for each mixed slot - std::vector mixed_fraction; // volume fraction each mixed slot -}; - -#endif diff --git a/src/Kripke/SweepSolver.cpp b/src/Kripke/SweepSolver.cpp new file mode 100644 index 00000000..67016369 --- /dev/null +++ b/src/Kripke/SweepSolver.cpp @@ -0,0 +1,114 @@ +/* + * NOTICE + * + * This work was produced at the Lawrence Livermore National Laboratory (LLNL) + * under contract no. DE-AC-52-07NA27344 (Contract 44) between the U.S. + * Department of Energy (DOE) and Lawrence Livermore National Security, LLC + * (LLNS) for the operation of LLNL. The rights of the Federal Government are + * reserved under Contract 44. + * + * DISCLAIMER + * + * This work was prepared as an account of work sponsored by an agency of the + * United States Government. Neither the United States Government nor Lawrence + * Livermore National Security, LLC nor any of their employees, makes any + * warranty, express or implied, or assumes any liability or responsibility + * for the accuracy, completeness, or usefulness of any information, apparatus, + * product, or process disclosed, or represents that its use would not infringe + * privately-owned rights. Reference herein to any specific commercial products, + * process, or service by trade name, trademark, manufacturer or otherwise does + * not necessarily constitute or imply its endorsement, recommendation, or + * favoring by the United States Government or Lawrence Livermore National + * Security, LLC. The views and opinions of authors expressed herein do not + * necessarily state or reflect those of the United States Government or + * Lawrence Livermore National Security, LLC, and shall not be used for + * advertising or product endorsement purposes. + * + * NOTIFICATION OF COMMERCIAL USE + * + * Commercialization of this product is prohibited without notifying the + * Department of Energy (DOE) or Lawrence Livermore National Security. + */ + +#include + +#include +#include +#include +#include +#include +#include +#include + +using namespace Kripke; + +/** + Perform full parallel sweep algorithm on subset of subdomains. +*/ +void Kripke::SweepSolver (Kripke::Core::DataStore &data_store, std::vector subdomain_list, bool block_jacobi) +{ + KRIPKE_TIMER(data_store, SweepSolver); + + // Initialize plane data + Kripke::Kernel::kConst(data_store.getVariable("i_plane"), 0.0); + Kripke::Kernel::kConst(data_store.getVariable("j_plane"), 0.0); + Kripke::Kernel::kConst(data_store.getVariable("k_plane"), 0.0); + + // Create a new sweep communicator object + ParallelComm *comm = NULL; + if(block_jacobi){ + comm = new BlockJacobiComm(data_store); + } + else { + comm = new SweepComm(data_store); + } + + // Add all subdomains in our list + for(size_t i = 0;i < subdomain_list.size();++ i){ +// Kripke::Core::Comm default_comm; +// printf("SweepSolver: rank=%d, sdom=%d\n", (int)default_comm.rank(), (int)*subdomain_list[i]); + SdomId sdom_id = subdomain_list[i]; + comm->addSubdomain(data_store, sdom_id); + } + + auto &field_upwind = data_store.getVariable("upwind"); + + /* Loop until we have finished all of our work */ + while(comm->workRemaining()){ + + std::vector sdom_ready = comm->readySubdomains(); + int backlog = sdom_ready.size(); + + // Run top of list + if(backlog > 0){ + SdomId sdom_id = sdom_ready[0]; + + auto upwind = field_upwind.getView(sdom_id); + + // Clear boundary conditions + if(upwind(Direction{0}) == -1){ + Kripke::Kernel::kConst(data_store.getVariable("i_plane"), sdom_id, 0.0); + } + if(upwind(Direction{1}) == -1){ + Kripke::Kernel::kConst(data_store.getVariable("j_plane"), sdom_id, 0.0); + } + if(upwind(Direction{2}) == -1){ + Kripke::Kernel::kConst(data_store.getVariable("k_plane"), sdom_id, 0.0); + } + + // Perform subdomain sweep + Kripke::Kernel::sweepSubdomain(data_store, Kripke::SdomId{sdom_id}); + + // Mark as complete (and do any communication) + comm->markComplete(sdom_id); + } + } + + delete comm; + +// printf("\nAfter sweep psi:\n"); +// data_store.getVariable("psi").dump(); + +} + + diff --git a/src/Kripke/Kernel/Kernel_3d_GDZ.h b/src/Kripke/SweepSolver.h similarity index 70% rename from src/Kripke/Kernel/Kernel_3d_GDZ.h rename to src/Kripke/SweepSolver.h index 8de463e1..91281578 100644 --- a/src/Kripke/Kernel/Kernel_3d_GDZ.h +++ b/src/Kripke/SweepSolver.h @@ -30,25 +30,25 @@ * Department of Energy (DOE) or Lawrence Livermore National Security. */ -#ifndef KRIPKE_KERNEL_3D_GDZ_H__ -#define KRIPKE_KERNEL_3D_GDZ_H__ - -#include - -class Kernel_3d_GDZ : public Kernel { - public: - virtual Nesting_Order nestingPsi(void) const; - virtual Nesting_Order nestingPhi(void) const; - virtual Nesting_Order nestingSigt(void) const; - virtual Nesting_Order nestingEll(void) const; - virtual Nesting_Order nestingEllPlus(void) const; - virtual Nesting_Order nestingSigs(void) const; - - virtual void LTimes(Grid_Data *grid_data); - virtual void LPlusTimes(Grid_Data *grid_data); - virtual void scattering(Grid_Data *grid_data); - virtual void source(Grid_Data *grid_data); - virtual void sweep(Subdomain *ga_set); -}; +#ifndef KRIPKE_SWEEPSOLVER_H__ +#define KRIPKE_SWEEPSOLVER_H__ + +#include +#include +#include +#include + +namespace Kripke { + + class DataStore; + + void SweepSolver (Kripke::Core::DataStore &data_store, + std::vector subdomain_list, + bool block_jacobi); + + + +} // namespace #endif + diff --git a/src/Kripke/Sweep_Solver.cpp b/src/Kripke/Sweep_Solver.cpp deleted file mode 100644 index 0fb616eb..00000000 --- a/src/Kripke/Sweep_Solver.cpp +++ /dev/null @@ -1,221 +0,0 @@ -/* - * NOTICE - * - * This work was produced at the Lawrence Livermore National Laboratory (LLNL) - * under contract no. DE-AC-52-07NA27344 (Contract 44) between the U.S. - * Department of Energy (DOE) and Lawrence Livermore National Security, LLC - * (LLNS) for the operation of LLNL. The rights of the Federal Government are - * reserved under Contract 44. - * - * DISCLAIMER - * - * This work was prepared as an account of work sponsored by an agency of the - * United States Government. Neither the United States Government nor Lawrence - * Livermore National Security, LLC nor any of their employees, makes any - * warranty, express or implied, or assumes any liability or responsibility - * for the accuracy, completeness, or usefulness of any information, apparatus, - * product, or process disclosed, or represents that its use would not infringe - * privately-owned rights. Reference herein to any specific commercial products, - * process, or service by trade name, trademark, manufacturer or otherwise does - * not necessarily constitute or imply its endorsement, recommendation, or - * favoring by the United States Government or Lawrence Livermore National - * Security, LLC. The views and opinions of authors expressed herein do not - * necessarily state or reflect those of the United States Government or - * Lawrence Livermore National Security, LLC, and shall not be used for - * advertising or product endorsement purposes. - * - * NOTIFICATION OF COMMERCIAL USE - * - * Commercialization of this product is prohibited without notifying the - * Department of Energy (DOE) or Lawrence Livermore National Security. - */ - -#include -#include -#include -#include -#include -#include -#include - - -/** - Run solver iterations. -*/ -int SweepSolver (Grid_Data *grid_data, bool block_jacobi) -{ - Kernel *kernel = grid_data->kernel; - - int mpi_rank; - MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank); - - grid_data->trace_offset = MPI_Wtime(); - if(grid_data->sweep_trace){ - // Get a "synchronized" time in case there is clock skew - // this won't be perfect, but will eliminate large differences - MPI_Bcast(&grid_data->trace_offset, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); - grid_data->trace_offset = grid_data->trace_offset - MPI_Wtime(); - - // Open a trace file based on our rank - char fname[1024]; - snprintf(fname, 1024, "trace.%05d", mpi_rank); - grid_data->trace_file = fopen(fname, "wb"); - } - - - - BLOCK_TIMER(grid_data->timing, Solve); - - - // Loop over iterations - double part_last = 0.0; - for(int iter = 0;iter < grid_data->niter;++ iter){ - - /* - * Compute the RHS: rhs = LPlus*S*L*psi + Q - */ - - // Discrete to Moments transformation (phi = L*psi) - { - BLOCK_TIMER(grid_data->timing, LTimes); - kernel->LTimes(grid_data); - } - - // Compute Scattering Source Term (psi_out = S*phi) - { - BLOCK_TIMER(grid_data->timing, Scattering); - kernel->scattering(grid_data); - } - - // Compute External Source Term (psi_out = psi_out + Q) - { - BLOCK_TIMER(grid_data->timing, Source); - kernel->source(grid_data); - } - - // Moments to Discrete transformation (rhs = LPlus*psi_out) - { - BLOCK_TIMER(grid_data->timing, LPlusTimes); - kernel->LPlusTimes(grid_data); - } - - /* - * Sweep (psi = Hinv*rhs) - */ - { - BLOCK_TIMER(grid_data->timing, Sweep); - - if(true){ - // Create a list of all groups - std::vector sdom_list(grid_data->subdomains.size()); - for(int i = 0;i < grid_data->subdomains.size();++ i){ - sdom_list[i] = i; - } - - // Sweep everything - SweepSubdomains(sdom_list, grid_data, block_jacobi); - } - // This is the ARDRA version, doing each groupset sweep independently - else{ - for(int group_set = 0;group_set < grid_data->num_group_sets;++ group_set){ - std::vector sdom_list; - // Add all subdomains for this groupset - for(int s = 0;s < grid_data->subdomains.size();++ s){ - if(grid_data->subdomains[s].idx_group_set == group_set){ - sdom_list.push_back(s); - } - } - - // Sweep the groupset - SweepSubdomains(sdom_list, grid_data, block_jacobi); - } - } - } - - double part = grid_data->particleEdit(); - if(mpi_rank==0){ - printf("iter %d: particle count=%e, change=%e\n", iter, part, (part-part_last)/part); - } - part_last = part; - } - - if(grid_data->trace_file){ - fclose(grid_data->trace_file); - } - - return(0); -} - - - -/** - Perform full parallel sweep algorithm on subset of subdomains. -*/ -void SweepSubdomains (std::vector subdomain_list, Grid_Data *grid_data, bool block_jacobi) -{ - // Create a new sweep communicator object - ParallelComm *comm = NULL; - if(block_jacobi){ - comm = new BlockJacobiComm(grid_data); - } - else { - comm = new SweepComm(grid_data); - } - - // Add all subdomains in our list - for(int i = 0;i < subdomain_list.size();++ i){ - int sdom_id = subdomain_list[i]; - comm->addSubdomain(sdom_id, grid_data->subdomains[sdom_id]); - } - - // try and synch up tasks for better sweep performance? - // - // MPI_Barrier(MPI_COMM_WORLD); - - /* Loop until we have finished all of our work */ - while(comm->workRemaining()){ - - // Get a list of subdomains that have met dependencies - // DEBUG: Query MPI a few times between doing actual work - // the idea is to trick MPI into actually sending messages - for(int i = 0;i < KRIPKE_SWEEP_EXTRA_RECV;++ i){ - comm->readySubdomains(); - } - // now do it for real - std::vector sdom_ready = comm->readySubdomains(); - int backlog = sdom_ready.size(); - - // Run top of list - if(backlog > 0){ - int sdom_id = sdom_ready[0]; - Subdomain &sdom = grid_data->subdomains[sdom_id]; - // Clear boundary conditions - for(int dim = 0;dim < 3;++ dim){ - if(sdom.upwind[dim].subdomain_id == -1){ - sdom.plane_data[dim]->clear(0.0); - } - } - - double start_time, end_time; - if(grid_data->trace_file){ - start_time = MPI_Wtime() + grid_data->trace_offset; - } - { - BLOCK_TIMER(grid_data->timing, Sweep_Kernel); - // Perform subdomain sweep - grid_data->kernel->sweep(&sdom); - } - if(grid_data->trace_file){ - end_time = MPI_Wtime() + grid_data->trace_offset; - fprintf(grid_data->trace_file, "sweep_kernel %lf %lf %d\n", start_time, end_time, sdom_id); - } - - // Mark as complete (and do any communication) - comm->markComplete(sdom_id); - } - } - - delete comm; -} - - diff --git a/src/Kripke/Test/TestKernels.cpp b/src/Kripke/Test/TestKernels.cpp deleted file mode 100644 index 4eb56b78..00000000 --- a/src/Kripke/Test/TestKernels.cpp +++ /dev/null @@ -1,171 +0,0 @@ -/* - * NOTICE - * - * This work was produced at the Lawrence Livermore National Laboratory (LLNL) - * under contract no. DE-AC-52-07NA27344 (Contract 44) between the U.S. - * Department of Energy (DOE) and Lawrence Livermore National Security, LLC - * (LLNS) for the operation of LLNL. The rights of the Federal Government are - * reserved under Contract 44. - * - * DISCLAIMER - * - * This work was prepared as an account of work sponsored by an agency of the - * United States Government. Neither the United States Government nor Lawrence - * Livermore National Security, LLC nor any of their employees, makes any - * warranty, express or implied, or assumes any liability or responsibility - * for the accuracy, completeness, or usefulness of any information, apparatus, - * product, or process disclosed, or represents that its use would not infringe - * privately-owned rights. Reference herein to any specific commercial products, - * process, or service by trade name, trademark, manufacturer or otherwise does - * not necessarily constitute or imply its endorsement, recommendation, or - * favoring by the United States Government or Lawrence Livermore National - * Security, LLC. The views and opinions of authors expressed herein do not - * necessarily state or reflect those of the United States Government or - * Lawrence Livermore National Security, LLC, and shall not be used for - * advertising or product endorsement purposes. - * - * NOTIFICATION OF COMMERCIAL USE - * - * Commercialization of this product is prohibited without notifying the - * Department of Energy (DOE) or Lawrence Livermore National Security. - */ - -#include - -#include -#include -#include - -/** - * Functional object to run the LTimes kernel. - */ -struct runLTimes { - std::string name(void) const { return "LTimes"; } - - void operator ()(Grid_Data *grid_data) const { - grid_data->kernel->LTimes(grid_data); - } -}; - -/** - * Functional object to run the LPlusTimes kernel. - */ -struct runLPlusTimes { - std::string name(void) const { return "LPlusTimes"; } - - void operator ()(Grid_Data *grid_data) const { - grid_data->kernel->LPlusTimes(grid_data); - } -}; - - -/** - * Functional object to run the scattering kernel. - */ -struct runScattering { - std::string name(void) const { return "scattering"; } - - void operator ()(Grid_Data *grid_data) const { - grid_data->kernel->scattering(grid_data); - } -}; - - -/** - * Functional object to run the source kernel. - */ -struct runSource { - std::string name(void) const { return "source"; } - - void operator ()(Grid_Data *grid_data) const { - grid_data->kernel->source(grid_data); - } -}; - -/** - * Functional object to run the MPI sweep and sweep kernels - */ -struct runSweep { - std::string name(void) const { return "Sweep"; } - - void operator ()(Grid_Data *grid_data) const { - std::vector sdom_list(grid_data->subdomains.size()); - for(int i = 0;i < grid_data->subdomains.size();++ i){ - sdom_list[i] = i; - } - SweepSubdomains(sdom_list, grid_data, false); - } -}; - - -/** - * Tests a specific kernel (using one of the above runXXX functional objects). - */ -template -void testKernel(Input_Variables &input_variables){ - int myid; - MPI_Comm_rank(MPI_COMM_WORLD, &myid); - - KernelRunner kr; - - if(myid == 0){ - printf(" Comparing %s to %s for kernel %s\n", - nestingString(NEST_GDZ).c_str(), - nestingString(input_variables.nesting).c_str(), - kr.name().c_str()); - } - - // Allocate two problems (one reference) - if(myid == 0)printf(" -- allocating\n"); - Grid_Data *grid_data = new Grid_Data(&input_variables); - - Nesting_Order old_nest = input_variables.nesting; - input_variables.nesting = NEST_GDZ; - Grid_Data *ref_data = new Grid_Data(&input_variables); - input_variables.nesting = old_nest; - - // Generate random data in the reference problem, and copy it to the other - if(myid == 0)printf(" -- randomizing data\n"); - ref_data->randomizeData(); - grid_data->copy(*ref_data); - - if(myid == 0)printf(" -- running kernels\n"); - - // Run both kernels - kr(ref_data); - kr(grid_data); - - if(myid == 0)printf(" -- comparing results\n"); - // Compare differences - bool is_diff = ref_data->compare(*grid_data, 1e-12, true); - if(is_diff){ - if(myid == 0)printf("Differences found, bailing out\n"); - MPI_Abort(MPI_COMM_WORLD, 1); - } - - // Cleanup - if(myid == 0)printf(" -- OK\n\n"); - delete grid_data; - delete ref_data; -} - - -/** - * Tests all kernels given the specified input. - */ -void testKernels(Input_Variables &input_variables){ - // Run LTimes - testKernel(input_variables); - - // Run LPlusTimes - testKernel(input_variables); - - // Run Scattering - testKernel(input_variables); - - // Run Source - testKernel(input_variables); - - // Run Sweep - testKernel(input_variables); -} diff --git a/src/Kripke/Timing.cpp b/src/Kripke/Timing.cpp index 9a219524..0fdbcde5 100644 --- a/src/Kripke/Timing.cpp +++ b/src/Kripke/Timing.cpp @@ -32,162 +32,78 @@ #include -#include +#include +#include #include -#include #include #include -#include -#include - -#ifdef KRIPKE_USE_BGPM -extern "C" void HPM_Start(char const *); -extern "C" void HPM_Stop(char const *); -#endif - - -#ifdef KRIPKE_USE_PAPI -#include -#endif +using namespace Kripke; Timing::~Timing(){ -#ifdef KRIPKE_USE_PAPI -long long tmp[16]; -PAPI_stop(papi_set, tmp); -#endif - + stopAll(); } void Timing::start(std::string const &name){ // get or create timer Timer &timer = timers[name]; - if(!timer.started){ - timer.started = true; - timer.start_time = MPI_Wtime(); - -#ifdef KRIPKE_USE_PAPI - int num_papi = papi_event.size(); - if(num_papi > 0){ - if(timer.papi_total.size() == 0){ - timer.papi_start_values.resize(num_papi, 0); - timer.papi_total.resize(num_papi, 0); - } - - /* - // start timers - PAPI_start_counters(&papi_event[0], num_papi); - - // clear timers - long long tmp[16]; - PAPI_read_counters(tmp, num_papi); - */ - - // read initial values - PAPI_read(papi_set, &timer.papi_start_values[0]); - - } -#endif - -#ifdef KRIPKE_USE_BGPM - HPM_Start(name.c_str()); -#endif - } + // Start it up + timer.start(name); } void Timing::stop(std::string const &name){ // get or create timer Timer &timer = timers[name]; -#ifdef KRIPKE_USE_BGPM - HPM_Stop(name.c_str()); -#endif - - if(timer.started){ -#ifdef KRIPKE_USE_PAPI - int num_papi = papi_event.size(); - if(num_papi > 0){ - // read timers - long long tmp[16]; - //PAPI_stop_counters(tmp, num_papi); - PAPI_read(papi_set, tmp); - - // accumulate to all started timers (since this clears the PAPI values) - for(int i = 0;i < num_papi;++ i){ - timer.papi_total[i] += tmp[i] - timer.papi_start_values[i]; - } - - } -#endif - - // Stop the timer - timer.started = false; - timer.total_time += MPI_Wtime() - timer.start_time; - timer.count ++; - - } + // stop it + timer.stop(name); } void Timing::stopAll(void){ - for(TimerMap::iterator i = timers.begin();i != timers.end();++ i){ - stop((*i).first); + for(auto i : timers){ + stop(i.first); } } -void Timing::clear(void){ - timers.clear(); -} - void Timing::print(void) const { - int rank; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - if(rank != 0){ + Kripke::Core::Comm default_comm; + if(default_comm.rank() != 0){ return; } // build a sorted list of names std::vector names; - for(TimerMap::const_iterator i = timers.begin();i != timers.end();++ i){ - names.push_back((*i).first); - + for(auto i : timers){ + names.push_back(i.first); } std::sort(names.begin(), names.end()); std::vector ord_timers; - for(int i = 0;i < names.size();++ i){ - std::string &name = names[i]; - TimerMap::const_iterator iter = timers.find(name); - ord_timers.push_back(&(*iter).second); + for(auto name : names){ + ord_timers.push_back(&timers.find(name)->second); } // Display column names - printf("Timers:\n"); + printf("\nTimers\n"); + printf("======\n\n"); printf(" %-16s %12s %12s", "Timer", "Count", "Seconds"); -#ifdef KRIPKE_USE_PAPI - int num_papi = papi_names.size(); - for(int i = 0;i < num_papi;++i){ - printf(" %16s", papi_names[i].c_str()); - } -#endif printf("\n"); + printf(" ---------------- ------------ ------------\n"); // Dislpay timer results - for(int i = 0;i < names.size();++ i){ - printf(" %-16s %12d %12.5lf", names[i].c_str(), (int)ord_timers[i]->count, ord_timers[i]->total_time); -#ifdef KRIPKE_USE_PAPI - for(int p = 0;p < num_papi;++ p){ - printf(" %16ld", (long)ord_timers[i]->papi_total[p]); - } -#endif - printf("\n"); + for(size_t i = 0;i < names.size();++ i){ + printf(" %-16s %12d %12.5lf\n", + names[i].c_str(), + (int)ord_timers[i]->getCount(), + ord_timers[i]->getElapsed()); } // Now display timers in machine readable format printf("\n"); printf("TIMER_NAMES:"); - for(int i = 0;i < names.size();++ i){ + for(size_t i = 0;i < names.size();++ i){ if(i > 0){ printf(","); } @@ -195,73 +111,29 @@ void Timing::print(void) const { } printf("\n"); printf("TIMER_DATA:"); - for(int i = 0;i < names.size();++ i){ + for(size_t i = 0;i < names.size();++ i){ if(i > 0){ printf(","); } - printf("%lf", ord_timers[i]->total_time); + printf("%lf", ord_timers[i]->getElapsed()); } printf("\n"); } double Timing::getTotal(std::string const &name) const{ - TimerMap::const_iterator i = timers.find(name); + auto i = timers.find(name); if(i == timers.end()){ return 0.0; } - return (*i).second.total_time; + return i->second.getElapsed(); } - - -void Timing::setPapiEvents(std::vector names){ -#ifdef KRIPKE_USE_PAPI - - - static bool papi_initialized = false; - if(!papi_initialized){ - //printf("PAPI INIT\n"); - int retval = PAPI_library_init(PAPI_VER_CURRENT); - papi_initialized = true; - - if(retval != PAPI_VER_CURRENT){ - fprintf(stderr, "ERROR INITIALIZING PAPI\n"); - exit(1); - } - } - - //printf("PAPI VERSION=%x\n", - // PAPI_VERSION); - - papi_set = PAPI_NULL; - PAPI_create_eventset(&papi_set); - - - for(int i = 0;i < names.size();++ i){ - // Convert text string to PAPI id - int event_code; - PAPI_event_name_to_code( - const_cast(names[i].c_str()), - &event_code); - - // TODO: error checking? - - // Add to our list of PAPI events - papi_names.push_back(names[i]); - papi_event.push_back(event_code); - - int retval = PAPI_add_event(papi_set, event_code); - if(retval != PAPI_OK){ - fprintf(stderr, "ERROR ADDING %s, retval=%d, ID=0x%-10x\n", names[i].c_str(), retval, event_code); - } - - //printf("EVT=%s, ID=0x%-10x\n", names[i].c_str(), event_code); - } - PAPI_start(papi_set); -#else - if(names.size() > 0){ - fprintf(stderr, "WARNING: PAPI NOT ENABLED, IGNORING PAPI EVENTS\n"); +size_t Timing::getCount(std::string const &name) const{ + auto i = timers.find(name); + if(i == timers.end()){ + return 0; } -#endif + return i->second.getCount(); } + diff --git a/src/Kripke/Timing.h b/src/Kripke/Timing.h index 36b1807d..135a3dec 100644 --- a/src/Kripke/Timing.h +++ b/src/Kripke/Timing.h @@ -33,90 +33,100 @@ #ifndef KRIPKE_TIMING_H__ #define KRIPKE_TIMING_H__ -#include -#include -#include -#include -#include -#include -#include - -#ifdef KRIPKE_USE_PAPI -#include -#endif - -inline double getTime(void){ - struct timeval tv; - gettimeofday(&tv, NULL); - return (double)tv.tv_sec + (double)tv.tv_usec/1000000.0; -} - - -struct Timer { - Timer() : - started(false), - start_time(0.0), - total_time(0.0), - count(0) - {} - - bool started; - double start_time; - double total_time; - size_t count; -#ifdef KRIPKE_USE_PAPI - std::vector papi_start_values; - std::vector papi_total; -#endif -}; - -class Timing { - public: - ~Timing(); - - void start(std::string const &name); - void stop(std::string const &name); +#include +#include - void stopAll(void); - void clear(void); +#include - void print(void) const; - double getTotal(std::string const &name) const; - - void setPapiEvents(std::vector names); - - private: - typedef std::map TimerMap; - TimerMap timers; -#ifdef KRIPKE_USE_PAPI - std::vector papi_names; - std::vector papi_event; - int papi_set; -#endif -}; - - -#include - -// Aides timing a block of code, with automatic timer stopping -class BlockTimer { - public: - inline BlockTimer(Timing &timer_obj, std::string const &timer_name) : - timer(timer_obj), - name(timer_name) - { - timer.start(name); - } - inline ~BlockTimer(){ - timer.stop(name); - } +#include +#include - private: +namespace Kripke { + + class Timer { + public: + RAJA_INLINE + Timer() : + started(false), + elapsed(0.), + count(0) + {} + + RAJA_INLINE + void start(std::string const &my_name) { + timer.stop(my_name.c_str()); + timer.reset(); + timer.start(my_name.c_str()); + started = true; + ++ count; + } + + RAJA_INLINE + void stop(std::string const &my_name) { + if(started){ + timer.stop(my_name.c_str()); + elapsed += timer.elapsed(); + } + } + + RAJA_INLINE + size_t getCount() const { + return count; + } + + RAJA_INLINE + double getElapsed() const { + return elapsed; + } + + private: + bool started; + double elapsed; + size_t count; + RAJA::Timer timer; + }; + + class Timing : public Kripke::Core::BaseVar { + public: + virtual ~Timing(); + + void start(std::string const &name); + void stop(std::string const &name); + + void stopAll(void); + + void print(void) const; + double getTotal(std::string const &name) const; + size_t getCount(std::string const &name) const; + + private: + using TimerMap = std::map; + TimerMap timers; + }; + + + // Aides timing a block of code, with automatic timer stopping + class BlockTimer { + public: + inline BlockTimer(Timing &timer_obj, std::string const &timer_name) : + timer(timer_obj), + name(timer_name) + { + timer.start(name); + } + inline ~BlockTimer(){ + timer.stop(name); + } + + private: Timing &timer; std::string name; -}; + }; + +} -#define BLOCK_TIMER(TIMER, NAME) BlockTimer BLK_TIMER_##NAME(TIMER, #NAME); +#define KRIPKE_TIMER(DS, NAME) \ + Kripke::BlockTimer BLK_TIMER_##NAME(DS.getVariable("timing"), #NAME); #endif diff --git a/src/Kripke/VarTypes.h b/src/Kripke/VarTypes.h new file mode 100644 index 00000000..c5450c47 --- /dev/null +++ b/src/Kripke/VarTypes.h @@ -0,0 +1,182 @@ +/* + * NOTICE + * + * This work was produced at the Lawrence Livermore National Laboratory (LLNL) + * under contract no. DE-AC-52-07NA27344 (Contract 44) between the U.S. + * Department of Energy (DOE) and Lawrence Livermore National Security, LLC + * (LLNS) for the operation of LLNL. The rights of the Federal Government are + * reserved under Contract 44. + * + * DISCLAIMER + * + * This work was prepared as an account of work sponsored by an agency of the + * United States Government. Neither the United States Government nor Lawrence + * Livermore National Security, LLC nor any of their employees, makes any + * warranty, express or implied, or assumes any liability or responsibility + * for the accuracy, completeness, or usefulness of any information, apparatus, + * product, or process disclosed, or represents that its use would not infringe + * privately-owned rights. Reference herein to any specific commercial products, + * process, or service by trade name, trademark, manufacturer or otherwise does + * not necessarily constitute or imply its endorsement, recommendation, or + * favoring by the United States Government or Lawrence Livermore National + * Security, LLC. The views and opinions of authors expressed herein do not + * necessarily state or reflect those of the United States Government or + * Lawrence Livermore National Security, LLC, and shall not be used for + * advertising or product endorsement purposes. + * + * NOTIFICATION OF COMMERCIAL USE + * + * Commercialization of this product is prohibited without notifying the + * Department of Energy (DOE) or Lawrence Livermore National Security. + */ + +#ifndef KRIPKE_VARTYPES_H__ +#define KRIPKE_VARTYPES_H__ + +#include +#include +#include +#include +#include + +namespace Kripke { + + RAJA_INDEX_VALUE(Dimension, "Dimension"); + RAJA_INDEX_VALUE(Direction, "Direction"); + RAJA_INDEX_VALUE(GlobalGroup, "GlobalGroup"); + RAJA_INDEX_VALUE(Group, "Group"); + RAJA_INDEX_VALUE(Legendre, "Legendre"); + RAJA_INDEX_VALUE(Material, "Material"); + RAJA_INDEX_VALUE(MixElem, "MixElem"); + RAJA_INDEX_VALUE(Moment, "Moment"); + RAJA_INDEX_VALUE(Zone, "Zone"); + RAJA_INDEX_VALUE(ZoneI, "ZoneI"); + RAJA_INDEX_VALUE(ZoneJ, "ZoneJ"); + RAJA_INDEX_VALUE(ZoneK, "ZoneK"); + + using Field_Flux = Kripke::Core::Field; + using Field_Moments = Kripke::Core::Field; + + using Field_IPlane = Kripke::Core::Field; + using Field_JPlane = Kripke::Core::Field; + using Field_KPlane = Kripke::Core::Field; + + using Field_Ell = Kripke::Core::Field; + using Field_EllPlus = Kripke::Core::Field; + + using Field_Speed = Kripke::Core::Field; + using Field_SigmaT = Kripke::Core::Field; + using Field_SigmaS = Kripke::Core::Field; + + using Field_Direction2Double = Kripke::Core::Field; + using Field_Direction2Int = Kripke::Core::Field; + + using Field_Adjacency = Kripke::Core::Field; + + using Field_Moment2Legendre = Kripke::Core::Field; + + using Field_ZoneI2Double = Kripke::Core::Field; + using Field_ZoneJ2Double = Kripke::Core::Field; + using Field_ZoneK2Double = Kripke::Core::Field; + using Field_Zone2Double = Kripke::Core::Field; + using Field_Zone2Int = Kripke::Core::Field; + using Field_Zone2MixElem = Kripke::Core::Field; + + using Field_MixElem2Double = Kripke::Core::Field; + using Field_MixElem2Material = Kripke::Core::Field; + using Field_MixElem2Zone = Kripke::Core::Field; + + using Field_SigmaTZonal = Kripke::Core::Field; + + + template + struct DefaultOrder{}; + + template + struct DefaultOrder> : DefaultOrder {}; + + template<> + struct DefaultOrder{ + using type = camp::list; + }; + + template<> + struct DefaultOrder{ + using type = camp::list; + }; + + template<> + struct DefaultOrder{ + using type = camp::list; + }; + + template<> + struct DefaultOrder{ + using type = camp::list; + }; + + template<> + struct DefaultOrder{ + using type = camp::list; + }; + + template<> + struct DefaultOrder{ + using type = camp::list; + }; + + + template + struct SdomAL; + + template + struct SdomAL> + { + using al_t = ArchLayoutT; + using arch_t = A; + using layout_t = L; + + using order_t = typename DefaultOrder::type; + + Kripke::SdomId sdom_id; + + template + auto getView(FieldType &field) const -> + decltype(field.template getViewOrder(sdom_id)) + { + return field.template getViewOrder(sdom_id); + } + + template + auto getView(FieldType &field, Kripke::SdomId sdom) const -> + decltype(field.template getViewOrder(sdom)) + { + return field.template getViewOrder(sdom); + } + }; + + template + SdomAL getSdomAL(AL, Kripke::SdomId sdom_id){ + return SdomAL{sdom_id}; + } + + + template + RAJA_INLINE + FieldType &createField(Core::DataStore &data_store, std::string const &name, ArchLayoutV al_v, SetType const &set) + { + FieldType *field = nullptr; + dispatchLayout(al_v.layout_v, [&](auto layout_t){ + using order_t = typename DefaultOrder::type; + + field = new FieldType(set, order_t{}); + data_store.addVariable(name, field); + }); + + return *field; + }; + +} // namespace Kripke + + +#endif diff --git a/src/KripkeConfig.h.in b/src/KripkeConfig.h.in new file mode 100644 index 00000000..bd126151 --- /dev/null +++ b/src/KripkeConfig.h.in @@ -0,0 +1,23 @@ +#ifndef KRIPKE_CONFIG_H__ +#define KRIPKE_CONFIG_H__ + +#cmakedefine KRIPKE_VERSION "@KRIPKE_VERSION@" + +#cmakedefine KRIPKE_CXX_COMPILER "@KRIPKE_CXX_COMPILER@" +#cmakedefine KRIPKE_CXX_FLAGS "@KRIPKE_CXX_FLAGS@" +#cmakedefine KRIPKE_LINK_FLAGS "@KRIPKE_LINK_FLAGS@" + +#cmakedefine KRIPKE_USE_MPI +#cmakedefine KRIPKE_USE_OPENMP +#cmakedefine KRIPKE_USE_CUDA +#cmakedefine KRIPKE_USE_CHAI + +#cmakedefine KRIPKE_ARCH "@KRIPKE_ARCH@" + +#cmakedefine KRIPKE_ARCHV_DEFAULT @KRIPKE_ARCHV_DEFAULT@ +#cmakedefine KRIPKE_LAYOUTV_DEFAULT @KRIPKE_LAYOUTV_DEFAULT@ + +#cmakedefine KRIPKE_NVCC_COMPILER "@KRIPKE_NVCC_COMPILER@" +#cmakedefine KRIPKE_NVCC_FLAGS "@KRIPKE_NVCC_FLAGS@" + +#endif diff --git a/src/kripke.cpp b/src/kripke.cpp index 60dad530..7f073171 100644 --- a/src/kripke.cpp +++ b/src/kripke.cpp @@ -29,38 +29,38 @@ * Commercialization of this product is prohibited without notifying the * Department of Energy (DOE) or Lawrence Livermore National Security. */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #ifdef KRIPKE_USE_OPENMP -#include -#endif - -#ifdef KRIPKE_USE_TCMALLOC -#include +#include #endif #ifdef __bgq__ #include -#include #endif + void usage(void){ - int myid; - MPI_Comm_rank(MPI_COMM_WORLD, &myid); - if(myid == 0){ + + Kripke::Core::Comm comm; + if(comm.rank() == 0){ // Get a new object with defaulted values - Input_Variables def; + InputVariables def; // Display command line printf("Usage: [srun ...] kripke [options...]\n\n"); @@ -100,19 +100,17 @@ void usage(void){ printf("\n"); printf("On-Node Options:\n"); printf("----------------\n"); - printf(" --nest Loop nesting order (and data layout)\n"); + printf(" --arch Architecture selection\n"); + printf(" Available: Sequential, OpenMP, CUDA\n"); + printf(" Default: --arch %s\n\n", archToString(def.al_v.arch_v).c_str()); + printf(" --layout Data layout and loop nesting order\n"); printf(" Available: DGZ,DZG,GDZ,GZD,ZDG,ZGD\n"); - printf(" Default: --nest %s\n\n", nestingString(def.nesting).c_str()); + printf(" Default: --layout %s\n\n", layoutToString(def.al_v.layout_v).c_str()); printf("\n"); printf("Parallel Decomposition Options:\n"); printf("-------------------------------\n"); - printf(" --layout Layout of spatial subdomains over mpi ranks\n"); - printf(" 0: Blocked: local zone sets are adjacent\n"); - printf(" 1: Scattered: adjacent zone sets are distributed\n"); - printf(" Default: --layout %d\n\n", def.layout_pattern); - - + printf(" --procs Number of MPI ranks in each spatial dimension\n"); printf(" Default: --procs %d,%d,%d\n\n", def.npx, def.npy, def.npz); @@ -141,20 +139,10 @@ void usage(void){ printf(" Default: --pmethod sweep\n\n"); printf("\n"); - printf("Output and Testing Options:\n"); - printf("---------------------------\n"); - -#ifdef KRIPKE_USE_PAPI - printf(" --papi Track PAPI hardware counters for each timer\n\n"); -#endif -#ifdef KRIPKE_USE_SILO - printf(" --silo Create SILO output files\n\n"); -#endif - printf(" --trace Turn on sweep trace output\n\n"); - printf(" --test Run Kernel Test instead of solver\n\n"); - printf("\n"); } - MPI_Finalize(); + + Kripke::Core::Comm::finalize(); + exit(1); } @@ -208,32 +196,85 @@ int main(int argc, char **argv) { /* * Initialize MPI */ - MPI_Init(&argc, &argv); - int myid; - MPI_Comm_rank(MPI_COMM_WORLD, &myid); - int num_tasks; - MPI_Comm_size(MPI_COMM_WORLD, &num_tasks); + Kripke::Core::Comm::init(&argc, &argv); + + Kripke::Core::Comm comm; + + int myid = comm.rank(); + int num_tasks = comm.size(); if (myid == 0) { /* Print out a banner message along with a version number. */ printf("\n"); - printf("----------------------------------------------------------------------\n"); - printf("------------------------ KRIPKE VERSION 1.1 --------------------------\n"); - printf("----------------------------------------------------------------------\n"); + printf(" _ __ _ _\n"); + printf(" | |/ / (_) | |\n"); + printf(" | ' / _ __ _ _ __ | | __ ___\n"); + printf(" | < | '__|| || '_ \\ | |/ // _ \\ \n"); + printf(" | . \\ | | | || |_) || <| __/\n"); + printf(" |_|\\_\\|_| |_|| .__/ |_|\\_\\\\___|\n"); + printf(" | |\n"); + printf(" |_| Version %s\n", KRIPKE_VERSION); + printf("\n"); + printf("LLNL-CODE-658597\n"); + printf("\n"); printf("This work was produced at the Lawrence Livermore National Laboratory\n"); printf("(LLNL) under contract no. DE-AC-52-07NA27344 (Contract 44) between the\n"); printf("U.S. Department of Energy (DOE) and Lawrence Livermore National\n"); printf("Security, LLC (LLNS) for the operation of LLNL. The rights of the\n"); printf("Federal Government are reserved under Contract 44.\n"); printf("\n"); - printf("Main Contact: Adam J. Kunen \n"); - printf("----------------------------------------------------------------------\n"); - - + printf("Author: Adam J. Kunen \n"); + printf("\n"); + + // Display information about how we were built + printf("Compilation Options:\n"); + printf(" Architecture: %s\n", KRIPKE_ARCH); + printf(" Compiler: %s\n", KRIPKE_CXX_COMPILER); + printf(" Compiler Flags: \"%s\"\n", KRIPKE_CXX_FLAGS); + printf(" Linker Flags: \"%s\"\n", KRIPKE_LINK_FLAGS); + +#ifdef KRIPKE_USE_CHAI + printf(" CHAI Enabled: Yes\n"); +#else + printf(" CHAI Enabled: No\n"); +#endif + +#ifdef KRIPKE_USE_CUDA + printf(" CUDA Enabled: Yes\n"); + printf(" NVCC: %s\n", KRIPKE_NVCC_COMPILER); + printf(" NVCC Flags: \"%s\"\n", KRIPKE_NVCC_FLAGS); +#else + printf(" CUDA Enabled: No\n"); +#endif + +#ifdef KRIPKE_USE_MPI + printf(" MPI Enabled: Yes\n"); +#else + printf(" MPI Enabled: No\n"); +#endif + +#ifdef KRIPKE_USE_OPENMP + printf(" OpenMP Enabled: Yes\n"); +#else + printf(" OpenMP Enabled: No\n"); +#endif + + + + + /* Print out some information about how OpenMP threads are being mapped * to CPU cores. */ #ifdef KRIPKE_USE_OPENMP + + // Get max number of threads + int max_threads = omp_get_max_threads(); + + // Allocate an array to store which core each thread is running on + std::vector thread_to_core(max_threads, -1); + + // Collect thread->core mapping #pragma omp parallel { int tid = omp_get_thread_num(); @@ -242,17 +283,24 @@ int main(int argc, char **argv) { #else int core = sched_getcpu(); #endif - printf("Rank: %d Thread %d: Core %d\n", myid, tid, core); + thread_to_core[tid] = core; + } + + printf("\nOpenMP Thread->Core mapping for %d threads on rank 0", max_threads); + for(int tid = 0;tid < max_threads;++ tid){ + if(!(tid%8)){ + printf("\n"); + } + printf(" %3d->%3d", tid, thread_to_core[tid]); } + printf("\n"); #endif } /* * Default input parameters */ - Input_Variables vars; - std::vector papi_names; - bool test = false; + InputVariables vars; /* * Parse command line @@ -275,9 +323,6 @@ int main(int argc, char **argv) { vars.num_zonesets_dim[1] = std::atoi(nz[1].c_str()); vars.num_zonesets_dim[2] = std::atoi(nz[2].c_str()); } - else if(opt == "--layout"){ - vars.layout_pattern = std::atoi(cmd.pop().c_str()); - } else if(opt == "--zones"){ std::vector nz = split(cmd.pop(), ','); if(nz.size() != 3) usage(); @@ -343,24 +388,11 @@ int main(int argc, char **argv) { else if(opt == "--niter"){ vars.niter = std::atoi(cmd.pop().c_str()); } - else if(opt == "--nest"){ - vars.nesting = nestingFromString(cmd.pop()); - } -#ifdef KRIPKE_USE_SILO - else if(opt == "--silo"){ - vars.silo_basename = cmd.pop(); + else if(opt == "--arch"){ + vars.al_v.arch_v = Kripke::stringToArch(cmd.pop()); } -#endif - else if(opt == "--test"){ - test = true; - } -#ifdef KRIPKE_USE_PAPI - else if(opt == "--papi"){ - papi_names = split(cmd.pop(), ','); - } -#endif - else if(opt == "--trace"){ - vars.sweep_trace = true; + else if(opt == "--layout"){ + vars.al_v.layout_v = Kripke::stringToLayout(cmd.pop()); } else{ printf("Unknwon options %s\n", opt.c_str()); @@ -376,129 +408,111 @@ int main(int argc, char **argv) { /* * Display Options */ - int num_threads=1; if (myid == 0) { - printf("Number of MPI tasks: %d\n", num_tasks); -#ifdef KRIPKE_USE_OPENMP -#pragma omp parallel - { - num_threads = omp_get_num_threads(); - if(omp_get_thread_num() == 0){ - printf("OpenMP threads/task: %d\n", num_threads); - printf("OpenMP total threads: %d\n", num_threads*num_tasks); - } - } -#endif -#ifdef KRIPKE_USE_PAPI - printf("PAPI Counters: "); - if(papi_names.size() > 0){ - for(int i = 0;i < papi_names.size();++ i){ - printf("%s ", papi_names[i].c_str()); - } - } - else{ - printf(""); - } + printf("\nInput Parameters\n"); + printf("================\n"); + printf("\n"); -#endif - printf("Processors: %d x %d x %d\n", vars.npx, vars.npy, vars.npz); - printf("Zones: %d x %d x %d\n", vars.nx, vars.ny, vars.nz); - printf("Legendre Order: %d\n", vars.legendre_order); - printf("Total X-Sec: sigt=[%lf, %lf, %lf]\n", vars.sigt[0], vars.sigt[1], vars.sigt[2]); - printf("Scattering X-Sec: sigs=[%lf, %lf, %lf]\n", vars.sigs[0], vars.sigs[1], vars.sigs[2]); - printf("Quadrature Set: "); + printf(" Problem Size:\n"); + printf(" Zones: %d x %d x %d (%d total)\n", vars.nx, vars.ny, vars.nz, vars.nx*vars.ny*vars.nz); + printf(" Groups: %d\n", vars.num_groups); + printf(" Legendre Order: %d\n", vars.legendre_order); + printf(" Quadrature Set: "); if(vars.quad_num_polar == 0){ printf("Dummy S2 with %d points\n", vars.num_directions); } else { printf("Gauss-Legendre, %d polar, %d azimuthal (%d points)\n", vars.quad_num_polar, vars.quad_num_azimuthal, vars.num_directions); } - printf("Parallel method: "); + + + printf("\n"); + printf(" Physical Properties:\n"); + printf(" Total X-Sec: sigt=[%lf, %lf, %lf]\n", vars.sigt[0], vars.sigt[1], vars.sigt[2]); + printf(" Scattering X-Sec: sigs=[%lf, %lf, %lf]\n", vars.sigs[0], vars.sigs[1], vars.sigs[2]); + + + printf("\n"); + printf(" Solver Options:\n"); + printf(" Number iterations: %d\n", vars.niter); + + + + printf("\n"); + printf(" MPI Decomposition Options:\n"); + printf(" Total MPI tasks: %d\n", num_tasks); + printf(" Spatial decomp: %d x %d x %d MPI tasks\n", vars.npx, vars.npy, vars.npz); + printf(" Block solve method: "); if(vars.parallel_method == PMETHOD_SWEEP){ printf("Sweep\n"); } else if(vars.parallel_method == PMETHOD_BJ){ printf("Block Jacobi\n"); } - printf("Loop Nesting Order %s\n", nestingString(vars.nesting).c_str()); - printf("Number iterations: %d\n", vars.niter); - - printf("GroupSet/Groups: %d sets, %d groups/set\n", vars.num_groupsets, vars.num_groups/vars.num_groupsets); - printf("DirSets/Directions: %d sets, %d directions/set\n", vars.num_dirsets, vars.num_directions/vars.num_dirsets); - printf("Zone Sets: %d,%d,%d\n", vars.num_zonesets_dim[0], vars.num_zonesets_dim[1], vars.num_zonesets_dim[2]); - - } - + printf("\n"); + printf(" Per-Task Options:\n"); + printf(" DirSets/Directions: %d sets, %d directions/set\n", vars.num_dirsets, vars.num_directions/vars.num_dirsets); + printf(" GroupSet/Groups: %d sets, %d groups/set\n", vars.num_groupsets, vars.num_groups/vars.num_groupsets); + printf(" Zone Sets: %d x %d x %d\n", vars.num_zonesets_dim[0], vars.num_zonesets_dim[1], vars.num_zonesets_dim[2]); + printf(" Architecture: %s\n", archToString(vars.al_v.arch_v).c_str()); + printf(" Data Layout: %s\n", layoutToString(vars.al_v.layout_v).c_str()); + - if(test){ - // Invoke Kernel testing - testKernels(vars); + + } - else{ - // Allocate problem - Grid_Data *grid_data = new Grid_Data(&vars); - grid_data->timing.setPapiEvents(papi_names); - // Run the solver - SweepSolver(grid_data, vars.parallel_method == PMETHOD_BJ); -#ifdef KRIPKE_USE_SILO - // Output silo data - if(vars.silo_basename != ""){ - grid_data->writeSilo(vars.silo_basename); - } -#endif + // Allocate problem - // Print Timing Info - int myid; - MPI_Comm_rank(MPI_COMM_WORLD, &myid); - if(myid == 0){ - grid_data->timing.print(); - printf("\n\n"); - } + Kripke::Core::DataStore data_store; + Kripke::generateProblem(data_store, vars); - // Cleanup - delete grid_data; - } + // Run the solver + Kripke::SteadyStateSolver(data_store, vars.niter, vars.parallel_method == PMETHOD_BJ); - // Gather post-point memory info - double heap_mb = -1.0; - double hwm_mb = -1.0; -#ifdef KRIPKE_USE_TCMALLOC - // If we are using tcmalloc, we need to use it's interface - MallocExtension *mext = MallocExtension::instance(); - size_t bytes; + // Print Timing Info + auto &timing = data_store.getVariable("timing"); + timing.print(); - mext->GetNumericProperty("generic.current_allocated_bytes", &bytes); - heap_mb = ((double)bytes)/1024.0/1024.0; + // Compute performance metrics + auto &set_group = data_store.getVariable("Set/Group"); + auto &set_dir = data_store.getVariable("Set/Direction"); + auto &set_zone = data_store.getVariable("Set/Zone"); - mext->GetNumericProperty("generic.heap_size", &bytes); - hwm_mb = ((double)bytes)/1024.0/1024.0; -#else -#ifdef __bgq__ - // use BG/Q specific calls (if NOT using tcmalloc) - uint64_t bytes; + size_t num_unknowns = set_group.globalSize() + * set_dir.globalSize() + * set_zone.globalSize(); - int rc = Kernel_GetMemorySize(KERNEL_MEMSIZE_HEAP, &bytes); - heap_mb = ((double)bytes)/1024.0/1024.0; + size_t num_iter = timing.getCount("SweepSolver"); + double solve_time = timing.getTotal("Solve"); + double iter_time = solve_time / num_iter; + double grind_time = iter_time / num_unknowns; + double throughput = num_unknowns / iter_time; - rc = Kernel_GetMemorySize(KERNEL_MEMSIZE_HEAPMAX, &bytes); - hwm_mb = ((double)bytes)/1024.0/1024.0; -#endif -#endif - // Print memory info - if(myid == 0 && heap_mb >= 0.0){ - printf("Bytes allocated: %lf MB\n", heap_mb); - printf("Heap Size : %lf MB\n", hwm_mb); + double sweep_eff = 100.0 * timing.getTotal("SweepSubdomain") / timing.getTotal("SweepSolver"); + if(myid == 0){ + printf("\n"); + printf("Figures of Merit\n"); + printf("================\n"); + printf("\n"); + printf(" Throughput: %e [unknowns/(second/iteration)]\n", throughput); + printf(" Grind time : %e [(seconds/iteration)/unknowns]\n", grind_time); + printf(" Sweep efficiency : %4.5lf [100.0 * SweepSubdomain time / SweepSolver time]\n", sweep_eff); + printf(" Number of unknowns: %lu\n", (unsigned long) num_unknowns); } // Cleanup and exit - MPI_Finalize(); + Kripke::Core::Comm::finalize(); + if(myid == 0){ + printf("\n"); + printf("END\n"); + } return (0); } diff --git a/tarball.py b/tarball.py deleted file mode 100755 index 8b5feea0..00000000 --- a/tarball.py +++ /dev/null @@ -1,26 +0,0 @@ -#!/usr/bin/env python - -# This script generates the release tarball - -import os -import sys - -build_dir = "./kripke-tarball" - -symref = os.popen("git symbolic-ref -q HEAD").read().strip(" \n\r") -symref_l = symref.split('/') -branch = 'none' -if len(symref_l) > 0: - branch = symref_l[len(symref_l)-1] -print("Branch: %s" % branch) - -os.system("rm -rf %s" % build_dir) -os.makedirs(build_dir) -os.chdir(build_dir) -os.system("cmake .. -DCPACK_SOURCE_PACKAGE_FILE_NAME=kripke-%s" % branch) -os.system("make package_source") -os.system("mv *.tar.gz ..") -os.chdir("..") -os.system("rm -rf %s" % build_dir) -print "Tarball for Kripke generated" - diff --git a/tpl/README.txt b/tpl/README.txt new file mode 100644 index 00000000..3bb24288 --- /dev/null +++ b/tpl/README.txt @@ -0,0 +1,5 @@ +Third Party Libraries + +Subdirectories in ./tpl/ contain third party libraries that Kripke depends on. + +tpl/raja - (REQUIRED) RAJA programming model diff --git a/tpl/chai b/tpl/chai new file mode 160000 index 00000000..0b1d7036 --- /dev/null +++ b/tpl/chai @@ -0,0 +1 @@ +Subproject commit 0b1d70365b8bd44b8ff8248ba03206795071d38e diff --git a/tpl/raja b/tpl/raja new file mode 160000 index 00000000..cc7a97e8 --- /dev/null +++ b/tpl/raja @@ -0,0 +1 @@ +Subproject commit cc7a97e8b4e52c3de820c9dfacd358822a147871