diff --git a/.github/workflows/c-cpp.yml b/.github/workflows/c-cpp.yml index be28f9aa..089cb19f 100644 --- a/.github/workflows/c-cpp.yml +++ b/.github/workflows/c-cpp.yml @@ -8,32 +8,145 @@ on: jobs: unit_tests: - runs-on: ubuntu-latest - container: kitrt/test:latest - environment: coverage - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - COVERALLS_REPO_TOKEN: ${{ secrets.COVERALLS_REPO_TOKEN }} - OMP_NUM_THREADS: 1 - OMP_DYNAMIC: FALSE - OPENBLAS_NUM_THREADS: 1 - MKL_NUM_THREADS: 1 - - steps: - - uses: actions/checkout@v2 - - - name: Build code - run: | - git submodule update --init --recursive - mkdir build && cd build - cmake -G Ninja -DCMAKE_BUILD_TYPE=Debug -DBUILD_TESTING=ON -DBUILD_ML=OFF -DBUILD_CODE_COV=ON .. - ninja - - - name: Run unit tests - run: | - cd build - ./unit_tests - - - name: Code coverage - run: | - cpp-coveralls -r . -b "build/" -i "src/" -i "include/" --exclude "ext/" --gcov-options "\-lp" --verbose + runs-on: ubuntu-latest + container: kitrt/test:latest + defaults: + run: + working-directory: ${{ github.workspace }} + environment: coverage + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + COVERALLS_REPO_TOKEN: ${{ secrets.COVERALLS_REPO_TOKEN }} + OMP_NUM_THREADS: 1 + OMP_DYNAMIC: FALSE + OPENBLAS_NUM_THREADS: 1 + MKL_NUM_THREADS: 1 + + steps: + - uses: actions/checkout@v4 + with: + submodules: recursive + + - name: Build code + run: | + mkdir build && cd build + cmake -G Ninja -DCMAKE_BUILD_TYPE=Debug -DBUILD_TESTING=ON -DBUILD_ML=OFF -DBUILD_CODE_COV=ON .. + ninja + + - name: Run unit tests + run: | + cd build + ./unit_tests + + - name: Code coverage + run: | + cpp-coveralls -r . -b "build/" -i "src/" -i "include/" --exclude "ext/" --gcov-options "\-lp" --verbose + + rocm_build: + runs-on: ubuntu-latest + container: rocm/dev-ubuntu-22.04:7.2 + env: + OMP_NUM_THREADS: 1 + OMP_DYNAMIC: FALSE + OPENBLAS_NUM_THREADS: 1 + MKL_NUM_THREADS: 1 + + steps: + - name: Install ROCm build dependencies + run: | + apt-get update + DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ + git \ + libopenmpi-dev \ + openmpi-bin \ + libblas-dev \ + liblapack-dev \ + cmake \ + ninja-build \ + libvtk9-dev + + - uses: actions/checkout@v4 + with: + submodules: recursive + + - name: Build ROCm target + working-directory: ${{ github.workspace }} + run: | + ROCM_CLANGXX="/opt/rocm-7.2.0/lib/llvm/bin/clang++" + if [ ! -x "${ROCM_CLANGXX}" ] && [ -x "/opt/rocm/lib/llvm/bin/clang++" ]; then + ROCM_CLANGXX="/opt/rocm/lib/llvm/bin/clang++" + fi + if [ ! -x "${ROCM_CLANGXX}" ] && [ -x "/opt/rocm/llvm/bin/clang++" ]; then + ROCM_CLANGXX="/opt/rocm/llvm/bin/clang++" + fi + if [ ! -x "${ROCM_CLANGXX}" ]; then + echo "Could not find ROCm clang++ compiler." >&2 + exit 1 + fi + HIP_DIR="" + for CANDIDATE in \ + "/opt/rocm-7.2.0/lib/cmake/hip" \ + "/opt/rocm/lib/cmake/hip" \ + "/opt/rocm/cmake/hip" + do + if [ -f "${CANDIDATE}/hip-config.cmake" ] || [ -f "${CANDIDATE}/hipConfig.cmake" ]; then + HIP_DIR="${CANDIDATE}" + break + fi + done + if [ -z "${HIP_DIR}" ]; then + echo "Could not find hip package config directory." >&2 + exit 1 + fi + cmake -G Ninja -S . -B build_rocm \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_CXX_COMPILER="${ROCM_CLANGXX}" \ + -DCMAKE_HIP_COMPILER="${ROCM_CLANGXX}" \ + -Dhip_DIR="${HIP_DIR}" \ + -DBUILD_MPI=ON \ + -DBUILD_CUDA_HPC=OFF \ + -DBUILD_HIP_HPC=ON \ + -DBUILD_ML=OFF + cmake --build build_rocm -j2 + + cuda_build: + runs-on: ubuntu-latest + container: nvidia/cuda:12.4.1-devel-ubuntu22.04 + env: + OMP_NUM_THREADS: 1 + OMP_DYNAMIC: FALSE + OPENBLAS_NUM_THREADS: 1 + MKL_NUM_THREADS: 1 + + steps: + - name: Install CUDA build dependencies + run: | + apt-get update + DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ + git \ + libopenmpi-dev \ + openmpi-bin \ + libblas-dev \ + liblapack-dev \ + cmake \ + ninja-build \ + libvtk9-dev + + - uses: actions/checkout@v4 + with: + submodules: recursive + + - name: Build CUDA target and tests + working-directory: ${{ github.workspace }} + run: | + cmake -G Ninja -S . -B build_cuda \ + -DCMAKE_BUILD_TYPE=Release \ + -DBUILD_TESTING=ON \ + -DBUILD_MPI=ON \ + -DBUILD_CUDA_HPC=ON \ + -DBUILD_HIP_HPC=OFF \ + -DBUILD_ML=OFF \ + -DCMAKE_CUDA_ARCHITECTURES=70 + cmake --build build_cuda -j2 + + diff --git a/CMakeLists.txt b/CMakeLists.txt index 32bab039..26b0c32d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,5 @@ cmake_minimum_required( VERSION 3.16 ) -project( KiT-RT VERSION 0.1.0 LANGUAGES CXX ) +project( KiT-RT VERSION 1.2.0 LANGUAGES CXX ) ### OPTIONS ##################################### option( BUILD_TESTING "builds all available unit tests" OFF ) @@ -10,12 +10,16 @@ option( BUILD_CODE_COV "enables compiler option required for code coverage analy option( BUILD_ML "enables build with tensorflow backend access" OFF ) option( BUILD_MPI "enables build with MPI access" OFF ) option( BUILD_CUDA_HPC "enables CUDA backend for SN HPC solver (MPI rank to GPU mapping)" OFF ) +option( BUILD_HIP_HPC "enables HIP/ROCm backend for SN HPC solver (MPI rank to GPU mapping)" OFF ) ################################################# ### COMPILER #################################### set( CMAKE_CXX_STANDARD 17 ) set( CMAKE_CXX_STANDARD_REQUIRED ON ) +if( BUILD_CUDA_HPC AND BUILD_HIP_HPC ) + message( FATAL_ERROR "BUILD_CUDA_HPC and BUILD_HIP_HPC cannot both be enabled." ) +endif() if( BUILD_CUDA_HPC ) enable_language( CUDA ) if( CMAKE_VERSION VERSION_LESS 3.18 ) @@ -27,9 +31,25 @@ if( BUILD_CUDA_HPC ) set( CMAKE_CUDA_STANDARD_REQUIRED ON ) set( CMAKE_CUDA_EXTENSIONS OFF ) endif() +if( BUILD_HIP_HPC ) + if( CMAKE_VERSION VERSION_LESS 3.21 ) + message( FATAL_ERROR "BUILD_HIP_HPC requires CMake >= 3.21 for HIP language support." ) + endif() + enable_language( HIP ) + set( CMAKE_HIP_STANDARD 17 ) + set( CMAKE_HIP_STANDARD_REQUIRED ON ) + set( CMAKE_HIP_EXTENSIONS OFF ) +endif() set( KITRT_RELEASE_OPTIONS -march=native -w ) set( KITRT_RELWITHDEBINFO_OPTIONS -march=native -pg -no-pie ) set( KITRT_DEBUG_OPTIONS -Wall -Wextra -Wpedantic ) +if( BUILD_HIP_HPC AND CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|X86_64|amd64|AMD64" ) + # ROCm clang can expose AVX-512 feature macros that trigger an incompatible + # Blaze SIMD floor implementation path on some hosts. + list( APPEND KITRT_RELEASE_OPTIONS -mno-avx512f -mno-avx512bw -mno-avx512dq ) + list( APPEND KITRT_RELWITHDEBINFO_OPTIONS -mno-avx512f -mno-avx512bw -mno-avx512dq ) + message( STATUS "HIP host C++ compile options: disabling AVX-512 for Blaze compatibility" ) +endif() if( BUILD_UNITY AND NOT BUILD_CODE_COV ) message( STATUS "Unity build enabled" ) set( CMAKE_UNITY_BUILD ON ) @@ -57,6 +77,14 @@ else() message( STATUS "CUDA HPC solver: disabled" ) endif() +if( BUILD_HIP_HPC ) + add_compile_definitions( KITRT_ENABLE_HIP_HPC ) + find_package( hip REQUIRED CONFIG ) + message( STATUS "HIP HPC solver: enabled" ) +else() + message( STATUS "HIP HPC solver: disabled" ) +endif() + message(STATUS "MPI build flag: ${BUILD_MPI}") if( BUILD_MPI ) add_definitions(-DIMPORT_MPI) @@ -113,6 +141,16 @@ if( BUILD_CUDA_HPC ) endif() endif() +if( BUILD_HIP_HPC ) + if( TARGET hip::host ) + # Link only the host runtime to avoid propagating HIP device compile flags + # (e.g. --offload-arch=...) into non-HIP C++ translation units. + list( APPEND CORE_LIBRARIES hip::host ) + else() + message( FATAL_ERROR "Could not find HIP runtime target (hip::host)." ) + endif() +endif() + ################################################# @@ -136,6 +174,10 @@ if( BUILD_CUDA_HPC ) # imported target features inject an older fallback standard. set_source_files_properties( src/solvers/snsolver_hpc.cu PROPERTIES COMPILE_FLAGS "--std=c++17" ) endif() +if( BUILD_HIP_HPC ) + list( APPEND SRCS "src/solvers/snsolver_hpc.hip" "include/solvers/snsolver_hpc_hip.hpp" ) + set_source_files_properties( src/solvers/snsolver_hpc.hip PROPERTIES LANGUAGE HIP ) +endif() set( EXCLUDE_DIR "/gui/" ) foreach( TMP_PATH ${SRCS} ) string( FIND ${TMP_PATH} ${EXCLUDE_DIR} EXCLUDE_DIR_FOUND ) @@ -163,6 +205,12 @@ if( BUILD_CUDA_HPC ) PROPERTIES CUDA_STANDARD 17 CUDA_STANDARD_REQUIRED ON CUDA_EXTENSIONS OFF ) endif() +if( BUILD_HIP_HPC ) + set_target_properties( + ${CMAKE_PROJECT_NAME} + PROPERTIES HIP_STANDARD 17 HIP_STANDARD_REQUIRED ON HIP_EXTENSIONS OFF + ) +endif() target_compile_options( ${CMAKE_PROJECT_NAME} PUBLIC "$<$,$>:${KITRT_DEBUG_OPTIONS}>" ) target_compile_options( ${CMAKE_PROJECT_NAME} PUBLIC "$<$,$>:${KITRT_RELWITHDEBINFO_OPTIONS}>" ) target_compile_options( ${CMAKE_PROJECT_NAME} PUBLIC "$<$,$>:${KITRT_RELEASE_OPTIONS}>" ) @@ -204,6 +252,12 @@ if( BUILD_TESTING ) PROPERTIES CUDA_STANDARD 17 CUDA_STANDARD_REQUIRED ON CUDA_EXTENSIONS OFF ) endif() + if( BUILD_HIP_HPC ) + set_target_properties( + unit_tests + PROPERTIES HIP_STANDARD 17 HIP_STANDARD_REQUIRED ON HIP_EXTENSIONS OFF + ) + endif() target_compile_options( unit_tests PUBLIC "$<$,$>:${KITRT_DEBUG_OPTIONS}>" ) if( BUILD_CODE_COV ) if( CMAKE_COMPILER_IS_GNUCXX ) diff --git a/README.md b/README.md index 9197e1e8..f38bfac3 100644 --- a/README.md +++ b/README.md @@ -143,9 +143,9 @@ singularity exec tools/singularity/kit_rt_MPI.sif \ mpirun -np 4 ./build_singularity_mpi/KiT-RT tests/input/validation_tests/SN_solver_hpc/lattice_hpc_200_cpu_order2.cfg ``` -### 3. CPU + CUDA (single or multi-GPU via MPI) +### 3. CPU + GPU backend (single or multi-GPU via MPI) -#### 3a) Singularity installation +#### 3a) CUDA (Singularity installation) ```bash cd tools/singularity sudo singularity build kit_rt_MPI_cuda.sif kit_rt_MPI_cuda.def @@ -162,6 +162,23 @@ singularity exec --nv tools/singularity/kit_rt_MPI_cuda.sif \ When compiled with `-DBUILD_CUDA_HPC=ON`, HPC runs use the CUDA backend if a GPU is visible, and fall back to CPU if no GPU is detected. +#### 3b) ROCm/HIP (Singularity installation) +```bash +cd tools/singularity +sudo singularity build kit_rt_MPI_rocm72.sif kit_rt_MPI_rocm72.def +cd ../.. +mkdir -p build_singularity_rocm72 +cd build_singularity_rocm72 +singularity exec --rocm ../tools/singularity/kit_rt_MPI_rocm72.sif \ + cmake -DCMAKE_BUILD_TYPE=Release -DBUILD_MPI=ON -DBUILD_CUDA_HPC=OFF -DBUILD_HIP_HPC=ON -DBUILD_ML=OFF .. +singularity exec --rocm ../tools/singularity/kit_rt_MPI_rocm72.sif make -j +cd .. +singularity exec --rocm tools/singularity/kit_rt_MPI_rocm72.sif \ + mpirun -np 2 ./build_singularity_rocm72/KiT-RT tests/input/validation_tests/SN_solver_hpc/lattice_hpc_200_cuda_order2.cfg +``` + +When compiled with `-DBUILD_HIP_HPC=ON`, HPC runs use the HIP backend if a ROCm-capable GPU is visible, and fall back to CPU if no GPU is detected. + ### 4. Build with TensorFlow backend (CPU + OpenMP only) ```bash diff --git a/examples/configs/readme.md b/examples/configs/readme.md deleted file mode 100644 index f1fa1529..00000000 --- a/examples/configs/readme.md +++ /dev/null @@ -1,3 +0,0 @@ -## Attention - Please copy the configuration, that you want to simulate, from this folder to its parent directory. - diff --git a/include/solvers/snsolver_hpc_hip.hpp b/include/solvers/snsolver_hpc_hip.hpp new file mode 100644 index 00000000..6789b423 --- /dev/null +++ b/include/solvers/snsolver_hpc_hip.hpp @@ -0,0 +1,246 @@ +#ifndef SNSOLVERHPCHIP_H +#define SNSOLVERHPCHIP_H + +// include Matrix, Vector definitions +#include "common/globalconstants.hpp" +#include "common/typedef.hpp" + +// externals +#include "spdlog/spdlog.h" + +// Forward Declarations +class Config; +class Mesh; +class ProblemBase; + +/*! @brief Base class for all solvers. */ +class SNSolverHPCHIP +{ + + private: + struct DeviceBuffers; + + int _rank; + int _numProcs; + unsigned long _localNSys; + unsigned long _startSysIdx; + unsigned long _endSysIdx; + + double _curSimTime;/*!< @brief current in-simulation time after current iteration */ + double _currTime; /*!< @brief wall-time after current iteration */ + Config* _settings; /*!< @brief config class for global information */ + Mesh* _mesh; + ProblemBase* _problem; + + // Time + unsigned long _nIter; /*!< @brief number of time steps, for non CSD, this equals _nEnergies, for _csd, _maxIter =_nEnergies-1*/ + double _dT; /*!< @brief energy/time step size */ + int _idx_start_iter; /*!< @brief index of first iteration */ + // Mesh related members, memory optimized + unsigned long _nCells; /*!< @brief number of spatial cells */ + unsigned long _nSys; /*!< @brief number of equations in the transport system, i.e. num quad pts */ + unsigned long _nq; /*!< @brief number of quadrature points */ + unsigned long _nDim; + unsigned long _nNbr; + unsigned long _nNodes; + + std::vector _areas; /*!< @brief surface area of all spatial cells, + dim(_areas) = _NCells */ + std::vector _normals; /*!< @brief edge normals multiplied by edge length, + dim(_normals) = (_NCells,nEdgesPerCell,spatialDim) */ + std::vector _neighbors; /*!< @brief edge neighbor cell ids, dim(_neighbors) = (_NCells,nEdgesPerCell) */ + std::vector _cellMidPoints; /*!< @brief dim _nCells x _dim */ + + std::vector _interfaceMidPoints; /*!< @brief dim: _nCells x _nEdgesPerCell x _dim */ + std::vector _cellBoundaryTypes; /*!< @brief dim: _nCells x _nEdgesPerCell x _dim */ + std::map> _ghostCells; /*!< @brief Vector of ghost cells for boundary conditions. CAN BE MORE EFFICIENT */ + std::vector _relativeInterfaceMidPt; /*!< @brief dim _nCells * _nNbr * _nDim */ + std::vector _relativeCellVertices; /*!< @brief dim _nCells * _nNbr * _nDim */ + + std::map _ghostCellsReflectingY; /*!< map that indicates if a ghostcell has a fixed value or is a mirroring boundary */ + std::map _ghostCellsReflectingX; /*!< map that indicates if a ghostcell has a fixed value or is a mirroring boundary */ + std::vector _quadratureYReflection; /*!< map that gives a Reflection against the y axis for the velocity ordinates */ + std::vector _quadratureXReflection; /*!< map that gives a Reflection against the y axis for the velocity ordinates */ + + unsigned _temporalOrder; /*!< @brief temporal order (current: 1 & 2) */ + unsigned _spatialOrder; /*!< @brief spatial order (current: 1 & 2) */ + std::vector _solDx; /*!< @brief dim = _nCells x _nSys x _dim*/ + std::vector _limiter; /*!< @brief dim = _nCells x _nSys */ + + // Scattering, absorption and source + std::vector _sigmaS; /*!< @brief dim: _nCells */ + std::vector _sigmaT; /*!< @brief dim: _nCells */ + std::vector _source; /*!< @brief dim: _nCells x _nSys */ + std::vector _scatteringKernel; /*!< @brief dim: _nSys x _nSys */ + + // quadrature related numbers + std::vector _quadPts; /*!< @brief dim: _nSys x _dim*/ + std::vector _quadWeights; /*!< @brief dim: _nSys*/ + + // Solution related members + std::vector _sol; /*!< @brief dim = _nCells x _nSys */ + std::vector _flux; /*!< @brief dim = _nCells x _nSys */ + + // Output related members + std::vector _scalarFlux; /*!< @brief dim = _nCells */ + std::vector _scalarFluxPrevIter; /*!< @brief scalar flux snapshot before current solver iteration */ + + // Lattice QOIS + unsigned _nOutputMoments; + + double _mass; + double _rmsFlux; + double _curAbsorptionLattice; /*!< @brief Absorption of particles at Lattice checkerboard regions at current time step */ + double _totalAbsorptionLattice; /*!< @brief Absorption of particles at Lattice checkerboard regions integrated until current time step */ + double _curMaxAbsorptionLattice; /*!< @brief Maximum pointwise absorption of particles at Lattice checkerboard regions until current time step */ + double _curScalarOutflow; /*!< @brief Outflow over whole boundary at current time step */ + double _totalScalarOutflow; /*!< @brief Outflow over whole boundary integrated until current time step */ + double _curMaxOrdinateOutflow; /*!< @brief Maximum ordinate-wise ouftlow over boundary over all time steps */ + std::vector _localMaxOrdinateOutflow; /*!< @brief Maximum ordinate-wise ouftlow over boundary over all time steps */ + double _curScalarOutflowPeri1; /*!< @brief Outflow over whole boundary at current time step */ + double _totalScalarOutflowPeri1; /*!< @brief Outflow over whole boundary integrated until current time step */ + double _curScalarOutflowPeri2; /*!< @brief Outflow over whole boundary at current time step */ + double _totalScalarOutflowPeri2; /*!< @brief Outflow over whole boundary integrated until current time step */ + // helper + std::map> _cellsLatticePerimeter1; + std::map> _cellsLatticePerimeter2; + std::vector _isPerimeterLatticeCell1; + std::vector _isPerimeterLatticeCell2; + + // Hohlraum QOIS + double _totalAbsorptionHohlraumCenter; + double _totalAbsorptionHohlraumVertical; + double _totalAbsorptionHohlraumHorizontal; + double _curAbsorptionHohlraumCenter; + double _curAbsorptionHohlraumVertical; + double _curAbsorptionHohlraumHorizontal; + double _varAbsorptionHohlraumGreen; + + std::vector> _probingCellsHohlraum; /*!< @brief Indices of cells that contain a probing sensor */ + std::vector _probingMoments; /*!< @brief Solution Momnets at the probing cells that contain a probing sensor */ + unsigned _probingMomentsTimeIntervals; /*!< @brief Solution Momnets at the probing cells that contain a probing sensor */ + + unsigned _nProbingCellsLineGreen; /*!< @brief Number of sampling cells that contain a probing sensor for the sliding window */ + std::vector _probingCellsLineGreen; /*!< @brief Indices of cells that contain a probing sensor for the sliding window */ + std::vector _absorptionValsLineSegment; /*!< @brief Avg Absorption value at the sampleing points of lineGreen */ + + unsigned _nProbingCellsBlocksGreen; + std::vector> _probingCellsBlocksGreen; /*!< @brief Indices of cells that contain a probing sensor blocks */ + std::vector _absorptionValsBlocksGreen; /*!< @brief Avg Absorption value at the sampleing blocks of lineGreen */ + + // Design parameters + std::vector _cornerUpperLeftGreen; /*!< @brief Coord of corner of the green area (minus thickness/2 of it) relative to the green center */ + std::vector _cornerLowerLeftGreen; /*!< @brief Coord of corner of the green area (minus thickness/2 of it) relative to the green center */ + std::vector + _cornerUpperRightGreen; /*!< @brief Coord of corner of the green area (minus thickness/2 of it) relative to the green center */ + std::vector + _cornerLowerRightGreen; /*!< @brief Coord of corner of the green area (minus thickness/2 of it) relative to the green center */ + + double _thicknessGreen; /*!< @brief thickness of the green area */ + std::vector _centerGreen; /*!< @brief Center of the Hohlraum */ + + // Output + std::vector>> _outputFields; /*!< @brief Solver Output: dimensions + (GroupID,FieldID,CellID).*/ + std::vector> _outputFieldNames; /*!< @brief Names of the outputFields: dimensions + (GroupID,FieldID) */ + + std::vector _screenOutputFields; /*!< @brief Solver Output: dimensions (FieldID). */ + std::vector _screenOutputFieldNames; /*!< @brief Names of the outputFields: dimensions (FieldID) */ + + std::vector _historyOutputFields; /*!< @brief Solver Output: dimensions (FieldID). */ + std::vector _historyOutputFieldNames; /*!< @brief Names of the outputFields: dimensions (FieldID) */ + + // HIP backend state + bool _hipInitialized; + int _hipDeviceId; + DeviceBuffers* _device; + std::vector _cellBoundaryTypesInt; + std::vector _ghostCellValues; + + // ---- Member functions ---- + + // Solver + void FluxOrder1(); + void FluxOrder2(); + + void FVMUpdate(); + + void IterPostprocessing(); + + /*! @brief Sets vector of ghost cell values according to boundary conditions */ + void SetGhostCells(); + + // Helper + /*! @brief ComputeTimeStep calculates the maximal stable time step using the + cfl number + @param cfl Courant-Friedrichs-Levy condition number */ + double ComputeTimeStep( double cfl ) const; + + // IO + /*! @brief Initializes the output groups and fields of this solver and names + * the fields */ + void PrepareVolumeOutput(); + /*! @brief Function that prepares VTK export and csv export of the current + solver iteration + @param idx_iter current (pseudo) time iteration */ + void WriteVolumeOutput( unsigned idx_iter ); + /*! @brief Save Output solution at given energy (pseudo time) to VTK file. + Write frequency is given by option VOLUME_OUTPUT_FREQUENCY. Always prints + last iteration without iteration affix. + @param idx_iter current (pseudo) time iteration */ + void PrintVolumeOutput( int idx_iter ); + /*! @brief Initialized the output fields and their Names for the screenoutput + */ + void PrepareScreenOutput(); + + /*! @brief Function that writes screen and history output fields + @param idx_iter current (pseudo) time iteration */ + void WriteScalarOutput( unsigned idx_iter ); + /*! @brief Prints ScreenOutputFields to Screen and to logger. Write frequency + is given by option SCREEN_OUTPUT_FREQUENCY. Always prints last iteration. + @param idx_iter current (pseudo) time iteration */ + void PrintScreenOutput( unsigned idx_iter ); + /*! @brief Initialized the historyOutputFields and their Names for history + output. Write frequency is given by option HISTORY_OUTPUT_FREQUENCY. Always + prints last iteration. */ + void PrepareHistoryOutput(); + /*! @brief Prints HistoryOutputFields to logger + @param idx_iter current (pseudo) time iteration */ + void PrintHistoryOutput( unsigned idx_iter ); + /*! @brief Pre Solver Screen and Logger Output */ + void DrawPreSolverOutput(); + /*! @brief Post Solver Screen and Logger Output */ + void DrawPostSolverOutput(); + + // Helper + unsigned long Idx2D( unsigned long idx1, unsigned long idx2, unsigned long len2 ); + unsigned long Idx3D( unsigned long idx1, unsigned long idx2, unsigned long idx3, unsigned long len2, unsigned long len3 ); + bool IsAbsorptionLattice( double x, double y ) const; + void ComputeCellsPerimeterLattice(); + + void SetProbingCellsLineGreen(); + void ComputeQOIsGreenProbingLine(); + std::vector linspace2D( const std::vector& start, const std::vector& end, unsigned num_points ); + + void InitHIP(); + void UploadStaticDataToDevice(); + void UploadStateToDevice(); + void DownloadStateFromDevice(); + void FreeHIP(); + + public: + /*! @brief Solver constructor + * @param settings config class that stores all needed config information */ + SNSolverHPCHIP( Config* settings ); + + ~SNSolverHPCHIP(); + + /*! @brief Solve functions runs main iteration loop. Components of the solve + * loop are pure and subclassed by the child solvers. */ + void Solve(); + + /*! @brief Save Output solution to VTK file */ + void PrintVolumeOutput() const {}; // Only for debugging purposes. +}; +#endif // SNSOLVERHPCHIP_H diff --git a/src/main.cpp b/src/main.cpp index 76d8f63f..02188db7 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -18,6 +18,10 @@ #include #include "solvers/snsolver_hpc_cuda.hpp" #endif +#ifdef KITRT_ENABLE_HIP_HPC +#include +#include "solvers/snsolver_hpc_hip.hpp" +#endif #include "solvers/solverbase.hpp" #ifdef BUILD_GUI @@ -63,7 +67,7 @@ int main( int argc, char** argv ) { else { // Build solver if( config->GetHPC() ) { -#ifdef KITRT_ENABLE_CUDA_HPC +#if defined( KITRT_ENABLE_CUDA_HPC ) int deviceCount = 0; if( cudaGetDeviceCount( &deviceCount ) == cudaSuccess && deviceCount > 0 ) { SNSolverHPCCUDA* solver = new SNSolverHPCCUDA( config ); @@ -75,6 +79,18 @@ int main( int argc, char** argv ) { solver->Solve(); delete solver; } +#elif defined( KITRT_ENABLE_HIP_HPC ) + int deviceCount = 0; + if( hipGetDeviceCount( &deviceCount ) == hipSuccess && deviceCount > 0 ) { + SNSolverHPCHIP* solver = new SNSolverHPCHIP( config ); + solver->Solve(); + delete solver; + } + else { + SNSolverHPC* solver = new SNSolverHPC( config ); + solver->Solve(); + delete solver; + } #else SNSolverHPC* solver = new SNSolverHPC( config ); solver->Solve(); diff --git a/src/solvers/snsolver_hpc.hip b/src/solvers/snsolver_hpc.hip new file mode 100644 index 00000000..3418da0b --- /dev/null +++ b/src/solvers/snsolver_hpc.hip @@ -0,0 +1,2083 @@ +#ifdef IMPORT_MPI +#include +#endif +#include "common/config.hpp" +#include "common/io.hpp" +#include "common/mesh.hpp" +#include "kernels/scatteringkernelbase.hpp" +#include "problems/problembase.hpp" +#include "quadratures/quadraturebase.hpp" +#include "solvers/snsolver_hpc_hip.hpp" +#include "toolboxes/textprocessingtoolbox.hpp" +#include +#include +#include +#include +#include + +struct SNSolverHPCHIP::DeviceBuffers { + double* areas = nullptr; + double* normals = nullptr; + unsigned* neighbors = nullptr; + int* boundaryTypes = nullptr; + double* ghostCellValues = nullptr; + double* cellMidPoints = nullptr; + double* interfaceMidPoints = nullptr; + double* relativeInterfaceMid = nullptr; + double* relativeCellVertices = nullptr; + + double* sigmaS = nullptr; + double* sigmaT = nullptr; + double* source = nullptr; + double* quadPts = nullptr; + double* quadWeights = nullptr; + + double* sol = nullptr; + double* solRK0 = nullptr; + double* flux = nullptr; + double* scalarFlux = nullptr; + double* solDx = nullptr; + double* limiter = nullptr; +}; + +namespace { + +constexpr int HIP_BLOCK_SIZE = 256; + +inline void CheckHip( hipError_t status, const std::string& where ) { + if( status != hipSuccess ) { + ErrorMessages::Error( "HIP error in " + where + ": " + std::string( hipGetErrorString( status ) ), where ); + } +} + +template inline void AllocateDeviceArray( T** ptr, std::size_t count, const std::string& name ) { + if( count == 0 ) { + *ptr = nullptr; + return; + } + CheckHip( hipMalloc( reinterpret_cast( ptr ), count * sizeof( T ) ), "hipMalloc(" + name + ")" ); +} + +template inline void FreeDeviceArray( T*& ptr, const std::string& name ) { + if( ptr == nullptr ) return; + CheckHip( hipFree( ptr ), "hipFree(" + name + ")" ); + ptr = nullptr; +} + +__device__ __forceinline__ unsigned long Idx2DDevice( unsigned long idx1, unsigned long idx2, unsigned long len2 ) { return idx1 * len2 + idx2; } + +__device__ __forceinline__ unsigned long +Idx3DDevice( unsigned long idx1, unsigned long idx2, unsigned long idx3, unsigned long len2, unsigned long len3 ) { + return idx1 * len2 * len3 + idx2 * len3 + idx3; +} + +__global__ void FluxOrder1Kernel( unsigned long nCells, + unsigned long localNSys, + unsigned long nNbr, + unsigned long nDim, + const double* __restrict__ quadPts, + const double* __restrict__ normals, + const unsigned* __restrict__ neighbors, + const int* __restrict__ boundaryTypes, + const double* __restrict__ ghostCellValues, + const double* __restrict__ sol, + double* flux, + int neumannBoundary ) { + const unsigned long linearIdx = static_cast( blockIdx.x ) * blockDim.x + threadIdx.x; + const unsigned long nCellSys = nCells * localNSys; + if( linearIdx >= nCellSys ) return; + + const unsigned long idxCell = linearIdx / localNSys; + const unsigned long idxSys = linearIdx % localNSys; + + double localFlux = 0.0; + for( unsigned long idxNbr = 0; idxNbr < nNbr; ++idxNbr ) { + const unsigned nbrCell = neighbors[Idx2DDevice( idxCell, idxNbr, nNbr )]; + const double localInner = quadPts[Idx2DDevice( idxSys, 0, nDim )] * normals[Idx3DDevice( idxCell, idxNbr, 0, nNbr, nDim )] + + quadPts[Idx2DDevice( idxSys, 1, nDim )] * normals[Idx3DDevice( idxCell, idxNbr, 1, nNbr, nDim )]; + + if( boundaryTypes[idxCell] == neumannBoundary && nbrCell == nCells ) { + localFlux += ( localInner > 0.0 ) ? localInner * sol[Idx2DDevice( idxCell, idxSys, localNSys )] + : localInner * ghostCellValues[Idx2DDevice( idxCell, idxSys, localNSys )]; + } + else { + localFlux += ( localInner > 0.0 ) ? localInner * sol[Idx2DDevice( idxCell, idxSys, localNSys )] + : localInner * sol[Idx2DDevice( static_cast( nbrCell ), idxSys, localNSys )]; + } + } + flux[Idx2DDevice( idxCell, idxSys, localNSys )] = localFlux; +} + +__global__ void FluxOrder2SlopeKernel( unsigned long nCells, + unsigned long localNSys, + unsigned long nNbr, + unsigned long nDim, + const double* __restrict__ areas, + const double* __restrict__ normals, + const unsigned* __restrict__ neighbors, + const int* __restrict__ boundaryTypes, + const double* __restrict__ sol, + double* solDx, + double* limiter, + int noneBoundary ) { + const unsigned long linearIdx = static_cast( blockIdx.x ) * blockDim.x + threadIdx.x; + const unsigned long nCellSys = nCells * localNSys; + if( linearIdx >= nCellSys ) return; + + const unsigned long idxCell = linearIdx / localNSys; + const unsigned long idxSys = linearIdx % localNSys; + + const unsigned long limiterIdx = Idx2DDevice( idxCell, idxSys, localNSys ); + const unsigned long solDxXIdx = Idx3DDevice( idxCell, idxSys, 0, localNSys, nDim ); + const unsigned long solDxYIdx = Idx3DDevice( idxCell, idxSys, 1, localNSys, nDim ); + + if( boundaryTypes[idxCell] != noneBoundary ) { + limiter[limiterIdx] = 0.0; + solDx[solDxXIdx] = 0.0; + solDx[solDxYIdx] = 0.0; + return; + } + + double slopeX = 0.0; + double slopeY = 0.0; + const double thisSol = sol[Idx2DDevice( idxCell, idxSys, localNSys )]; + + for( unsigned long idxNbr = 0; idxNbr < nNbr; ++idxNbr ) { + const unsigned nbrCell = neighbors[Idx2DDevice( idxCell, idxNbr, nNbr )]; + const double avgVal = 0.5 * ( thisSol + sol[Idx2DDevice( static_cast( nbrCell ), idxSys, localNSys )] ); + slopeX += avgVal * normals[Idx3DDevice( idxCell, idxNbr, 0, nNbr, nDim )]; + slopeY += avgVal * normals[Idx3DDevice( idxCell, idxNbr, 1, nNbr, nDim )]; + } + + slopeX /= areas[idxCell]; + slopeY /= areas[idxCell]; + + limiter[limiterIdx] = 1.0; + solDx[solDxXIdx] = slopeX; + solDx[solDxYIdx] = slopeY; +} + +__global__ void FluxOrder2LimiterKernel( unsigned long nCells, + unsigned long localNSys, + unsigned long nNbr, + unsigned long nDim, + const double* __restrict__ areas, + const unsigned* __restrict__ neighbors, + const int* __restrict__ boundaryTypes, + const double* __restrict__ relativeCellVertices, + const double* __restrict__ sol, + const double* __restrict__ solDx, + double* limiter, + int noneBoundary, + double eps ) { + const unsigned long linearIdx = static_cast( blockIdx.x ) * blockDim.x + threadIdx.x; + const unsigned long nCellSys = nCells * localNSys; + if( linearIdx >= nCellSys ) return; + + const unsigned long idxCell = linearIdx / localNSys; + const unsigned long idxSys = linearIdx % localNSys; + if( boundaryTypes[idxCell] != noneBoundary ) return; + + const unsigned long solIdx = Idx2DDevice( idxCell, idxSys, localNSys ); + const double thisSol = sol[solIdx]; + + double minSol = thisSol; + double maxSol = thisSol; + for( unsigned long idxNbr = 0; idxNbr < nNbr; ++idxNbr ) { + const unsigned nbrCell = neighbors[Idx2DDevice( idxCell, idxNbr, nNbr )]; + const double nbrSol = sol[Idx2DDevice( static_cast( nbrCell ), idxSys, localNSys )]; + maxSol = fmax( maxSol, nbrSol ); + minSol = fmin( minSol, nbrSol ); + } + + double limVal = 1.0; + const double d = areas[idxCell]; + const double sx = solDx[Idx3DDevice( idxCell, idxSys, 0, localNSys, nDim )]; + const double sy = solDx[Idx3DDevice( idxCell, idxSys, 1, localNSys, nDim )]; + + for( unsigned long idxNbr = 0; idxNbr < nNbr; ++idxNbr ) { + const double gaussPoint = sx * relativeCellVertices[Idx3DDevice( idxCell, idxNbr, 0, nNbr, nDim )] + + sy * relativeCellVertices[Idx3DDevice( idxCell, idxNbr, 1, nNbr, nDim )]; + + const double delta1Max = maxSol - thisSol; + const double delta1Min = minSol - thisSol; + double r = 1.0; + if( gaussPoint > 0.0 ) { + r = ( ( delta1Max * delta1Max + d ) * gaussPoint + 2.0 * gaussPoint * gaussPoint * delta1Max ) / + ( delta1Max * delta1Max + 2.0 * gaussPoint * gaussPoint + delta1Max * gaussPoint + d ) / ( gaussPoint + eps ); + } + else { + r = ( ( delta1Min * delta1Min + d ) * gaussPoint + 2.0 * gaussPoint * gaussPoint * delta1Min ) / + ( delta1Min * delta1Min + 2.0 * gaussPoint * gaussPoint + delta1Min * gaussPoint + d ) / ( gaussPoint - eps ); + } + if( fabs( gaussPoint ) < eps ) { + r = 1.0; + } + limVal = fmin( limVal, r ); + } + limiter[solIdx] = limVal; +} + +__global__ void FluxOrder2Kernel( unsigned long nCells, + unsigned long localNSys, + unsigned long nNbr, + unsigned long nDim, + const double* __restrict__ quadPts, + const double* __restrict__ normals, + const unsigned* __restrict__ neighbors, + const int* __restrict__ boundaryTypes, + const double* __restrict__ ghostCellValues, + const double* __restrict__ interfaceMidPoints, + const double* __restrict__ cellMidPoints, + const double* __restrict__ relativeInterfaceMid, + const double* __restrict__ sol, + const double* __restrict__ solDx, + const double* __restrict__ limiter, + double* flux, + int neumannBoundary ) { + const unsigned long linearIdx = static_cast( blockIdx.x ) * blockDim.x + threadIdx.x; + const unsigned long nCellSys = nCells * localNSys; + if( linearIdx >= nCellSys ) return; + + const unsigned long idxCell = linearIdx / localNSys; + const unsigned long idxSys = linearIdx % localNSys; + + double localFlux = 0.0; + for( unsigned long idxNbr = 0; idxNbr < nNbr; ++idxNbr ) { + const unsigned nbrCell = neighbors[Idx2DDevice( idxCell, idxNbr, nNbr )]; + const double localInner = quadPts[Idx2DDevice( idxSys, 0, nDim )] * normals[Idx3DDevice( idxCell, idxNbr, 0, nNbr, nDim )] + + quadPts[Idx2DDevice( idxSys, 1, nDim )] * normals[Idx3DDevice( idxCell, idxNbr, 1, nNbr, nDim )]; + + if( boundaryTypes[idxCell] == neumannBoundary && nbrCell == nCells ) { + localFlux += ( localInner > 0.0 ) ? localInner * sol[Idx2DDevice( idxCell, idxSys, localNSys )] + : localInner * ghostCellValues[Idx2DDevice( idxCell, idxSys, localNSys )]; + continue; + } + + if( localInner > 0.0 ) { + const double recVal = + sol[Idx2DDevice( idxCell, idxSys, localNSys )] + + limiter[Idx2DDevice( idxCell, idxSys, localNSys )] * ( solDx[Idx3DDevice( idxCell, idxSys, 0, localNSys, nDim )] * + relativeInterfaceMid[Idx3DDevice( idxCell, idxNbr, 0, nNbr, nDim )] + + solDx[Idx3DDevice( idxCell, idxSys, 1, localNSys, nDim )] * + relativeInterfaceMid[Idx3DDevice( idxCell, idxNbr, 1, nNbr, nDim )] ); + localFlux += localInner * recVal; + } + else { + const unsigned long nbrCellUL = static_cast( nbrCell ); + const double recVal = + sol[Idx2DDevice( nbrCellUL, idxSys, localNSys )] + + limiter[Idx2DDevice( nbrCellUL, idxSys, localNSys )] * + ( solDx[Idx3DDevice( nbrCellUL, idxSys, 0, localNSys, nDim )] * + ( interfaceMidPoints[Idx3DDevice( idxCell, idxNbr, 0, nNbr, nDim )] - cellMidPoints[Idx2DDevice( nbrCellUL, 0, nDim )] ) + + solDx[Idx3DDevice( nbrCellUL, idxSys, 1, localNSys, nDim )] * + ( interfaceMidPoints[Idx3DDevice( idxCell, idxNbr, 1, nNbr, nDim )] - cellMidPoints[Idx2DDevice( nbrCellUL, 1, nDim )] ) ); + localFlux += localInner * recVal; + } + } + + flux[Idx2DDevice( idxCell, idxSys, localNSys )] = localFlux; +} + +__global__ void FVMUpdateKernel( unsigned long nCells, + unsigned long localNSys, + double dT, + const double* __restrict__ sigmaS, + const double* __restrict__ sigmaT, + const double* __restrict__ source, + const double* __restrict__ areas, + const double* __restrict__ quadWeights, + const double* __restrict__ flux, + double* sol, + double* scalarFlux, + double invTwoPi ) { + const unsigned long idxCell = static_cast( blockIdx.x ) * blockDim.x + threadIdx.x; + if( idxCell >= nCells ) return; + + const double scattering = sigmaS[idxCell] * scalarFlux[idxCell] * invTwoPi; + double localScalarFlux = 0.0; + + for( unsigned long idxSys = 0; idxSys < localNSys; ++idxSys ) { + const unsigned long idx = Idx2DDevice( idxCell, idxSys, localNSys ); + double newVal = ( 1.0 - dT * sigmaT[idxCell] ) * sol[idx] - dT / areas[idxCell] * flux[idx] + dT * ( scattering + source[idx] ); + newVal = fmax( newVal, 0.0 ); + sol[idx] = newVal; + localScalarFlux += newVal * quadWeights[idxSys]; + } + scalarFlux[idxCell] = localScalarFlux; +} + +__global__ void RK2AverageAndScalarFluxKernel( unsigned long nCells, + unsigned long localNSys, + const double* __restrict__ quadWeights, + const double* __restrict__ solRK0, + double* sol, + double* scalarFlux ) { + const unsigned long idxCell = static_cast( blockIdx.x ) * blockDim.x + threadIdx.x; + if( idxCell >= nCells ) return; + + double localScalarFlux = 0.0; + for( unsigned long idxSys = 0; idxSys < localNSys; ++idxSys ) { + const unsigned long idx = Idx2DDevice( idxCell, idxSys, localNSys ); + const double avgVal = 0.5 * ( solRK0[idx] + sol[idx] ); + sol[idx] = avgVal; + localScalarFlux += avgVal * quadWeights[idxSys]; + } + scalarFlux[idxCell] = localScalarFlux; +} + +} // namespace + +SNSolverHPCHIP::SNSolverHPCHIP( Config* settings ) { +#ifdef IMPORT_MPI + // Initialize MPI + MPI_Comm_size( MPI_COMM_WORLD, &_numProcs ); + MPI_Comm_rank( MPI_COMM_WORLD, &_rank ); +#endif +#ifndef IMPORT_MPI + _numProcs = 1; // default values + _rank = 0; +#endif + _settings = settings; + _currTime = 0.0; + _idx_start_iter = 0; + _nOutputMoments = 2; // Currently only u_1 (x direction) and u_1 (y direction) are supported + _hipInitialized = false; + _hipDeviceId = 0; + _device = nullptr; + // Create Mesh + _mesh = LoadSU2MeshFromFile( settings ); + _settings->SetNCells( _mesh->GetNumCells() ); + auto quad = QuadratureBase::Create( settings ); + _settings->SetNQuadPoints( quad->GetNq() ); + + _nCells = static_cast( _mesh->GetNumCells() ); + _nNbr = static_cast( _mesh->GetNumNodesPerCell() ); + _nDim = static_cast( _mesh->GetDim() ); + _nNodes = static_cast( _mesh->GetNumNodes() ); + + _nq = static_cast( quad->GetNq() ); + _nSys = _nq; + + if( static_cast( _numProcs ) > _nq ) { + ErrorMessages::Error( "The number of processors must be less than or equal to the number of quadrature points.", CURRENT_FUNCTION ); + } + + const unsigned long numRanks = static_cast( _numProcs ); + const unsigned long rankIndex = static_cast( _rank ); + const unsigned long baseChunk = _nSys / numRanks; + const unsigned long remainder = _nSys % numRanks; + + _localNSys = baseChunk + ( rankIndex < remainder ? 1UL : 0UL ); + _startSysIdx = rankIndex * baseChunk + std::min( rankIndex, remainder ); + _endSysIdx = _startSysIdx + _localNSys; + + // std::cout << "Rank: " << _rank << " startSysIdx: " << _startSysIdx << " endSysIdx: " << _endSysIdx << " localNSys: " << _localNSys << + // std::endl; + + _spatialOrder = _settings->GetSpatialOrder(); + _temporalOrder = _settings->GetTemporalOrder(); + + _areas = std::vector( _nCells ); + _normals = std::vector( _nCells * _nNbr * _nDim ); + _neighbors = std::vector( _nCells * _nNbr ); + _cellMidPoints = std::vector( _nCells * _nDim ); + _interfaceMidPoints = std::vector( _nCells * _nNbr * _nDim ); + _cellBoundaryTypes = std::vector( _nCells ); + _relativeInterfaceMidPt = std::vector( _nCells * _nNbr * _nDim ); + _relativeCellVertices = std::vector( _nCells * _nNbr * _nDim ); + + // Slope + _solDx = std::vector( _nCells * _localNSys * _nDim, 0.0 ); + _limiter = std::vector( _nCells * _localNSys, 0.0 ); + + // Physics + _sigmaS = std::vector( _nCells ); + _sigmaT = std::vector( _nCells ); + _source = std::vector( _nCells * _localNSys ); + + // Quadrature + _quadPts = std::vector( _localNSys * _nDim ); + _quadWeights = std::vector( _localNSys ); + + // Solution + _sol = std::vector( _nCells * _localNSys ); + _flux = std::vector( _nCells * _localNSys ); + + _scalarFlux = std::vector( _nCells ); + _scalarFluxPrevIter = std::vector( _nCells ); + _localMaxOrdinateOutflow = std::vector( _nCells ); + + auto areas = _mesh->GetCellAreas(); + auto neighbors = _mesh->GetNeighbours(); + auto normals = _mesh->GetNormals(); + auto cellMidPts = _mesh->GetCellMidPoints(); + auto interfaceMidPts = _mesh->GetInterfaceMidPoints(); + auto boundaryTypes = _mesh->GetBoundaryTypes(); + auto nodes = _mesh->GetNodes(); + auto cells = _mesh->GetCells(); + +#pragma omp parallel for + for( unsigned long idx_cell = 0; idx_cell < _nCells; idx_cell++ ) { + _areas[idx_cell] = areas[idx_cell]; + } + + _dT = ComputeTimeStep( _settings->GetCFL() ); + _nIter = unsigned( _settings->GetTEnd() / _dT ) + 1; + + auto quadPoints = quad->GetPoints(); + auto quadWeights = quad->GetWeights(); + + _problem = ProblemBase::Create( _settings, _mesh, quad ); + auto sigmaT = _problem->GetTotalXS( Vector( _nIter, 0.0 ) ); + auto sigmaS = _problem->GetScatteringXS( Vector( _nIter, 0.0 ) ); + auto source = _problem->GetExternalSource( Vector( _nIter, 0.0 ) ); + + // Copy to everything to solver + _mass = 0; +#pragma omp parallel for + for( unsigned long idx_sys = 0; idx_sys < _localNSys; idx_sys++ ) { + for( unsigned long idx_dim = 0; idx_dim < _nDim; idx_dim++ ) { + _quadPts[Idx2D( idx_sys, idx_dim, _nDim )] = quadPoints[idx_sys + _startSysIdx][idx_dim]; + } + if( _settings->GetQuadName() == QUAD_GaussLegendreTensorized2D ) { + _quadWeights[idx_sys] = + 2.0 * quadWeights[idx_sys + _startSysIdx]; // Rescaling of quadweights TODO: Check if this needs general refactoring + } + else { + _quadWeights[idx_sys] = quadWeights[idx_sys + _startSysIdx]; // Rescaling of quadweights TODO: Check if this needs general refactoring} + } + } + +#pragma omp parallel for + for( unsigned long idx_cell = 0; idx_cell < _nCells; idx_cell++ ) { + _cellBoundaryTypes[idx_cell] = boundaryTypes[idx_cell]; + + for( unsigned long idx_dim = 0; idx_dim < _nDim; idx_dim++ ) { + _cellMidPoints[Idx2D( idx_cell, idx_dim, _nDim )] = cellMidPts[idx_cell][idx_dim]; + } + + for( unsigned long idx_nbr = 0; idx_nbr < _nNbr; idx_nbr++ ) { + + _neighbors[Idx2D( idx_cell, idx_nbr, _nNbr )] = neighbors[idx_cell][idx_nbr]; + + for( unsigned long idx_dim = 0; idx_dim < _nDim; idx_dim++ ) { + _normals[Idx3D( idx_cell, idx_nbr, idx_dim, _nNbr, _nDim )] = normals[idx_cell][idx_nbr][idx_dim]; + _interfaceMidPoints[Idx3D( idx_cell, idx_nbr, idx_dim, _nNbr, _nDim )] = interfaceMidPts[idx_cell][idx_nbr][idx_dim]; + _relativeInterfaceMidPt[Idx3D( idx_cell, idx_nbr, idx_dim, _nNbr, _nDim )] = + _interfaceMidPoints[Idx3D( idx_cell, idx_nbr, idx_dim, _nNbr, _nDim )] - _cellMidPoints[Idx2D( idx_cell, idx_dim, _nDim )]; + _relativeCellVertices[Idx3D( idx_cell, idx_nbr, idx_dim, _nNbr, _nDim )] = + nodes[cells[idx_cell][idx_nbr]][idx_dim] - cellMidPts[idx_cell][idx_dim]; + } + } + + _sigmaS[idx_cell] = sigmaS[0][idx_cell]; + _sigmaT[idx_cell] = sigmaT[0][idx_cell]; + _scalarFlux[idx_cell] = 0; + + for( unsigned long idx_sys = 0; idx_sys < _localNSys; idx_sys++ ) { + _source[Idx2D( idx_cell, idx_sys, _localNSys )] = source[0][idx_cell][0]; // CAREFUL HERE hardcoded to isotropic source + _sol[Idx2D( idx_cell, idx_sys, _localNSys )] = 0.0; // initial condition is zero + + _scalarFlux[idx_cell] += _sol[Idx2D( idx_cell, idx_sys, _localNSys )] * _quadWeights[idx_sys]; + } + // _mass += _scalarFlux[idx_cell] * _areas[idx_cell]; + } + + // Lattice + { + _curAbsorptionLattice = 0; + _totalAbsorptionLattice = 0; + _curMaxAbsorptionLattice = 0; + _curScalarOutflow = 0; + _totalScalarOutflow = 0; + _curMaxOrdinateOutflow = 0; + _curScalarOutflowPeri1 = 0; + _totalScalarOutflowPeri1 = 0; + _curScalarOutflowPeri2 = 0; + _totalScalarOutflowPeri2 = 0; + ComputeCellsPerimeterLattice(); + } + // Hohlraum + { + _totalAbsorptionHohlraumCenter = 0; + _totalAbsorptionHohlraumVertical = 0; + _totalAbsorptionHohlraumHorizontal = 0; + _curAbsorptionHohlraumCenter = 0; + _curAbsorptionHohlraumVertical = 0; + _curAbsorptionHohlraumHorizontal = 0; + _varAbsorptionHohlraumGreen = 0; + } + + if( _settings->GetLoadRestartSolution() ) + _idx_start_iter = LoadRestartSolution( _settings->GetOutputFile(), + _sol, + _scalarFlux, + _rank, + _nCells, + _totalAbsorptionHohlraumCenter, + _totalAbsorptionHohlraumVertical, + _totalAbsorptionHohlraumHorizontal, + _totalAbsorptionLattice ) + + 1; + +#ifdef IMPORT_MPI + MPI_Barrier( MPI_COMM_WORLD ); +#endif + SetGhostCells(); + InitHIP(); + UploadStaticDataToDevice(); + UploadStateToDevice(); + + if( _rank == 0 ) { + PrepareScreenOutput(); // Screen Output + PrepareHistoryOutput(); // History Output + } +#ifdef IMPORT_MPI + MPI_Barrier( MPI_COMM_WORLD ); +#endif + delete quad; + + // Initialiye QOIS + _mass = 0; + _rmsFlux = 0; + + { // Hohlraum + + unsigned n_probes = 4; + if( _settings->GetProblemName() == PROBLEM_SymmetricHohlraum ) n_probes = 4; + // if( _settings->GetProblemName() == PROBLEM_QuarterHohlraum ) n_probes = 2; + + _probingMoments = std::vector( n_probes * 3, 0. ); // 10 time horizons + + if( _settings->GetProblemName() == PROBLEM_SymmetricHohlraum ) { + _probingCellsHohlraum = { + _mesh->GetCellsofBall( -0.4, 0., 0.01 ), + _mesh->GetCellsofBall( 0.4, 0., 0.01 ), + _mesh->GetCellsofBall( 0., -0.5, 0.01 ), + _mesh->GetCellsofBall( 0., 0.5, 0.01 ), + }; + } + // else if( _settings->GetProblemName() == PROBLEM_QuarterHohlraum ) { + // _probingCellsHohlraum = { + // _mesh->GetCellsofBall( 0.4, 0., 0.01 ), + // _mesh->GetCellsofBall( 0., 0.5, 0.01 ), + // }; + // } + + // Green + _thicknessGreen = 0.05; + + if( _settings->GetProblemName() == PROBLEM_SymmetricHohlraum ) { + _centerGreen = { _settings->GetPosXCenterGreenHohlraum(), _settings->GetPosYCenterGreenHohlraum() }; + _cornerUpperLeftGreen = { -0.2 + _centerGreen[0], 0.4 + _centerGreen[1] }; + _cornerLowerLeftGreen = { -0.2 + _centerGreen[0], -0.4 + _centerGreen[1] }; + _cornerUpperRightGreen = { 0.2 + _centerGreen[0], 0.4 + _centerGreen[1] }; + _cornerLowerRightGreen = { 0.2 + _centerGreen[0], -0.4 + _centerGreen[1] }; + } + // else { + // _centerGreen = { 0.0, 0.0 }; + // _cornerUpperLeftGreen = { 0., 0.4 - _thicknessGreen / 2.0 }; + // _cornerLowerLeftGreen = { 0., +_thicknessGreen / 2.0 }; + // _cornerUpperRightGreen = { 0.2 - _thicknessGreen / 2.0, 0.4 - _thicknessGreen / 2.0 }; + // _cornerLowerRightGreen = { 0.2 - _thicknessGreen / 2.0, 0. + _thicknessGreen / 2.0 }; + // } + + _nProbingCellsLineGreen = _settings->GetNumProbingCellsLineHohlraum(); + + _nProbingCellsBlocksGreen = 44; + _absorptionValsBlocksGreen = std::vector( _nProbingCellsBlocksGreen, 0. ); + _absorptionValsLineSegment = std::vector( _nProbingCellsLineGreen, 0.0 ); + + SetProbingCellsLineGreen(); // ONLY FOR HOHLRAUM + } +} + +void SNSolverHPCHIP::InitHIP() { + int nDevices = 0; + CheckHip( hipGetDeviceCount( &nDevices ), "hipGetDeviceCount" ); + if( nDevices < 1 ) { + ErrorMessages::Error( "No HIP-capable GPU detected, but SNSolverHPCHIP was requested.", CURRENT_FUNCTION ); + } + + int localRank = 0; + int localSize = 1; +#ifdef IMPORT_MPI + MPI_Comm localComm = MPI_COMM_NULL; + MPI_Comm_split_type( MPI_COMM_WORLD, MPI_COMM_TYPE_SHARED, _rank, MPI_INFO_NULL, &localComm ); + MPI_Comm_rank( localComm, &localRank ); + MPI_Comm_size( localComm, &localSize ); + MPI_Comm_free( &localComm ); +#endif + + _hipDeviceId = localRank % nDevices; + CheckHip( hipSetDevice( _hipDeviceId ), "hipSetDevice" ); + + if( _rank == 0 ) { + auto log = spdlog::get( "event" ); + if( log ) { + log->info( "| HIP backend: {} local MPI rank(s), {} visible HIP device(s).", localSize, nDevices ); + if( localSize > nDevices ) { + log->warn( "| HIP backend: {} local MPI rank(s) exceed {} visible device(s); GPUs will be shared.", localSize, nDevices ); + } + } + } + + _device = new DeviceBuffers(); + + const std::size_t nCells = static_cast( _nCells ); + const std::size_t nCellNbr = nCells * static_cast( _nNbr ); + const std::size_t nCellNbrDim = nCellNbr * static_cast( _nDim ); + const std::size_t nCellSys = nCells * static_cast( _localNSys ); + const std::size_t nSysDim = static_cast( _localNSys ) * static_cast( _nDim ); + + AllocateDeviceArray( &_device->areas, nCells, "areas" ); + AllocateDeviceArray( &_device->normals, nCellNbrDim, "normals" ); + AllocateDeviceArray( &_device->neighbors, nCellNbr, "neighbors" ); + AllocateDeviceArray( &_device->boundaryTypes, nCells, "boundaryTypes" ); + AllocateDeviceArray( &_device->ghostCellValues, nCellSys, "ghostCellValues" ); + AllocateDeviceArray( &_device->cellMidPoints, nCells * static_cast( _nDim ), "cellMidPoints" ); + AllocateDeviceArray( &_device->interfaceMidPoints, nCellNbrDim, "interfaceMidPoints" ); + AllocateDeviceArray( &_device->relativeInterfaceMid, nCellNbrDim, "relativeInterfaceMid" ); + AllocateDeviceArray( &_device->relativeCellVertices, nCellNbrDim, "relativeCellVertices" ); + + AllocateDeviceArray( &_device->sigmaS, nCells, "sigmaS" ); + AllocateDeviceArray( &_device->sigmaT, nCells, "sigmaT" ); + AllocateDeviceArray( &_device->source, nCellSys, "source" ); + AllocateDeviceArray( &_device->quadPts, nSysDim, "quadPts" ); + AllocateDeviceArray( &_device->quadWeights, static_cast( _localNSys ), "quadWeights" ); + + AllocateDeviceArray( &_device->sol, nCellSys, "sol" ); + AllocateDeviceArray( &_device->solRK0, nCellSys, "solRK0" ); + AllocateDeviceArray( &_device->flux, nCellSys, "flux" ); + AllocateDeviceArray( &_device->scalarFlux, nCells, "scalarFlux" ); + AllocateDeviceArray( &_device->solDx, nCellSys * static_cast( _nDim ), "solDx" ); + AllocateDeviceArray( &_device->limiter, nCellSys, "limiter" ); + + _hipInitialized = true; +} + +void SNSolverHPCHIP::UploadStaticDataToDevice() { + if( !_hipInitialized || _device == nullptr ) { + ErrorMessages::Error( "HIP backend was not initialized before UploadStaticDataToDevice.", CURRENT_FUNCTION ); + } + + _cellBoundaryTypesInt.resize( _nCells, static_cast( BOUNDARY_TYPE::INVALID ) ); +#pragma omp parallel for + for( unsigned long idxCell = 0; idxCell < _nCells; ++idxCell ) { + _cellBoundaryTypesInt[idxCell] = static_cast( _cellBoundaryTypes[idxCell] ); + } + + _ghostCellValues.assign( static_cast( _nCells ) * static_cast( _localNSys ), 0.0 ); + for( const auto& cellEntry : _ghostCells ) { + const unsigned idxCell = cellEntry.first; + const std::vector& values = cellEntry.second; + const unsigned long nGhostOrdinate = std::min( _localNSys, static_cast( values.size() ) ); + for( unsigned long idxSys = 0; idxSys < nGhostOrdinate; ++idxSys ) { + _ghostCellValues[Idx2D( idxCell, idxSys, _localNSys )] = values[idxSys]; + } + } + + const std::size_t nCells = static_cast( _nCells ); + const std::size_t nCellNbr = nCells * static_cast( _nNbr ); + const std::size_t nCellNbrDim = nCellNbr * static_cast( _nDim ); + const std::size_t nCellSys = nCells * static_cast( _localNSys ); + const std::size_t nSysDim = static_cast( _localNSys ) * static_cast( _nDim ); + + CheckHip( hipMemcpy( _device->areas, _areas.data(), nCells * sizeof( double ), hipMemcpyHostToDevice ), "copy areas" ); + CheckHip( hipMemcpy( _device->normals, _normals.data(), nCellNbrDim * sizeof( double ), hipMemcpyHostToDevice ), "copy normals" ); + CheckHip( hipMemcpy( _device->neighbors, _neighbors.data(), nCellNbr * sizeof( unsigned ), hipMemcpyHostToDevice ), "copy neighbors" ); + CheckHip( hipMemcpy( _device->boundaryTypes, _cellBoundaryTypesInt.data(), nCells * sizeof( int ), hipMemcpyHostToDevice ), + "copy boundary types" ); + CheckHip( hipMemcpy( _device->ghostCellValues, _ghostCellValues.data(), nCellSys * sizeof( double ), hipMemcpyHostToDevice ), + "copy ghost values" ); + CheckHip( + hipMemcpy( + _device->cellMidPoints, _cellMidPoints.data(), nCells * static_cast( _nDim ) * sizeof( double ), hipMemcpyHostToDevice ), + "copy cell midpoints" ); + CheckHip( hipMemcpy( _device->interfaceMidPoints, _interfaceMidPoints.data(), nCellNbrDim * sizeof( double ), hipMemcpyHostToDevice ), + "copy interface midpoints" ); + CheckHip( hipMemcpy( _device->relativeInterfaceMid, _relativeInterfaceMidPt.data(), nCellNbrDim * sizeof( double ), hipMemcpyHostToDevice ), + "copy relative interface points" ); + CheckHip( hipMemcpy( _device->relativeCellVertices, _relativeCellVertices.data(), nCellNbrDim * sizeof( double ), hipMemcpyHostToDevice ), + "copy relative vertices" ); + CheckHip( hipMemcpy( _device->sigmaS, _sigmaS.data(), nCells * sizeof( double ), hipMemcpyHostToDevice ), "copy sigmaS" ); + CheckHip( hipMemcpy( _device->sigmaT, _sigmaT.data(), nCells * sizeof( double ), hipMemcpyHostToDevice ), "copy sigmaT" ); + CheckHip( hipMemcpy( _device->source, _source.data(), nCellSys * sizeof( double ), hipMemcpyHostToDevice ), "copy source" ); + CheckHip( hipMemcpy( _device->quadPts, _quadPts.data(), nSysDim * sizeof( double ), hipMemcpyHostToDevice ), "copy quad points" ); + CheckHip( + hipMemcpy( _device->quadWeights, _quadWeights.data(), static_cast( _localNSys ) * sizeof( double ), hipMemcpyHostToDevice ), + "copy quad weights" ); + + CheckHip( hipMemset( _device->flux, 0, nCellSys * sizeof( double ) ), "init flux" ); + CheckHip( hipMemcpy( _device->solDx, _solDx.data(), nCellSys * static_cast( _nDim ) * sizeof( double ), hipMemcpyHostToDevice ), + "init solDx" ); + CheckHip( hipMemcpy( _device->limiter, _limiter.data(), nCellSys * sizeof( double ), hipMemcpyHostToDevice ), "init limiter" ); +} + +void SNSolverHPCHIP::UploadStateToDevice() { + if( !_hipInitialized || _device == nullptr ) { + ErrorMessages::Error( "HIP backend was not initialized before UploadStateToDevice.", CURRENT_FUNCTION ); + } + const std::size_t nCells = static_cast( _nCells ); + const std::size_t nCellSys = nCells * static_cast( _localNSys ); + + CheckHip( hipMemcpy( _device->sol, _sol.data(), nCellSys * sizeof( double ), hipMemcpyHostToDevice ), "upload sol" ); + CheckHip( hipMemcpy( _device->scalarFlux, _scalarFlux.data(), nCells * sizeof( double ), hipMemcpyHostToDevice ), "upload scalar flux" ); +} + +void SNSolverHPCHIP::DownloadStateFromDevice() { + if( !_hipInitialized || _device == nullptr ) { + ErrorMessages::Error( "HIP backend was not initialized before DownloadStateFromDevice.", CURRENT_FUNCTION ); + } + const std::size_t nCells = static_cast( _nCells ); + const std::size_t nCellSys = nCells * static_cast( _localNSys ); + + CheckHip( hipMemcpy( _sol.data(), _device->sol, nCellSys * sizeof( double ), hipMemcpyDeviceToHost ), "download sol" ); + CheckHip( hipMemcpy( _scalarFlux.data(), _device->scalarFlux, nCells * sizeof( double ), hipMemcpyDeviceToHost ), "download scalar flux" ); +} + +void SNSolverHPCHIP::FreeHIP() { + if( !_hipInitialized || _device == nullptr ) { + return; + } + + CheckHip( hipSetDevice( _hipDeviceId ), "hipSetDevice" ); + + FreeDeviceArray( _device->areas, "areas" ); + FreeDeviceArray( _device->normals, "normals" ); + FreeDeviceArray( _device->neighbors, "neighbors" ); + FreeDeviceArray( _device->boundaryTypes, "boundaryTypes" ); + FreeDeviceArray( _device->ghostCellValues, "ghostCellValues" ); + FreeDeviceArray( _device->cellMidPoints, "cellMidPoints" ); + FreeDeviceArray( _device->interfaceMidPoints, "interfaceMidPoints" ); + FreeDeviceArray( _device->relativeInterfaceMid, "relativeInterfaceMid" ); + FreeDeviceArray( _device->relativeCellVertices, "relativeCellVertices" ); + FreeDeviceArray( _device->sigmaS, "sigmaS" ); + FreeDeviceArray( _device->sigmaT, "sigmaT" ); + FreeDeviceArray( _device->source, "source" ); + FreeDeviceArray( _device->quadPts, "quadPts" ); + FreeDeviceArray( _device->quadWeights, "quadWeights" ); + FreeDeviceArray( _device->sol, "sol" ); + FreeDeviceArray( _device->solRK0, "solRK0" ); + FreeDeviceArray( _device->flux, "flux" ); + FreeDeviceArray( _device->scalarFlux, "scalarFlux" ); + FreeDeviceArray( _device->solDx, "solDx" ); + FreeDeviceArray( _device->limiter, "limiter" ); + + delete _device; + _device = nullptr; + _hipInitialized = false; +} + +SNSolverHPCHIP::~SNSolverHPCHIP() { + FreeHIP(); + delete _mesh; + delete _problem; +} + +void SNSolverHPCHIP::Solve() { + + // --- Preprocessing --- + if( _rank == 0 ) { + PrepareVolumeOutput(); + DrawPreSolverOutput(); + } + // On restart, continue simulation time from the loaded iteration index. + _curSimTime = static_cast( _idx_start_iter ) * _dT; + auto start = std::chrono::high_resolution_clock::now(); // Start timing + + std::chrono::duration duration; + // Loop over energies (pseudo-time of continuous slowing down approach) + for( unsigned iter = (unsigned)_idx_start_iter; iter < _nIter; iter++ ) { + + if( iter == _nIter - 1 ) { // last iteration + _dT = _settings->GetTEnd() - iter * _dT; + } + _scalarFluxPrevIter = _scalarFlux; + + if( _temporalOrder == 2 ) { + CheckHip( hipSetDevice( _hipDeviceId ), "hipSetDevice" ); + CheckHip( hipMemcpy( _device->solRK0, + _device->sol, + static_cast( _nCells ) * static_cast( _localNSys ) * sizeof( double ), + hipMemcpyDeviceToDevice ), + "backup solRK0" ); + + ( _spatialOrder == 2 ) ? FluxOrder2() : FluxOrder1(); + FVMUpdate(); + ( _spatialOrder == 2 ) ? FluxOrder2() : FluxOrder1(); + FVMUpdate(); + + const dim3 threads( HIP_BLOCK_SIZE ); + const dim3 gridCells( static_cast( ( _nCells + HIP_BLOCK_SIZE - 1 ) / HIP_BLOCK_SIZE ) ); + RK2AverageAndScalarFluxKernel<<>>( + _nCells, _localNSys, _device->quadWeights, _device->solRK0, _device->sol, _device->scalarFlux ); + CheckHip( hipGetLastError(), "RK2AverageAndScalarFluxKernel launch" ); +#ifdef IMPORT_MPI + CheckHip( + hipMemcpy( _scalarFlux.data(), _device->scalarFlux, static_cast( _nCells ) * sizeof( double ), hipMemcpyDeviceToHost ), + "download scalar flux after RK2 average" ); + std::vector tempScalarFlux( _scalarFlux ); + MPI_Barrier( MPI_COMM_WORLD ); + MPI_Allreduce( tempScalarFlux.data(), _scalarFlux.data(), _nCells, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD ); + MPI_Barrier( MPI_COMM_WORLD ); + CheckHip( + hipMemcpy( _device->scalarFlux, _scalarFlux.data(), static_cast( _nCells ) * sizeof( double ), hipMemcpyHostToDevice ), + "sync allreduced scalar flux after RK2 average" ); +#endif + } + else { + ( _spatialOrder == 2 ) ? FluxOrder2() : FluxOrder1(); + FVMUpdate(); + } + + DownloadStateFromDevice(); + + _curSimTime += _dT; + IterPostprocessing(); + // --- Wall time measurement + duration = std::chrono::high_resolution_clock::now() - start; + _currTime = std::chrono::duration_cast>( duration ).count(); + + // --- Write Output --- + if( _rank == 0 ) { + + WriteScalarOutput( iter ); + + // --- Update Scalar Fluxes + + // --- Print Output --- + PrintScreenOutput( iter ); + PrintHistoryOutput( iter ); + } +#ifdef IMPORT_MPI + MPI_Barrier( MPI_COMM_WORLD ); +#endif + + PrintVolumeOutput( iter ); +#ifdef IMPORT_MPI + MPI_Barrier( MPI_COMM_WORLD ); +#endif + } + // --- Postprocessing --- + if( _rank == 0 ) { + DrawPostSolverOutput(); + } +} + +void SNSolverHPCHIP::FluxOrder2() { + CheckHip( hipSetDevice( _hipDeviceId ), "hipSetDevice" ); + const unsigned long nCellSys = _nCells * _localNSys; + const dim3 threads( HIP_BLOCK_SIZE ); + const dim3 gridCellSys( static_cast( ( nCellSys + HIP_BLOCK_SIZE - 1 ) / HIP_BLOCK_SIZE ) ); + const double eps = 1e-10; + + FluxOrder2SlopeKernel<<>>( _nCells, + _localNSys, + _nNbr, + _nDim, + _device->areas, + _device->normals, + _device->neighbors, + _device->boundaryTypes, + _device->sol, + _device->solDx, + _device->limiter, + static_cast( BOUNDARY_TYPE::NONE ) ); + CheckHip( hipGetLastError(), "FluxOrder2SlopeKernel launch" ); + + FluxOrder2LimiterKernel<<>>( _nCells, + _localNSys, + _nNbr, + _nDim, + _device->areas, + _device->neighbors, + _device->boundaryTypes, + _device->relativeCellVertices, + _device->sol, + _device->solDx, + _device->limiter, + static_cast( BOUNDARY_TYPE::NONE ), + eps ); + CheckHip( hipGetLastError(), "FluxOrder2LimiterKernel launch" ); + + FluxOrder2Kernel<<>>( _nCells, + _localNSys, + _nNbr, + _nDim, + _device->quadPts, + _device->normals, + _device->neighbors, + _device->boundaryTypes, + _device->ghostCellValues, + _device->interfaceMidPoints, + _device->cellMidPoints, + _device->relativeInterfaceMid, + _device->sol, + _device->solDx, + _device->limiter, + _device->flux, + static_cast( BOUNDARY_TYPE::NEUMANN ) ); + CheckHip( hipGetLastError(), "FluxOrder2Kernel launch" ); +} + +void SNSolverHPCHIP::FluxOrder1() { + CheckHip( hipSetDevice( _hipDeviceId ), "hipSetDevice" ); + const unsigned long nCellSys = _nCells * _localNSys; + const dim3 threads( HIP_BLOCK_SIZE ); + const dim3 gridCellSys( static_cast( ( nCellSys + HIP_BLOCK_SIZE - 1 ) / HIP_BLOCK_SIZE ) ); + + FluxOrder1Kernel<<>>( _nCells, + _localNSys, + _nNbr, + _nDim, + _device->quadPts, + _device->normals, + _device->neighbors, + _device->boundaryTypes, + _device->ghostCellValues, + _device->sol, + _device->flux, + static_cast( BOUNDARY_TYPE::NEUMANN ) ); + CheckHip( hipGetLastError(), "FluxOrder1Kernel launch" ); +} + +void SNSolverHPCHIP::FVMUpdate() { + CheckHip( hipSetDevice( _hipDeviceId ), "hipSetDevice" ); + + const dim3 threads( HIP_BLOCK_SIZE ); + const dim3 gridCells( static_cast( ( _nCells + HIP_BLOCK_SIZE - 1 ) / HIP_BLOCK_SIZE ) ); + const double invTwoPi = 1.0 / static_cast( 2.0L * PI ); + + FVMUpdateKernel<<>>( _nCells, + _localNSys, + _dT, + _device->sigmaS, + _device->sigmaT, + _device->source, + _device->areas, + _device->quadWeights, + _device->flux, + _device->sol, + _device->scalarFlux, + invTwoPi ); + CheckHip( hipGetLastError(), "FVMUpdateKernel launch" ); + CheckHip( hipMemcpy( _scalarFlux.data(), _device->scalarFlux, static_cast( _nCells ) * sizeof( double ), hipMemcpyDeviceToHost ), + "download scalar flux in FVMUpdate" ); + +#ifdef IMPORT_MPI + std::vector tempScalarFlux( _scalarFlux ); + MPI_Barrier( MPI_COMM_WORLD ); + MPI_Allreduce( tempScalarFlux.data(), _scalarFlux.data(), _nCells, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD ); + MPI_Barrier( MPI_COMM_WORLD ); + CheckHip( hipMemcpy( _device->scalarFlux, _scalarFlux.data(), static_cast( _nCells ) * sizeof( double ), hipMemcpyHostToDevice ), + "sync allreduced scalar flux to device" ); +#endif +} + +void SNSolverHPCHIP::IterPostprocessing() { + // ALREDUCE NEEDED + _mass = 0.0; + _rmsFlux = 0.0; +#pragma omp parallel for reduction( + : _mass, _rmsFlux ) + for( unsigned long idx_cell = 0; idx_cell < _nCells; ++idx_cell ) { + _mass += _scalarFlux[idx_cell] * _areas[idx_cell]; + double diff = _scalarFlux[idx_cell] - _scalarFluxPrevIter[idx_cell]; + _rmsFlux += diff * diff; + } + + _curAbsorptionLattice = 0.0; + _curScalarOutflow = 0.0; + _curScalarOutflowPeri1 = 0.0; + _curScalarOutflowPeri2 = 0.0; + _curAbsorptionHohlraumCenter = 0.0; // Green and blue areas of symmetric hohlraum + _curAbsorptionHohlraumVertical = 0.0; // Red areas of symmetric hohlraum + _curAbsorptionHohlraumHorizontal = 0.0; // Black areas of symmetric hohlraum + _varAbsorptionHohlraumGreen = 0.0; + double a_g = 0.0; + +#pragma omp parallel for reduction( + : _curAbsorptionLattice, \ + _curScalarOutflow, \ + _curScalarOutflowPeri1, \ + _curScalarOutflowPeri2, \ + _curAbsorptionHohlraumCenter, \ + _curAbsorptionHohlraumVertical, \ + _curAbsorptionHohlraumHorizontal, \ + a_g ) reduction( max : _curMaxOrdinateOutflow, _curMaxAbsorptionLattice ) + for( unsigned long idx_cell = 0; idx_cell < _nCells; ++idx_cell ) { + + if( _settings->GetProblemName() == PROBLEM_Lattice || _settings->GetProblemName() == PROBLEM_HalfLattice ) { + if( IsAbsorptionLattice( _cellMidPoints[Idx2D( idx_cell, 0, _nDim )], _cellMidPoints[Idx2D( idx_cell, 1, _nDim )] ) ) { + double sigmaAPsi = _scalarFlux[idx_cell] * ( _sigmaT[idx_cell] - _sigmaS[idx_cell] ) * _areas[idx_cell]; + _curAbsorptionLattice += sigmaAPsi; + _curMaxAbsorptionLattice = ( _curMaxAbsorptionLattice < sigmaAPsi ) ? sigmaAPsi : _curMaxAbsorptionLattice; + } + } + + if( _settings->GetProblemName() == PROBLEM_SymmetricHohlraum ) { //} || _settings->GetProblemName() == PROBLEM_QuarterHohlraum ) { + + double x = _cellMidPoints[Idx2D( idx_cell, 0, _nDim )]; + double y = _cellMidPoints[Idx2D( idx_cell, 1, _nDim )]; + _curAbsorptionLattice += _scalarFlux[idx_cell] * ( _sigmaT[idx_cell] - _sigmaS[idx_cell] ) * _areas[idx_cell]; + if( x > -0.2 + _settings->GetPosXCenterGreenHohlraum() && x < 0.2 + _settings->GetPosXCenterGreenHohlraum() && + y > -0.4 + _settings->GetPosYCenterGreenHohlraum() && y < 0.4 + _settings->GetPosYCenterGreenHohlraum() ) { + _curAbsorptionHohlraumCenter += _scalarFlux[idx_cell] * ( _sigmaT[idx_cell] - _sigmaS[idx_cell] ) * _areas[idx_cell]; + } + if( ( x < _settings->GetPosRedLeftBorderHohlraum() && y > _settings->GetPosRedLeftBottomHohlraum() && + y < _settings->GetPosRedLeftTopHohlraum() ) || + ( x > _settings->GetPosRedRightBorderHohlraum() && y > _settings->GetPosRedLeftBottomHohlraum() && + y < _settings->GetPosRedRightTopHohlraum() ) ) { + _curAbsorptionHohlraumVertical += _scalarFlux[idx_cell] * ( _sigmaT[idx_cell] - _sigmaS[idx_cell] ) * _areas[idx_cell]; + } + if( y > 0.6 || y < -0.6 ) { + _curAbsorptionHohlraumHorizontal += _scalarFlux[idx_cell] * ( _sigmaT[idx_cell] - _sigmaS[idx_cell] ) * _areas[idx_cell]; + } + + // Variation in absorption of center (part 1) + // green area 1 (lower boundary) + bool green1 = x > -0.2 + _settings->GetPosXCenterGreenHohlraum() && x < 0.2 + _settings->GetPosXCenterGreenHohlraum() && + y > -0.4 + _settings->GetPosYCenterGreenHohlraum() && y < -0.35 + _settings->GetPosYCenterGreenHohlraum(); + // green area 2 (upper boundary) + bool green2 = x > -0.2 + _settings->GetPosXCenterGreenHohlraum() && x < 0.2 + _settings->GetPosXCenterGreenHohlraum() && + y > 0.35 + _settings->GetPosYCenterGreenHohlraum() && y < 0.4 + _settings->GetPosYCenterGreenHohlraum(); + // green area 3 (left boundary) + bool green3 = x > -0.2 + _settings->GetPosXCenterGreenHohlraum() && x < -0.15 + _settings->GetPosXCenterGreenHohlraum() && + y > -0.35 + _settings->GetPosYCenterGreenHohlraum() && y < 0.35 + _settings->GetPosYCenterGreenHohlraum(); + // green area 4 (right boundary) + bool green4 = x > 0.15 + _settings->GetPosXCenterGreenHohlraum() && x < 0.2 + _settings->GetPosXCenterGreenHohlraum() && + y > -0.35 + _settings->GetPosYCenterGreenHohlraum() && y < 0.35 + _settings->GetPosYCenterGreenHohlraum(); + if( green1 || green2 || green3 || green4 ) { + a_g += ( _sigmaT[idx_cell] - _sigmaS[idx_cell] ) * _scalarFlux[idx_cell] * _areas[idx_cell] / + ( 44 * 0.05 * 0.05 ); // divisor is area of the green + } + } + + if( _settings->GetProblemName() == PROBLEM_Lattice ) { + // Outflow out of inner and middle perimeter + if( _isPerimeterLatticeCell1[idx_cell] ) { // inner + for( unsigned long idx_nbr_helper = 0; idx_nbr_helper < _cellsLatticePerimeter1[idx_cell].size(); ++idx_nbr_helper ) { +#pragma omp simd reduction( + : _curScalarOutflowPeri1 ) + for( unsigned long idx_sys = 0; idx_sys < _localNSys; idx_sys++ ) { + double localInner = _quadPts[Idx2D( idx_sys, 0, _nDim )] * + _normals[Idx3D( idx_cell, _cellsLatticePerimeter1[idx_cell][idx_nbr_helper], 0, _nNbr, _nDim )] + + _quadPts[Idx2D( idx_sys, 1, _nDim )] * + _normals[Idx3D( idx_cell, _cellsLatticePerimeter1[idx_cell][idx_nbr_helper], 1, _nNbr, _nDim )]; + // Find outward facing transport directions + + if( localInner > 0.0 ) { + _curScalarOutflowPeri1 += + localInner * _sol[Idx2D( idx_cell, idx_sys, _localNSys )] * _quadWeights[idx_sys]; // Integrate flux + } + } + } + } + if( _isPerimeterLatticeCell2[idx_cell] ) { // middle + for( unsigned long idx_nbr_helper = 0; idx_nbr_helper < _cellsLatticePerimeter2[idx_cell].size(); ++idx_nbr_helper ) { +#pragma omp simd reduction( + : _curScalarOutflowPeri2 ) + for( unsigned long idx_sys = 0; idx_sys < _localNSys; idx_sys++ ) { + double localInner = _quadPts[Idx2D( idx_sys, 0, _nDim )] * + _normals[Idx3D( idx_cell, _cellsLatticePerimeter2[idx_cell][idx_nbr_helper], 0, _nNbr, _nDim )] + + _quadPts[Idx2D( idx_sys, 1, _nDim )] * + _normals[Idx3D( idx_cell, _cellsLatticePerimeter2[idx_cell][idx_nbr_helper], 1, _nNbr, _nDim )]; + // Find outward facing transport directions + + if( localInner > 0.0 ) { + _curScalarOutflowPeri2 += + localInner * _sol[Idx2D( idx_cell, idx_sys, _localNSys )] * _quadWeights[idx_sys]; // Integrate flux + } + } + } + } + } + // Outflow out of domain + if( _cellBoundaryTypes[idx_cell] == BOUNDARY_TYPE::NEUMANN ) { + // Iterate over face cell faces + double currOrdinatewiseOutflow = 0.0; + + for( unsigned long idx_nbr = 0; idx_nbr < _nNbr; ++idx_nbr ) { + // Find face that points outward + if( _neighbors[Idx2D( idx_cell, idx_nbr, _nNbr )] == _nCells ) { +#pragma omp simd reduction( + : _curScalarOutflow ) reduction( max : _curMaxOrdinateOutflow ) + for( unsigned long idx_sys = 0; idx_sys < _localNSys; idx_sys++ ) { + + double localInner = _quadPts[Idx2D( idx_sys, 0, _nDim )] * _normals[Idx3D( idx_cell, idx_nbr, 0, _nNbr, _nDim )] + + _quadPts[Idx2D( idx_sys, 1, _nDim )] * _normals[Idx3D( idx_cell, idx_nbr, 1, _nNbr, _nDim )]; + // Find outward facing transport directions + + if( localInner > 0.0 ) { + _curScalarOutflow += + localInner * _sol[Idx2D( idx_cell, idx_sys, _localNSys )] * _quadWeights[idx_sys]; // Integrate flux + + currOrdinatewiseOutflow = + _sol[Idx2D( idx_cell, idx_sys, _localNSys )] * localInner / + sqrt( ( + _normals[Idx3D( idx_cell, idx_nbr, 0, _nNbr, _nDim )] * _normals[Idx3D( idx_cell, idx_nbr, 0, _nNbr, _nDim )] + + _normals[Idx3D( idx_cell, idx_nbr, 1, _nNbr, _nDim )] * _normals[Idx3D( idx_cell, idx_nbr, 1, _nNbr, _nDim )] ) ); + + _curMaxOrdinateOutflow = + ( currOrdinatewiseOutflow > _curMaxOrdinateOutflow ) ? currOrdinatewiseOutflow : _curMaxOrdinateOutflow; + } + } + } + } + } + } +// MPI Allreduce +#ifdef IMPORT_MPI + double tmp_curScalarOutflow = 0.0; + double tmp_curScalarOutflowPeri1 = 0.0; + double tmp_curScalarOutflowPeri2 = 0.0; + double tmp_curMaxOrdinateOutflow = 0.0; + MPI_Barrier( MPI_COMM_WORLD ); + MPI_Allreduce( &_curScalarOutflow, &tmp_curScalarOutflow, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD ); + _curScalarOutflow = tmp_curScalarOutflow; + MPI_Barrier( MPI_COMM_WORLD ); + MPI_Allreduce( &_curScalarOutflowPeri1, &tmp_curScalarOutflowPeri1, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD ); + _curScalarOutflowPeri1 = tmp_curScalarOutflowPeri1; + MPI_Barrier( MPI_COMM_WORLD ); + MPI_Allreduce( &_curScalarOutflowPeri2, &tmp_curScalarOutflowPeri2, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD ); + _curScalarOutflowPeri2 = tmp_curScalarOutflowPeri2; + MPI_Barrier( MPI_COMM_WORLD ); + MPI_Allreduce( &_curMaxOrdinateOutflow, &tmp_curMaxOrdinateOutflow, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD ); + _curMaxOrdinateOutflow = tmp_curMaxOrdinateOutflow; + MPI_Barrier( MPI_COMM_WORLD ); +#endif + // Variation absorption (part II) + if( _settings->GetProblemName() == PROBLEM_SymmetricHohlraum ) { + unsigned n_probes = 4; + std::vector temp_probingMoments( 3 * n_probes ); // for MPI allreduce + +#pragma omp parallel for reduction( + : _varAbsorptionHohlraumGreen ) + for( unsigned long idx_cell = 0; idx_cell < _nCells; ++idx_cell ) { + double x = _cellMidPoints[Idx2D( idx_cell, 0, _nDim )]; + double y = _cellMidPoints[Idx2D( idx_cell, 1, _nDim )]; + + // green area 1 (lower boundary) + bool green1 = x > -0.2 + _settings->GetPosXCenterGreenHohlraum() && x < 0.2 + _settings->GetPosXCenterGreenHohlraum() && + y > -0.4 + _settings->GetPosYCenterGreenHohlraum() && y < -0.35 + _settings->GetPosYCenterGreenHohlraum(); + // green area 2 (upper boundary) + bool green2 = x > -0.2 + _settings->GetPosXCenterGreenHohlraum() && x < 0.2 + _settings->GetPosXCenterGreenHohlraum() && + y > 0.35 + _settings->GetPosYCenterGreenHohlraum() && y < 0.4 + _settings->GetPosYCenterGreenHohlraum(); + // green area 3 (left boundary) + bool green3 = x > -0.2 + _settings->GetPosXCenterGreenHohlraum() && x < -0.15 + _settings->GetPosXCenterGreenHohlraum() && + y > -0.35 + _settings->GetPosYCenterGreenHohlraum() && y < 0.35 + _settings->GetPosYCenterGreenHohlraum(); + // green area 4 (right boundary) + bool green4 = x > 0.15 + _settings->GetPosXCenterGreenHohlraum() && x < 0.2 + _settings->GetPosXCenterGreenHohlraum() && + y > -0.35 + _settings->GetPosYCenterGreenHohlraum() && y < 0.35 + _settings->GetPosYCenterGreenHohlraum(); + if( green1 || green2 || green3 || green4 ) { + _varAbsorptionHohlraumGreen += ( a_g - _scalarFlux[idx_cell] * ( _sigmaT[idx_cell] - _sigmaS[idx_cell] ) ) * + ( a_g - _scalarFlux[idx_cell] * ( _sigmaT[idx_cell] - _sigmaS[idx_cell] ) ) * _areas[idx_cell]; + } + } + // Probes value moments + // #pragma omp parallel for + for( unsigned long idx_probe = 0; idx_probe < n_probes; idx_probe++ ) { // Loop over probing cells + temp_probingMoments[Idx2D( idx_probe, 0, 3 )] = 0.0; + temp_probingMoments[Idx2D( idx_probe, 1, 3 )] = 0.0; + temp_probingMoments[Idx2D( idx_probe, 2, 3 )] = 0.0; + // for( unsigned long idx_ball = 0; idx_ball < _probingCellsHohlraum[idx_probe].size(); idx_ball++ ) { + // std::cout << idx_ball << _areas[_probingCellsHohlraum[idx_probe][idx_ball]] / ( 0.01 * 0.01 * M_PI ) << std::endl; + //} + for( unsigned long idx_sys = 0; idx_sys < _localNSys; idx_sys++ ) { + for( unsigned long idx_ball = 0; idx_ball < _probingCellsHohlraum[idx_probe].size(); idx_ball++ ) { + temp_probingMoments[Idx2D( idx_probe, 0, 3 )] += _sol[Idx2D( _probingCellsHohlraum[idx_probe][idx_ball], idx_sys, _localNSys )] * + _quadWeights[idx_sys] * _areas[_probingCellsHohlraum[idx_probe][idx_ball]] / + ( 0.01 * 0.01 * M_PI ); + temp_probingMoments[Idx2D( idx_probe, 1, 3 )] += + _quadPts[Idx2D( idx_sys, 0, _nDim )] * _sol[Idx2D( _probingCellsHohlraum[idx_probe][idx_ball], idx_sys, _localNSys )] * + _quadWeights[idx_sys] * _areas[_probingCellsHohlraum[idx_probe][idx_ball]] / ( 0.01 * 0.01 * M_PI ); + temp_probingMoments[Idx2D( idx_probe, 2, 3 )] += + _quadPts[Idx2D( idx_sys, 1, _nDim )] * _sol[Idx2D( _probingCellsHohlraum[idx_probe][idx_ball], idx_sys, _localNSys )] * + _quadWeights[idx_sys] * _areas[_probingCellsHohlraum[idx_probe][idx_ball]] / ( 0.01 * 0.01 * M_PI ); + } + } + } + +#ifdef IMPORT_MPI + MPI_Barrier( MPI_COMM_WORLD ); + MPI_Allreduce( temp_probingMoments.data(), _probingMoments.data(), 3 * n_probes, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD ); + MPI_Barrier( MPI_COMM_WORLD ); +#endif +#ifndef IMPORT_MPI + for( unsigned long idx_probe = 0; idx_probe < n_probes; idx_probe++ ) { // Loop over probing cells + _probingMoments[Idx2D( idx_probe, 0, 3 )] = temp_probingMoments[Idx2D( idx_probe, 0, 3 )]; + _probingMoments[Idx2D( idx_probe, 1, 3 )] = temp_probingMoments[Idx2D( idx_probe, 1, 3 )]; + _probingMoments[Idx2D( idx_probe, 2, 3 )] = temp_probingMoments[Idx2D( idx_probe, 2, 3 )]; + } +#endif + } + // probe values green + if( _settings->GetProblemName() == PROBLEM_SymmetricHohlraum ) { + ComputeQOIsGreenProbingLine(); + } + // Update time integral values on rank 0 + if( _rank == 0 ) { + _totalScalarOutflow += _curScalarOutflow * _dT; + _totalScalarOutflowPeri1 += _curScalarOutflowPeri1 * _dT; + _totalScalarOutflowPeri2 += _curScalarOutflowPeri2 * _dT; + _totalAbsorptionLattice += _curAbsorptionLattice * _dT; + + _totalAbsorptionHohlraumCenter += _curAbsorptionHohlraumCenter * _dT; + _totalAbsorptionHohlraumVertical += _curAbsorptionHohlraumVertical * _dT; + _totalAbsorptionHohlraumHorizontal += _curAbsorptionHohlraumHorizontal * _dT; + + _rmsFlux = sqrt( _rmsFlux ); + } +} + +bool SNSolverHPCHIP::IsAbsorptionLattice( double x, double y ) const { + // Check whether pos is inside absorbing squares + double xy_corrector = -3.5; + std::vector lbounds{ 1 + xy_corrector, 2 + xy_corrector, 3 + xy_corrector, 4 + xy_corrector, 5 + xy_corrector }; + std::vector ubounds{ 2 + xy_corrector, 3 + xy_corrector, 4 + xy_corrector, 5 + xy_corrector, 6 + xy_corrector }; + for( unsigned k = 0; k < lbounds.size(); ++k ) { + for( unsigned l = 0; l < lbounds.size(); ++l ) { + if( ( l + k ) % 2 == 1 || ( k == 2 && l == 2 ) || ( k == 2 && l == 4 ) ) continue; + if( x >= lbounds[k] && x <= ubounds[k] && y >= lbounds[l] && y <= ubounds[l] ) { + return true; + } + } + } + return false; +} + +// --- Helper --- +double SNSolverHPCHIP::ComputeTimeStep( double cfl ) const { + // for pseudo 1D, set timestep to dx + double dx, dy; + switch( _settings->GetProblemName() ) { + case PROBLEM_Checkerboard1D: + dx = 7.0 / (double)_nCells; + dy = 0.3; + return cfl * ( dx * dy ) / ( dx + dy ); + break; + case PROBLEM_Linesource1D: // Fallthrough + case PROBLEM_Meltingcube1D: // Fallthrough + case PROBLEM_Aircavity1D: + dx = 3.0 / (double)_nCells; + dy = 0.3; + return cfl * ( dx * dy ) / ( dx + dy ); + break; + default: break; // 2d as normal + } + // 2D case + double charSize = __DBL_MAX__; // minimum char size of all mesh cells in the mesh + +#pragma omp parallel for reduction( min : charSize ) + for( unsigned long j = 0; j < _nCells; j++ ) { + double currCharSize = sqrt( _areas[j] ); + if( currCharSize < charSize ) { + charSize = currCharSize; + } + } + if( _rank == 0 ) { + auto log = spdlog::get( "event" ); + std::string line = "| Smallest characteristic length of a grid cell in this mesh: " + std::to_string( charSize ); + log->info( line ); + line = "| Corresponding maximal time-step: " + std::to_string( cfl * charSize ); + log->info( line ); + } + return cfl * charSize; +} + +// --- IO ---- +void SNSolverHPCHIP::PrepareScreenOutput() { + unsigned nFields = (unsigned)_settings->GetNScreenOutput(); + + _screenOutputFieldNames.resize( nFields ); + _screenOutputFields.resize( nFields ); + + // Prepare all output Fields ==> Specified in option SCREEN_OUTPUT + for( unsigned idx_field = 0; idx_field < nFields; idx_field++ ) { + // Prepare all Output Fields per group + + // Different procedure, depending on the Group... + switch( _settings->GetScreenOutput()[idx_field] ) { + case MASS: _screenOutputFieldNames[idx_field] = "Mass"; break; + case ITER: _screenOutputFieldNames[idx_field] = "Iter"; break; + case SIM_TIME: _screenOutputFieldNames[idx_field] = "Sim time"; break; + case WALL_TIME: _screenOutputFieldNames[idx_field] = "Wall time [s]"; break; + case RMS_FLUX: _screenOutputFieldNames[idx_field] = "RMS flux"; break; + case VTK_OUTPUT: _screenOutputFieldNames[idx_field] = "VTK out"; break; + case CSV_OUTPUT: _screenOutputFieldNames[idx_field] = "CSV out"; break; + case CUR_OUTFLOW: _screenOutputFieldNames[idx_field] = "Cur. outflow"; break; + case TOTAL_OUTFLOW: _screenOutputFieldNames[idx_field] = "Tot. outflow"; break; + case CUR_OUTFLOW_P1: _screenOutputFieldNames[idx_field] = "Cur. outflow P1"; break; + case TOTAL_OUTFLOW_P1: _screenOutputFieldNames[idx_field] = "Tot. outflow P1"; break; + case CUR_OUTFLOW_P2: _screenOutputFieldNames[idx_field] = "Cur. outflow P2"; break; + case TOTAL_OUTFLOW_P2: _screenOutputFieldNames[idx_field] = "Tot. outflow P2"; break; + case MAX_OUTFLOW: _screenOutputFieldNames[idx_field] = "Max outflow"; break; + case CUR_PARTICLE_ABSORPTION: _screenOutputFieldNames[idx_field] = "Cur. absorption"; break; + case TOTAL_PARTICLE_ABSORPTION: _screenOutputFieldNames[idx_field] = "Tot. absorption"; break; + case MAX_PARTICLE_ABSORPTION: _screenOutputFieldNames[idx_field] = "Max absorption"; break; + case TOTAL_PARTICLE_ABSORPTION_CENTER: _screenOutputFieldNames[idx_field] = "Tot. abs. center"; break; + case TOTAL_PARTICLE_ABSORPTION_VERTICAL: _screenOutputFieldNames[idx_field] = "Tot. abs. vertical wall"; break; + case TOTAL_PARTICLE_ABSORPTION_HORIZONTAL: _screenOutputFieldNames[idx_field] = "Tot. abs. horizontal wall"; break; + case PROBE_MOMENT_TIME_TRACE: + _screenOutputFieldNames[idx_field] = "Probe 1 u_0"; + idx_field++; + _screenOutputFieldNames[idx_field] = "Probe 2 u_0"; + if( _settings->GetProblemName() == PROBLEM_SymmetricHohlraum ) { + idx_field++; + _screenOutputFieldNames[idx_field] = "Probe 3 u_0"; + idx_field++; + _screenOutputFieldNames[idx_field] = "Probe 4 u_0"; + } + break; + case VAR_ABSORPTION_GREEN: _screenOutputFieldNames[idx_field] = "Var. absorption green"; break; + + default: ErrorMessages::Error( "Screen output field not defined!", CURRENT_FUNCTION ); break; + } + } +} + +void SNSolverHPCHIP::WriteScalarOutput( unsigned idx_iter ) { + unsigned n_probes = 4; + + unsigned nFields = (unsigned)_settings->GetNScreenOutput(); + const VectorVector probingMoments = _problem->GetCurrentProbeMoment(); + // -- Screen Output + for( unsigned idx_field = 0; idx_field < nFields; idx_field++ ) { + // Prepare all Output Fields per group + // Different procedure, depending on the Group... + switch( _settings->GetScreenOutput()[idx_field] ) { + case MASS: _screenOutputFields[idx_field] = _mass; break; + case ITER: _screenOutputFields[idx_field] = idx_iter; break; + case SIM_TIME: _screenOutputFields[idx_field] = _curSimTime; break; + case WALL_TIME: _screenOutputFields[idx_field] = _currTime; break; + case RMS_FLUX: _screenOutputFields[idx_field] = _rmsFlux; break; + case VTK_OUTPUT: + _screenOutputFields[idx_field] = 0; + if( ( _settings->GetVolumeOutputFrequency() != 0 && idx_iter % (unsigned)_settings->GetVolumeOutputFrequency() == 0 ) || + ( idx_iter == _nIter - 1 ) /* need sol at last iteration */ ) { + _screenOutputFields[idx_field] = 1; + } + break; + case CSV_OUTPUT: + _screenOutputFields[idx_field] = 0; + if( ( _settings->GetHistoryOutputFrequency() != 0 && idx_iter % (unsigned)_settings->GetHistoryOutputFrequency() == 0 ) || + ( idx_iter == _nIter - 1 ) /* need sol at last iteration */ ) { + _screenOutputFields[idx_field] = 1; + } + break; + case CUR_OUTFLOW: _screenOutputFields[idx_field] = _curScalarOutflow; break; + case TOTAL_OUTFLOW: _screenOutputFields[idx_field] = _totalScalarOutflow; break; + case CUR_OUTFLOW_P1: _screenOutputFields[idx_field] = _curScalarOutflowPeri1; break; + case TOTAL_OUTFLOW_P1: _screenOutputFields[idx_field] = _totalScalarOutflowPeri1; break; + case CUR_OUTFLOW_P2: _screenOutputFields[idx_field] = _curScalarOutflowPeri2; break; + case TOTAL_OUTFLOW_P2: _screenOutputFields[idx_field] = _totalScalarOutflowPeri2; break; + case MAX_OUTFLOW: _screenOutputFields[idx_field] = _curMaxOrdinateOutflow; break; + case CUR_PARTICLE_ABSORPTION: _screenOutputFields[idx_field] = _curAbsorptionLattice; break; + case TOTAL_PARTICLE_ABSORPTION: _screenOutputFields[idx_field] = _totalAbsorptionLattice; break; + case MAX_PARTICLE_ABSORPTION: _screenOutputFields[idx_field] = _curMaxAbsorptionLattice; break; + case TOTAL_PARTICLE_ABSORPTION_CENTER: _screenOutputFields[idx_field] = _totalAbsorptionHohlraumCenter; break; + case TOTAL_PARTICLE_ABSORPTION_VERTICAL: _screenOutputFields[idx_field] = _totalAbsorptionHohlraumVertical; break; + case TOTAL_PARTICLE_ABSORPTION_HORIZONTAL: _screenOutputFields[idx_field] = _totalAbsorptionHohlraumHorizontal; break; + case PROBE_MOMENT_TIME_TRACE: + if( _settings->GetProblemName() == PROBLEM_SymmetricHohlraum ) n_probes = 4; + // if( _settings->GetProblemName() == PROBLEM_QuarterHohlraum ) n_probes = 2; + for( unsigned i = 0; i < n_probes; i++ ) { + _screenOutputFields[idx_field] = _probingMoments[Idx2D( i, 0, 3 )]; + idx_field++; + } + idx_field--; + break; + case VAR_ABSORPTION_GREEN: _screenOutputFields[idx_field] = _varAbsorptionHohlraumGreen; break; + default: ErrorMessages::Error( "Screen output group not defined!", CURRENT_FUNCTION ); break; + } + } + + // --- History output --- + nFields = (unsigned)_settings->GetNHistoryOutput(); + + std::vector screenOutputFields = _settings->GetScreenOutput(); + for( unsigned idx_field = 0; idx_field < nFields; idx_field++ ) { + + // Prepare all Output Fields per group + // Different procedure, depending on the Group... + switch( _settings->GetHistoryOutput()[idx_field] ) { + case MASS: _historyOutputFields[idx_field] = _mass; break; + case ITER: _historyOutputFields[idx_field] = idx_iter; break; + case SIM_TIME: _historyOutputFields[idx_field] = _curSimTime; break; + case WALL_TIME: _historyOutputFields[idx_field] = _currTime; break; + case RMS_FLUX: _historyOutputFields[idx_field] = _rmsFlux; break; + case VTK_OUTPUT: + _historyOutputFields[idx_field] = 0; + if( ( _settings->GetVolumeOutputFrequency() != 0 && idx_iter % (unsigned)_settings->GetVolumeOutputFrequency() == 0 ) || + ( idx_iter == _nIter - 1 ) /* need sol at last iteration */ ) { + _historyOutputFields[idx_field] = 1; + } + break; + + case CSV_OUTPUT: + _historyOutputFields[idx_field] = 0; + if( ( _settings->GetHistoryOutputFrequency() != 0 && idx_iter % (unsigned)_settings->GetHistoryOutputFrequency() == 0 ) || + ( idx_iter == _nIter - 1 ) /* need sol at last iteration */ ) { + _historyOutputFields[idx_field] = 1; + } + break; + case CUR_OUTFLOW: _historyOutputFields[idx_field] = _curScalarOutflow; break; + case TOTAL_OUTFLOW: _historyOutputFields[idx_field] = _totalScalarOutflow; break; + case CUR_OUTFLOW_P1: _historyOutputFields[idx_field] = _curScalarOutflowPeri1; break; + case TOTAL_OUTFLOW_P1: _historyOutputFields[idx_field] = _totalScalarOutflowPeri1; break; + case CUR_OUTFLOW_P2: _historyOutputFields[idx_field] = _curScalarOutflowPeri2; break; + case TOTAL_OUTFLOW_P2: _historyOutputFields[idx_field] = _totalScalarOutflowPeri2; break; + case MAX_OUTFLOW: _historyOutputFields[idx_field] = _curMaxOrdinateOutflow; break; + case CUR_PARTICLE_ABSORPTION: _historyOutputFields[idx_field] = _curAbsorptionLattice; break; + case TOTAL_PARTICLE_ABSORPTION: _historyOutputFields[idx_field] = _totalAbsorptionLattice; break; + case MAX_PARTICLE_ABSORPTION: _historyOutputFields[idx_field] = _curMaxAbsorptionLattice; break; + case TOTAL_PARTICLE_ABSORPTION_CENTER: _historyOutputFields[idx_field] = _totalAbsorptionHohlraumCenter; break; + case TOTAL_PARTICLE_ABSORPTION_VERTICAL: _historyOutputFields[idx_field] = _totalAbsorptionHohlraumVertical; break; + case TOTAL_PARTICLE_ABSORPTION_HORIZONTAL: _historyOutputFields[idx_field] = _totalAbsorptionHohlraumHorizontal; break; + case PROBE_MOMENT_TIME_TRACE: + if( _settings->GetProblemName() == PROBLEM_SymmetricHohlraum ) n_probes = 4; + // if( _settings->GetProblemName() == PROBLEM_QuarterHohlraum ) n_probes = 2; + for( unsigned i = 0; i < n_probes; i++ ) { + for( unsigned j = 0; j < 3; j++ ) { + _historyOutputFields[idx_field] = _probingMoments[Idx2D( i, j, 3 )]; + idx_field++; + } + } + idx_field--; + break; + case VAR_ABSORPTION_GREEN: _historyOutputFields[idx_field] = _varAbsorptionHohlraumGreen; break; + case ABSORPTION_GREEN_LINE: + for( unsigned i = 0; i < _settings->GetNumProbingCellsLineHohlraum(); i++ ) { + _historyOutputFields[idx_field] = _absorptionValsLineSegment[i]; + idx_field++; + } + idx_field--; + break; + case ABSORPTION_GREEN_BLOCK: + for( unsigned i = 0; i < 44; i++ ) { + _historyOutputFields[idx_field] = _absorptionValsBlocksGreen[i]; + // std::cout << _absorptionValsBlocksGreen[i] << "/" << _historyOutputFields[idx_field] << std::endl; + idx_field++; + } + idx_field--; + break; + default: ErrorMessages::Error( "History output group not defined!", CURRENT_FUNCTION ); break; + } + } +} + +void SNSolverHPCHIP::PrintScreenOutput( unsigned idx_iter ) { + auto log = spdlog::get( "event" ); + + unsigned strLen = 15; // max width of one column + char paddingChar = ' '; + + // assemble the line to print + std::string lineToPrint = "| "; + std::string tmp; + for( unsigned idx_field = 0; idx_field < _settings->GetNScreenOutput(); idx_field++ ) { + tmp = std::to_string( _screenOutputFields[idx_field] ); + + // Format outputs correctly + std::vector integerFields = { ITER }; + std::vector scientificFields = { RMS_FLUX, + MASS, + WALL_TIME, + CUR_OUTFLOW, + TOTAL_OUTFLOW, + CUR_OUTFLOW_P1, + TOTAL_OUTFLOW_P1, + CUR_OUTFLOW_P2, + TOTAL_OUTFLOW_P2, + MAX_OUTFLOW, + CUR_PARTICLE_ABSORPTION, + TOTAL_PARTICLE_ABSORPTION, + MAX_PARTICLE_ABSORPTION, + TOTAL_PARTICLE_ABSORPTION_CENTER, + TOTAL_PARTICLE_ABSORPTION_VERTICAL, + TOTAL_PARTICLE_ABSORPTION_HORIZONTAL, + PROBE_MOMENT_TIME_TRACE, + VAR_ABSORPTION_GREEN, + ABSORPTION_GREEN_BLOCK, + ABSORPTION_GREEN_LINE }; + std::vector booleanFields = { VTK_OUTPUT, CSV_OUTPUT }; + + if( !( integerFields.end() == std::find( integerFields.begin(), integerFields.end(), _settings->GetScreenOutput()[idx_field] ) ) ) { + tmp = std::to_string( (int)_screenOutputFields[idx_field] ); + } + else if( !( booleanFields.end() == std::find( booleanFields.begin(), booleanFields.end(), _settings->GetScreenOutput()[idx_field] ) ) ) { + tmp = "no"; + if( (bool)_screenOutputFields[idx_field] ) tmp = "yes"; + } + else if( !( scientificFields.end() == + std::find( scientificFields.begin(), scientificFields.end(), _settings->GetScreenOutput()[idx_field] ) ) ) { + + std::stringstream ss; + ss << TextProcessingToolbox::DoubleToScientificNotation( _screenOutputFields[idx_field] ); + tmp = ss.str(); + tmp.erase( std::remove( tmp.begin(), tmp.end(), '+' ), tmp.end() ); // removing the '+' sign + } + + if( strLen > tmp.size() ) // Padding + tmp.insert( 0, strLen - tmp.size(), paddingChar ); + else if( strLen < tmp.size() ) // Cutting + tmp.resize( strLen ); + + lineToPrint += tmp + " |"; + } + if( _settings->GetScreenOutputFrequency() != 0 && idx_iter % (unsigned)_settings->GetScreenOutputFrequency() == 0 ) { + log->info( lineToPrint ); + } + else if( idx_iter == _nIter - 1 ) { // Always print last iteration + log->info( lineToPrint ); + } +} + +void SNSolverHPCHIP::PrepareHistoryOutput() { + unsigned n_probes = 4; + + unsigned nFields = (unsigned)_settings->GetNHistoryOutput(); + + _historyOutputFieldNames.resize( nFields ); + _historyOutputFields.resize( nFields ); + + // Prepare all output Fields ==> Specified in option SCREEN_OUTPUT + for( unsigned idx_field = 0; idx_field < nFields; idx_field++ ) { + // Prepare all Output Fields per group + + // Different procedure, depending on the Group... + switch( _settings->GetHistoryOutput()[idx_field] ) { + case MASS: _historyOutputFieldNames[idx_field] = "Mass"; break; + case ITER: _historyOutputFieldNames[idx_field] = "Iter"; break; + case SIM_TIME: _historyOutputFieldNames[idx_field] = "Sim_time"; break; + case WALL_TIME: _historyOutputFieldNames[idx_field] = "Wall_time_[s]"; break; + case RMS_FLUX: _historyOutputFieldNames[idx_field] = "RMS_flux"; break; + case VTK_OUTPUT: _historyOutputFieldNames[idx_field] = "VTK_out"; break; + case CSV_OUTPUT: _historyOutputFieldNames[idx_field] = "CSV_out"; break; + case CUR_OUTFLOW: _historyOutputFieldNames[idx_field] = "Cur_outflow"; break; + case TOTAL_OUTFLOW: _historyOutputFieldNames[idx_field] = "Total_outflow"; break; + case CUR_OUTFLOW_P1: _historyOutputFieldNames[idx_field] = "Cur_outflow_P1"; break; + case TOTAL_OUTFLOW_P1: _historyOutputFieldNames[idx_field] = "Total_outflow_P1"; break; + case CUR_OUTFLOW_P2: _historyOutputFieldNames[idx_field] = "Cur_outflow_P2"; break; + case TOTAL_OUTFLOW_P2: _historyOutputFieldNames[idx_field] = "Total_outflow_P2"; break; + case MAX_OUTFLOW: _historyOutputFieldNames[idx_field] = "Max_outflow"; break; + case CUR_PARTICLE_ABSORPTION: _historyOutputFieldNames[idx_field] = "Cur_absorption"; break; + case TOTAL_PARTICLE_ABSORPTION: _historyOutputFieldNames[idx_field] = "Total_absorption"; break; + case MAX_PARTICLE_ABSORPTION: _historyOutputFieldNames[idx_field] = "Max_absorption"; break; + case TOTAL_PARTICLE_ABSORPTION_CENTER: _historyOutputFieldNames[idx_field] = "Cumulated_absorption_center"; break; + case TOTAL_PARTICLE_ABSORPTION_VERTICAL: _historyOutputFieldNames[idx_field] = "Cumulated_absorption_vertical_wall"; break; + case TOTAL_PARTICLE_ABSORPTION_HORIZONTAL: _historyOutputFieldNames[idx_field] = "Cumulated_absorption_horizontal_wall"; break; + case PROBE_MOMENT_TIME_TRACE: + if( _settings->GetProblemName() == PROBLEM_SymmetricHohlraum ) n_probes = 4; + // if( _settings->GetProblemName() == PROBLEM_QuarterHohlraum ) n_probes = 2; + for( unsigned i = 0; i < n_probes; i++ ) { + for( unsigned j = 0; j < 3; j++ ) { + _historyOutputFieldNames[idx_field] = "Probe " + std::to_string( i ) + " u_" + std::to_string( j ); + idx_field++; + } + } + idx_field--; + break; + case VAR_ABSORPTION_GREEN: _historyOutputFieldNames[idx_field] = "Var. absorption green"; break; + case ABSORPTION_GREEN_BLOCK: + for( unsigned i = 0; i < 44; i++ ) { + _historyOutputFieldNames[idx_field] = "Probe Green Block " + std::to_string( i ); + idx_field++; + } + idx_field--; + break; + + case ABSORPTION_GREEN_LINE: + for( unsigned i = 0; i < _settings->GetNumProbingCellsLineHohlraum(); i++ ) { + _historyOutputFieldNames[idx_field] = "Probe Green Line " + std::to_string( i ); + idx_field++; + } + idx_field--; + break; + default: ErrorMessages::Error( "History output field not defined!", CURRENT_FUNCTION ); break; + } + } +} + +void SNSolverHPCHIP::PrintHistoryOutput( unsigned idx_iter ) { + + auto log = spdlog::get( "tabular" ); + + // assemble the line to print + std::string lineToPrint = ""; + std::string tmp; + for( int idx_field = 0; idx_field < _settings->GetNHistoryOutput() - 1; idx_field++ ) { + if( idx_field == 0 ) { + tmp = std::to_string( _historyOutputFields[idx_field] ); // Iteration count + } + else { + tmp = TextProcessingToolbox::DoubleToScientificNotation( _historyOutputFields[idx_field] ); + } + lineToPrint += tmp + ","; + } + tmp = TextProcessingToolbox::DoubleToScientificNotation( _historyOutputFields[_settings->GetNHistoryOutput() - 1] ); + lineToPrint += tmp; // Last element without comma + // std::cout << lineToPrint << std::endl; + if( _settings->GetHistoryOutputFrequency() != 0 && idx_iter % (unsigned)_settings->GetHistoryOutputFrequency() == 0 ) { + log->info( lineToPrint ); + } + else if( idx_iter == _nIter - 1 ) { // Always print last iteration + log->info( lineToPrint ); + } +} + +void SNSolverHPCHIP::DrawPreSolverOutput() { + + // Logger + auto log = spdlog::get( "event" ); + auto logCSV = spdlog::get( "tabular" ); + + std::string hLine = "--"; + + unsigned strLen = 15; // max width of one column + char paddingChar = ' '; + + // Assemble Header for Screen Output + std::string lineToPrint = "| "; + std::string tmpLine = "-----------------"; + for( unsigned idxFields = 0; idxFields < _settings->GetNScreenOutput(); idxFields++ ) { + std::string tmp = _screenOutputFieldNames[idxFields]; + + if( strLen > tmp.size() ) // Padding + tmp.insert( 0, strLen - tmp.size(), paddingChar ); + else if( strLen < tmp.size() ) // Cutting + tmp.resize( strLen ); + + lineToPrint += tmp + " |"; + hLine += tmpLine; + } + log->info( "---------------------------- Solver Starts -----------------------------" ); + log->info( "| The simulation will run for {} iterations.", _nIter ); + log->info( "| The spatial grid contains {} cells.", _nCells ); + if( _settings->GetSolverName() != PN_SOLVER && _settings->GetSolverName() != CSD_PN_SOLVER ) { + log->info( "| The velocity grid contains {} points.", _nq ); + } + log->info( hLine ); + log->info( lineToPrint ); + log->info( hLine ); + + std::string lineToPrintCSV = ""; + for( int idxFields = 0; idxFields < _settings->GetNHistoryOutput() - 1; idxFields++ ) { + std::string tmp = _historyOutputFieldNames[idxFields]; + lineToPrintCSV += tmp + ","; + } + lineToPrintCSV += _historyOutputFieldNames[_settings->GetNHistoryOutput() - 1]; + logCSV->info( lineToPrintCSV ); +} + +void SNSolverHPCHIP::DrawPostSolverOutput() { + + // Logger + auto log = spdlog::get( "event" ); + + std::string hLine = "--"; + + unsigned strLen = 10; // max width of one column + char paddingChar = ' '; + + // Assemble Header for Screen Output + std::string lineToPrint = "| "; + std::string tmpLine = "------------"; + for( unsigned idxFields = 0; idxFields < _settings->GetNScreenOutput(); idxFields++ ) { + std::string tmp = _screenOutputFieldNames[idxFields]; + + if( strLen > tmp.size() ) // Padding + tmp.insert( 0, strLen - tmp.size(), paddingChar ); + else if( strLen < tmp.size() ) // Cutting + tmp.resize( strLen ); + + lineToPrint += tmp + " |"; + hLine += tmpLine; + } + log->info( hLine ); +#ifndef BUILD_TESTING + log->info( "| The volume output files have been stored at " + _settings->GetOutputFile() ); + log->info( "| The log files have been stored at " + _settings->GetLogDir() + _settings->GetLogFile() ); +#endif + log->info( "--------------------------- Solver Finished ----------------------------" ); +} + +unsigned long SNSolverHPCHIP::Idx2D( unsigned long idx1, unsigned long idx2, unsigned long len2 ) { return idx1 * len2 + idx2; } + +unsigned long SNSolverHPCHIP::Idx3D( unsigned long idx1, unsigned long idx2, unsigned long idx3, unsigned long len2, unsigned long len3 ) { + return ( idx1 * len2 + idx2 ) * len3 + idx3; +} + +void SNSolverHPCHIP::WriteVolumeOutput( unsigned idx_iter ) { + unsigned nGroups = (unsigned)_settings->GetNVolumeOutput(); + if( ( _settings->GetVolumeOutputFrequency() != 0 && idx_iter % (unsigned)_settings->GetVolumeOutputFrequency() == 0 ) || + ( idx_iter == _nIter - 1 ) /* need sol at last iteration */ ) { + for( unsigned idx_group = 0; idx_group < nGroups; idx_group++ ) { + switch( _settings->GetVolumeOutput()[idx_group] ) { + case MINIMAL: + if( _rank == 0 ) { + _outputFields[idx_group][0] = _scalarFlux; + } + break; + + case MOMENTS: +#pragma omp parallel for + for( unsigned idx_cell = 0; idx_cell < _nCells; ++idx_cell ) { + _outputFields[idx_group][0][idx_cell] = 0.0; + _outputFields[idx_group][1][idx_cell] = 0.0; + for( unsigned idx_sys = 0; idx_sys < _localNSys; idx_sys++ ) { + _outputFields[idx_group][0][idx_cell] += + _quadPts[Idx2D( idx_sys, 0, _nDim )] * _sol[Idx2D( idx_cell, idx_sys, _localNSys )] * _quadWeights[idx_sys]; + _outputFields[idx_group][1][idx_cell] += + _quadPts[Idx2D( idx_sys, 1, _nDim )] * _sol[Idx2D( idx_cell, idx_sys, _localNSys )] * _quadWeights[idx_sys]; + } + } +#ifdef IMPORT_MPI + MPI_Barrier( MPI_COMM_WORLD ); + MPI_Allreduce( + _outputFields[idx_group][0].data(), _outputFields[idx_group][0].data(), _nCells, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD ); + MPI_Allreduce( + _outputFields[idx_group][1].data(), _outputFields[idx_group][1].data(), _nCells, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD ); + MPI_Barrier( MPI_COMM_WORLD ); +#endif + break; + default: ErrorMessages::Error( "Volume Output Group not defined for HPC SN Solver!", CURRENT_FUNCTION ); break; + } + } + } +} + +void SNSolverHPCHIP::PrintVolumeOutput( int idx_iter ) { + if( _settings->GetSaveRestartSolutionFrequency() != 0 && idx_iter % (int)_settings->GetSaveRestartSolutionFrequency() == 0 ) { + // std::cout << "Saving restart solution at iteration " << idx_iter << std::endl; + WriteRestartSolution( _settings->GetOutputFile(), + _sol, + _scalarFlux, + _rank, + idx_iter, + _totalAbsorptionHohlraumCenter, + _totalAbsorptionHohlraumVertical, + _totalAbsorptionHohlraumHorizontal, + _totalAbsorptionLattice ); + } + + if( _settings->GetVolumeOutputFrequency() != 0 && idx_iter % (int)_settings->GetVolumeOutputFrequency() == 0 ) { + WriteVolumeOutput( idx_iter ); + if( _rank == 0 ) { + ExportVTK( _settings->GetOutputFile() + "_" + std::to_string( idx_iter ), _outputFields, _outputFieldNames, _mesh ); // slow + } + } + if( idx_iter == (int)_nIter - 1 ) { // Last iteration write without suffix. + WriteVolumeOutput( idx_iter ); + if( _rank == 0 ) { + ExportVTK( _settings->GetOutputFile(), _outputFields, _outputFieldNames, _mesh ); + } + } +} + +void SNSolverHPCHIP::PrepareVolumeOutput() { + unsigned nGroups = (unsigned)_settings->GetNVolumeOutput(); + + _outputFieldNames.resize( nGroups ); + _outputFields.resize( nGroups ); + + // Prepare all OutputGroups ==> Specified in option VOLUME_OUTPUT + for( unsigned idx_group = 0; idx_group < nGroups; idx_group++ ) { + // Prepare all Output Fields per group + + // Different procedure, depending on the Group... + switch( _settings->GetVolumeOutput()[idx_group] ) { + case MINIMAL: + // Currently only one entry ==> rad flux + _outputFields[idx_group].resize( 1 ); + _outputFieldNames[idx_group].resize( 1 ); + + _outputFields[idx_group][0].resize( _nCells ); + _outputFieldNames[idx_group][0] = "scalar flux"; + break; + case MOMENTS: + // As many entries as there are moments in the system + _outputFields[idx_group].resize( _nOutputMoments ); + _outputFieldNames[idx_group].resize( _nOutputMoments ); + + for( unsigned idx_moment = 0; idx_moment < _nOutputMoments; idx_moment++ ) { + _outputFieldNames[idx_group][idx_moment] = std::string( "u_" + std::to_string( idx_moment ) ); + _outputFields[idx_group][idx_moment].resize( _nCells ); + } + break; + + default: ErrorMessages::Error( "Volume Output Group not defined for HPC SN Solver!", CURRENT_FUNCTION ); break; + } + } +} + +void SNSolverHPCHIP::SetGhostCells() { + if( _settings->GetProblemName() == PROBLEM_Lattice ) { + // #pragma omp parallel for + for( unsigned idx_cell = 0; idx_cell < _nCells; idx_cell++ ) { + if( _cellBoundaryTypes[idx_cell] == BOUNDARY_TYPE::NEUMANN || _cellBoundaryTypes[idx_cell] == BOUNDARY_TYPE::DIRICHLET ) { + _ghostCells[idx_cell] = std::vector( _localNSys, 0.0 ); + } + } + } + else if( _settings->GetProblemName() == PROBLEM_HalfLattice ) { // HALF LATTICE NOT WORKING + ErrorMessages::Error( "Test case does not work with MPI", CURRENT_FUNCTION ); + } + else if( _settings->GetProblemName() == PROBLEM_SymmetricHohlraum ) { + + auto nodes = _mesh->GetNodes(); + double tol = 1e-12; // For distance to boundary + + // #pragma omp parallel for + for( unsigned idx_cell = 0; idx_cell < _nCells; idx_cell++ ) { + + if( _cellBoundaryTypes[idx_cell] == BOUNDARY_TYPE::NEUMANN || _cellBoundaryTypes[idx_cell] == BOUNDARY_TYPE::DIRICHLET ) { + _ghostCells[idx_cell] = std::vector( _localNSys, 0.0 ); + + auto localCellNodes = _mesh->GetCells()[idx_cell]; + + for( unsigned idx_node = 0; idx_node < _mesh->GetNumNodesPerCell(); idx_node++ ) { // Check if corner node is in this cell + if( nodes[localCellNodes[idx_node]][0] < -0.65 + tol ) { // close to 0 => left boundary + for( unsigned idx_sys = 0; idx_sys < _localNSys; idx_sys++ ) { + if( _quadPts[Idx2D( idx_sys, 0, _nDim )] > 0.0 ) _ghostCells[idx_cell][idx_sys] = 1.0; + } + break; + } + else if( nodes[localCellNodes[idx_node]][0] > 0.65 - tol ) { // right boundary + for( unsigned idx_sys = 0; idx_sys < _localNSys; idx_sys++ ) { + if( _quadPts[Idx2D( idx_sys, 0, _nDim )] < 0.0 ) _ghostCells[idx_cell][idx_sys] = 1.0; + } + break; + } + else if( nodes[localCellNodes[idx_node]][1] < -0.65 + tol ) { // lower boundary + break; + } + else if( nodes[localCellNodes[idx_node]][1] > 0.65 - tol ) { // upper boundary + break; + } + else if( idx_node == _mesh->GetNumNodesPerCell() - 1 ) { + ErrorMessages::Error( " Problem with ghost cell setup and boundary of this mesh ", CURRENT_FUNCTION ); + } + } + } + } + } + else if( _settings->GetProblemName() == PROBLEM_QuarterHohlraum ) { + ErrorMessages::Error( "Test case does not work with MPI", CURRENT_FUNCTION ); + } +} + +void SNSolverHPCHIP::SetProbingCellsLineGreen() { + + if( _settings->GetProblemName() == PROBLEM_SymmetricHohlraum ) { + assert( _nProbingCellsLineGreen % 2 == 0 ); + + std::vector p1 = { _cornerUpperLeftGreen[0] + _thicknessGreen / 2.0, _cornerUpperLeftGreen[1] - _thicknessGreen / 2.0 }; + std::vector p2 = { _cornerUpperRightGreen[0] - _thicknessGreen / 2.0, _cornerUpperRightGreen[1] - _thicknessGreen / 2.0 }; + std::vector p3 = { _cornerLowerRightGreen[0] - _thicknessGreen / 2.0, _cornerLowerRightGreen[1] + _thicknessGreen / 2.0 }; + std::vector p4 = { _cornerLowerLeftGreen[0] + _thicknessGreen / 2.0, _cornerLowerLeftGreen[1] + _thicknessGreen / 2.0 }; + + double verticalLineWidth = std::abs( p1[1] - p4[1] ); + double horizontalLineWidth = std::abs( p1[0] - p2[0] ); + + double pt_ratio_h = horizontalLineWidth / ( horizontalLineWidth + verticalLineWidth ); + + unsigned nHorizontalProbingCells = (unsigned)std::ceil( _nProbingCellsLineGreen / 2 * pt_ratio_h ); + unsigned nVerticalProbingCells = _nProbingCellsLineGreen / 2 - nHorizontalProbingCells; + assert( nHorizontalProbingCells > 1 ); + assert( nVerticalProbingCells > 1 ); + + _probingCellsLineGreen = std::vector( 2 * ( nVerticalProbingCells + nHorizontalProbingCells ) ); + + // Sample points on each side of the rectangle + std::vector side1 = linspace2D( p1, p2, nHorizontalProbingCells ); // upper horizontal + std::vector side2 = linspace2D( p2, p3, nVerticalProbingCells ); // right vertical + std::vector side3 = linspace2D( p3, p4, nHorizontalProbingCells ); // lower horizontal + std::vector side4 = linspace2D( p4, p1, nVerticalProbingCells ); // left vertical + + for( unsigned i = 0; i < nHorizontalProbingCells; ++i ) { + _probingCellsLineGreen[i] = side1[i]; + } + for( unsigned i = 0; i < nVerticalProbingCells; ++i ) { + _probingCellsLineGreen[i + nHorizontalProbingCells] = side2[i]; + } + for( unsigned i = 0; i < nHorizontalProbingCells; ++i ) { + _probingCellsLineGreen[i + nVerticalProbingCells + nHorizontalProbingCells] = side3[i]; + } + for( unsigned i = 0; i < nVerticalProbingCells; ++i ) { + _probingCellsLineGreen[i + nVerticalProbingCells + 2 * nHorizontalProbingCells] = side4[i]; + } + + // Block-wise approach + // Initialize the vector to store the corner points of each block + std::vector>> block_corners; + + double block_size = 0.05; + const double cx = _centerGreen[0]; + const double cy = _centerGreen[1]; + + // Loop to fill the blocks + for( int i = 0; i < 8; ++i ) { // 8 blocks in the x-direction (horizontal) (upper) (left to right) + + // Top row + double x1 = -0.2 + cx + i * block_size; + double y1 = 0.4 + cy; + double x2 = x1 + block_size; + double y2 = y1 - block_size; + + std::vector> corners = { + { x1, y1 }, // top-left + { x2, y1 }, // top-right + { x2, y2 }, // bottom-right + { x1, y2 } // bottom-left + }; + block_corners.push_back( corners ); + } + + for( int j = 0; j < 14; ++j ) { // 14 blocks in the y-direction (vertical) + // right column double x1 = 0.15; + double x1 = 0.15 + cx; + double y1 = 0.35 + cy - j * block_size; + double x2 = x1 + block_size; + double y2 = y1 - block_size; + + // Store the four corner points for this block + std::vector> corners = { + { x1, y1 }, // top-left + { x2, y1 }, // top-right + { x2, y2 }, // bottom-right + { x1, y2 } // bottom-left + }; + + block_corners.push_back( corners ); + } + + for( int i = 0; i < 8; ++i ) { // 8 blocks in the x-direction (horizontal) (lower) (right to left) + // bottom row + double x1 = 0.15 + cx - i * block_size; + double y1 = -0.35 + cy; + double x2 = x1 + block_size; + double y2 = y1 - block_size; + + std::vector> corners = { + { x1, y1 }, // top-left + { x2, y1 }, // top-right + { x2, y2 }, // bottom-right + { x1, y2 } // bottom-left + }; + block_corners.push_back( corners ); + } + + for( int j = 0; j < 14; ++j ) { // 14 blocks in the y-direction (vertical) (down to up) + + // left column + double x1 = -0.2 + cx; + double y1 = -0.3 + cy + j * block_size; + double x2 = x1 + block_size; + double y2 = y1 - block_size; + + // Store the four corner points for this block + std::vector> corners = { + { x1, y1 }, // top-left + { x2, y1 }, // top-right + { x2, y2 }, // bottom-right + { x1, y2 } // bottom-left + }; + + block_corners.push_back( corners ); + } + + // Compute the probing cells for each block + for( unsigned i = 0; i < _nProbingCellsBlocksGreen; i++ ) { + _probingCellsBlocksGreen.push_back( _mesh->GetCellsofRectangle( block_corners[i] ) ); + } + } +} + +void SNSolverHPCHIP::ComputeQOIsGreenProbingLine() { +#pragma omp parallel for + for( unsigned i = 0; i < _nProbingCellsLineGreen; i++ ) { // Loop over probing cells + _absorptionValsLineSegment[i] = + ( _sigmaT[_probingCellsLineGreen[i]] - _sigmaS[_probingCellsLineGreen[i]] ) * _scalarFlux[_probingCellsLineGreen[i]]; + } + + // Block-wise approach + // #pragma omp parallel for + for( unsigned i = 0; i < _nProbingCellsBlocksGreen; i++ ) { + _absorptionValsBlocksGreen[i] = 0.0; + for( unsigned j = 0; j < _probingCellsBlocksGreen[i].size(); j++ ) { + _absorptionValsBlocksGreen[i] += ( _sigmaT[_probingCellsBlocksGreen[i][j]] - _sigmaS[_probingCellsBlocksGreen[i][j]] ) * + _scalarFlux[_probingCellsBlocksGreen[i][j]] * _areas[_probingCellsBlocksGreen[i][j]]; + } + } + // std::cout << _absorptionValsBlocksGreen[1] << std::endl; + // std::cout << _absorptionValsLineSegment[1] << std::endl; +} + +std::vector SNSolverHPCHIP::linspace2D( const std::vector& start, const std::vector& end, unsigned num_points ) { + /** + * Generate a 2D linspace based on the start and end points with a specified number of points. + * + * @param start vector of starting x and y coordinates + * @param end vector of ending x and y coordinates + * @param num_points number of points to generate + * + * @return vector of unsigned integers representing the result + */ + + std::vector result; + assert( num_points > 1 ); + result.resize( num_points ); + double stepX = ( end[0] - start[0] ) / ( num_points - 1 ); + double stepY = ( end[1] - start[1] ) / ( num_points - 1 ); +#pragma omp parallel for + for( unsigned i = 0; i < num_points; ++i ) { + double x = start[0] + i * stepX; + double y = start[1] + i * stepY; + + result[i] = _mesh->GetCellOfKoordinate( x, y ); + } + + return result; +} + +void SNSolverHPCHIP::ComputeCellsPerimeterLattice() { + double l_1 = 1.5; // perimeter 1 + double l_2 = 2.5; // perimeter 2 + auto nodes = _mesh->GetNodes(); + auto cells = _mesh->GetCells(); + auto cellMids = _mesh->GetCellMidPoints(); + auto normals = _mesh->GetNormals(); + auto neigbors = _mesh->GetNeighbours(); + + _isPerimeterLatticeCell1.resize( _mesh->GetNumCells(), false ); + _isPerimeterLatticeCell2.resize( _mesh->GetNumCells(), false ); + + for( unsigned idx_cell = 0; idx_cell < _mesh->GetNumCells(); ++idx_cell ) { + if( abs( cellMids[idx_cell][0] ) < l_1 && abs( cellMids[idx_cell][1] ) < l_1 ) { + // Cell is within perimeter + for( unsigned idx_nbr = 0; idx_nbr < _mesh->GetNumNodesPerCell(); ++idx_nbr ) { + if( neigbors[idx_cell][idx_nbr] == _mesh->GetNumCells() ) { + continue; // Skip boundary - ghost cells + } + + if( ( abs( cellMids[neigbors[idx_cell][idx_nbr]][0] ) > l_1 && abs( cellMids[idx_cell][0] ) < l_1 ) || + ( abs( cellMids[neigbors[idx_cell][idx_nbr]][1] ) > l_1 && abs( cellMids[idx_cell][1] ) < l_1 ) ) { + // neighbor is outside perimeter + _cellsLatticePerimeter1[idx_cell].push_back( idx_nbr ); + _isPerimeterLatticeCell1[idx_cell] = true; + } + } + } + else if( abs( cellMids[idx_cell][0] ) < l_2 && abs( cellMids[idx_cell][1] ) < l_2 ) { + // Cell is within perimeter + for( unsigned idx_nbr = 0; idx_nbr < _mesh->GetNumNodesPerCell(); ++idx_nbr ) { + if( neigbors[idx_cell][idx_nbr] == _mesh->GetNumCells() ) { + continue; // Skip boundary - ghost cells + } + if( ( abs( cellMids[neigbors[idx_cell][idx_nbr]][0] ) > l_2 && abs( cellMids[idx_cell][0] ) < l_2 ) || + ( abs( cellMids[neigbors[idx_cell][idx_nbr]][1] ) > l_2 && abs( cellMids[idx_cell][1] ) < l_2 ) ) { + // neighbor is outside perimeter + _cellsLatticePerimeter2[idx_cell].push_back( idx_nbr ); + _isPerimeterLatticeCell2[idx_cell] = true; + } + } + } + } +} diff --git a/tests/catch2_main.cpp b/tests/catch2_main.cpp index 72bfe4a4..82f7b05b 100644 --- a/tests/catch2_main.cpp +++ b/tests/catch2_main.cpp @@ -4,9 +4,18 @@ // #include // #define PY_ARRAY_UNIQUE_SYMBOL KITRT_ARRAY_API #include +#ifdef IMPORT_MPI +#include +#endif int main( int argc, char** argv ) { - // MPI_Init( &argc, &argv ); +#ifdef IMPORT_MPI + int mpiInitialized = 0; + MPI_Initialized( &mpiInitialized ); + if( !mpiInitialized ) { + MPI_Init( &argc, &argv ); + } +#endif // wchar_t* program = Py_DecodeLocale( argv[0], NULL ); // Py_SetProgramName( program ); @@ -15,6 +24,12 @@ int main( int argc, char** argv ) { std::filesystem::remove_all( std::string( TESTS_PATH ) + "result" ); - // MPI_Finalize(); +#ifdef IMPORT_MPI + int mpiFinalized = 0; + MPI_Finalized( &mpiFinalized ); + if( !mpiFinalized ) { + MPI_Finalize(); + } +#endif return result; } diff --git a/tests/test_cases.cpp b/tests/test_cases.cpp index 383ffa8f..9eae1428 100644 --- a/tests/test_cases.cpp +++ b/tests/test_cases.cpp @@ -16,6 +16,9 @@ #ifdef KITRT_ENABLE_CUDA_HPC #include "solvers/snsolver_hpc_cuda.hpp" #endif +#ifdef KITRT_ENABLE_HIP_HPC +#include "solvers/snsolver_hpc_hip.hpp" +#endif #include "solvers/solverbase.hpp" using vtkUnstructuredGridReaderSP = vtkSmartPointer; @@ -642,6 +645,69 @@ TEST_CASE( "SN_SOLVER_HPC_CUDA_QOI_VALIDATION", "[validation_tests][hpc][cuda]" } #endif +#ifdef KITRT_ENABLE_HIP_HPC +TEST_CASE( "SN_SOLVER_HPC_HIP_QOI_VALIDATION", "[validation_tests][hpc][hip]" ) { + std::string hpcFileDir = "input/validation_tests/SN_solver_hpc/"; + std::string resultDir = std::string( TESTS_PATH ) + "result"; + std::string logDir = std::string( TESTS_PATH ) + "result/logs"; + + auto runCpuAndCollectCSV = [&]( const std::string& configFileName, const std::string& prefix ) { + Config* config = new Config( configFileName ); + SNSolverHPC* solver = new SNSolverHPC( config ); + solver->Solve(); + requireAndFlushTestLoggers(); + delete solver; + delete config; + return findNewestCSVFile( logDir, prefix ); + }; + + auto runHipAndCollectCSV = [&]( const std::string& configFileName, const std::string& prefix ) { + Config* config = new Config( configFileName ); + SNSolverHPCHIP* solver = new SNSolverHPCHIP( config ); + solver->Solve(); + requireAndFlushTestLoggers(); + delete solver; + delete config; + return findNewestCSVFile( logDir, prefix ); + }; + + SECTION( "lattice_order1_cpu_vs_hip_all_qois" ) { + std::filesystem::remove_all( resultDir ); + resetTestLoggers(); + const std::string cpuConfig = std::string( TESTS_PATH ) + hpcFileDir + "lattice_hpc_200_cpu_order1.cfg"; + const std::string cpuCSV = runCpuAndCollectCSV( cpuConfig, "lattice_hpc_200_cpu_order1_output" ); + REQUIRE( !cpuCSV.empty() ); + + resetTestLoggers(); + const std::string hipConfig = std::string( TESTS_PATH ) + hpcFileDir + "lattice_hpc_200_cuda_order1.cfg"; + const std::string hipCSV = runHipAndCollectCSV( hipConfig, "lattice_hpc_200_cuda_order1_output" ); + REQUIRE( !hipCSV.empty() ); + + INFO( "CPU CSV: " << cpuCSV ); + INFO( "HIP CSV: " << hipCSV ); + compareHistoryCSV( cpuCSV, hipCSV ); + } + + SECTION( "lattice_order2_cpu_vs_hip_all_qois" ) { + std::filesystem::remove_all( resultDir ); + resetTestLoggers(); + const std::string cpuConfig = std::string( TESTS_PATH ) + hpcFileDir + "lattice_hpc_200_cpu_order2.cfg"; + const std::string cpuCSV = runCpuAndCollectCSV( cpuConfig, "lattice_hpc_200_cpu_order2_output" ); + REQUIRE( !cpuCSV.empty() ); + + resetTestLoggers(); + const std::string hipConfig = std::string( TESTS_PATH ) + hpcFileDir + "lattice_hpc_200_cuda_order2.cfg"; + const std::string hipCSV = runHipAndCollectCSV( hipConfig, "lattice_hpc_200_cuda_order2_output" ); + REQUIRE( !hipCSV.empty() ); + + INFO( "CPU CSV: " << cpuCSV ); + INFO( "HIP CSV: " << hipCSV ); + compareHistoryCSV( cpuCSV, hipCSV ); + } + +} +#endif + TEST_CASE( "screen_output", "[output]" ) { std::string out_fileDir = "input/validation_tests/output/"; resetTestLoggers(); // Make sure to write in own logging file diff --git a/tools/singularity/build_container.sh b/tools/singularity/build_container.sh index 1975c056..a57c0c5b 100755 --- a/tools/singularity/build_container.sh +++ b/tools/singularity/build_container.sh @@ -10,12 +10,16 @@ case "${mode}" in cuda) singularity build kit_rt_MPI_cuda.sif kit_rt_MPI_cuda.def ;; + rocm) + singularity build kit_rt_MPI_rocm72.sif kit_rt_MPI_rocm72.def + ;; all) singularity build kit_rt.sif kit_rt.def singularity build kit_rt_MPI_cuda.sif kit_rt_MPI_cuda.def + singularity build kit_rt_MPI_rocm72.sif kit_rt_MPI_rocm72.def ;; *) - echo "Usage: $0 [cpu|cuda|all]" >&2 + echo "Usage: $0 [cpu|cuda|rocm|all]" >&2 exit 1 ;; esac diff --git a/tools/singularity/install_kitrt_singularity_cuda.sh b/tools/singularity/install_kitrt_singularity_cuda.sh index a59653d2..6d613888 100755 --- a/tools/singularity/install_kitrt_singularity_cuda.sh +++ b/tools/singularity/install_kitrt_singularity_cuda.sh @@ -4,5 +4,5 @@ set -euo pipefail cd ../../ mkdir -p build_singularity_cuda cd build_singularity_cuda -cmake -DCMAKE_BUILD_TYPE=Release -DBUILD_MPI=ON -DBUILD_CUDA_HPC=ON -DBUILD_ML=OFF .. +cmake -DCMAKE_BUILD_TYPE=Release -DBUILD_MPI=ON -DBUILD_CUDA_HPC=ON -DBUILD_HIP_HPC=OFF -DBUILD_ML=OFF .. make -j diff --git a/tools/singularity/install_kitrt_singularity_rocm.sh b/tools/singularity/install_kitrt_singularity_rocm.sh new file mode 100755 index 00000000..b1f8cf9f --- /dev/null +++ b/tools/singularity/install_kitrt_singularity_rocm.sh @@ -0,0 +1,32 @@ +#!/usr/bin/env bash +set -euo pipefail + +cd ../../ +ROCM_CLANGXX="${ROCM_CLANGXX:-/opt/rocm-7.2.0/lib/llvm/bin/clang++}" +if [[ ! -x "${ROCM_CLANGXX}" && -x "/opt/rocm/lib/llvm/bin/clang++" ]]; then + ROCM_CLANGXX="/opt/rocm/lib/llvm/bin/clang++" +fi + +if [[ ! -x "${ROCM_CLANGXX}" ]]; then + echo "Could not find ROCm clang++ compiler. Set ROCM_CLANGXX to a valid path." >&2 + exit 1 +fi + +mkdir -p build_singularity_rocm72 +cd build_singularity_rocm72 +if [[ -f CMakeCache.txt ]]; then + CACHED_CXX="$(grep '^CMAKE_CXX_COMPILER:FILEPATH=' CMakeCache.txt | cut -d= -f2- || true)" + if [[ -n "${CACHED_CXX}" && "${CACHED_CXX}" != "${ROCM_CLANGXX}" ]]; then + echo "Resetting CMake cache because compiler changed: ${CACHED_CXX} -> ${ROCM_CLANGXX}" + rm -rf CMakeCache.txt CMakeFiles + fi +fi + +cmake -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_CXX_COMPILER="${ROCM_CLANGXX}" \ + -DCMAKE_HIP_COMPILER="${ROCM_CLANGXX}" \ + -DBUILD_MPI=ON \ + -DBUILD_CUDA_HPC=OFF \ + -DBUILD_HIP_HPC=ON \ + -DBUILD_ML=OFF .. +make -j diff --git a/tools/singularity/kit_rt_MPI_rocm72.def b/tools/singularity/kit_rt_MPI_rocm72.def new file mode 100644 index 00000000..96e5fa88 --- /dev/null +++ b/tools/singularity/kit_rt_MPI_rocm72.def @@ -0,0 +1,75 @@ +Bootstrap: docker +From: rocm/dev-ubuntu-22.04:7.2-complete + +%post + export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/local/lib:/opt/rocm/lib:/opt/rocm/lib64" + export PYTHONPATH=/usr/local/gmsh/lib:$PYTHONPATH + export PATH=/usr/local/gmsh/bin:/opt/rocm/bin:$PATH + + apt-get update + DEBIAN_FRONTEND=noninteractive apt-get install -qq \ + gcc \ + g++ \ + libopenmpi-dev \ + openmpi-bin \ + libblas-dev \ + liblapack-dev \ + git \ + make \ + ninja-build \ + cmake \ + wget \ + ssh \ + libssl-dev \ + libxt-dev \ + libgl1-mesa-dev \ + libglu1 \ + libxrender1 \ + libxcursor-dev \ + libxft-dev \ + libxinerama-dev \ + python3 \ + python3-pip \ + doxygen + + apt-get clean + apt-get autoremove --purge + rm -rf /var/lib/apt/lists/* + + cd /usr/local + wget -nc --quiet http://gmsh.info/bin/Linux/gmsh-4.7.0-Linux64-sdk.tgz + tar xzf gmsh-4.7.0-Linux64-sdk.tgz + mv gmsh-4.7.0-Linux64-sdk gmsh + rm gmsh-4.7.0-Linux64-sdk.tgz + + wget -nc --no-check-certificate --quiet https://www.vtk.org/files/release/9.1/VTK-9.1.0.tar.gz + tar xzf VTK-9.1.0.tar.gz + mkdir VTK-9.1.0/build + cd VTK-9.1.0/build + cmake -G Ninja -DCMAKE_BUILD_TYPE=Release -DBUILD_DOCUMENTATION=OFF -DBUILD_TESTING=OFF ../ + ninja + ninja install > /dev/null + cd - + rm -rf VTK-* + + pip3 install numpy pygmsh==6.1.1 Pillow pydicom gcovr sphinx_rtd_theme breathe cpp-coveralls coveralls pandas + +%environment + export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/local/lib:/opt/rocm/lib:/opt/rocm/lib64" + export PYTHONPATH=/usr/local/gmsh/lib:$PYTHONPATH + export PATH=/usr/local/gmsh/bin:/opt/rocm/bin:$PATH + +%help + KiT-RT ROCm 7.2 MPI container. + + Example build: + cmake -S . -B build_singularity_rocm72 -DCMAKE_BUILD_TYPE=Release \ + -DBUILD_MPI=ON -DBUILD_HIP_HPC=ON -DBUILD_ML=OFF + cmake --build build_singularity_rocm72 -j + + Example run: + mpirun -np 2 ./build_singularity_rocm72/KiT-RT \ + tests/input/validation_tests/SN_solver_hpc/lattice_hpc_200_cuda_order2.cfg + +%runscript + exec /bin/bash diff --git a/tools/singularity/singularity_run_interactive_rocm.sh b/tools/singularity/singularity_run_interactive_rocm.sh new file mode 100644 index 00000000..02b611d8 --- /dev/null +++ b/tools/singularity/singularity_run_interactive_rocm.sh @@ -0,0 +1 @@ +singularity shell --nv --bind $(pwd)/../..:/mnt kit_rt_MPI_rocm72.sif