diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 8054c9c354..f070ecfbd6 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -149,7 +149,7 @@ jobs: continue-on-error: true github: - name: Github + name: ${{ matrix.nvhpc && format('NVHPC {0} ({1})', matrix.nvhpc, matrix.target) || format('Github ({0}, {1}, {2}, intel={3})', matrix.os, matrix.mpi, matrix.debug, matrix.intel) }} needs: [lint-gate, file-changes, rebuild-cache] if: >- !cancelled() && @@ -164,6 +164,8 @@ jobs: precision: [''] debug: ['debug', 'no-debug'] intel: [true, false] + nvhpc: [''] + target: [''] exclude: - os: macos intel: true @@ -175,11 +177,59 @@ jobs: debug: no-debug intel: false + # NVHPC compiler matrix: cpu (build+test), gpu (build-only, acc then omp) + # Every release from 23.11 through 26.3 (current) + - { nvhpc: '23.11', target: cpu } + - { nvhpc: '23.11', target: gpu } + - { nvhpc: '24.1', target: cpu } + - { nvhpc: '24.1', target: gpu } + - { nvhpc: '24.3', target: cpu } + - { nvhpc: '24.3', target: gpu } + - { nvhpc: '24.5', target: cpu } + - { nvhpc: '24.5', target: gpu } + - { nvhpc: '24.7', target: cpu } + - { nvhpc: '24.7', target: gpu } + - { nvhpc: '24.9', target: cpu } + - { nvhpc: '24.9', target: gpu } + - { nvhpc: '24.11', target: cpu } + - { nvhpc: '24.11', target: gpu } + - { nvhpc: '25.1', target: cpu } + - { nvhpc: '25.1', target: gpu } + - { nvhpc: '25.3', target: cpu } + - { nvhpc: '25.3', target: gpu } + - { nvhpc: '25.5', target: cpu } + - { nvhpc: '25.5', target: gpu } + - { nvhpc: '25.7', target: cpu } + - { nvhpc: '25.7', target: gpu } + - { nvhpc: '25.9', target: cpu } + - { nvhpc: '25.9', target: gpu } + - { nvhpc: '25.11', target: cpu } + - { nvhpc: '25.11', target: gpu } + - { nvhpc: '26.1', target: cpu } + - { nvhpc: '26.1', target: gpu } + - { nvhpc: '26.3', target: cpu } + - { nvhpc: '26.3', target: gpu } + fail-fast: false continue-on-error: true - runs-on: ${{ matrix.os }}-latest + runs-on: ${{ matrix.nvhpc && 'ubuntu-22.04' || format('{0}-latest', matrix.os) }} + container: + image: ${{ matrix.nvhpc && format('nvcr.io/nvidia/nvhpc:{0}-devel-cuda_multi-ubuntu22.04', matrix.nvhpc) || '' }} + options: ${{ matrix.nvhpc && '--security-opt seccomp=unconfined' || '' }} + env: + CC: ${{ matrix.nvhpc && 'nvc' || '' }} + CXX: ${{ matrix.nvhpc && 'nvc++' || '' }} + FC: ${{ matrix.nvhpc && 'nvfortran' || '' }} + OMPI_ALLOW_RUN_AS_ROOT: ${{ matrix.nvhpc && '1' || '' }} + OMPI_ALLOW_RUN_AS_ROOT_CONFIRM: ${{ matrix.nvhpc && '1' || '' }} + PMIX_MCA_gds: ${{ matrix.nvhpc && 'hash' || '' }} + OMPI_MCA_hwloc_base_binding_policy: ${{ matrix.nvhpc && 'none' || '' }} steps: + - name: Git safe directory + if: matrix.nvhpc + run: git config --global --add safe.directory /__w/MFC/MFC + - name: Clone uses: actions/checkout@v4 @@ -222,7 +272,7 @@ jobs: fi - name: Setup MacOS - if: matrix.os == 'macos' + if: matrix.os == 'macos' && !matrix.nvhpc run: | brew update brew upgrade || true @@ -231,7 +281,7 @@ jobs: echo "BOOST_INCLUDE=/opt/homebrew/include/" >> $GITHUB_ENV - name: Setup Ubuntu - if: matrix.os == 'ubuntu' && matrix.intel == false + if: matrix.os == 'ubuntu' && matrix.intel == false && !matrix.nvhpc run: | sudo apt update -y sudo apt install -y cmake gcc g++ python3 python3-dev hdf5-tools \ @@ -254,7 +304,25 @@ jobs: printenv | sort > /tmp/env_after diff /tmp/env_before /tmp/env_after | grep '^>' | sed 's/^> //' >> $GITHUB_ENV + # --- NVHPC container setup --- + - name: Setup NVHPC + if: matrix.nvhpc + run: | + apt-get update -y + apt-get install -y cmake python3 python3-venv python3-pip \ + libfftw3-dev libhdf5-dev hdf5-tools git + # Set up NVHPC HPC-X MPI runtime paths + HPCX_DIR=$(dirname "$(find /opt/nvidia/hpc_sdk -path "*/hpcx/hpcx-*/ompi/bin/mpirun" | head -1)")/../.. + MPI_LIB=$(mpifort --showme:link | grep -oP '(?<=-L)\S+' | head -1) + echo "LD_LIBRARY_PATH=${MPI_LIB}:${HPCX_DIR}/ucx/lib:${HPCX_DIR}/ucc/lib:${LD_LIBRARY_PATH}" >> $GITHUB_ENV + # Container MPI fixes: PMIx shared-memory, hwloc binding + echo "PMIX_MCA_gds=hash" >> $GITHUB_ENV + echo "OMPI_MCA_hwloc_base_binding_policy=none" >> $GITHUB_ENV + echo "OMPI_MCA_rmaps_base_oversubscribe=1" >> $GITHUB_ENV + + # --- Standard build + test --- - name: Build + if: '!matrix.nvhpc' run: | /bin/bash mfc.sh test -v --dry-run -j $(nproc) --${{ matrix.debug }} --${{ matrix.mpi }} $PRECISION $TEST_ALL env: @@ -262,6 +330,7 @@ jobs: PRECISION: ${{ matrix.precision != '' && format('--{0}', matrix.precision) || '' }} - name: Test + if: '!matrix.nvhpc' run: | /bin/bash mfc.sh test -v --max-attempts 3 -j $(nproc) $ONLY_CHANGES $TEST_ALL $TEST_PCT env: @@ -269,6 +338,23 @@ jobs: TEST_PCT: ${{ matrix.debug == 'debug' && '-% 20' || '' }} ONLY_CHANGES: ${{ github.event_name == 'pull_request' && '--only-changes' || '' }} + # --- NVHPC build + test --- + - name: Build (NVHPC) + if: matrix.nvhpc && matrix.target == 'cpu' + run: /bin/bash mfc.sh test -v --dry-run -j $(nproc) --test-all + + - name: Build (NVHPC GPU) + if: matrix.nvhpc && matrix.target == 'gpu' + run: | + /bin/bash mfc.sh test -v --dry-run -j 2 --test-all --gpu acc + /bin/bash mfc.sh test -v --dry-run -j 2 --test-all --gpu mp + + - name: Test (NVHPC) + if: matrix.nvhpc && matrix.target == 'cpu' + run: | + ulimit -s unlimited || ulimit -s 65536 || true + /bin/bash mfc.sh test -v --max-attempts 3 -j $(nproc) --test-all + self: name: "${{ matrix.cluster_name }} (${{ matrix.device }}${{ matrix.interface != 'none' && format('-{0}', matrix.interface) || '' }}${{ matrix.shard != '' && format(' [{0}]', matrix.shard) || '' }})" needs: [lint-gate, file-changes, rebuild-cache] diff --git a/CMakeLists.txt b/CMakeLists.txt index fb77271a37..baf310fe2b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -262,11 +262,6 @@ if (CMAKE_BUILD_TYPE STREQUAL "Release") elseif (CMAKE_Fortran_COMPILER_ID STREQUAL "NVHPC") if (MFC_Unified) message(STATUS "LTO/IPO is not available with NVHPC using Unified Memory") - elseif (CMAKE_Fortran_COMPILER_VERSION VERSION_GREATER "24.11" AND CMAKE_Fortran_COMPILER_VERSION VERSION_LESS "25.9") - message(STATUS "LTO/IPO is not supported in NVHPC Version 24.11 to 25.9. Use >=25.9 or (<=24.11 && > 23.11) Performance will be degraded.") - set(NVHPC_USE_TWO_PASS_IPO FALSE) - elseif(CMAKE_Fortran_COMPILER_VERSION VERSION_LESS "23.11") - message(STATUS "LTO/IPO is not supported in NVHPC Version < 23.11. Use a newer version of NVHPC for best performance.") else() message(STATUS "Performing IPO using -Mextract followed by -Minline") set(NVHPC_USE_TWO_PASS_IPO TRUE) @@ -453,7 +448,20 @@ function(MFC_SETUP_TARGET) $<$:-Minline> ) add_dependencies(${ARGS_TARGET} ${ARGS_TARGET}_lib) - target_compile_options(${ARGS_TARGET} PRIVATE -Minline=lib:${ARGS_TARGET}_lib) + target_compile_options(${ARGS_TARGET} PRIVATE -Minline=lib:${ARGS_TARGET}_lib,except:f_is_default,except:s_compute_dt,except:my_inquire,except:s_mpi_abort,except:s_mpi_barrier,except:s_prohibit_abort,except:s_int_to_str,except:s_associate_cbc_coefficients_pointers) + + # Exclude m_start_up and m_cbc from cross-file inlining: these are + # initialization/boundary code that trigger NVHPC 25.x fort2 ICE when + # too many functions are cross-inlined into them. GPU hot-path files + # (m_rhs, m_riemann_solvers, m_viscous, m_weno, etc.) keep full IPO. + foreach(_no_inline_file m_start_up m_cbc) + set_source_files_properties( + "${CMAKE_BINARY_DIR}/fypp/${ARGS_TARGET}/${_no_inline_file}.fpp.f90" + TARGET_DIRECTORY ${ARGS_TARGET} + PROPERTIES COMPILE_OPTIONS "-Mnoinline" + ) + endforeach() + list(PREPEND IPO_TARGETS ${ARGS_TARGET}_lib) endif() diff --git a/src/simulation/m_riemann_solvers.fpp b/src/simulation/m_riemann_solvers.fpp index 72055ef4ab..1e533673e8 100644 --- a/src/simulation/m_riemann_solvers.fpp +++ b/src/simulation/m_riemann_solvers.fpp @@ -2205,7 +2205,8 @@ contains & G_L, G_R, rho_avg, H_avg, c_avg, gamma_avg, ptilde_L, ptilde_R, vel_L_rms, vel_R_rms, & & vel_avg_rms, vel_L_tmp, vel_R_tmp, Ms_L, Ms_R, pres_SL, pres_SR, alpha_L_sum, & & alpha_R_sum, rho_Star, E_Star, p_Star, p_K_Star, vel_K_star, s_L, s_R, s_M, s_P, s_S, & - & xi_M, xi_P, xi_L, xi_R, xi_MP, xi_PP]') + & xi_M, xi_P, xi_L, xi_R, xi_MP, xi_PP, Ys_L, Ys_R, Cp_iL, Cp_iR, Xs_L, Xs_R, Gamma_iL, & + & Gamma_iR, Yi_avg, Phi_avg, h_iL, h_iR, h_avg_2]') do l = is3%beg, is3%end do k = is2%beg, is2%end do j = is1%beg, is1%end @@ -2423,7 +2424,8 @@ contains & qv_R, qv_avg, c_L, c_R, c_avg, vel_L_rms, vel_R_rms, vel_avg_rms, vel_L_tmp, vel_R_tmp, & & Ms_L, Ms_R, pres_SL, pres_SR, alpha_L_sum, alpha_R_sum, s_L, s_R, s_M, s_P, s_S, xi_M, & & xi_P, xi_L, xi_R, xi_MP, xi_PP, nbub_L, nbub_R, PbwR3Lbar, PbwR3Rbar, R3Lbar, R3Rbar, & - & R3V2Lbar, R3V2Rbar]') + & R3V2Lbar, R3V2Rbar, Ys_L, Ys_R, Cp_iL, Cp_iR, Xs_L, Xs_R, Gamma_iL, Gamma_iR, Yi_avg, & + & Phi_avg, h_iL, h_iR, h_avg_2]') do l = is3%beg, is3%end do k = is2%beg, is2%end do j = is1%beg, is1%end diff --git a/toolchain/dependencies/CMakeLists.txt b/toolchain/dependencies/CMakeLists.txt index 8f51fbce76..0ef1dcf604 100644 --- a/toolchain/dependencies/CMakeLists.txt +++ b/toolchain/dependencies/CMakeLists.txt @@ -61,11 +61,13 @@ if (MFC_HDF5) GIT_PROGRESS ON CMAKE_ARGS "-DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}" -DBUILD_SHARED_LIBS=OFF - -DFORTRAN_LIBRARIES=ON -DBUILD_TESTING=OFF -DHDF5_BUILD_UTILS=OFF -DHDF5_BUILD_TOOLS=ON -DHDF5_BUILD_EXAMPLES=OFF + -DHDF5_BUILD_CPP_LIB=OFF + -DHDF5_BUILD_FORTRAN=OFF + -DHDF5_ENABLE_PARALLEL=OFF ) endif() endif() @@ -82,13 +84,15 @@ if (MFC_SILO) ExternalProject_Add(silo GIT_REPOSITORY "https://github.com/LLNL/Silo" - GIT_TAG 0cddaa6865fb49ae7b2110fa1b6f2709592abe0b + GIT_TAG 4.12.0 + GIT_SHALLOW ON GIT_PROGRESS ON PATCH_COMMAND "${GIT_EXECUTABLE}" stash && "${GIT_EXECUTABLE}" apply "${CMAKE_SOURCE_DIR}/Silo.patch" CMAKE_ARGS -DSILO_ENABLE_SHARED=OFF -DSILO_ENABLE_SILOCK=OFF -DSILO_ENABLE_BROWSER=OFF + -DSILO_ENABLE_ZFP=OFF -DFIND_LIBRARY_USE_LIB64_PATHS=ON "-DCMAKE_PREFIX_PATH=${CMAKE_PREFIX_PATH}" "-DCMAKE_FIND_ROOT_PATH=${CMAKE_FIND_ROOT_PATH}" diff --git a/toolchain/mfc/test/cases.py b/toolchain/mfc/test/cases.py index 3666b930c3..fcaa8eb1c4 100644 --- a/toolchain/mfc/test/cases.py +++ b/toolchain/mfc/test/cases.py @@ -1542,6 +1542,7 @@ def foreach_example(): "1D_multispecies_diffusion", "2D_ibm_stl_MFCCharacter", "1D_qbmm", # formatted I/O field overflow on gfortran 12 + "3D_rayleigh_taylor_muscl", # segfaults with nvfortran+MPI in Docker (seccomp/mprotect) ] if path in casesToSkip: continue