Fix 8 HPC-sensitive bugs: GPU kernels, MPI broadcast, domain decomposition (#1242)

sbryngelson · web-flow · commit 1412eb2a9362 · 2026-02-26T16:44:28.000-05:00
diff --git a/src/common/m_finite_differences.fpp b/src/common/m_finite_differences.fpp
@@ -50,7 +50,7 @@ contains
                         if (z == iz_s%beg) then
                             divergence = divergence + (-3._wp*fields(3)%sf(x, y, z) + 4._wp*fields(3)%sf(x, y, z + 1) - fields(3)%sf(x, y, z + 2))/(z_cc(z + 2) - z_cc(z))
                         else if (z == iz_s%end) then
-                            divergence = divergence + (+3._wp*fields(3)%sf(x, y, z) - 4._wp*fields(3)%sf(x, y, z - 1) + fields(2)%sf(x, y, z - 2))/(z_cc(z) - z_cc(z - 2))
+                            divergence = divergence + (+3._wp*fields(3)%sf(x, y, z) - 4._wp*fields(3)%sf(x, y, z - 1) + fields(3)%sf(x, y, z - 2))/(z_cc(z) - z_cc(z - 2))
                         else
                             divergence = divergence + (fields(3)%sf(x, y, z + 1) - fields(3)%sf(x, y, z - 1))/(z_cc(z + 1) - z_cc(z - 1))
                         end if
diff --git a/src/common/m_mpi_common.fpp b/src/common/m_mpi_common.fpp
@@ -936,9 +936,9 @@ contains
                                         (j + buff_size*((k + 1) + (n + 1)*l))
                                     q_comm(i)%sf(j + unpack_offset, k, l) = real(buff_recv(r), kind=stp)
 #if defined(__INTEL_COMPILER)
-                                    if (ieee_is_nan(q_comm(i)%sf(j, k, l))) then
+                                    if (ieee_is_nan(q_comm(i)%sf(j + unpack_offset, k, l))) then
                                         print *, "Error", j, k, l, i
-                                        error stop "NaN(s) in recv"
+                                        call s_mpi_abort("NaN(s) in recv")
                                     end if
 #endif
                                 end do
@@ -991,9 +991,9 @@ contains
                                          ((k + buff_size) + buff_size*l))
                                     q_comm(i)%sf(j, k + unpack_offset, l) = real(buff_recv(r), kind=stp)
 #if defined(__INTEL_COMPILER)
-                                    if (ieee_is_nan(q_comm(i)%sf(j, k, l))) then
+                                    if (ieee_is_nan(q_comm(i)%sf(j, k + unpack_offset, l))) then
                                         print *, "Error", j, k, l, i
-                                        error stop "NaN(s) in recv"
+                                        call s_mpi_abort("NaN(s) in recv")
                                     end if
 #endif
                                 end do
@@ -1050,9 +1050,9 @@ contains
                                           (l + buff_size)))
                                     q_comm(i)%sf(j, k, l + unpack_offset) = real(buff_recv(r), kind=stp)
 #if defined(__INTEL_COMPILER)
-                                    if (ieee_is_nan(q_comm(i)%sf(j, k, l))) then
+                                    if (ieee_is_nan(q_comm(i)%sf(j, k, l + unpack_offset))) then
                                         print *, "Error", j, k, l, i
-                                        error stop "NaN(s) in recv"
+                                        call s_mpi_abort("NaN(s) in recv")
                                     end if
 #endif
                                 end do
@@ -1153,8 +1153,6 @@ contains
 
         if (igr) then
             recon_order = igr_order
-        else
-            recon_order = weno_order
         end if
 
         ! 3D Cartesian Processor Topology
diff --git a/src/common/m_variables_conversion.fpp b/src/common/m_variables_conversion.fpp
@@ -852,11 +852,11 @@ contains
                     end if
 
                     if (hypoelasticity) then
+                        if (cont_damage) G_K = G_K*max((1._wp - qK_cons_vf(damage_idx)%sf(j, k, l)), 0._wp)
                         $:GPU_LOOP(parallelism='[seq]')
                         do i = strxb, strxe
                             ! subtracting elastic contribution for pressure calculation
                             if (G_K > verysmall) then
-                                if (cont_damage) G_K = G_K*max((1._wp - qK_cons_vf(damage_idx)%sf(j, k, l)), 0._wp)
                                 qK_prim_vf(E_idx)%sf(j, k, l) = qK_prim_vf(E_idx)%sf(j, k, l) - &
                                                                 ((qK_prim_vf(i)%sf(j, k, l)**2._wp)/(4._wp*G_K))/gamma_K
                                 ! Double for shear stresses
@@ -1123,11 +1123,10 @@ contains
                     end if
 
                     if (hypoelasticity) then
+                        if (cont_damage) G = G*max((1._wp - q_prim_vf(damage_idx)%sf(j, k, l)), 0._wp)
                         do i = strxb, strxe
                             ! adding elastic contribution
                             if (G > verysmall) then
-                                if (cont_damage) G = G*max((1._wp - q_prim_vf(damage_idx)%sf(j, k, l)), 0._wp)
-
                                 q_cons_vf(E_idx)%sf(j, k, l) = q_cons_vf(E_idx)%sf(j, k, l) + &
                                                                (q_prim_vf(i)%sf(j, k, l)**2._wp)/(4._wp*G)
                                 ! Double for shear stresses
diff --git a/src/pre_process/m_data_output.fpp b/src/pre_process/m_data_output.fpp
@@ -483,6 +483,8 @@ contains
         integer :: m_glb_ds, n_glb_ds, p_glb_ds
         integer :: m_glb_save, n_glb_save, p_glb_save ! Size of array being saved
 
+        loc_violations = 0._wp
+
         if (down_sample) then
             if ((mod(m + 1, 3) > 0) .or. (mod(n + 1, 3) > 0) .or. (mod(p + 1, 3) > 0)) then
                 loc_violations = 1._wp
diff --git a/src/pre_process/m_mpi_proxy.fpp b/src/pre_process/m_mpi_proxy.fpp
@@ -58,7 +58,7 @@ contains
             & 'igr', 'down_sample', 'simplex_perturb','fft_wrt', 'hyper_cleaning' ]
             call MPI_BCAST(${VAR}$, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
         #:endfor
-        call MPI_BCAST(fluid_rho(1), num_fluids_max, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
+        call MPI_BCAST(fluid_rho(1), num_fluids_max, mpi_p, 0, MPI_COMM_WORLD, ierr)
 
         #:for VAR in [ 'x_domain%beg', 'x_domain%end', 'y_domain%beg',         &
             & 'y_domain%end', 'z_domain%beg', 'z_domain%end', 'a_x', 'a_y',    &
diff --git a/src/simulation/m_cbc.fpp b/src/simulation/m_cbc.fpp
@@ -930,7 +930,7 @@ contains
                             if (bc_${XYZ}$%grcbc_in) then
                                 $:GPU_LOOP(parallelism='[seq]')
                                 do i = 2, momxb
-                                    L(2) = c**3._wp*Ma*(alpha_rho(i - 1) - alpha_rho_in(i - 1, ${CBC_DIR}$))/Del_in(${CBC_DIR}$) - c*Ma*(pres - pres_in(${CBC_DIR}$))/Del_in(${CBC_DIR}$)
+                                    L(i) = c**3._wp*Ma*(alpha_rho(i - 1) - alpha_rho_in(i - 1, ${CBC_DIR}$))/Del_in(${CBC_DIR}$) - c*Ma*(pres - pres_in(${CBC_DIR}$))/Del_in(${CBC_DIR}$)
                                 end do
                                 if (n > 0) then
                                     L(momxb + 1) = c*Ma*(vel(dir_idx(2)) - vel_in(${CBC_DIR}$, dir_idx(2)))/Del_in(${CBC_DIR}$)
diff --git a/src/simulation/m_mpi_proxy.fpp b/src/simulation/m_mpi_proxy.fpp
@@ -145,7 +145,7 @@ contains
 
         #:for VAR in [ 'dt','weno_eps','teno_CT','pref','rhoref','R0ref','Web','Ca', 'sigma', &
             & 'Re_inv', 'poly_sigma', 'palpha_eps', 'ptgalpha_eps', 'pi_fac',    &
-            & 'bc_x%vb1','bc_x%vb2','bc_x%vb3','bc_x%ve1','bc_x%ve2','bc_x%ve2', &
+            & 'bc_x%vb1','bc_x%vb2','bc_x%vb3','bc_x%ve1','bc_x%ve2','bc_x%ve3', &
             & 'bc_y%vb1','bc_y%vb2','bc_y%vb3','bc_y%ve1','bc_y%ve2','bc_y%ve3', &
             & 'bc_z%vb1','bc_z%vb2','bc_z%vb3','bc_z%ve1','bc_z%ve2','bc_z%ve3', &
             & 'bc_x%pres_in','bc_x%pres_out','bc_y%pres_in','bc_y%pres_out', 'bc_z%pres_in','bc_z%pres_out', &
diff --git a/src/simulation/m_start_up.fpp b/src/simulation/m_start_up.fpp
@@ -965,7 +965,7 @@ contains
                     do j = 0, m
                         if (ieee_is_nan(real(q_cons_ts(stor)%vf(i)%sf(j, k, l), kind=wp))) then
                             print *, "NaN(s) in timestep output.", j, k, l, i, proc_rank, t_step, m, n, p
-                            error stop "NaN(s) in timestep output."
+                            call s_mpi_abort("NaN(s) in timestep output.")
                         end if
                     end do
                 end do