diff --git a/docs/module_categories.json b/docs/module_categories.json index d1da0ed5ed..6d64a0e42f 100644 --- a/docs/module_categories.json +++ b/docs/module_categories.json @@ -19,6 +19,8 @@ "m_bubbles_EE", "m_bubbles_EL", "m_bubbles_EL_kernels", + "m_particles_EL", + "m_particles_EL_kernels", "m_qbmm", "m_hyperelastic", "m_hypoelastic", diff --git a/src/common/m_boundary_common.fpp b/src/common/m_boundary_common.fpp index 2a33ecfc60..b26838e580 100644 --- a/src/common/m_boundary_common.fpp +++ b/src/common/m_boundary_common.fpp @@ -25,6 +25,9 @@ module m_boundary_common type(scalar_field), dimension(:, :), allocatable :: bc_buffers $:GPU_DECLARE(create='[bc_buffers]') + type(int_bounds_info), dimension(3) :: beta_bc_bounds + $:GPU_DECLARE(create='[beta_bc_bounds]') + #ifdef MFC_MPI integer, dimension(1:3, 1:2) :: MPI_BC_TYPE_TYPE integer, dimension(1:3, 1:2) :: MPI_BC_BUFFER_TYPE @@ -32,6 +35,7 @@ module m_boundary_common private; public :: s_initialize_boundary_common_module, & s_populate_variables_buffers, & + s_populate_beta_buffers, & s_create_mpi_types, & s_populate_capillary_buffers, & s_populate_F_igr_buffers, & @@ -78,8 +82,23 @@ contains @:ACC_SETUP_SFs(bc_buffers(i,j)) end do end do + end if + if (bubbles_lagrange .or. particles_lagrange) then + beta_bc_bounds(1)%beg = -mapcells - 1 + beta_bc_bounds(1)%end = m + mapcells + 1 + ! n > 0 always for bubbles_lagrange + beta_bc_bounds(2)%beg = -mapcells - 1 + beta_bc_bounds(2)%end = n + mapcells + 1 + if (p == 0) then + beta_bc_bounds(3)%beg = 0 + beta_bc_bounds(3)%end = 0 + else + beta_bc_bounds(3)%beg = -mapcells - 1 + beta_bc_bounds(3)%end = p + mapcells + 1 + end if end if + $:GPU_UPDATE(device='[beta_bc_bounds]') end subroutine s_initialize_boundary_common_module @@ -1183,6 +1202,332 @@ contains end subroutine s_qbmm_extrapolation + impure subroutine s_populate_beta_buffers(q_beta, bc_type, nvar) + + type(scalar_field), dimension(:), intent(inout) :: q_beta + type(integer_field), dimension(1:num_dims, 1:2), intent(in) :: bc_type + integer, intent(in) :: nvar + + integer :: k, l + + !< x-direction + if (bc_x%beg >= 0) then + call s_mpi_reduce_beta_variables_buffers(q_beta, 1, -1, nvar) + else + $:GPU_PARALLEL_LOOP(private='[l,k]', collapse=2) + do l = beta_bc_bounds(3)%beg, beta_bc_bounds(3)%end + do k = beta_bc_bounds(2)%beg, beta_bc_bounds(2)%end + select case (bc_x%beg) + case (BC_PERIODIC) + call s_beta_periodic(q_beta, 1, -1, k, l, nvar) + case (BC_REFLECTIVE) + call s_beta_reflective(q_beta, 1, -1, k, l, nvar) + case default + end select + end do + end do + $:END_GPU_PARALLEL_LOOP() + end if + + if (bc_x%end >= 0) then + call s_mpi_reduce_beta_variables_buffers(q_beta, 1, 1, nvar) + else + $:GPU_PARALLEL_LOOP(private='[l,k]', collapse=2) + do l = beta_bc_bounds(3)%beg, beta_bc_bounds(3)%end + do k = beta_bc_bounds(2)%beg, beta_bc_bounds(2)%end + select case (bc_x%end) + case (BC_PERIODIC) + call s_beta_periodic(q_beta, 1, 1, k, l, nvar) + case (BC_REFLECTIVE) + call s_beta_reflective(q_beta, 1, 1, k, l, nvar) + case default + end select + end do + end do + $:END_GPU_PARALLEL_LOOP() + end if + + !< y-direction + if (bc_y%beg >= 0) then + call s_mpi_reduce_beta_variables_buffers(q_beta, 2, -1, nvar) + else + $:GPU_PARALLEL_LOOP(private='[l,k]', collapse=2) + do l = beta_bc_bounds(3)%beg, beta_bc_bounds(3)%end + do k = beta_bc_bounds(1)%beg, beta_bc_bounds(1)%end + select case (bc_y%beg) + case (BC_PERIODIC) + call s_beta_periodic(q_beta, 2, -1, k, l, nvar) + case (BC_REFLECTIVE) + call s_beta_reflective(q_beta, 2, -1, k, l, nvar) + case default + end select + end do + end do + $:END_GPU_PARALLEL_LOOP() + end if + + if (bc_y%end >= 0) then + call s_mpi_reduce_beta_variables_buffers(q_beta, 2, 1, nvar) + else + $:GPU_PARALLEL_LOOP(private='[l,k]', collapse=2) + do l = beta_bc_bounds(3)%beg, beta_bc_bounds(3)%end + do k = beta_bc_bounds(1)%beg, beta_bc_bounds(1)%end + select case (bc_y%end) + case (BC_PERIODIC) + call s_beta_periodic(q_beta, 2, 1, k, l, nvar) + case (BC_REFLECTIVE) + call s_beta_reflective(q_beta, 2, 1, k, l, nvar) + case default + end select + end do + end do + $:END_GPU_PARALLEL_LOOP() + end if + + if (num_dims == 2) return + + #:if not MFC_CASE_OPTIMIZATION or num_dims > 2 + !< z-direction + if (bc_z%beg >= 0) then + call s_mpi_reduce_beta_variables_buffers(q_beta, 3, -1, nvar) + else + $:GPU_PARALLEL_LOOP(private='[l,k]', collapse=2) + do l = beta_bc_bounds(2)%beg, beta_bc_bounds(2)%end + do k = beta_bc_bounds(1)%beg, beta_bc_bounds(1)%end + select case (bc_type(3, 1)%sf(k, l, 0)) + case (BC_PERIODIC) + call s_beta_periodic(q_beta, 3, -1, k, l, nvar) + case (BC_REFLECTIVE) + call s_beta_reflective(q_beta, 3, -1, k, l, nvar) + case default + end select + end do + end do + $:END_GPU_PARALLEL_LOOP() + end if + + if (bc_z%end >= 0) then + call s_mpi_reduce_beta_variables_buffers(q_beta, 3, 1, nvar) + else + $:GPU_PARALLEL_LOOP(private='[l,k]', collapse=2) + do l = beta_bc_bounds(2)%beg, beta_bc_bounds(2)%end + do k = beta_bc_bounds(1)%beg, beta_bc_bounds(1)%end + select case (bc_type(3, 2)%sf(k, l, 0)) + case (BC_PERIODIC) + call s_beta_periodic(q_beta, 3, 1, k, l, nvar) + case (BC_REFLECTIVE) + call s_beta_reflective(q_beta, 3, 1, k, l, nvar) + case default + end select + end do + end do + $:END_GPU_PARALLEL_LOOP() + end if + #:endif + + end subroutine s_populate_beta_buffers + + subroutine s_beta_periodic(q_beta, bc_dir, bc_loc, k, l, nvar) + $:GPU_ROUTINE(function_name='s_beta_periodic', & + & parallelism='[seq]', cray_inline=True) + type(scalar_field), dimension(num_dims + 1), intent(inout) :: q_beta + integer, intent(in) :: bc_dir, bc_loc + integer, intent(in) :: k, l + integer, intent(in) :: nvar + + integer :: j, i + + if (bc_dir == 1) then !< x-direction + if (bc_loc == -1) then !bc_x%beg + do i = 1, nvar + do j = -mapCells - 1, mapCells + q_beta(beta_vars(i))%sf(j, k, l) = & + q_beta(beta_vars(i))%sf(j, k, l) + q_beta(beta_vars(i))%sf(m + j + 1, k, l) + end do + end do + else !< bc_x%end + do i = 1, nvar + do j = -mapcells, mapcells + 1 + q_beta(beta_vars(i))%sf(m + j, k, l) = q_beta(beta_vars(i))%sf(j - 1, k, l) + end do + end do + end if + elseif (bc_dir == 2) then !< y-direction + if (bc_loc == -1) then !< bc_y%beg + do i = 1, nvar + do j = -mapcells - 1, mapcells + q_beta(beta_vars(i))%sf(k, j, l) = & + q_beta(beta_vars(i))%sf(k, j, l) + q_beta(beta_vars(i))%sf(k, n + j + 1, l) + end do + end do + else !< bc_y%end + do i = 1, nvar + do j = -mapcells, mapcells + 1 + q_beta(beta_vars(i))%sf(k, n + j, l) = q_beta(beta_vars(i))%sf(k, j - 1, l) + end do + end do + end if + elseif (bc_dir == 3) then !< z-direction + if (bc_loc == -1) then !< bc_z%beg + do i = 1, nvar + do j = -mapcells - 1, mapcells + q_beta(beta_vars(i))%sf(k, l, j) = & + q_beta(beta_vars(i))%sf(k, l, j) + q_beta(beta_vars(i))%sf(k, l, p + j + 1) + end do + end do + else !< bc_z%end + do i = 1, nvar + do j = -mapcells, mapcells + 1 + q_beta(beta_vars(i))%sf(k, l, p + j) = q_beta(beta_vars(i))%sf(k, l, j - 1) + end do + end do + end if + end if + + end subroutine s_beta_periodic + + subroutine s_beta_extrapolation(q_beta, bc_dir, bc_loc, k, l, nvar) + $:GPU_ROUTINE(function_name='s_beta_extrapolation', & + & parallelism='[seq]', cray_inline=True) + type(scalar_field), dimension(num_dims + 1), intent(inout) :: q_beta + integer, intent(in) :: bc_dir, bc_loc + integer, intent(in) :: k, l + integer, intent(in) :: nvar + + integer :: j, i + + ! Set beta in buffer regions equal to zero + + if (bc_dir == 1) then !< x-direction + if (bc_loc == -1) then !bc_x%beg + do i = 1, nvar + do j = 1, buff_size + q_beta(beta_vars(i))%sf(-j, k, l) = 0._wp + end do + end do + else !< bc_x%end + do i = 1, nvar + do j = 1, buff_size + q_beta(beta_vars(i))%sf(m + j, k, l) = 0._wp + end do + end do + end if + elseif (bc_dir == 2) then !< y-direction + if (bc_loc == -1) then !< bc_y%beg + do i = 1, nvar + do j = 1, buff_size + q_beta(beta_vars(i))%sf(k, -j, l) = 0._wp + end do + end do + else !< bc_y%end + do i = 1, nvar + do j = 1, buff_size + q_beta(beta_vars(i))%sf(k, n + j, l) = 0._wp + end do + end do + end if + elseif (bc_dir == 3) then !< z-direction + if (bc_loc == -1) then !< bc_z%beg + do i = 1, nvar + do j = 1, buff_size + q_beta(beta_vars(i))%sf(k, l, -j) = 0._wp + end do + end do + else !< bc_z%end + do i = 1, nvar + do j = 1, buff_size + q_beta(beta_vars(i))%sf(k, l, p + j) = 0._wp + end do + end do + end if + end if + + end subroutine s_beta_extrapolation + + subroutine s_beta_reflective(q_beta, bc_dir, bc_loc, k, l, nvar) + $:GPU_ROUTINE(function_name='s_beta_reflective', & + & parallelism='[seq]', cray_inline=True) + type(scalar_field), dimension(num_dims + 1), intent(inout) :: q_beta + integer, intent(in) :: bc_dir, bc_loc + integer, intent(in) :: k, l + integer, intent(in) :: nvar + + integer :: j, i + + ! Reflective BC for void fraction: + ! 1) Fold ghost-cell contributions back onto their mirror interior cells + ! 2) Set ghost cells = mirror of (now-folded) interior values + + if (bc_dir == 1) then !< x-direction + if (bc_loc == -1) then !< bc_x%beg + do i = 1, nvar + do j = 1, mapCells + 1 + q_beta(beta_vars(i))%sf(j - 1, k, l) = & + q_beta(beta_vars(i))%sf(j - 1, k, l) + q_beta(beta_vars(i))%sf(-j, k, l) + end do + do j = 1, mapCells + 1 + q_beta(beta_vars(i))%sf(-j, k, l) = q_beta(beta_vars(i))%sf(j - 1, k, l) + end do + end do + else !< bc_x%end + do i = 1, nvar + do j = 1, mapCells + 1 + q_beta(beta_vars(i))%sf(m - (j - 1), k, l) = & + q_beta(beta_vars(i))%sf(m - (j - 1), k, l) + q_beta(beta_vars(i))%sf(m + j, k, l) + end do + do j = 1, mapCells + 1 + q_beta(beta_vars(i))%sf(m + j, k, l) = q_beta(beta_vars(i))%sf(m - (j - 1), k, l) + end do + end do + end if + elseif (bc_dir == 2) then !< y-direction + if (bc_loc == -1) then !< bc_y%beg + do i = 1, nvar + do j = 1, mapCells + 1 + q_beta(beta_vars(i))%sf(k, j - 1, l) = & + q_beta(beta_vars(i))%sf(k, j - 1, l) + q_beta(beta_vars(i))%sf(k, -j, l) + end do + do j = 1, mapCells + 1 + q_beta(beta_vars(i))%sf(k, -j, l) = q_beta(beta_vars(i))%sf(k, j - 1, l) + end do + end do + else !< bc_y%end + do i = 1, nvar + do j = 1, mapCells + 1 + q_beta(beta_vars(i))%sf(k, n - (j - 1), l) = & + q_beta(beta_vars(i))%sf(k, n - (j - 1), l) + q_beta(beta_vars(i))%sf(k, n + j, l) + end do + do j = 1, mapCells + 1 + q_beta(beta_vars(i))%sf(k, n + j, l) = q_beta(beta_vars(i))%sf(k, n - (j - 1), l) + end do + end do + end if + elseif (bc_dir == 3) then !< z-direction + if (bc_loc == -1) then !< bc_z%beg + do i = 1, nvar + do j = 1, mapCells + 1 + q_beta(beta_vars(i))%sf(k, l, j - 1) = & + q_beta(beta_vars(i))%sf(k, l, j - 1) + q_beta(beta_vars(i))%sf(k, l, -j) + end do + do j = 1, mapCells + 1 + q_beta(beta_vars(i))%sf(k, l, -j) = q_beta(beta_vars(i))%sf(k, l, j - 1) + end do + end do + else !< bc_z%end + do i = 1, nvar + do j = 1, mapCells + 1 + q_beta(beta_vars(i))%sf(k, l, p - (j - 1)) = & + q_beta(beta_vars(i))%sf(k, l, p - (j - 1)) + q_beta(beta_vars(i))%sf(k, l, p + j) + end do + do j = 1, mapCells + 1 + q_beta(beta_vars(i))%sf(k, l, p + j) = q_beta(beta_vars(i))%sf(k, l, p - (j - 1)) + end do + end do + end if + end if + + end subroutine s_beta_reflective + !> @brief Populates ghost cell buffers for the color function and its divergence used in capillary surface tension. impure subroutine s_populate_capillary_buffers(c_divs, bc_type) @@ -1317,6 +1662,7 @@ contains $:END_GPU_PARALLEL_LOOP() end if #:endif + end subroutine s_populate_capillary_buffers !> @brief Applies periodic boundary conditions to the color function and its divergence fields. @@ -2027,6 +2373,31 @@ contains offset_x%beg = buff_size; offset_x%end = buff_size offset_y%beg = buff_size; offset_y%end = buff_size offset_z%beg = buff_size; offset_z%end = buff_size + +#ifdef MFC_MPI + ! Populate global domain boundaries with stretched grids + call s_mpi_allreduce_min(x_cb(-1), glb_bounds(1)%beg) + call s_mpi_allreduce_max(x_cb(m), glb_bounds(1)%end) + + if (n > 0) then + call s_mpi_allreduce_min(y_cb(-1), glb_bounds(2)%beg) + call s_mpi_allreduce_max(y_cb(n), glb_bounds(2)%end) + if (p > 0) then + call s_mpi_allreduce_min(z_cb(-1), glb_bounds(3)%beg) + call s_mpi_allreduce_max(z_cb(p), glb_bounds(3)%end) + end if + end if +#else + glb_bounds(1)%beg = x_cb(-1); glb_bounds(1)%end = x_cb(m) + if (n > 0) then + glb_bounds(2)%beg = y_cb(-1); glb_bounds(2)%end = y_cb(n) + if (p > 0) then + glb_bounds(3)%beg = z_cb(-1); glb_bounds(3)%end = z_cb(p) + end if + end if +#endif + $:GPU_UPDATE(device='[glb_bounds]') + #endif #ifndef MFC_PRE_PROCESS diff --git a/src/common/m_constants.fpp b/src/common/m_constants.fpp index 168d6df96f..6d65d1dbe3 100644 --- a/src/common/m_constants.fpp +++ b/src/common/m_constants.fpp @@ -57,9 +57,9 @@ module m_constants real(wp), parameter :: initial_distance_buffer = 1.e12_wp !< Initialized levelset distance for the shortest path pair algorithm ! Lagrange bubbles constants - integer, parameter :: mapCells = 3 !< Number of cells around the bubble where the smoothening function will have effect + integer, parameter :: mapCells = 3 !< Number of cells around the bubble/particle where the smoothening function will have effect real(wp), parameter :: R_uni = 8314._wp !< Universal gas constant - J/kmol/K - integer, parameter :: lag_io_vars = 21 ! Number of variables per particle for MPI_IO + integer, parameter :: lag_io_vars = 21 !< Number of variables per particle for MPI_IO ! Strang Splitting constants real(wp), parameter :: dflt_adap_dt_tol = 1.e-4_wp !< Default tolerance for adaptive step size diff --git a/src/common/m_derived_types.fpp b/src/common/m_derived_types.fpp index 8559be36d3..2298cad219 100644 --- a/src/common/m_derived_types.fpp +++ b/src/common/m_derived_types.fpp @@ -393,6 +393,12 @@ module m_derived_types real(wp) :: R_g !< gas constant of gas (bubble) end type subgrid_bubble_physical_parameters + !> Derived type annexing the physical parameters required for sub-grid particle models + type subgrid_particle_physical_parameters + real(wp) :: rho0ref_particle !< reference density + real(wp) :: cp_particle ! !! bubbles_euler + polytropic !! bubbles_euler + non-polytropic @@ -160,6 +168,7 @@ contains R_v = bub_pp%R_v; R_g = bub_pp%R_g Tw = bub_pp%T0ref end if + if (bubbles_lagrange) then cp_v = bub_pp%cp_v; cp_g = bub_pp%cp_g k_vl = bub_pp%k_v; k_gl = bub_pp%k_g diff --git a/src/common/m_helper_basic.fpp b/src/common/m_helper_basic.fpp index 0b430cb4d5..d28acdbf6e 100644 --- a/src/common/m_helper_basic.fpp +++ b/src/common/m_helper_basic.fpp @@ -114,13 +114,14 @@ contains subroutine s_configure_coordinate_bounds(recon_type, weno_polyn, muscl_polyn, & igr_order, buff_size, idwint, idwbuff, & - viscous, bubbles_lagrange, m, n, p, num_dims, igr, ib) + viscous, bubbles_lagrange, particles_lagrange, & + m, n, p, num_dims, igr, ib, fd_number) integer, intent(in) :: recon_type, weno_polyn, muscl_polyn - integer, intent(in) :: m, n, p, num_dims, igr_order + integer, intent(in) :: m, n, p, num_dims, igr_order, fd_number integer, intent(inout) :: buff_size type(int_bounds_info), dimension(3), intent(inout) :: idwint, idwbuff - logical, intent(in) :: viscous, bubbles_lagrange + logical, intent(in) :: viscous, bubbles_lagrange, particles_lagrange logical, intent(in) :: igr logical, intent(in) :: ib @@ -142,7 +143,12 @@ contains ! Correction for smearing function in the lagrangian subgrid bubble model if (bubbles_lagrange) then - buff_size = max(buff_size, 6) + buff_size = max(buff_size + fd_number, mapCells + 1 + fd_number) + end if + + ! Correction for smearing function in the lagrangian subgrid particle model + if (particles_lagrange) then + buff_size = max(buff_size + fd_number, mapCells + 1 + fd_number) end if if (ib) then diff --git a/src/common/m_mpi_common.fpp b/src/common/m_mpi_common.fpp index 8768298306..1aa249b344 100644 --- a/src/common/m_mpi_common.fpp +++ b/src/common/m_mpi_common.fpp @@ -38,6 +38,11 @@ module m_mpi_common !! average primitive variables, for a single computational domain boundary !! at the time, from the relevant neighboring processor. + type(int_bounds_info) :: comm_coords(3) + integer :: comm_size(3) + $:GPU_DECLARE(create='[comm_coords, comm_size]') + !! Variables for EL bubbles communication + #ifndef __NVCOMPILER_GPU_UNIFIED_MEM $:GPU_DECLARE(create='[buff_send, buff_recv]') #endif @@ -52,6 +57,8 @@ contains !! other procedures that are necessary to setup the module. impure subroutine s_initialize_mpi_common_module + integer :: beta_v_size, beta_comm_size_1, beta_comm_size_2, beta_comm_size_3, beta_halo_size + #ifdef MFC_MPI ! Allocating buff_send/recv and. Please note that for the sake of ! simplicity, both variables are provided sufficient storage to hold @@ -78,6 +85,28 @@ contains halo_size = -1 + buff_size*(v_size) end if + if (bubbles_lagrange .or. particles_lagrange) then + beta_v_size = size(beta_vars) + beta_comm_size_1 = m + 2*mapCells + 3 + beta_comm_size_2 = merge(n + 2*mapCells + 3, 1, n > 0) + beta_comm_size_3 = merge(p + 2*mapCells + 3, 1, p > 0) + if (n > 0) then + if (p > 0) then + beta_halo_size = 2*(mapCells + 1)*beta_v_size*max( & + beta_comm_size_2*beta_comm_size_3, & + beta_comm_size_1*beta_comm_size_3, & + beta_comm_size_1*beta_comm_size_2) - 1 + else + beta_halo_size = 2*(mapCells + 1)*beta_v_size*max( & + beta_comm_size_2, & + beta_comm_size_1) - 1 + end if + else + beta_halo_size = 2*(mapCells + 1)*beta_v_size - 1 + end if + halo_size = max(halo_size, beta_halo_size) + end if + $:GPU_UPDATE(device='[halo_size, v_size]') #ifndef __NVCOMPILER_GPU_UNIFIED_MEM @@ -343,17 +372,21 @@ contains impure subroutine s_mpi_reduce_stability_criteria_extrema(icfl_max_loc, & vcfl_max_loc, & Rc_min_loc, & + bubs_loc, & icfl_max_glb, & vcfl_max_glb, & - Rc_min_glb) + Rc_min_glb, & + bubs_glb) real(wp), intent(in) :: icfl_max_loc real(wp), intent(in) :: vcfl_max_loc real(wp), intent(in) :: Rc_min_loc + integer, intent(in) :: bubs_loc real(wp), intent(out) :: icfl_max_glb real(wp), intent(out) :: vcfl_max_glb real(wp), intent(out) :: Rc_min_glb + integer, intent(out) :: bubs_glb #ifdef MFC_SIMULATION #ifdef MFC_MPI @@ -374,6 +407,11 @@ contains MPI_COMM_WORLD, ierr) end if + if (bubbles_lagrange) then + call MPI_REDUCE(bubs_loc, bubs_glb, 1, & + MPI_INTEGER, MPI_SUM, 0, & + MPI_COMM_WORLD, ierr) + end if #else icfl_max_glb = icfl_max_loc @@ -383,11 +421,34 @@ contains Rc_min_glb = Rc_min_loc end if + if (bubbles_lagrange) bubs_glb = bubs_loc #endif #endif end subroutine s_mpi_reduce_stability_criteria_extrema + !> The following subroutine takes the inputted variable and + !! determines its sum on the entire computational domain. + !! @param var_loc holds the local value to be reduced among + !! all the processors in communicator. On output, the variable holds + !! the sum, reduced amongst all of the local values. + subroutine s_mpi_reduce_int_sum(var_loc, sum) + + integer, intent(in) :: var_loc + integer, intent(out) :: sum + +#ifdef MFC_MPI + integer :: ierr !< Generic flag used to identify and report MPI errors + + ! Performing reduction procedure and eventually storing its result + ! into the variable that was initially inputted into the subroutine + call MPI_REDUCE(var_loc, sum, 1, MPI_INTEGER, & + MPI_SUM, 0, MPI_COMM_WORLD, ierr) + +#endif + + end subroutine s_mpi_reduce_int_sum + !> The following subroutine takes the input local variable !! from all processors and reduces to the sum of all !! values. The reduced variable is recorded back onto the @@ -1106,6 +1167,273 @@ contains end subroutine s_mpi_sendrecv_variables_buffers + !> The goal of this procedure is to populate the buffers of + !! the cell-average conservative variables by communicating + !! with the neighboring processors. + !! @param q_cons_vf Cell-average conservative variables + !! @param mpi_dir MPI communication coordinate direction + !! @param pbc_loc Processor boundary condition (PBC) location + subroutine s_mpi_reduce_beta_variables_buffers(q_comm, & + mpi_dir, & + pbc_loc, & + nVar) + + type(scalar_field), dimension(1:), intent(inout) :: q_comm + integer, intent(in) :: mpi_dir, pbc_loc, nVar + + integer :: i, j, k, l, r, q !< Generic loop iterators + integer :: lb_size + + integer :: buffer_counts(1:3), buffer_count + + type(int_bounds_info) :: boundary_conditions(1:3) + integer :: beg_end(1:2), grid_dims(1:3) + integer :: dst_proc, src_proc, recv_tag, send_tag + + logical :: beg_end_geq_0, qbmm_comm, replace_buff + + integer :: pack_offset, unpack_offset + +#ifdef MFC_MPI + integer :: ierr !< Generic flag used to identify and report MPI errors + + call nvtxStartRange("BETA-COMM-PACKBUF") + + ! Set bounds for each dimension + ! Always include the full buffer range for each existing dimension. + ! The Gaussian smearing kernel writes to buffer cells even at physical + ! boundaries, and these contributions must be communicated to neighbors + ! in other directions via ADD operations. + comm_coords(1)%beg = -mapcells - 1 + comm_coords(1)%end = m + mapcells + 1 + comm_coords(2)%beg = merge(-mapcells - 1, 0, n > 0) + comm_coords(2)%end = merge(n + mapcells + 1, n, n > 0) + comm_coords(3)%beg = merge(-mapcells - 1, 0, p > 0) + comm_coords(3)%end = merge(p + mapcells + 1, p, p > 0) + + ! Compute sizes + comm_size(1) = comm_coords(1)%end - comm_coords(1)%beg + 1 + comm_size(2) = comm_coords(2)%end - comm_coords(2)%beg + 1 + comm_size(3) = comm_coords(3)%end - comm_coords(3)%beg + 1 + + ! Buffer counts using the conditional sizes + v_size = nVar + lb_size = 2*(mapcells + 1) ! Size of the buffer region for beta variables (-mapcells - 1, mapcells) + buffer_counts = (/ & + lb_size*v_size*comm_size(2)*comm_size(3), & ! mpi_dir=1 + lb_size*v_size*comm_size(1)*comm_size(3), & ! mpi_dir=2 + lb_size*v_size*comm_size(1)*comm_size(2) & ! mpi_dir=3 + /) + + $:GPU_UPDATE(device='[v_size, comm_coords, comm_size]') + + buffer_count = buffer_counts(mpi_dir) + boundary_conditions = (/bc_x, bc_y, bc_z/) + beg_end = (/boundary_conditions(mpi_dir)%beg, boundary_conditions(mpi_dir)%end/) + beg_end_geq_0 = beg_end(max(pbc_loc, 0) - pbc_loc + 1) >= 0 + + ! Implements: + ! pbc_loc bc_x >= 0 -> [send/recv]_tag [dst/src]_proc + ! -1 (=0) 0 -> [1,0] [0,0] | 0 0 [1,0] [beg,beg] + ! -1 (=0) 1 -> [0,0] [1,0] | 0 1 [0,0] [end,beg] + ! +1 (=1) 0 -> [0,1] [1,1] | 1 0 [0,1] [end,end] + ! +1 (=1) 1 -> [1,1] [0,1] | 1 1 [1,1] [beg,end] + + send_tag = f_logical_to_int(.not. f_xor(beg_end_geq_0, pbc_loc == 1)) + recv_tag = f_logical_to_int(pbc_loc == 1) + + dst_proc = beg_end(1 + f_logical_to_int(f_xor(pbc_loc == 1, beg_end_geq_0))) + src_proc = beg_end(1 + f_logical_to_int(pbc_loc == 1)) + + grid_dims = (/m, n, p/) + + pack_offset = 0 + if (f_xor(pbc_loc == 1, beg_end_geq_0)) then + pack_offset = grid_dims(mpi_dir) + 1 + end if + + unpack_offset = 0 + if (pbc_loc == 1) then + unpack_offset = grid_dims(mpi_dir) + 1 + end if + + replace_buff = .false. + if (pbc_loc == 1 .and. beg_end_geq_0) replace_buff = .true. + + ! Pack Buffer to Send + #:for mpi_dir in [1, 2, 3] + if (mpi_dir == ${mpi_dir}$) then + #:if mpi_dir == 1 + $:GPU_PARALLEL_LOOP(collapse=4,private='[r]') + do l = comm_coords(3)%beg, comm_coords(3)%end + do k = comm_coords(2)%beg, comm_coords(2)%end + do j = -mapcells - 1, mapcells + do i = 1, v_size + r = (i - 1) + v_size*( & + (j + mapcells + 1) + lb_size*( & + (k - comm_coords(2)%beg) + comm_size(2)* & + (l - comm_coords(3)%beg))) + buff_send(r) = real(q_comm(beta_vars(i))%sf(j + pack_offset, k, l), kind=wp) + end do + end do + end do + end do + $:END_GPU_PARALLEL_LOOP() + #:elif mpi_dir == 2 + $:GPU_PARALLEL_LOOP(collapse=4,private='[r]') + do i = 1, v_size + do l = comm_coords(3)%beg, comm_coords(3)%end + do k = -mapcells - 1, mapcells + do j = comm_coords(1)%beg, comm_coords(1)%end + r = (i - 1) + v_size*( & + (j - comm_coords(1)%beg) + comm_size(1)*( & + (k + mapcells + 1) + lb_size* & + (l - comm_coords(3)%beg))) + buff_send(r) = real(q_comm(beta_vars(i))%sf(j, k + pack_offset, l), kind=wp) + end do + end do + end do + end do + $:END_GPU_PARALLEL_LOOP() + #:else + $:GPU_PARALLEL_LOOP(collapse=4,private='[r]') + do i = 1, v_size + do l = -mapcells - 1, mapcells + do k = comm_coords(2)%beg, comm_coords(2)%end + do j = comm_coords(1)%beg, comm_coords(1)%end + r = (i - 1) + v_size*( & + (j - comm_coords(1)%beg) + comm_size(1)*( & + (k - comm_coords(2)%beg) + comm_size(2)* & + (l + mapcells + 1))) + buff_send(r) = real(q_comm(beta_vars(i))%sf(j, k, l + pack_offset), kind=wp) + end do + end do + end do + end do + $:END_GPU_PARALLEL_LOOP() + #:endif + end if + #:endfor + call nvtxEndRange ! Packbuf + + ! Send/Recv +#ifdef MFC_SIMULATION + #:for rdma_mpi in [False, True] + if (rdma_mpi .eqv. ${'.true.' if rdma_mpi else '.false.'}$) then + #:if rdma_mpi + #:call GPU_HOST_DATA(use_device_addr='[buff_send, buff_recv]') + call nvtxStartRange("BETA-COMM-SENDRECV-RDMA") + + call MPI_SENDRECV( & + buff_send, buffer_count, mpi_p, dst_proc, send_tag, & + buff_recv, buffer_count, mpi_p, src_proc, recv_tag, & + MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) + + call nvtxEndRange ! BETA-MPI-SENDRECV-(NO)-RDMA + + #:endcall GPU_HOST_DATA + $:GPU_WAIT() + #:else + call nvtxStartRange("BETA-COMM-DEV2HOST") + $:GPU_UPDATE(host='[buff_send]') + call nvtxEndRange + call nvtxStartRange("BETA-COMM-SENDRECV-NO-RMDA") + + call MPI_SENDRECV( & + buff_send, buffer_count, mpi_p, dst_proc, send_tag, & + buff_recv, buffer_count, mpi_p, src_proc, recv_tag, & + MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) + + call nvtxEndRange ! BETA-MPI-SENDRECV-(NO)-RDMA + + call nvtxStartRange("BETA-COMM-HOST2DEV") + $:GPU_UPDATE(device='[buff_recv]') + call nvtxEndRange + #:endif + end if + #:endfor +#else + call MPI_SENDRECV( & + buff_send, buffer_count, mpi_p, dst_proc, send_tag, & + buff_recv, buffer_count, mpi_p, src_proc, recv_tag, & + MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) +#endif + + ! Unpack Received Buffer + call nvtxStartRange("BETA-COMM-UNPACKBUF") + #:for mpi_dir in [1, 2, 3] + if (mpi_dir == ${mpi_dir}$) then + #:if mpi_dir == 1 + $:GPU_PARALLEL_LOOP(collapse=4,private='[r]',copyin='[replace_buff]') + do l = comm_coords(3)%beg, comm_coords(3)%end + do k = comm_coords(2)%beg, comm_coords(2)%end + do j = -mapcells - 1, mapcells + do i = 1, v_size + r = (i - 1) + v_size*( & + (j + mapcells + 1) + lb_size*( & + (k - comm_coords(2)%beg) + comm_size(2)* & + (l - comm_coords(3)%beg))) + if (replace_buff) then + q_comm(beta_vars(i))%sf(j + unpack_offset, k, l) = real(buff_recv(r), kind=stp) + else + q_comm(beta_vars(i))%sf(j + unpack_offset, k, l) = & + q_comm(beta_vars(i))%sf(j + unpack_offset, k, l) + real(buff_recv(r), kind=stp) + end if + end do + end do + end do + end do + $:END_GPU_PARALLEL_LOOP() + #:elif mpi_dir == 2 + $:GPU_PARALLEL_LOOP(collapse=4,private='[r]',copyin='[replace_buff]') + do i = 1, v_size + do l = comm_coords(3)%beg, comm_coords(3)%end + do k = -mapcells - 1, mapcells + do j = comm_coords(1)%beg, comm_coords(1)%end + r = (i - 1) + v_size*( & + (j - comm_coords(1)%beg) + comm_size(1)*( & + (k + mapcells + 1) + lb_size* & + (l - comm_coords(3)%beg))) + if (replace_buff) then + q_comm(beta_vars(i))%sf(j, k + unpack_offset, l) = real(buff_recv(r), kind=stp) + else + q_comm(beta_vars(i))%sf(j, k + unpack_offset, l) = & + q_comm(beta_vars(i))%sf(j, k + unpack_offset, l) + real(buff_recv(r), kind=stp) + end if + end do + end do + end do + end do + $:END_GPU_PARALLEL_LOOP() + #:else + $:GPU_PARALLEL_LOOP(collapse=4,private='[r]',copyin='[replace_buff]') + do i = 1, v_size + do l = -mapcells - 1, mapcells + do k = comm_coords(2)%beg, comm_coords(2)%end + do j = comm_coords(1)%beg, comm_coords(1)%end + r = (i - 1) + v_size*( & + (j - comm_coords(1)%beg) + comm_size(1)*( & + (k - comm_coords(2)%beg) + comm_size(2)* & + (l + mapcells + 1))) + if (replace_buff) then + q_comm(beta_vars(i))%sf(j, k, l + unpack_offset) = real(buff_recv(r), kind=stp) + else + q_comm(beta_vars(i))%sf(j, k, l + unpack_offset) = & + q_comm(beta_vars(i))%sf(j, k, l + unpack_offset) + real(buff_recv(r), kind=stp) + end if + end do + end do + end do + end do + $:END_GPU_PARALLEL_LOOP() + #:endif + end if + #:endfor + call nvtxEndRange +#endif + + end subroutine s_mpi_reduce_beta_variables_buffers + !> The purpose of this procedure is to optimally decompose !! the computational domain among the available processors. !! This is performed by attempting to award each processor, @@ -1135,9 +1463,17 @@ contains integer :: recon_order !< !! WENO or MUSCL reconstruction order - integer :: i, j !< Generic loop iterators + integer :: i, j, k !< Generic loop iterators integer :: ierr !< Generic flag used to identify and report MPI errors + ! temp array to store neighbor rank coordinates + integer, dimension(1:num_dims) :: neighbor_coords + + ! Zeroing out communication needs for moving EL bubbles/particles + nidx(1)%beg = 0; nidx(1)%end = 0 + nidx(2)%beg = 0; nidx(2)%end = 0 + nidx(3)%beg = 0; nidx(3)%end = 0 + if (recon_type == WENO_TYPE) then recon_order = weno_order else @@ -1356,6 +1692,7 @@ contains call MPI_CART_RANK(MPI_COMM_CART, proc_coords, & bc_z%beg, ierr) proc_coords(3) = proc_coords(3) + 1 + nidx(3)%beg = -1 end if ! Boundary condition at the end @@ -1364,6 +1701,7 @@ contains call MPI_CART_RANK(MPI_COMM_CART, proc_coords, & bc_z%end, ierr) proc_coords(3) = proc_coords(3) - 1 + nidx(3)%end = 1 end if #ifdef MFC_POST_PROCESS @@ -1495,6 +1833,7 @@ contains call MPI_CART_RANK(MPI_COMM_CART, proc_coords, & bc_y%beg, ierr) proc_coords(2) = proc_coords(2) + 1 + nidx(2)%beg = -1 end if ! Boundary condition at the end @@ -1503,6 +1842,7 @@ contains call MPI_CART_RANK(MPI_COMM_CART, proc_coords, & bc_y%end, ierr) proc_coords(2) = proc_coords(2) - 1 + nidx(2)%end = 1 end if #ifdef MFC_POST_PROCESS @@ -1588,6 +1928,7 @@ contains proc_coords(1) = proc_coords(1) - 1 call MPI_CART_RANK(MPI_COMM_CART, proc_coords, bc_x%beg, ierr) proc_coords(1) = proc_coords(1) + 1 + nidx(1)%beg = -1 end if ! Boundary condition at the end @@ -1595,6 +1936,7 @@ contains proc_coords(1) = proc_coords(1) + 1 call MPI_CART_RANK(MPI_COMM_CART, proc_coords, bc_x%end, ierr) proc_coords(1) = proc_coords(1) - 1 + nidx(1)%end = 1 end if #ifdef MFC_POST_PROCESS @@ -1640,6 +1982,23 @@ contains end if #endif end if + + @:ALLOCATE(neighbor_ranks(nidx(1)%beg:nidx(1)%end, & + nidx(2)%beg:nidx(2)%end, & + nidx(3)%beg:nidx(3)%end)) + do k = nidx(3)%beg, nidx(3)%end + do j = nidx(2)%beg, nidx(2)%end + do i = nidx(1)%beg, nidx(1)%end + if (abs(i) + abs(j) + abs(k) > 0) then + neighbor_coords(1) = proc_coords(1) + i + if (num_dims > 1) neighbor_coords(2) = proc_coords(2) + j + if (num_dims > 2) neighbor_coords(3) = proc_coords(3) + k + call MPI_CART_RANK(MPI_COMM_CART, neighbor_coords, & + neighbor_ranks(i, j, k), ierr) + end if + end do + end do + end do #endif end subroutine s_mpi_decompose_computational_domain diff --git a/src/post_process/m_global_parameters.fpp b/src/post_process/m_global_parameters.fpp index 219667a158..a3f4e2cea9 100644 --- a/src/post_process/m_global_parameters.fpp +++ b/src/post_process/m_global_parameters.fpp @@ -76,6 +76,8 @@ module m_global_parameters !! region, this region is used to store information outside the computational !! domain based on the boundary conditions. + integer, allocatable :: beta_vars(:) !< Indices of variables to communicate for bubble/particle coupling + integer :: t_step_start !< First time-step directory integer :: t_step_stop !< Last time-step directory integer :: t_step_save !< Interval between consecutive time-step directory @@ -177,6 +179,11 @@ module m_global_parameters integer, allocatable, dimension(:) :: proc_coords !< !! Processor coordinates in MPI_CART_COMM + type(int_bounds_info), dimension(3) :: nidx + + integer, allocatable, dimension(:, :, :) :: neighbor_ranks + !! Neighbor processor ranks + integer, allocatable, dimension(:) :: start_idx !< !! Starting cell-center index of local processor in global grid @@ -206,6 +213,9 @@ module m_global_parameters ! Subgrid Bubble Parameters type(subgrid_bubble_physical_parameters) :: bub_pp + ! Subgrid Particle Parameters + type(subgrid_particle_physical_parameters) :: particle_pp + real(wp), allocatable, dimension(:) :: adv !< Advection variables ! Formatted Database File(s) Structure Parameters @@ -328,6 +338,11 @@ module m_global_parameters integer :: nmom !> @} + !> @name Particle modeling variables and parameters + !> @{ + real(wp) :: cp_particle, rho0ref_particle + !> @} + !> @name surface tension coefficient !> @{ @@ -350,6 +365,7 @@ module m_global_parameters !> @name Lagrangian bubbles !> @{ logical :: bubbles_lagrange + logical :: particles_lagrange !> @} real(wp) :: Bx0 !< Constant magnetic field in the x-direction (1D) @@ -458,6 +474,10 @@ contains bub_pp%R_v = dflt_real; R_v = dflt_real bub_pp%R_g = dflt_real; R_g = dflt_real + ! Subgrid particle parameters + particle_pp%rho0ref_particle = dflt_real + particle_pp%cp_particle = dflt_real + ! Formatted database file(s) structure parameters format = dflt_int @@ -536,6 +556,7 @@ contains ! Lagrangian bubbles modeling bubbles_lagrange = .false. + particles_lagrange = .false. ! IBM num_ibs = dflt_int @@ -673,10 +694,15 @@ contains end if end if - if (bubbles_lagrange) then - beta_idx = sys_size + 1 - sys_size = beta_idx - end if + ! if (bubbles_lagrange) then + ! beta_idx = sys_size + 1 + ! sys_size = beta_idx + ! end if + + ! if (particles_lagrange) then + ! beta_idx = sys_size + 1 + ! sys_size = beta_idx + ! end if if (mhd) then B_idx%beg = sys_size + 1 @@ -806,6 +832,16 @@ contains sys_size = c_idx end if + if (bubbles_lagrange) then + beta_idx = sys_size + 1 + sys_size = beta_idx + end if + + if (particles_lagrange) then + beta_idx = sys_size + 1 + sys_size = beta_idx + end if + if (cont_damage) then damage_idx = sys_size + 1 sys_size = damage_idx @@ -822,6 +858,14 @@ contains end if + if (bubbles_lagrange) then + allocate (beta_vars(1:3)) + beta_vars(1:3) = [1, 2, 5] + elseif (particles_lagrange) then + allocate (beta_vars(1:8)) + beta_vars(1:8) = [1, 2, 3, 4, 5, 6, 7, 8] + end if + if (chemistry) then species_idx%beg = sys_size + 1 species_idx%end = sys_size + num_species @@ -1049,6 +1093,8 @@ contains if (ib) MPI_IO_IB_DATA%var%sf => null() #endif + if (allocated(neighbor_ranks)) deallocate (neighbor_ranks) + end subroutine s_finalize_global_parameters_module end module m_global_parameters diff --git a/src/post_process/m_mpi_proxy.fpp b/src/post_process/m_mpi_proxy.fpp index 29e65942c6..5e7308d325 100644 --- a/src/post_process/m_mpi_proxy.fpp +++ b/src/post_process/m_mpi_proxy.fpp @@ -101,11 +101,22 @@ contains & 'adv_n', 'ib', 'cfl_adap_dt', 'cfl_const_dt', 'cfl_dt', & & 'surface_tension', 'hyperelasticity', 'bubbles_lagrange', & & 'output_partial_domain', 'relativity', 'cont_damage', 'bc_io', & - & 'down_sample','fft_wrt', 'hyper_cleaning' ] + & 'down_sample','fft_wrt', 'hyper_cleaning', & + & 'particles_lagrange' ] call MPI_BCAST(${VAR}$, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) #:endfor if (bubbles_lagrange) then + #:for VAR in ['lag_header', 'lag_txt_wrt', 'lag_db_wrt', 'lag_id_wrt', & + & 'lag_pos_wrt', 'lag_pos_prev_wrt', 'lag_vel_wrt', 'lag_rad_wrt', & + & 'lag_rvel_wrt', 'lag_r0_wrt', 'lag_rmax_wrt', 'lag_rmin_wrt', & + & 'lag_dphidt_wrt', 'lag_pres_wrt', 'lag_mv_wrt', 'lag_mg_wrt', & + & 'lag_betaT_wrt', 'lag_betaC_wrt'] + call MPI_BCAST(${VAR}$, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) + #:endfor + end if + + if (particles_lagrange) then #:for VAR in ['lag_header', 'lag_txt_wrt', 'lag_db_wrt', 'lag_id_wrt', & & 'lag_pos_wrt', 'lag_pos_prev_wrt', 'lag_vel_wrt', 'lag_rad_wrt', & & 'lag_rvel_wrt', 'lag_r0_wrt', 'lag_rmax_wrt', 'lag_rmin_wrt', & @@ -141,6 +152,13 @@ contains #:endfor end if + ! Subgrid particle parameters + if (particles_lagrange) then + #:for VAR in ['rho0ref_particle','cp_particle'] + call MPI_BCAST(particle_pp%${VAR}$, 1, mpi_p, 0, MPI_COMM_WORLD, ierr) + #:endfor + end if + #:for VAR in [ 'pref', 'rhoref', 'R0ref', 'poly_sigma', 'Web', 'Ca', & & 'Re_inv', 'Bx0', 'sigma', 't_save', 't_stop', & & 'x_output%beg', 'x_output%end', 'y_output%beg', & diff --git a/src/post_process/m_start_up.fpp b/src/post_process/m_start_up.fpp index 74a15d04db..48f4d1da4b 100644 --- a/src/post_process/m_start_up.fpp +++ b/src/post_process/m_start_up.fpp @@ -116,7 +116,7 @@ contains lag_rad_wrt, lag_rvel_wrt, lag_r0_wrt, lag_rmax_wrt, & lag_rmin_wrt, lag_dphidt_wrt, lag_pres_wrt, lag_mv_wrt, & lag_mg_wrt, lag_betaT_wrt, lag_betaC_wrt, & - alpha_rho_e_wrt + alpha_rho_e_wrt, particles_lagrange, particle_pp ! Inquiring the status of the post_process.inp file file_loc = 'post_process.inp' @@ -868,7 +868,7 @@ contains end if ! Adding the lagrangian subgrid variables to the formatted database file - if (bubbles_lagrange) then + if (bubbles_lagrange .or. particles_lagrange) then !! Void fraction field q_sf(:, :, :) = 1._wp - q_cons_vf(beta_idx)%sf( & -offset_x%beg:m + offset_x%end, & @@ -984,6 +984,9 @@ contains if (bubbles_euler .or. bubbles_lagrange) then call s_initialize_bubbles_model() end if + if (particles_lagrange) then + call s_initialize_particles_model() + end if if (num_procs > 1) then call s_initialize_mpi_proxy_module() call s_initialize_mpi_common_module() diff --git a/src/pre_process/m_global_parameters.fpp b/src/pre_process/m_global_parameters.fpp index 11fa95e124..99fe8b1e9a 100644 --- a/src/pre_process/m_global_parameters.fpp +++ b/src/pre_process/m_global_parameters.fpp @@ -131,6 +131,21 @@ module m_global_parameters ! Stands for "InDices With BUFFer". type(int_bounds_info) :: idwbuff(1:3) + integer :: fd_order !< + !! The order of the finite-difference (fd) approximations of the first-order + !! derivatives that need to be evaluated when the CoM or flow probe data + !! files are to be written at each time step + + integer :: fd_number !< + !! The finite-difference number is given by MAX(1, fd_order/2). Essentially, + !! it is a measure of the half-size of the finite-difference stencil for the + !! selected order of accuracy. + + !> @name lagrangian subgrid bubble parameters + !> @{! + type(bubbles_lagrange_parameters) :: lag_params !< Lagrange bubbles' parameters + !> @} + type(int_bounds_info) :: bc_x, bc_y, bc_z !< !! Boundary conditions in the x-, y- and z-coordinate directions @@ -162,6 +177,7 @@ module m_global_parameters logical :: viscous logical :: bubbles_lagrange + logical :: particles_lagrange ! Perturb density of surrounding air so as to break symmetry of grid logical :: perturb_flow @@ -177,6 +193,11 @@ module m_global_parameters integer, allocatable, dimension(:) :: proc_coords !< !! Processor coordinates in MPI_CART_COMM + type(int_bounds_info), dimension(3) :: nidx + + integer, allocatable, dimension(:, :, :) :: neighbor_ranks + !! Neighbor ranks + integer, allocatable, dimension(:) :: start_idx !< !! Starting cell-center index of local processor in global grid @@ -214,6 +235,8 @@ module m_global_parameters ! Subgrid Bubble Parameters type(subgrid_bubble_physical_parameters) :: bub_pp + type(subgrid_particle_physical_parameters) :: particle_pp + real(wp) :: rhoref, pref !< Reference parameters for Tait EOS type(chemistry_parameters) :: chem_params @@ -263,6 +286,9 @@ module m_global_parameters real(wp) :: R0ref, p0ref, rho0ref, T0ref, ss, pv, vd, mu_l, mu_v, mu_g, & gam_v, gam_g, M_v, M_g, cp_v, cp_g, R_v, R_g + !Solid particle physical parameters + real(wp) :: cp_particle, rho0ref_particle + !> @} !> @name Surface Tension Modeling @@ -295,9 +321,15 @@ module m_global_parameters !! conditions data to march the solution in the physical computational domain !! to the next time-step. + integer, allocatable :: beta_vars(:) !< Indices of variables to communicate for bubble/particle coupling + logical :: fft_wrt logical :: dummy !< AMDFlang workaround: keep a dummy logical to avoid a compiler case-optimization bug when a parameter+GPU-kernel conditional is false + ! Variables for hardcoded initial conditions that are read from input files + character(LEN=2*path_len) :: interface_file + real(wp) :: normFac, normMag, g0_ic, p0_ic + contains !> Assigns default values to user inputs prior to reading @@ -406,6 +438,8 @@ contains elliptic_smoothing_iters = dflt_int elliptic_smoothing = .false. + particles_lagrange = .false. + fft_wrt = .false. dummy = .false. @@ -422,6 +456,27 @@ contains ! Initial condition parameters num_patches = dflt_int + fd_order = dflt_int + lag_params%cluster_type = dflt_int + lag_params%pressure_corrector = .false. + lag_params%smooth_type = dflt_int + lag_params%heatTransfer_model = .false. + lag_params%massTransfer_model = .false. + lag_params%write_bubbles = .false. + lag_params%write_bubbles_stats = .false. + lag_params%nBubs_glb = dflt_int + lag_params%vel_model = dflt_int + lag_params%drag_model = dflt_int + lag_params%epsilonb = 1._wp + lag_params%charwidth = dflt_real + lag_params%nParticles_glb = dflt_int + lag_params%qs_drag_model = dflt_int + lag_params%stokes_drag = dflt_int + lag_params%added_mass_model = dflt_int + lag_params%interpolation_order = dflt_int + lag_params%charNz = dflt_int + lag_params%valmaxvoid = dflt_real + do i = 1, num_patches_max patch_icpp(i)%geometry = dflt_int patch_icpp(i)%model_scale(:) = 1._wp @@ -618,6 +673,10 @@ contains bub_pp%R_v = dflt_real; R_v = dflt_real bub_pp%R_g = dflt_real; R_g = dflt_real + ! Subgrid particle parameters + particle_pp%rho0ref_particle = dflt_real + particle_pp%cp_particle = dflt_real + end subroutine s_assign_default_values_to_user_inputs !> Computation of parameters, allocation procedures, and/or @@ -892,6 +951,14 @@ contains end if + if (bubbles_lagrange) then + allocate (beta_vars(1:3)) + beta_vars(1:3) = [1, 2, 5] + elseif (particles_lagrange) then + allocate (beta_vars(1:8)) + beta_vars(1:8) = [1, 2, 3, 4, 5, 6, 7, 8] + end if + if (chemistry) then species_idx%beg = sys_size + 1 species_idx%end = sys_size + num_species @@ -915,11 +982,13 @@ contains chemxb = species_idx%beg chemxe = species_idx%end + if (bubbles_lagrange .or. particles_lagrange) fd_number = max(1, fd_order/2) + call s_configure_coordinate_bounds(recon_type, weno_polyn, muscl_polyn, & igr_order, buff_size, & idwint, idwbuff, viscous, & - bubbles_lagrange, m, n, p, & - num_dims, igr, ib) + bubbles_lagrange, particles_lagrange, & + m, n, p, num_dims, igr, ib, fd_number) #ifdef MFC_MPI @@ -1038,6 +1107,8 @@ contains #endif + if (allocated(neighbor_ranks)) deallocate (neighbor_ranks) + end subroutine s_finalize_global_parameters_module end module m_global_parameters diff --git a/src/pre_process/m_mpi_proxy.fpp b/src/pre_process/m_mpi_proxy.fpp index cbfac0571b..06592e592f 100644 --- a/src/pre_process/m_mpi_proxy.fpp +++ b/src/pre_process/m_mpi_proxy.fpp @@ -43,7 +43,7 @@ contains & 'perturb_sph_fluid', 'num_patches', 'thermal', 'nb', 'dist_type',& & 'relax_model', 'num_ibs', 'n_start', 'elliptic_smoothing_iters', & & 'num_bc_patches', 'mixlayer_perturb_nk', 'recon_type', & - & 'muscl_order', 'igr_order' ] + & 'muscl_order', 'igr_order', 'fd_order'] call MPI_BCAST(${VAR}$, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) #:endfor @@ -55,7 +55,8 @@ contains & 'cfl_const_dt', 'cfl_dt', 'surface_tension', & & 'hyperelasticity', 'pre_stress', 'elliptic_smoothing', 'viscous',& & 'bubbles_lagrange', 'bc_io', 'mhd', 'relativity', 'cont_damage', & - & 'igr', 'down_sample', 'simplex_perturb','fft_wrt', 'hyper_cleaning' ] + & 'igr', 'down_sample', 'simplex_perturb','fft_wrt', 'hyper_cleaning',& + & 'particles_lagrange' ] call MPI_BCAST(${VAR}$, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) #:endfor call MPI_BCAST(fluid_rho(1), num_fluids_max, mpi_p, 0, MPI_COMM_WORLD, ierr) @@ -137,6 +138,13 @@ contains #:endfor end do + ! Variables from input files for hardcoded patches + call MPI_BCAST(interface_file, len(interface_file), MPI_CHARACTER, 0, MPI_COMM_WORLD, ierr) + call MPI_BCAST(normFac, 1, mpi_p, 0, MPI_COMM_WORLD, ierr) + call MPI_BCAST(normMag, 1, mpi_p, 0, MPI_COMM_WORLD, ierr) + call MPI_BCAST(g0_ic, 1, mpi_p, 0, MPI_COMM_WORLD, ierr) + call MPI_BCAST(p0_ic, 1, mpi_p, 0, MPI_COMM_WORLD, ierr) + ! Simplex noise and fluid physical parameters do i = 1, num_fluids_max #:for VAR in [ 'gamma','pi_inf', 'G', 'cv', 'qv', 'qvp' ] @@ -152,15 +160,6 @@ contains end do end do - ! Subgrid bubble parameters - if (bubbles_euler .or. bubbles_lagrange) then - #:for VAR in [ 'R0ref','p0ref','rho0ref','T0ref', & - 'ss','pv','vd','mu_l','mu_v','mu_g','gam_v','gam_g', & - 'M_v','M_g','k_v','k_g','cp_v','cp_g','R_v','R_g'] - call MPI_BCAST(bub_pp%${VAR}$, 1, mpi_p, 0, MPI_COMM_WORLD, ierr) - #:endfor - end if - do i = 1, 3 call MPI_BCAST(simplex_params%perturb_vel(i), 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) call MPI_BCAST(simplex_params%perturb_vel_freq(i), 1, mpi_p, 0, MPI_COMM_WORLD, ierr) @@ -170,7 +169,6 @@ contains call MPI_BCAST(simplex_params%perturb_vel_offset(i, j), 1, mpi_p, 0, MPI_COMM_WORLD, ierr) end do end do - #endif end subroutine s_mpi_bcast_user_inputs diff --git a/src/pre_process/m_start_up.fpp b/src/pre_process/m_start_up.fpp index 3b45547c29..4a48e392b2 100644 --- a/src/pre_process/m_start_up.fpp +++ b/src/pre_process/m_start_up.fpp @@ -145,8 +145,11 @@ contains elliptic_smoothing, elliptic_smoothing_iters, & viscous, bubbles_lagrange, num_bc_patches, & patch_bc, Bx0, relativity, cont_damage, igr, igr_order, & - down_sample, recon_type, muscl_order, hyper_cleaning, & - simplex_perturb, simplex_params, fft_wrt + down_sample, recon_type, muscl_order, fft_wrt, & + fd_order, lag_params, simplex_perturb, simplex_params, & + interface_file, normFac, normMag, & + g0_ic, p0_ic, hyper_cleaning, & + particles_lagrange, particle_pp ! Inquiring the status of the pre_process.inp file file_loc = 'pre_process.inp' @@ -714,6 +717,9 @@ contains if (bubbles_euler .or. bubbles_lagrange) then call s_initialize_bubbles_model() end if + if (particles_lagrange) then + call s_initialize_particles_model() + end if call s_initialize_mpi_common_module() call s_initialize_data_output_module() call s_initialize_variables_conversion_module() diff --git a/src/simulation/m_bubbles.fpp b/src/simulation/m_bubbles.fpp index 0f17bd60c3..204a824b56 100644 --- a/src/simulation/m_bubbles.fpp +++ b/src/simulation/m_bubbles.fpp @@ -17,6 +17,8 @@ module m_bubbles use m_helper_basic !< Functions to compare floating point numbers + use m_bubbles_EL_kernels + implicit none real(wp) :: chi_vw !< Bubble wall properties (Ando 2010) @@ -40,7 +42,7 @@ contains !! @param f_bub_adv_src Source for bubble volume fraction !! @param f_divu Divergence of velocity !! @param fCson Speed of sound from fP (EL) - elemental function f_rddot(fRho, fP, fR, fV, fR0, fpb, fpbdot, alf, fntait, fBtait, f_bub_adv_src, f_divu, fCson) + function f_rddot(fRho, fP, fR, fV, fR0, fpb, fpbdot, alf, fntait, fBtait, f_bub_adv_src, f_divu, fCson) $:GPU_ROUTINE(parallelism='[seq]') real(wp), intent(in) :: fRho, fP, fR, fV, fR0, fpb, fpbdot, alf real(wp), intent(in) :: fntait, fBtait, f_bub_adv_src, f_divu @@ -84,7 +86,7 @@ contains !! @param fR Current bubble radius !! @param fV Current bubble velocity !! @param fpb Internal bubble pressure - elemental function f_cpbw(fR0, fR, fV, fpb) + function f_cpbw(fR0, fR, fV, fpb) $:GPU_ROUTINE(parallelism='[seq]') real(wp), intent(in) :: fR0, fR, fV, fpb @@ -103,7 +105,7 @@ contains !! @param fCpinf Driving bubble pressure !! @param fntait Tait EOS parameter !! @param fBtait Tait EOS parameter - elemental function f_H(fCpbw, fCpinf, fntait, fBtait) + function f_H(fCpbw, fCpinf, fntait, fBtait) $:GPU_ROUTINE(parallelism='[seq]') real(wp), intent(in) :: fCpbw, fCpinf, fntait, fBtait @@ -123,7 +125,7 @@ contains !! @param fntait Tait EOS parameter !! @param fBtait Tait EOS parameter !! @param fH Bubble enthalpy - elemental function f_cgas(fCpinf, fntait, fBtait, fH) + function f_cgas(fCpinf, fntait, fBtait, fH) $:GPU_ROUTINE(parallelism='[seq]') real(wp), intent(in) :: fCpinf, fntait, fBtait, fH @@ -146,7 +148,7 @@ contains !! @param fBtait Tait EOS parameter !! @param advsrc Advection equation source term !! @param divu Divergence of velocity - elemental function f_cpinfdot(fRho, fP, falf, fntait, fBtait, advsrc, divu) + function f_cpinfdot(fRho, fP, falf, fntait, fBtait, advsrc, divu) $:GPU_ROUTINE(parallelism='[seq]') real(wp), intent(in) :: fRho, fP, falf, fntait, fBtait, advsrc, divu @@ -176,7 +178,7 @@ contains !! @param fV Current bubble velocity !! @param fR0 Equilibrium bubble radius !! @param fpbdot Time derivative of the internal bubble pressure - elemental function f_Hdot(fCpbw, fCpinf, fCpinf_dot, fntait, fBtait, fR, fV, fR0, fpbdot) + function f_Hdot(fCpbw, fCpinf, fCpinf_dot, fntait, fBtait, fR, fV, fR0, fpbdot) $:GPU_ROUTINE(parallelism='[seq]') real(wp), intent(in) :: fCpbw, fCpinf, fCpinf_dot, fntait, fBtait real(wp), intent(in) :: fR, fV, fR0, fpbdot @@ -211,7 +213,7 @@ contains !! @param fR Current bubble radius !! @param fV Current bubble velocity !! @param fCpbw Boundary wall pressure - elemental function f_rddot_RP(fCp, fRho, fR, fV, fCpbw) + function f_rddot_RP(fCp, fRho, fR, fV, fCpbw) $:GPU_ROUTINE(parallelism='[seq]') real(wp), intent(in) :: fCp, fRho, fR, fV, fCpbw @@ -234,7 +236,7 @@ contains !! @param fcgas Current gas sound speed !! @param fntait Tait EOS parameter !! @param fBtait Tait EOS parameter - elemental function f_rddot_G(fCpbw, fR, fV, fH, fHdot, fcgas, fntait, fBtait) + function f_rddot_G(fCpbw, fR, fV, fH, fHdot, fcgas, fntait, fBtait) $:GPU_ROUTINE(parallelism='[seq]') real(wp), intent(in) :: fCpbw, fR, fV, fH, fHdot real(wp), intent(in) :: fcgas, fntait, fBtait @@ -257,7 +259,7 @@ contains !! @param fR Current bubble radius !! @param fV Current bubble velocity !! @param fpb Internal bubble pressure - elemental function f_cpbw_KM(fR0, fR, fV, fpb) + function f_cpbw_KM(fR0, fR, fV, fpb) $:GPU_ROUTINE(parallelism='[seq]') real(wp), intent(in) :: fR0, fR, fV, fpb real(wp) :: f_cpbw_KM @@ -284,7 +286,7 @@ contains !! @param fV Current bubble velocity !! @param fR0 Equilibrium bubble radius !! @param fC Current sound speed - elemental function f_rddot_KM(fpbdot, fCp, fCpbw, fRho, fR, fV, fR0, fC) + function f_rddot_KM(fpbdot, fCp, fCpbw, fRho, fR, fV, fR0, fC) $:GPU_ROUTINE(parallelism='[seq]') real(wp), intent(in) :: fpbdot, fCp, fCpbw real(wp), intent(in) :: fRho, fR, fV, fR0, fC @@ -318,7 +320,7 @@ contains !> Subroutine that computes bubble wall properties for vapor bubbles !! @param pb_in Internal bubble pressure !! @param iR0 Current bubble size index - elemental subroutine s_bwproperty(pb_in, iR0, chi_vw_out, k_mw_out, rho_mw_out) + subroutine s_bwproperty(pb_in, iR0, chi_vw_out, k_mw_out, rho_mw_out) $:GPU_ROUTINE(parallelism='[seq]') real(wp), intent(in) :: pb_in integer, intent(in) :: iR0 @@ -349,7 +351,7 @@ contains !! @param fbeta_c Mass transfer coefficient (EL) !! @param fR_m Mixture gas constant (EL) !! @param fgamma_m Mixture gamma (EL) - elemental subroutine s_vflux(fR, fV, fpb, fmass_v, iR0, vflux, fmass_g, fbeta_c, fR_m, fgamma_m) + subroutine s_vflux(fR, fV, fpb, fmass_v, iR0, vflux, fmass_g, fbeta_c, fR_m, fgamma_m) $:GPU_ROUTINE(parallelism='[seq]') real(wp), intent(in) :: fR real(wp), intent(in) :: fV @@ -407,7 +409,8 @@ contains !! @param fbeta_t Mass transfer coefficient (EL) !! @param fR_m Mixture gas constant (EL) !! @param fgamma_m Mixture gamma (EL) - elemental function f_bpres_dot(fvflux, fR, fV, fpb, fmass_v, iR0, fbeta_t, fR_m, fgamma_m) + function f_bpres_dot(fvflux, fR, fV, fpb, fmass_v, iR0, fbeta_t, fR_m, fgamma_m) + !$DIR INLINENEVER f_bpres_dot $:GPU_ROUTINE(parallelism='[seq]') real(wp), intent(in) :: fvflux real(wp), intent(in) :: fR @@ -463,27 +466,32 @@ contains !! @param fbeta_t Heat transfer coefficient (EL) !! @param fCson Speed of sound (EL) !! @param adap_dt_stop Fail-safe exit if max iteration count reached - subroutine s_advance_step(fRho, fP, fR, fV, fR0, fpb, fpbdot, alf, & - fntait, fBtait, f_bub_adv_src, f_divu, & - bub_id, fmass_v, fmass_g, fbeta_c, & - fbeta_t, fCson, adap_dt_stop) - $:GPU_ROUTINE(function_name='s_advance_step',parallelism='[seq]', & - & cray_inline=True) + function f_advance_step(fRho, fP, fR, fV, fR0, fpb, fpbdot, alf, & + fntait, fBtait, f_bub_adv_src, f_divu, & + bub_id, fmass_v, fmass_g, fbeta_c, & + fbeta_t, fCson, fRe, fPos, & + fVel, cell, q_prim_vf) result(adap_dt_stop) + $:GPU_ROUTINE(parallelism='[seq]') real(wp), intent(inout) :: fR, fV, fpb, fmass_v real(wp), intent(in) :: fRho, fP, fR0, fpbdot, alf real(wp), intent(in) :: fntait, fBtait, f_bub_adv_src, f_divu integer, intent(in) :: bub_id real(wp), intent(in) :: fmass_g, fbeta_c, fbeta_t, fCson - integer, intent(inout) :: adap_dt_stop + real(wp), intent(inout), dimension(3), optional :: fPos, fVel + real(wp), intent(in), optional :: fRe + integer, intent(in), dimension(3), optional :: cell + type(scalar_field), intent(in), dimension(sys_size), optional :: q_prim_vf real(wp), dimension(5) :: err !< Error estimates for adaptive time stepping real(wp) :: t_new !< Updated time step size real(wp) :: h0, h !< Time step size real(wp), dimension(4) :: myR_tmp1, myV_tmp1, myR_tmp2, myV_tmp2 !< Bubble radius, radial velocity, and radial acceleration for the inner loop real(wp), dimension(4) :: myPb_tmp1, myMv_tmp1, myPb_tmp2, myMv_tmp2 !< Gas pressure and vapor mass for the inner loop (EL) - real(wp) :: fR2, fV2, fpb2, fmass_v2 - integer :: iter_count + real(wp) :: fR2, fV2, fpb2, fmass_v2, f_bTemp + real(wp), dimension(3) :: vTemp, aTemp + integer :: adap_dt_stop + integer :: l, iter_count call s_initial_substep_h(fRho, fP, fR, fV, fR0, fpb, fpbdot, alf, & fntait, fBtait, f_bub_adv_src, f_divu, fCson, h0) @@ -565,6 +573,42 @@ contains ! Update pb and mass_v fpb = myPb_tmp1(4) fmass_v = myMv_tmp1(4) + + select case (lag_vel_model) + case (1) + do l = 1, num_dims + vTemp(l) = f_interpolate_velocity(fR, cell, l, q_prim_vf) + end do + do l = 1, num_dims + fVel(l) = vTemp(l) + fPos(l) = fPos(l) + h*vTemp(l) + end do + case (2) + do l = 1, num_dims + f_bTemp = f_get_bubble_force(fPos(l), fR, fV, fVel(l), fmass_g, fmass_v, & + fRe, fRho, cell, l, q_prim_vf) + aTemp(l) = f_bTemp/(fmass_g + fmass_v) + end do + do l = 1, num_dims + fVel(l) = fVel(l) + h*aTemp(l) + fPos(l) = fPos(l) + h*fVel(l) + end do + case (3) + do l = 1, num_dims + f_bTemp = f_get_bubble_force(fPos(l), fR, fV, fVel(l), fmass_g, fmass_v, & + fRe, fRho, cell, l, q_prim_vf) + aTemp(l) = 2._wp*f_bTemp/(fmass_g + fmass_v) - 3*fV*fVel(l)/fR + end do + do l = 1, num_dims + fVel(l) = fVel(l) + h*aTemp(l) + fPos(l) = fPos(l) + h*fVel(l) + end do + case default + do l = 1, num_dims + fVel(l) = fVel(l) + fPos(l) = fPos(l) + end do + end select end if ! Update step size for the next sub-step @@ -588,7 +632,7 @@ contains if (iter_count >= adap_dt_max_iters) adap_dt_stop = 1 - end subroutine s_advance_step + end function f_advance_step !> Choose the initial time step size for the adaptive time stepping routine !! (See Heirer, E. Hairer S.P.Nørsett G. Wanner, Solving Ordinary @@ -613,10 +657,10 @@ contains $:GPU_ROUTINE(function_name='s_initial_substep_h',parallelism='[seq]', & & cray_inline=True) - real(wp), intent(IN) :: fRho, fP, fR, fV, fR0, fpb, fpbdot, alf - real(wp), intent(IN) :: fntait, fBtait, f_bub_adv_src, f_divu - real(wp), intent(IN) :: fCson - real(wp), intent(OUT) :: h + real(wp), intent(in) :: fRho, fP, fR, fV, fR0, fpb, fpbdot, alf + real(wp), intent(in) :: fntait, fBtait, f_bub_adv_src, f_divu + real(wp), intent(in) :: fCson + real(wp), intent(out) :: h real(wp), dimension(2) :: h_size !< Time step size (h0, h1) real(wp), dimension(3) :: d_norms !< norms (d_0, d_1, d_2) @@ -697,12 +741,12 @@ contains $:GPU_ROUTINE(function_name='s_advance_substep',parallelism='[seq]', & & cray_inline=True) - real(wp), intent(OUT) :: err - real(wp), intent(IN) :: fRho, fP, fR, fV, fR0, fpb, fpbdot, alf - real(wp), intent(IN) :: fntait, fBtait, f_bub_adv_src, f_divu, h - integer, intent(IN) :: bub_id - real(wp), intent(IN) :: fmass_v, fmass_g, fbeta_c, fbeta_t, fCson - real(wp), dimension(4), intent(OUT) :: myR_tmp, myV_tmp, myPb_tmp, myMv_tmp + real(wp), intent(out) :: err + real(wp), intent(in) :: fRho, fP, fR, fV, fR0, fpb, fpbdot, alf + real(wp), intent(in) :: fntait, fBtait, f_bub_adv_src, f_divu, h + integer, intent(in) :: bub_id + real(wp), intent(in) :: fmass_v, fmass_g, fbeta_c, fbeta_t, fCson + real(wp), dimension(4), intent(out) :: myR_tmp, myV_tmp, myPb_tmp, myMv_tmp real(wp), dimension(4) :: myA_tmp, mydPbdt_tmp, mydMvdt_tmp real(wp) :: err_R, err_V @@ -802,14 +846,15 @@ contains !! @param fbeta_c Mass transfer coefficient !! @param fbeta_t Heat transfer coefficient !! @param fdPbdt_tmp Rate of change of the internal bubble pressure + !! @param fdMvdt_tmp Rate of change of the mass of vapor in the bubble !! @param advance_EL Rate of change of the mass of vapor in the bubble - elemental subroutine s_advance_EL(fR_tmp, fV_tmp, fPb_tmp, fMv_tmp, bub_id, & - fmass_g, fbeta_c, fbeta_t, fdPbdt_tmp, advance_EL) + subroutine s_advance_EL(fR_tmp, fV_tmp, fPb_tmp, fMv_tmp, bub_id, & + fmass_g, fbeta_c, fbeta_t, fdPbdt_tmp, advance_EL) $:GPU_ROUTINE(parallelism='[seq]') - real(wp), intent(IN) :: fR_tmp, fV_tmp, fPb_tmp, fMv_tmp - real(wp), intent(IN) :: fmass_g, fbeta_c, fbeta_t - integer, intent(IN) :: bub_id - real(wp), intent(INOUT) :: fdPbdt_tmp + real(wp), intent(in) :: fR_tmp, fV_tmp, fPb_tmp, fMv_tmp + real(wp), intent(in) :: fmass_g, fbeta_c, fbeta_t + integer, intent(in) :: bub_id + real(wp), intent(inout) :: fdPbdt_tmp real(wp), intent(out) :: advance_EL real(wp) :: fVapFlux, myR_m, mygamma_m diff --git a/src/simulation/m_bubbles_EE.fpp b/src/simulation/m_bubbles_EE.fpp index dad03d3f87..4674a36338 100644 --- a/src/simulation/m_bubbles_EE.fpp +++ b/src/simulation/m_bubbles_EE.fpp @@ -183,7 +183,7 @@ contains integer :: i, j, k, l, q, ii !< Loop variables - integer :: adap_dt_stop_max, adap_dt_stop !< Fail-safe exit if max iteration count reached + integer :: adap_dt_stop_sum, adap_dt_stop !< Fail-safe exit if max iteration count reached integer :: dmBub_id !< Dummy variables for unified subgrid bubble subroutines real(wp) :: dmMass_v, dmMass_n, dmBeta_c, dmBeta_t, dmCson @@ -205,10 +205,9 @@ contains end do $:END_GPU_PARALLEL_LOOP() - adap_dt_stop_max = 0 + adap_dt_stop_sum = 0 $:GPU_PARALLEL_LOOP(private='[j,k,l,Rtmp, Vtmp, myalpha_rho, myalpha, myR, myV, alf, myP, myRho, R2Vav, R3, nbub, pb_local, mv_local, vflux, pbdot, rddot, n_tait, B_tait, my_divu]', collapse=3, & - & reduction='[[adap_dt_stop_max]]', reductionOp='[MAX]', & - & copy='[adap_dt_stop_max]') + & copy='[adap_dt_stop_sum]') do l = 0, p do k = 0, n do j = 0, m @@ -298,21 +297,20 @@ contains pb_local = 0._wp; mv_local = 0._wp; vflux = 0._wp; pbdot = 0._wp end if + adap_dt_stop = 0 + ! Adaptive time stepping if (adap_dt) then - adap_dt_stop = 0 - call s_advance_step(myRho, myP, myR, myV, R0(q), & - pb_local, pbdot, alf, n_tait, B_tait, & - bub_adv_src(j, k, l), divu_in%sf(j, k, l), & - dmBub_id, dmMass_v, dmMass_n, dmBeta_c, & - dmBeta_t, dmCson, adap_dt_stop) + adap_dt_stop = f_advance_step(myRho, myP, myR, myV, R0(q), & + pb_local, pbdot, alf, n_tait, B_tait, & + bub_adv_src(j, k, l), divu_in%sf(j, k, l), & + dmBub_id, dmMass_v, dmMass_n, dmBeta_c, & + dmBeta_t, dmCson) q_cons_vf(rs(q))%sf(j, k, l) = nbub*myR q_cons_vf(vs(q))%sf(j, k, l) = nbub*myV - adap_dt_stop_max = max(adap_dt_stop_max, adap_dt_stop) - else rddot = f_rddot(myRho, myP, myR, myV, R0(q), & pb_local, pbdot, alf, n_tait, B_tait, & @@ -321,6 +319,9 @@ contains bub_v_src(j, k, l, q) = nbub*rddot bub_r_src(j, k, l, q) = q_cons_vf(vs(q))%sf(j, k, l) end if + + $:GPU_ATOMIC(atomic='update') + adap_dt_stop_sum = adap_dt_stop_sum + adap_dt_stop end if end do end do @@ -328,7 +329,7 @@ contains end do $:END_GPU_PARALLEL_LOOP() - if (adap_dt .and. adap_dt_stop_max > 0) call s_mpi_abort("Adaptive time stepping failed to converge.") + if (adap_dt .and. adap_dt_stop_sum > 0) call s_mpi_abort("Adaptive time stepping failed to converge.") if (.not. adap_dt) then $:GPU_PARALLEL_LOOP(private='[i,k,l,q]', collapse=3) diff --git a/src/simulation/m_bubbles_EL.fpp b/src/simulation/m_bubbles_EL.fpp index 4ae590a4b8..b8f94fc074 100644 --- a/src/simulation/m_bubbles_EL.fpp +++ b/src/simulation/m_bubbles_EL.fpp @@ -27,6 +27,12 @@ module m_bubbles_EL use m_helper + use m_mpi_common + + use m_ibm + + use m_finite_differences + implicit none !(nBub) @@ -67,21 +73,31 @@ module m_bubbles_EL $:GPU_DECLARE(create='[lag_num_ts]') - integer :: nBubs !< Number of bubbles in the local domain - real(wp) :: Rmax_glb, Rmin_glb !< Maximum and minimum bubbe size in the local domain + real(wp) :: Rmax_glb, Rmin_glb !< Maximum and minimum bubble size in the local domain !< Projection of the lagrangian particles in the Eulerian framework type(scalar_field), dimension(:), allocatable :: q_beta - integer :: q_beta_idx !< Size of the q_beta vector field + type(scalar_field), dimension(:), allocatable :: kahan_comp !< Kahan compensation for q_beta accumulation + integer :: q_beta_idx !< Size of the q_beta vector field - $:GPU_DECLARE(create='[nBubs,Rmax_glb,Rmin_glb,q_beta,q_beta_idx]') + $:GPU_DECLARE(create='[Rmax_glb,Rmin_glb,q_beta,kahan_comp,q_beta_idx]') + + integer, parameter :: LAG_EVOL_ID = 11 ! File id for lag_bubbles_evol_*.dat + integer, parameter :: LAG_STATS_ID = 12 ! File id for stats_lag_bubbles_*.dat + integer, parameter :: LAG_VOID_ID = 13 ! File id for voidfraction.dat + + integer, allocatable, dimension(:) :: keep_bubble + integer, allocatable, dimension(:, :) :: wrap_bubble_loc, wrap_bubble_dir + $:GPU_DECLARE(create='[keep_bubble]') + $:GPU_DECLARE(create='[wrap_bubble_loc, wrap_bubble_dir]') contains !> Initializes the lagrangian subgrid bubble solver !! @param q_cons_vf Initial conservative variables - impure subroutine s_initialize_bubbles_EL_module(q_cons_vf) + impure subroutine s_initialize_bubbles_EL_module(q_cons_vf, bc_type) type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf + type(integer_field), dimension(1:num_dims, 1:2), intent(in) :: bc_type integer :: nBubs_glb, i @@ -103,24 +119,39 @@ contains call s_mpi_abort('Please check the lag_params%solver_approach input') end if + pcomm_coords(1)%beg = x_cb(-1) + pcomm_coords(1)%end = x_cb(m) + $:GPU_UPDATE(device='[pcomm_coords(1)]') + if (n > 0) then + pcomm_coords(2)%beg = y_cb(-1) + pcomm_coords(2)%end = y_cb(n) + $:GPU_UPDATE(device='[pcomm_coords(2)]') + if (p > 0) then + pcomm_coords(3)%beg = z_cb(-1) + pcomm_coords(3)%end = z_cb(p) + $:GPU_UPDATE(device='[pcomm_coords(3)]') + end if + end if + $:GPU_UPDATE(device='[lag_num_ts, q_beta_idx]') @:ALLOCATE(q_beta(1:q_beta_idx)) + @:ALLOCATE(kahan_comp(1:q_beta_idx)) do i = 1, q_beta_idx @:ALLOCATE(q_beta(i)%sf(idwbuff(1)%beg:idwbuff(1)%end, & idwbuff(2)%beg:idwbuff(2)%end, & idwbuff(3)%beg:idwbuff(3)%end)) - end do - - do i = 1, q_beta_idx @:ACC_SETUP_SFs(q_beta(i)) + @:ALLOCATE(kahan_comp(i)%sf(idwbuff(1)%beg:idwbuff(1)%end, & + idwbuff(2)%beg:idwbuff(2)%end, & + idwbuff(3)%beg:idwbuff(3)%end)) + @:ACC_SETUP_SFs(kahan_comp(i)) end do ! Allocating space for lagrangian variables nBubs_glb = lag_params%nBubs_glb - @:ALLOCATE(lag_id(1:nBubs_glb, 1:2)) @:ALLOCATE(bub_R0(1:nBubs_glb)) @:ALLOCATE(Rmax_stats(1:nBubs_glb)) @:ALLOCATE(Rmin_stats(1:nBubs_glb)) @@ -128,6 +159,7 @@ contains @:ALLOCATE(gas_betaT(1:nBubs_glb)) @:ALLOCATE(gas_betaC(1:nBubs_glb)) @:ALLOCATE(bub_dphidt(1:nBubs_glb)) + @:ALLOCATE(lag_id(1:nBubs_glb, 1:2)) @:ALLOCATE(gas_p(1:nBubs_glb, 1:2)) @:ALLOCATE(gas_mv(1:nBubs_glb, 1:2)) @:ALLOCATE(intfc_rad(1:nBubs_glb, 1:2)) @@ -143,24 +175,73 @@ contains @:ALLOCATE(mtn_dposdt(1:nBubs_glb, 1:3, 1:lag_num_ts)) @:ALLOCATE(mtn_dveldt(1:nBubs_glb, 1:3, 1:lag_num_ts)) + @:ALLOCATE(keep_bubble(1:nBubs_glb)) + @:ALLOCATE(wrap_bubble_loc(1:nBubs_glb, 1:num_dims), wrap_bubble_dir(1:nBubs_glb, 1:num_dims)) + if (adap_dt .and. f_is_default(adap_dt_tol)) adap_dt_tol = dflt_adap_dt_tol + if (num_procs > 1) call s_initialize_particles_mpi(lag_num_ts) + ! Starting bubbles - call s_read_input_bubbles(q_cons_vf) + if (lag_params%write_void_evol) call s_open_void_evol + if (lag_params%write_bubbles) call s_open_lag_bubble_evol() + if (lag_params%write_bubbles_stats) call s_open_lag_bubble_stats() + + if (lag_params%vel_model > 0) then + moving_lag_bubbles = .true. + lag_pressure_force = lag_params%pressure_force + lag_gravity_force = lag_params%gravity_force + lag_vel_model = lag_params%vel_model + lag_drag_model = lag_params%drag_model + end if + $:GPU_UPDATE(device='[moving_lag_bubbles, lag_pressure_force, & + & lag_gravity_force, lag_vel_model, lag_drag_model]') + + ! Allocate cell-centered pressure gradient arrays and FD coefficients + if (lag_params%vel_model > 0 .and. lag_params%pressure_force) then + @:ALLOCATE(grad_p_x(0:m, 0:n, 0:p)) + @:ALLOCATE(fd_coeff_x_pgrad(-fd_number:fd_number, 0:m)) + call s_compute_finite_difference_coefficients(m, x_cc, fd_coeff_x_pgrad, & + buff_size, fd_number, fd_order) + $:GPU_UPDATE(device='[fd_coeff_x_pgrad]') + if (n > 0) then + @:ALLOCATE(grad_p_y(0:m, 0:n, 0:p)) + @:ALLOCATE(fd_coeff_y_pgrad(-fd_number:fd_number, 0:n)) + call s_compute_finite_difference_coefficients(n, y_cc, fd_coeff_y_pgrad, & + buff_size, fd_number, fd_order) + $:GPU_UPDATE(device='[fd_coeff_y_pgrad]') + end if + if (p > 0) then + @:ALLOCATE(grad_p_z(0:m, 0:n, 0:p)) + @:ALLOCATE(fd_coeff_z_pgrad(-fd_number:fd_number, 0:p)) + call s_compute_finite_difference_coefficients(p, z_cc, fd_coeff_z_pgrad, & + buff_size, fd_number, fd_order) + $:GPU_UPDATE(device='[fd_coeff_z_pgrad]') + end if + end if + + ! Allocate cell list arrays for atomic-free Gaussian smearing + @:ALLOCATE(cell_list_start(0:m, 0:n, 0:p)) + @:ALLOCATE(cell_list_count(0:m, 0:n, 0:p)) + @:ALLOCATE(cell_list_idx(1:lag_params%nBubs_glb)) + + call s_read_input_bubbles(q_cons_vf, bc_type) end subroutine s_initialize_bubbles_EL_module !> The purpose of this procedure is to obtain the initial bubbles' information !! @param q_cons_vf Conservative variables - impure subroutine s_read_input_bubbles(q_cons_vf) + impure subroutine s_read_input_bubbles(q_cons_vf, bc_type) type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf + type(integer_field), dimension(1:num_dims, 1:2), intent(in) :: bc_type real(wp), dimension(8) :: inputBubble real(wp) :: qtime integer :: id, bub_id, save_count integer :: i, ios logical :: file_exist, indomain + integer, dimension(3) :: cell character(LEN=path_len + 2*name_len) :: path_D_dir !< @@ -179,9 +260,9 @@ contains if (save_count == 0) then if (proc_rank == 0) print *, 'Reading lagrange bubbles input file.' - inquire (file='input/lag_bubbles.dat', exist=file_exist) + call my_inquire(trim(lag_params%input_path), file_exist) if (file_exist) then - open (94, file='input/lag_bubbles.dat', form='formatted', iostat=ios) + open (94, file=trim(lag_params%input_path), form='formatted', iostat=ios) do while (ios == 0) read (94, *, iostat=ios) (inputBubble(i), i=1, 8) if (ios /= 0) cycle @@ -195,12 +276,12 @@ contains call s_add_bubbles(inputBubble, q_cons_vf, bub_id) lag_id(bub_id, 1) = id !global ID lag_id(bub_id, 2) = bub_id !local ID - nBubs = bub_id ! local number of bubbles + n_el_bubs_loc = bub_id ! local number of bubbles end if end do close (94) else - call s_mpi_abort("Initialize the lagrange bubbles in input/lag_bubbles.dat") + call s_mpi_abort("Initialize the lagrange bubbles in "//trim(lag_params%input_path)) end if else if (proc_rank == 0) print *, 'Restarting lagrange bubbles at save_count: ', save_count @@ -209,13 +290,23 @@ contains print *, " Lagrange bubbles running, in proc", proc_rank, "number:", bub_id, "/", id + if (num_procs > 1) then + call s_mpi_reduce_int_sum(n_el_bubs_loc, n_el_bubs_glb) + else + n_el_bubs_glb = n_el_bubs_loc + end if + + if (proc_rank == 0) then + if (n_el_bubs_glb == 0) call s_mpi_abort('No bubbles in the domain. Check '//trim(lag_params%input_path)) + end if + $:GPU_UPDATE(device='[bubbles_lagrange, lag_params]') $:GPU_UPDATE(device='[lag_id,bub_R0,Rmax_stats,Rmin_stats,gas_mg, & & gas_betaT,gas_betaC,bub_dphidt,gas_p,gas_mv, & & intfc_rad,intfc_vel,mtn_pos,mtn_posPrev,mtn_vel, & & mtn_s,intfc_draddt,intfc_dveldt,gas_dpdt,gas_dmvdt, & - & mtn_dposdt,mtn_dveldt,nBubs]') + & mtn_dposdt,mtn_dveldt,n_el_bubs_loc]') Rmax_glb = min(dflt_real, -dflt_real) Rmin_glb = max(dflt_real, -dflt_real) @@ -225,19 +316,22 @@ contains !Populate temporal variables call s_transfer_data_to_tmp() - call s_smear_voidfraction() - - if (lag_params%write_bubbles) call s_write_lag_particles(qtime) + call s_smear_voidfraction(bc_type) if (save_count == 0) then ! Create ./D directory - write (path_D_dir, '(A,I0,A,I0)') trim(case_dir)//'/D' - call my_inquire(path_D_dir, file_exist) - if (.not. file_exist) call s_create_directory(trim(path_D_dir)) + if (proc_rank == 0) then + write (path_D_dir, '(A,I0,A,I0)') trim(case_dir)//'/D' + call my_inquire(trim(path_D_dir), file_exist) + if (.not. file_exist) call s_create_directory(trim(path_D_dir)) + end if + call s_mpi_barrier() call s_write_restart_lag_bubbles(save_count) ! Needed for post_processing - call s_write_void_evol(qtime) + if (lag_params%write_void_evol) call s_write_void_evol(qtime) end if + if (lag_params%write_bubbles) call s_write_lag_bubble_evol(qtime) + end subroutine s_read_input_bubbles !> The purpose of this procedure is to obtain the information of the bubbles when starting fresh @@ -280,7 +374,7 @@ contains mtn_posPrev(bub_id, 1:3, 1) = mtn_pos(bub_id, 1:3, 1) end if - cell = -buff_size + cell = fd_number - buff_size call s_locate_cell(mtn_pos(bub_id, 1:3, 1), cell, mtn_s(bub_id, 1:3, 1)) ! Check if the bubble is located in the ghost cell of a symmetric, or wall boundary @@ -466,7 +560,7 @@ contains call MPI_FILE_CLOSE(ifile, ierr) call MPI_TYPE_FREE(view, ierr) - nBubs = bub_id + n_el_bubs_loc = bub_id do i = 1, bub_id lag_id(i, 1) = int(MPI_IO_DATA_lag_bubbles(i, 1)) @@ -491,7 +585,7 @@ contains deallocate (MPI_IO_DATA_lag_bubbles) else - nBubs = 0 + n_el_bubs_loc = 0 call MPI_TYPE_CONTIGUOUS(0, mpi_p, view, ierr) call MPI_TYPE_COMMIT(view, ierr) @@ -523,9 +617,9 @@ contains !> Contains the bubble dynamics subroutines. !! @param q_prim_vf Primitive variables !! @param stage Current stage in the time-stepper algorithm - subroutine s_compute_bubble_EL_dynamics(q_prim_vf, stage) - + subroutine s_compute_bubble_EL_dynamics(q_prim_vf, bc_type, stage) type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf + type(integer_field), dimension(1:num_dims, 1:2), intent(in) :: bc_type integer, intent(in) :: stage real(wp) :: myVapFlux @@ -533,7 +627,8 @@ contains real(wp) :: myR_m, mygamma_m, myPb, myMass_n, myMass_v real(wp) :: myR, myV, myBeta_c, myBeta_t, myR0, myPbdot, myMvdot real(wp) :: myPinf, aux1, aux2, myCson, myRho - real(wp) :: gamma, pi_inf, qv + real(wp), dimension(3) :: myPos, myVel + real(wp) :: gamma, pi_inf, qv, f_b, myRe #:if not MFC_CASE_OPTIMIZATION and USING_AMD real(wp), dimension(3) :: myalpha_rho, myalpha #:else @@ -542,18 +637,17 @@ contains real(wp), dimension(2) :: Re integer, dimension(3) :: cell - integer :: adap_dt_stop_max, adap_dt_stop !< Fail-safe exit if max iteration count reached + integer :: adap_dt_stop_sum, adap_dt_stop !< Fail-safe exit if max iteration count reached real(wp) :: dmalf, dmntait, dmBtait, dm_bub_adv_src, dm_divu !< Dummy variables for unified subgrid bubble subroutines - integer :: i, k, l - - call nvtxStartRange("LAGRANGE-BUBBLE-DYNAMICS") + integer :: k, l ! Subgrid p_inf model based on Maeda and Colonius (2018). if (lag_params%pressure_corrector) then + call nvtxStartRange("LAGRANGE-BUBBLE-PINF-CORRECTION") ! Calculate velocity potentials (valid for one bubble per cell) $:GPU_PARALLEL_LOOP(private='[k,cell,paux,preterm1,term2,Romega,myR0,myR,myV,myPb,pint,term1_fac]') - do k = 1, nBubs + do k = 1, n_el_bubs_loc call s_get_pinf(k, q_prim_vf, 2, paux, cell, preterm1, term2, Romega) myR0 = bub_R0(k) myR = intfc_rad(k, 2) @@ -570,14 +664,22 @@ contains end if end do $:END_GPU_PARALLEL_LOOP() + call nvtxEndRange() end if + ! Precompute cell-centered pressure gradients for translational motion + if (moving_lag_bubbles .and. lag_pressure_force) then + call nvtxStartRange("LAGRANGE-BUBBLE-PRESSURE-GRADIENT") + call s_compute_pressure_gradients(q_prim_vf) + call nvtxEndRange() + end if + + call nvtxStartRange("LAGRANGE-BUBBLE-DYNAMICS") ! Radial motion model - adap_dt_stop_max = 0 - $:GPU_PARALLEL_LOOP(private='[k,i,myalpha_rho,myalpha,Re,cell,myVapFlux,preterm1, term2, paux, pint, Romega, term1_fac,myR_m, mygamma_m, myPb, myMass_n, myMass_v,myR, myV, myBeta_c, myBeta_t, myR0, myPbdot, myMvdot,myPinf, aux1, aux2, myCson, myRho,gamma,pi_inf,qv,dmalf, dmntait, dmBtait, dm_bub_adv_src, dm_divu,adap_dt_stop]', & - & reduction='[[adap_dt_stop_max]]',reductionOp='[MAX]', & - & copy='[adap_dt_stop_max]',copyin='[stage]') - do k = 1, nBubs + adap_dt_stop_sum = 0 + $:GPU_PARALLEL_LOOP(private='[k,myalpha_rho,myalpha,Re,cell,myVapFlux,preterm1, term2, paux, pint, Romega,term1_fac,myR_m, mygamma_m, myPb, myMass_n, myMass_v,myR, myV, myBeta_c, myBeta_t, myR0, myPbdot, myMvdot,myPinf, aux1,aux2, myCson, myRho,gamma,pi_inf,qv,dmalf, dmntait, dmBtait, dm_bub_adv_src, dm_divu,adap_dt_stop,myPos,myVel]', & + & copy='[adap_dt_stop_sum]',copyin='[stage]') + do k = 1, n_el_bubs_loc ! Keller-Miksis model ! Current bubble state @@ -589,6 +691,8 @@ contains myBeta_c = gas_betaC(k) myBeta_t = gas_betaT(k) myR0 = bub_R0(k) + myPos = mtn_pos(k, :, 2) + myVel = mtn_vel(k, :, 2) ! Vapor and heat fluxes call s_vflux(myR, myV, myPb, myMass_v, k, myVapFlux, myMass_n, myBeta_c, myR_m, mygamma_m) @@ -609,48 +713,77 @@ contains if (adap_dt) then - call s_advance_step(myRho, myPinf, myR, myV, myR0, myPb, myPbdot, dmalf, & - dmntait, dmBtait, dm_bub_adv_src, dm_divu, & - k, myMass_v, myMass_n, myBeta_c, & - myBeta_t, myCson, adap_dt_stop) + mtn_posPrev(k, :, 1) = myPos + + myRe = Re(1) + adap_dt_stop = f_advance_step(myRho, myPinf, myR, myV, myR0, myPb, myPbdot, dmalf, & + dmntait, dmBtait, dm_bub_adv_src, dm_divu, & + k, myMass_v, myMass_n, myBeta_c, & + myBeta_t, myCson, myRe, & + myPos, myVel, cell, q_prim_vf) ! Update bubble state intfc_rad(k, 1) = myR intfc_vel(k, 1) = myV gas_p(k, 1) = myPb gas_mv(k, 1) = myMass_v + mtn_pos(k, :, 1) = myPos + mtn_vel(k, :, 1) = myVel else - ! Radial acceleration from bubble models intfc_dveldt(k, stage) = f_rddot(myRho, myPinf, myR, myV, myR0, & myPb, myPbdot, dmalf, dmntait, dmBtait, & dm_bub_adv_src, dm_divu, & myCson) + intfc_draddt(k, stage) = myV gas_dmvdt(k, stage) = myMvdot gas_dpdt(k, stage) = myPbdot + if (moving_lag_bubbles) then + do l = 1, num_dims + select case (lag_vel_model) + case (1) + mtn_dposdt(k, l, stage) = f_interpolate_velocity(myPos(l), & + cell, l, q_prim_vf) + mtn_dveldt(k, l, stage) = 0._wp + case (2) + mtn_dposdt(k, l, stage) = myVel(l) + f_b = f_get_bubble_force(myPos(l), & + myR, myV, myVel(l), & + myMass_n, myMass_v, & + Re(1), myRho, cell, l, q_prim_vf) + mtn_dveldt(k, l, stage) = f_b/(myMass_n + myMass_v) + case (3) + mtn_dposdt(k, l, stage) = myVel(l) + f_b = f_get_bubble_force(myPos(l), & + myR, myV, myVel(l), & + myMass_n, myMass_v, & + Re(1), myRho, cell, l, q_prim_vf) + mtn_dveldt(k, l, stage) = 2._wp*f_b/(myMass_n + myMass_v) - 3._wp*myV*myVel(l)/myR + case default + mtn_dposdt(k, l, stage) = 0._wp + mtn_dveldt(k, l, stage) = 0._wp + end select + end do + end if end if - adap_dt_stop_max = max(adap_dt_stop_max, adap_dt_stop) + $:GPU_ATOMIC(atomic='update') + adap_dt_stop_sum = adap_dt_stop_sum + adap_dt_stop end do $:END_GPU_PARALLEL_LOOP() + call nvtxEndRange - if (adap_dt .and. adap_dt_stop_max > 0) call s_mpi_abort("Adaptive time stepping failed to converge.") - - ! Bubbles remain in a fixed position - $:GPU_PARALLEL_LOOP(collapse=2, private='[k,l]', copyin='[stage]') - do k = 1, nBubs - do l = 1, 3 - mtn_dposdt(k, l, stage) = 0._wp - mtn_dveldt(k, l, stage) = 0._wp - end do - end do - $:END_GPU_PARALLEL_LOOP() + if (adap_dt .and. adap_dt_stop_sum > 0) call s_mpi_abort("Adaptive time stepping failed to converge.") - call nvtxEndRange + if (adap_dt) then + call s_transfer_data_to_tmp() + if (moving_lag_bubbles) call s_enforce_EL_bubbles_boundary_conditions(q_prim_vf) + call s_smear_voidfraction(bc_type) + end if end subroutine s_compute_bubble_EL_dynamics @@ -667,96 +800,91 @@ contains integer :: i, j, k, l - if (.not. adap_dt) call s_smear_voidfraction() - - if (lag_params%solver_approach == 2) then - - ! (q / (1 - beta)) * d(beta)/dt source - if (p == 0) then - $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) - do k = 0, p - do j = 0, n - do i = 0, m - do l = 1, E_idx - if (q_beta(1)%sf(i, j, k) > (1._wp - lag_params%valmaxvoid)) then - rhs_vf(l)%sf(i, j, k) = rhs_vf(l)%sf(i, j, k) + & - q_cons_vf(l)%sf(i, j, k)*(q_beta(2)%sf(i, j, k) + & - q_beta(5)%sf(i, j, k)) - - end if - end do + call nvtxStartRange("LAGRANGE-BUBBLE-EL-SOURCE") + ! (q / (1 - beta)) * d(beta)/dt source + if (lag_params%cluster_type >= 4) then + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) + do k = idwint(3)%beg, idwint(3)%end + do j = idwint(2)%beg, idwint(2)%end + do i = idwint(1)%beg, idwint(1)%end + do l = 1, E_idx + if (q_beta(1)%sf(i, j, k) > (1._wp - lag_params%valmaxvoid)) then + rhs_vf(l)%sf(i, j, k) = rhs_vf(l)%sf(i, j, k) + & + q_cons_vf(l)%sf(i, j, k)*(q_beta(2)%sf(i, j, k) + & + q_beta(5)%sf(i, j, k)) + end if end do end do end do - $:END_GPU_PARALLEL_LOOP() - else - $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) - do k = 0, p - do j = 0, n - do i = 0, m - do l = 1, E_idx - if (q_beta(1)%sf(i, j, k) > (1._wp - lag_params%valmaxvoid)) then - rhs_vf(l)%sf(i, j, k) = rhs_vf(l)%sf(i, j, k) + & - q_cons_vf(l)%sf(i, j, k)/q_beta(1)%sf(i, j, k)* & - q_beta(2)%sf(i, j, k) - end if - end do + end do + $:END_GPU_PARALLEL_LOOP() + else + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) + do k = idwint(3)%beg, idwint(3)%end + do j = idwint(2)%beg, idwint(2)%end + do i = idwint(1)%beg, idwint(1)%end + do l = 1, E_idx + if (q_beta(1)%sf(i, j, k) > (1._wp - lag_params%valmaxvoid)) then + rhs_vf(l)%sf(i, j, k) = rhs_vf(l)%sf(i, j, k) + & + (q_cons_vf(l)%sf(i, j, k)/q_beta(1)%sf(i, j, k))* & + q_beta(2)%sf(i, j, k) + end if end do end do end do - $:END_GPU_PARALLEL_LOOP() - end if + end do + $:END_GPU_PARALLEL_LOOP() + end if - do l = 1, num_dims + do l = 1, num_dims - call s_gradient_dir(q_prim_vf(E_idx)%sf, q_beta(3)%sf, l) + call s_gradient_dir(q_prim_vf(E_idx)%sf, q_beta(3)%sf, l) - ! (q / (1 - beta)) * d(beta)/dt source - $:GPU_PARALLEL_LOOP(private='[i,j,k]', collapse=3) - do k = 0, p - do j = 0, n - do i = 0, m - if (q_beta(1)%sf(i, j, k) > (1._wp - lag_params%valmaxvoid)) then - rhs_vf(contxe + l)%sf(i, j, k) = rhs_vf(contxe + l)%sf(i, j, k) - & - (1._wp - q_beta(1)%sf(i, j, k))/ & - q_beta(1)%sf(i, j, k)* & - q_beta(3)%sf(i, j, k) - end if - end do + ! (q / (1 - beta)) * d(beta)/dt source + $:GPU_PARALLEL_LOOP(private='[i,j,k]', collapse=3) + do k = idwint(3)%beg, idwint(3)%end + do j = idwint(2)%beg, idwint(2)%end + do i = idwint(1)%beg, idwint(1)%end + if (q_beta(1)%sf(i, j, k) > (1._wp - lag_params%valmaxvoid)) then + rhs_vf(contxe + l)%sf(i, j, k) = rhs_vf(contxe + l)%sf(i, j, k) - & + (1._wp - q_beta(1)%sf(i, j, k))/ & + q_beta(1)%sf(i, j, k)* & + q_beta(3)%sf(i, j, k) + end if end do end do - $:END_GPU_PARALLEL_LOOP() + end do + $:END_GPU_PARALLEL_LOOP() - !source in energy - $:GPU_PARALLEL_LOOP(private='[i,j,k]', collapse=3) - do k = idwbuff(3)%beg, idwbuff(3)%end - do j = idwbuff(2)%beg, idwbuff(2)%end - do i = idwbuff(1)%beg, idwbuff(1)%end - q_beta(3)%sf(i, j, k) = q_prim_vf(E_idx)%sf(i, j, k)*q_prim_vf(contxe + l)%sf(i, j, k) - end do + !source in energy + $:GPU_PARALLEL_LOOP(private='[i,j,k]', collapse=3) + do k = idwbuff(3)%beg, idwbuff(3)%end + do j = idwbuff(2)%beg, idwbuff(2)%end + do i = idwbuff(1)%beg, idwbuff(1)%end + q_beta(3)%sf(i, j, k) = q_prim_vf(E_idx)%sf(i, j, k)*q_prim_vf(contxe + l)%sf(i, j, k) end do end do - $:END_GPU_PARALLEL_LOOP() + end do + $:END_GPU_PARALLEL_LOOP() - call s_gradient_dir(q_beta(3)%sf, q_beta(4)%sf, l) + call s_gradient_dir(q_beta(3)%sf, q_beta(4)%sf, l) - ! (beta / (1 - beta)) * d(Pu)/dl source - $:GPU_PARALLEL_LOOP(private='[i,j,k]', collapse=3) - do k = 0, p - do j = 0, n - do i = 0, m - if (q_beta(1)%sf(i, j, k) > (1._wp - lag_params%valmaxvoid)) then - rhs_vf(E_idx)%sf(i, j, k) = rhs_vf(E_idx)%sf(i, j, k) - & - q_beta(4)%sf(i, j, k)*(1._wp - q_beta(1)%sf(i, j, k))/ & - q_beta(1)%sf(i, j, k) - end if - end do + ! (beta / (1 - beta)) * d(Pu)/dl source + $:GPU_PARALLEL_LOOP(private='[i,j,k]', collapse=3) + do k = idwint(3)%beg, idwint(3)%end + do j = idwint(2)%beg, idwint(2)%end + do i = idwint(1)%beg, idwint(1)%end + if (q_beta(1)%sf(i, j, k) > (1._wp - lag_params%valmaxvoid)) then + rhs_vf(E_idx)%sf(i, j, k) = rhs_vf(E_idx)%sf(i, j, k) - & + q_beta(4)%sf(i, j, k)*(1._wp - q_beta(1)%sf(i, j, k))/ & + q_beta(1)%sf(i, j, k) + end if end do end do - $:END_GPU_PARALLEL_LOOP() end do - - end if + $:END_GPU_PARALLEL_LOOP() + end do + call nvtxEndRange ! LAGRANGE-BUBBLE-EL-SOURCE end subroutine s_compute_bubbles_EL_source @@ -797,26 +925,38 @@ contains end subroutine s_compute_cson_from_pinf !> The purpose of this subroutine is to smear the effect of the bubbles in the Eulerian framework - subroutine s_smear_voidfraction() + subroutine s_smear_voidfraction(bc_type) + type(integer_field), dimension(1:num_dims, 1:2), intent(in) :: bc_type integer :: i, j, k, l - call nvtxStartRange("BUBBLES-LAGRANGE-KERNELS") - + call nvtxStartRange("BUBBLES-LAGRANGE-SMEARING") $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) do i = 1, q_beta_idx do l = idwbuff(3)%beg, idwbuff(3)%end do k = idwbuff(2)%beg, idwbuff(2)%end do j = idwbuff(1)%beg, idwbuff(1)%end q_beta(i)%sf(j, k, l) = 0._wp + kahan_comp(i)%sf(j, k, l) = 0._wp end do end do end do end do $:END_GPU_PARALLEL_LOOP() - call s_smoothfunction(nBubs, intfc_rad, intfc_vel, & - mtn_s, mtn_pos, q_beta) + ! Build cell list before smearing (CPU-side counting sort) + call s_build_cell_list(n_el_bubs_loc, mtn_s) + + call s_smoothfunction(n_el_bubs_loc, intfc_rad, intfc_vel, & + mtn_s, mtn_pos, q_beta, kahan_comp) + + call nvtxStartRange("BUBBLES-LAGRANGE-BETA-COMM") + if (lag_params%cluster_type >= 4) then + call s_populate_beta_buffers(q_beta, bc_type, 3) + else + call s_populate_beta_buffers(q_beta, bc_type, 2) + end if + call nvtxEndRange !Store 1-beta $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) @@ -831,8 +971,7 @@ contains end do end do $:END_GPU_PARALLEL_LOOP() - - call nvtxEndRange + call nvtxEndRange ! BUBBLES-LAGRANGE-SMEARING end subroutine s_smear_voidfraction @@ -855,92 +994,176 @@ contains integer, dimension(3), intent(out) :: cell real(wp), intent(out), optional :: preterm1, term2, Romega - real(wp), dimension(3) :: scoord, psi + real(wp), dimension(3) :: scoord, psi_pos, psi_x, psi_y, psi_z + real(wp) :: xi, eta, zeta real(wp) :: dc, vol, aux real(wp) :: volgas, term1, Rbeq, denom real(wp) :: charvol, charpres, charvol2, charpres2 integer, dimension(3) :: cellaux integer :: i, j, k integer :: smearGrid, smearGridz - logical :: celloutside - scoord = mtn_s(bub_id, 1:3, 2) f_pinfl = 0._wp - !< Find current bubble cell - cell(:) = int(scoord(:)) - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - if (scoord(i) < 0._wp) cell(i) = cell(i) - 1 - end do + if (moving_lag_bubbles) then + cell = fd_number - buff_size + call s_locate_cell(mtn_pos(bub_id, 1:3, 2), cell, mtn_s(bub_id, 1:3, 2)) + scoord = mtn_s(bub_id, 1:3, 2) + else + scoord = mtn_s(bub_id, 1:3, 2) + cell(:) = int(scoord(:)) + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + if (scoord(i) < 0._wp) cell(i) = cell(i) - 1 + end do + end if if ((lag_params%cluster_type == 1)) then !< Getting p_cell in terms of only the current cell by interpolation - !< Getting the cell volulme as Omega - if (p > 0) then - vol = dx(cell(1))*dy(cell(2))*dz(cell(3)) - else - if (cyl_coord) then - vol = dx(cell(1))*dy(cell(2))*y_cc(cell(2))*2._wp*pi + if (fd_order == 2) then ! Bilinear interpolation + + if (p > 0) then + vol = dx(cell(1))*dy(cell(2))*dz(cell(3)) else - vol = dx(cell(1))*dy(cell(2))*lag_params%charwidth + if (cyl_coord) then + vol = dx(cell(1))*dy(cell(2))*y_cc(cell(2))*2._wp*pi + else + vol = dx(cell(1))*dy(cell(2))*lag_params%charwidth + end if end if - end if - !< Obtain bilinear interpolation coefficients, based on the current location of the bubble. - psi(1) = (scoord(1) - real(cell(1)))*dx(cell(1)) + x_cb(cell(1) - 1) - if (cell(1) == (m + buff_size)) then - cell(1) = cell(1) - 1 - psi(1) = 1._wp - else if (cell(1) == (-buff_size)) then - psi(1) = 0._wp - else - if (psi(1) < x_cc(cell(1))) cell(1) = cell(1) - 1 - psi(1) = abs((psi(1) - x_cc(cell(1)))/(x_cc(cell(1) + 1) - x_cc(cell(1)))) - end if + !< Obtain bilinear interpolation coefficients, based on the current location of the bubble. + psi_pos(1) = (scoord(1) - real(cell(1)))*dx(cell(1)) + x_cb(cell(1) - 1) + psi_pos(1) = abs((psi_pos(1) - x_cc(cell(1)))/(x_cc(cell(1) + 1) - x_cc(cell(1)))) - psi(2) = (scoord(2) - real(cell(2)))*dy(cell(2)) + y_cb(cell(2) - 1) - if (cell(2) == (n + buff_size)) then - cell(2) = cell(2) - 1 - psi(2) = 1._wp - else if (cell(2) == (-buff_size)) then - psi(2) = 0._wp - else - if (psi(2) < y_cc(cell(2))) cell(2) = cell(2) - 1 - psi(2) = abs((psi(2) - y_cc(cell(2)))/(y_cc(cell(2) + 1) - y_cc(cell(2)))) - end if + psi_pos(2) = (scoord(2) - real(cell(2)))*dy(cell(2)) + y_cb(cell(2) - 1) + psi_pos(2) = abs((psi_pos(2) - y_cc(cell(2)))/(y_cc(cell(2) + 1) - y_cc(cell(2)))) - if (p > 0) then - psi(3) = (scoord(3) - real(cell(3)))*dz(cell(3)) + z_cb(cell(3) - 1) - if (cell(3) == (p + buff_size)) then - cell(3) = cell(3) - 1 - psi(3) = 1._wp - else if (cell(3) == (-buff_size)) then - psi(3) = 0._wp + if (p > 0) then + psi_pos(3) = (scoord(3) - real(cell(3)))*dz(cell(3)) + z_cb(cell(3) - 1) + psi_pos(3) = abs((psi_pos(3) - z_cc(cell(3)))/(z_cc(cell(3) + 1) - z_cc(cell(3)))) else - if (psi(3) < z_cc(cell(3))) cell(3) = cell(3) - 1 - psi(3) = abs((psi(3) - z_cc(cell(3)))/(z_cc(cell(3) + 1) - z_cc(cell(3)))) + psi_pos(3) = 0._wp + end if + + ! Calculate bilinear basis functions for each direction + ! For normalized coordinate xi in [0, 1], the two basis functions are: + ! phi_0(xi) = 1 - xi, phi_1(xi) = xi + + ! X-direction basis functions + psi_x(1) = 1._wp - psi_pos(1) ! Left basis function + psi_x(2) = psi_pos(1) ! Right basis function + + ! Y-direction basis functions + psi_y(1) = 1._wp - psi_pos(2) ! Left basis function + psi_y(2) = psi_pos(2) ! Right basis function + + if (p > 0) then + ! Z-direction basis functions + psi_z(1) = 1._wp - psi_pos(3) ! Left basis function + psi_z(2) = psi_pos(3) ! Right basis function + else + psi_z(1) = 1._wp + psi_z(2) = 0._wp + end if + + !< Perform bilinear interpolation + f_pinfl = 0._wp + + if (p == 0) then !2D - 4 point interpolation (2x2) + do j = 1, 2 + do i = 1, 2 + f_pinfl = f_pinfl + q_prim_vf(E_idx)%sf(cell(1) + i - 1, cell(2) + j - 1, cell(3))* & + psi_x(i)*psi_y(j) + end do + end do + else !3D - 8 point interpolation (2x2x2) + do k = 1, 2 + do j = 1, 2 + do i = 1, 2 + f_pinfl = f_pinfl + q_prim_vf(E_idx)%sf(cell(1) + i - 1, cell(2) + j - 1, cell(3) + k - 1)* & + psi_x(i)*psi_y(j)*psi_z(k) + end do + end do + end do + end if + + elseif (fd_order == 4) then ! Biquadratic interpolation + + if (p > 0) then + vol = dx(cell(1))*dy(cell(2))*dz(cell(3)) + else + if (cyl_coord) then + vol = dx(cell(1))*dy(cell(2))*y_cc(cell(2))*2._wp*pi + else + vol = dx(cell(1))*dy(cell(2))*lag_params%charwidth + end if end if - else - psi(3) = 0._wp - end if - !< Perform bilinear interpolation - if (p == 0) then !2D - f_pinfl = q_prim_vf(E_idx)%sf(cell(1), cell(2), cell(3))*(1._wp - psi(1))*(1._wp - psi(2)) - f_pinfl = f_pinfl + q_prim_vf(E_idx)%sf(cell(1) + 1, cell(2), cell(3))*psi(1)*(1._wp - psi(2)) - f_pinfl = f_pinfl + q_prim_vf(E_idx)%sf(cell(1) + 1, cell(2) + 1, cell(3))*psi(1)*psi(2) - f_pinfl = f_pinfl + q_prim_vf(E_idx)%sf(cell(1), cell(2) + 1, cell(3))*(1._wp - psi(1))*psi(2) - else !3D - f_pinfl = q_prim_vf(E_idx)%sf(cell(1), cell(2), cell(3))*(1._wp - psi(1))*(1._wp - psi(2))*(1._wp - psi(3)) - f_pinfl = f_pinfl + q_prim_vf(E_idx)%sf(cell(1) + 1, cell(2), cell(3))*psi(1)*(1._wp - psi(2))*(1._wp - psi(3)) - f_pinfl = f_pinfl + q_prim_vf(E_idx)%sf(cell(1) + 1, cell(2) + 1, cell(3))*psi(1)*psi(2)*(1._wp - psi(3)) - f_pinfl = f_pinfl + q_prim_vf(E_idx)%sf(cell(1), cell(2) + 1, cell(3))*(1._wp - psi(1))*psi(2)*(1._wp - psi(3)) - f_pinfl = f_pinfl + q_prim_vf(E_idx)%sf(cell(1), cell(2), cell(3) + 1)*(1._wp - psi(1))*(1._wp - psi(2))*psi(3) - f_pinfl = f_pinfl + q_prim_vf(E_idx)%sf(cell(1) + 1, cell(2), cell(3) + 1)*psi(1)*(1._wp - psi(2))*psi(3) - f_pinfl = f_pinfl + q_prim_vf(E_idx)%sf(cell(1) + 1, cell(2) + 1, cell(3) + 1)*psi(1)*psi(2)*psi(3) - f_pinfl = f_pinfl + q_prim_vf(E_idx)%sf(cell(1), cell(2) + 1, cell(3) + 1)*(1._wp - psi(1))*psi(2)*psi(3) + !< Obtain biquadratic interpolation coefficients, based on the current location of the bubble. + ! For biquadratic interpolation, we need coefficients for 3 points in each direction + psi_pos(1) = (scoord(1) - real(cell(1)))*dx(cell(1)) + x_cb(cell(1) - 1) + psi_pos(1) = (psi_pos(1) - x_cc(cell(1)))/(x_cc(cell(1) + 1) - x_cc(cell(1))) + + psi_pos(2) = (scoord(2) - real(cell(2)))*dy(cell(2)) + y_cb(cell(2) - 1) + psi_pos(2) = (psi_pos(2) - y_cc(cell(2)))/(y_cc(cell(2) + 1) - y_cc(cell(2))) + + if (p > 0) then + psi_pos(3) = (scoord(3) - real(cell(3)))*dz(cell(3)) + z_cb(cell(3) - 1) + psi_pos(3) = (psi_pos(3) - z_cc(cell(3)))/(z_cc(cell(3) + 1) - z_cc(cell(3))) + else + psi_pos(3) = 0._wp + end if + + ! Calculate biquadratic basis functions for each direction + ! For normalized coordinate xi in [-1, 1], the three basis functions are: + ! phi_0(xi) = xi*(xi-1)/2, phi_1(xi) = (1-xi)*(1+xi), phi_2(xi) = xi*(xi+1)/2 + + ! X-direction basis functions + xi = 2._wp*psi_pos(1) - 1._wp ! Convert to [-1, 1] range + psi_x(1) = xi*(xi - 1._wp)/2._wp ! Left basis function + psi_x(2) = (1._wp - xi)*(1._wp + xi) ! Center basis function + psi_x(3) = xi*(xi + 1._wp)/2._wp ! Right basis function + + ! Y-direction basis functions + eta = 2._wp*psi_pos(2) - 1._wp ! Convert to [-1, 1] range + psi_y(1) = eta*(eta - 1._wp)/2._wp ! Left basis function + psi_y(2) = (1._wp - eta)*(1._wp + eta) ! Center basis function + psi_y(3) = eta*(eta + 1._wp)/2._wp ! Right basis function + + if (p > 0) then + ! Z-direction basis functions + zeta = 2._wp*psi_pos(3) - 1._wp ! Convert to [-1, 1] range + psi_z(1) = zeta*(zeta - 1._wp)/2._wp ! Left basis function + psi_z(2) = (1._wp - zeta)*(1._wp + zeta) ! Center basis function + psi_z(3) = zeta*(zeta + 1._wp)/2._wp ! Right basis function + else + psi_z(1) = 0._wp + psi_z(2) = 1._wp + psi_z(3) = 0._wp + end if + + !< Perform biquadratic interpolation + f_pinfl = 0._wp + + if (p == 0) then !2D - 9 point interpolation (3x3) + do j = 1, 3 + do i = 1, 3 + f_pinfl = f_pinfl + q_prim_vf(E_idx)%sf(cell(1) + i - 2, cell(2) + j - 2, cell(3))* & + psi_x(i)*psi_y(j) + end do + end do + else !3D - 27 point interpolation (3x3x3) + do k = 1, 3 + do j = 1, 3 + do i = 1, 3 + f_pinfl = f_pinfl + q_prim_vf(E_idx)%sf(cell(1) + i - 2, cell(2) + j - 2, cell(3) + k - 2)* & + psi_x(i)*psi_y(j)*psi_z(k) + end do + end do + end do + end if end if !R_Omega @@ -971,56 +1194,25 @@ contains cellaux(3) = cell(3) + k - (mapCells + 1) if (p == 0) cellaux(3) = 0 - !< check if the current cell is outside the computational domain or not (including ghost cells) - celloutside = .false. - if (num_dims == 2) then - if ((cellaux(1) < -buff_size) .or. (cellaux(2) < -buff_size)) then - celloutside = .true. - end if - if (cyl_coord .and. y_cc(cellaux(2)) < 0._wp) then - celloutside = .true. - end if - if ((cellaux(2) > n + buff_size) .or. (cellaux(1) > m + buff_size)) then - celloutside = .true. - end if + !< Obtaining the cell volume + if (p > 0) then + vol = dx(cellaux(1))*dy(cellaux(2))*dz(cellaux(3)) else - if ((cellaux(3) < -buff_size) .or. (cellaux(1) < -buff_size) .or. (cellaux(2) < -buff_size)) then - celloutside = .true. - end if - - if ((cellaux(3) > p + buff_size) .or. (cellaux(2) > n + buff_size) .or. (cellaux(1) > m + buff_size)) then - celloutside = .true. - end if - end if - if (.not. celloutside) then - if (cyl_coord .and. (p == 0) .and. (y_cc(cellaux(2)) < 0._wp)) then - celloutside = .true. - end if - end if - - if (.not. celloutside) then - !< Obtaining the cell volulme - if (p > 0) then - vol = dx(cellaux(1))*dy(cellaux(2))*dz(cellaux(3)) + if (cyl_coord) then + vol = dx(cellaux(1))*dy(cellaux(2))*y_cc(cellaux(2))*2._wp*pi else - if (cyl_coord) then - vol = dx(cellaux(1))*dy(cellaux(2))*y_cc(cellaux(2))*2._wp*pi - else - vol = dx(cellaux(1))*dy(cellaux(2))*lag_params%charwidth - end if + vol = dx(cellaux(1))*dy(cellaux(2))*lag_params%charwidth end if - !< Update values - charvol = charvol + vol - charpres = charpres + q_prim_vf(E_idx)%sf(cellaux(1), cellaux(2), cellaux(3))*vol - charvol2 = charvol2 + vol*q_beta(1)%sf(cellaux(1), cellaux(2), cellaux(3)) - charpres2 = charpres2 + q_prim_vf(E_idx)%sf(cellaux(1), cellaux(2), cellaux(3)) & - *vol*q_beta(1)%sf(cellaux(1), cellaux(2), cellaux(3)) end if - + !< Update values + charvol = charvol + vol + charpres = charpres + q_prim_vf(E_idx)%sf(cellaux(1), cellaux(2), cellaux(3))*vol + charvol2 = charvol2 + vol*q_beta(1)%sf(cellaux(1), cellaux(2), cellaux(3)) + charpres2 = charpres2 + q_prim_vf(E_idx)%sf(cellaux(1), cellaux(2), cellaux(3)) & + *vol*q_beta(1)%sf(cellaux(1), cellaux(2), cellaux(3)) end do end do end do - f_pinfl = charpres2/charvol2 vol = charvol dc = (3._wp*abs(vol)/(4._wp*pi))**(1._wp/3._wp) @@ -1056,118 +1248,157 @@ contains !> This subroutine updates the Lagrange variables using the tvd RK time steppers. !! The time derivative of the bubble variables must be stored at every stage to avoid precision errors. !! @param stage Current tvd RK stage - impure subroutine s_update_lagrange_tdv_rk(stage) + impure subroutine s_update_lagrange_tdv_rk(q_prim_vf, bc_type, stage) + type(scalar_field), dimension(sys_size), intent(in) :: q_prim_vf + type(integer_field), dimension(1:num_dims, 1:2), intent(in) :: bc_type integer, intent(in) :: stage integer :: k if (time_stepper == 1) then ! 1st order TVD RK + $:GPU_PARALLEL_LOOP(private='[k]') - do k = 1, nBubs + do k = 1, n_el_bubs_loc !u{1} = u{n} + dt * RHS{n} intfc_rad(k, 1) = intfc_rad(k, 1) + dt*intfc_draddt(k, 1) intfc_vel(k, 1) = intfc_vel(k, 1) + dt*intfc_dveldt(k, 1) - mtn_pos(k, 1:3, 1) = mtn_pos(k, 1:3, 1) + dt*mtn_dposdt(k, 1:3, 1) - mtn_vel(k, 1:3, 1) = mtn_vel(k, 1:3, 1) + dt*mtn_dveldt(k, 1:3, 1) gas_p(k, 1) = gas_p(k, 1) + dt*gas_dpdt(k, 1) gas_mv(k, 1) = gas_mv(k, 1) + dt*gas_dmvdt(k, 1) + if (moving_lag_bubbles) then + mtn_posPrev(k, 1:3, 1) = mtn_pos(k, 1:3, 1) + mtn_pos(k, 1:3, 1) = mtn_pos(k, 1:3, 1) + dt*mtn_dposdt(k, 1:3, 1) + mtn_vel(k, 1:3, 1) = mtn_vel(k, 1:3, 1) + dt*mtn_dveldt(k, 1:3, 1) + end if end do $:END_GPU_PARALLEL_LOOP() call s_transfer_data_to_tmp() - call s_write_void_evol(mytime) + if (moving_lag_bubbles) call s_enforce_EL_bubbles_boundary_conditions(q_prim_vf) + call s_smear_voidfraction(bc_type) + if (lag_params%write_void_evol) call s_write_void_evol(mytime) if (lag_params%write_bubbles_stats) call s_calculate_lag_bubble_stats() - if (lag_params%write_bubbles) then $:GPU_UPDATE(host='[gas_p,gas_mv,intfc_rad,intfc_vel]') - call s_write_lag_particles(mytime) + call s_write_lag_bubble_evol(mytime) end if elseif (time_stepper == 2) then ! 2nd order TVD RK if (stage == 1) then + $:GPU_PARALLEL_LOOP(private='[k]') - do k = 1, nBubs + do k = 1, n_el_bubs_loc !u{1} = u{n} + dt * RHS{n} intfc_rad(k, 2) = intfc_rad(k, 1) + dt*intfc_draddt(k, 1) intfc_vel(k, 2) = intfc_vel(k, 1) + dt*intfc_dveldt(k, 1) - mtn_pos(k, 1:3, 2) = mtn_pos(k, 1:3, 1) + dt*mtn_dposdt(k, 1:3, 1) - mtn_vel(k, 1:3, 2) = mtn_vel(k, 1:3, 1) + dt*mtn_dveldt(k, 1:3, 1) gas_p(k, 2) = gas_p(k, 1) + dt*gas_dpdt(k, 1) gas_mv(k, 2) = gas_mv(k, 1) + dt*gas_dmvdt(k, 1) + if (moving_lag_bubbles) then + mtn_posPrev(k, 1:3, 2) = mtn_pos(k, 1:3, 1) + mtn_pos(k, 1:3, 2) = mtn_pos(k, 1:3, 1) + dt*mtn_dposdt(k, 1:3, 1) + mtn_vel(k, 1:3, 2) = mtn_vel(k, 1:3, 1) + dt*mtn_dveldt(k, 1:3, 1) + end if end do $:END_GPU_PARALLEL_LOOP() + if (moving_lag_bubbles) call s_enforce_EL_bubbles_boundary_conditions(q_prim_vf) + call s_smear_voidfraction(bc_type) + elseif (stage == 2) then + $:GPU_PARALLEL_LOOP(private='[k]') - do k = 1, nBubs + do k = 1, n_el_bubs_loc !u{1} = u{n} + (1/2) * dt * (RHS{n} + RHS{1}) intfc_rad(k, 1) = intfc_rad(k, 1) + dt*(intfc_draddt(k, 1) + intfc_draddt(k, 2))/2._wp intfc_vel(k, 1) = intfc_vel(k, 1) + dt*(intfc_dveldt(k, 1) + intfc_dveldt(k, 2))/2._wp - mtn_pos(k, 1:3, 1) = mtn_pos(k, 1:3, 1) + dt*(mtn_dposdt(k, 1:3, 1) + mtn_dposdt(k, 1:3, 2))/2._wp - mtn_vel(k, 1:3, 1) = mtn_vel(k, 1:3, 1) + dt*(mtn_dveldt(k, 1:3, 1) + mtn_dveldt(k, 1:3, 2))/2._wp gas_p(k, 1) = gas_p(k, 1) + dt*(gas_dpdt(k, 1) + gas_dpdt(k, 2))/2._wp gas_mv(k, 1) = gas_mv(k, 1) + dt*(gas_dmvdt(k, 1) + gas_dmvdt(k, 2))/2._wp + if (moving_lag_bubbles) then + mtn_posPrev(k, 1:3, 1) = mtn_pos(k, 1:3, 2) + mtn_pos(k, 1:3, 1) = mtn_pos(k, 1:3, 1) + dt*(mtn_dposdt(k, 1:3, 1) + mtn_dposdt(k, 1:3, 2))/2._wp + mtn_vel(k, 1:3, 1) = mtn_vel(k, 1:3, 1) + dt*(mtn_dveldt(k, 1:3, 1) + mtn_dveldt(k, 1:3, 2))/2._wp + end if end do $:END_GPU_PARALLEL_LOOP() call s_transfer_data_to_tmp() - call s_write_void_evol(mytime) + if (moving_lag_bubbles) call s_enforce_EL_bubbles_boundary_conditions(q_prim_vf) + call s_smear_voidfraction(bc_type) + if (lag_params%write_void_evol) call s_write_void_evol(mytime) if (lag_params%write_bubbles_stats) call s_calculate_lag_bubble_stats() - if (lag_params%write_bubbles) then $:GPU_UPDATE(host='[gas_p,gas_mv,intfc_rad,intfc_vel]') - call s_write_lag_particles(mytime) + call s_write_lag_bubble_evol(mytime) end if end if elseif (time_stepper == 3) then ! 3rd order TVD RK if (stage == 1) then + $:GPU_PARALLEL_LOOP(private='[k]') - do k = 1, nBubs + do k = 1, n_el_bubs_loc !u{1} = u{n} + dt * RHS{n} intfc_rad(k, 2) = intfc_rad(k, 1) + dt*intfc_draddt(k, 1) intfc_vel(k, 2) = intfc_vel(k, 1) + dt*intfc_dveldt(k, 1) - mtn_pos(k, 1:3, 2) = mtn_pos(k, 1:3, 1) + dt*mtn_dposdt(k, 1:3, 1) - mtn_vel(k, 1:3, 2) = mtn_vel(k, 1:3, 1) + dt*mtn_dveldt(k, 1:3, 1) gas_p(k, 2) = gas_p(k, 1) + dt*gas_dpdt(k, 1) gas_mv(k, 2) = gas_mv(k, 1) + dt*gas_dmvdt(k, 1) + if (moving_lag_bubbles) then + mtn_posPrev(k, 1:3, 2) = mtn_pos(k, 1:3, 1) + mtn_pos(k, 1:3, 2) = mtn_pos(k, 1:3, 1) + dt*mtn_dposdt(k, 1:3, 1) + mtn_vel(k, 1:3, 2) = mtn_vel(k, 1:3, 1) + dt*mtn_dveldt(k, 1:3, 1) + end if end do $:END_GPU_PARALLEL_LOOP() + if (moving_lag_bubbles) call s_enforce_EL_bubbles_boundary_conditions(q_prim_vf) + call s_smear_voidfraction(bc_type) + elseif (stage == 2) then + $:GPU_PARALLEL_LOOP(private='[k]') - do k = 1, nBubs + do k = 1, n_el_bubs_loc !u{2} = u{n} + (1/4) * dt * [RHS{n} + RHS{1}] intfc_rad(k, 2) = intfc_rad(k, 1) + dt*(intfc_draddt(k, 1) + intfc_draddt(k, 2))/4._wp intfc_vel(k, 2) = intfc_vel(k, 1) + dt*(intfc_dveldt(k, 1) + intfc_dveldt(k, 2))/4._wp - mtn_pos(k, 1:3, 2) = mtn_pos(k, 1:3, 1) + dt*(mtn_dposdt(k, 1:3, 1) + mtn_dposdt(k, 1:3, 2))/4._wp - mtn_vel(k, 1:3, 2) = mtn_vel(k, 1:3, 1) + dt*(mtn_dveldt(k, 1:3, 1) + mtn_dveldt(k, 1:3, 2))/4._wp gas_p(k, 2) = gas_p(k, 1) + dt*(gas_dpdt(k, 1) + gas_dpdt(k, 2))/4._wp gas_mv(k, 2) = gas_mv(k, 1) + dt*(gas_dmvdt(k, 1) + gas_dmvdt(k, 2))/4._wp + if (moving_lag_bubbles) then + mtn_posPrev(k, 1:3, 2) = mtn_pos(k, 1:3, 2) + mtn_pos(k, 1:3, 2) = mtn_pos(k, 1:3, 1) + dt*(mtn_dposdt(k, 1:3, 1) + mtn_dposdt(k, 1:3, 2))/4._wp + mtn_vel(k, 1:3, 2) = mtn_vel(k, 1:3, 1) + dt*(mtn_dveldt(k, 1:3, 1) + mtn_dveldt(k, 1:3, 2))/4._wp + end if end do $:END_GPU_PARALLEL_LOOP() + + if (moving_lag_bubbles) call s_enforce_EL_bubbles_boundary_conditions(q_prim_vf) + call s_smear_voidfraction(bc_type) + elseif (stage == 3) then + $:GPU_PARALLEL_LOOP(private='[k]') - do k = 1, nBubs + do k = 1, n_el_bubs_loc !u{n+1} = u{n} + (2/3) * dt * [(1/4)* RHS{n} + (1/4)* RHS{1} + RHS{2}] intfc_rad(k, 1) = intfc_rad(k, 1) + (2._wp/3._wp)*dt*(intfc_draddt(k, 1)/4._wp + intfc_draddt(k, 2)/4._wp + intfc_draddt(k, 3)) intfc_vel(k, 1) = intfc_vel(k, 1) + (2._wp/3._wp)*dt*(intfc_dveldt(k, 1)/4._wp + intfc_dveldt(k, 2)/4._wp + intfc_dveldt(k, 3)) - mtn_pos(k, 1:3, 1) = mtn_pos(k, 1:3, 1) + (2._wp/3._wp)*dt*(mtn_dposdt(k, 1:3, 1)/4._wp + mtn_dposdt(k, 1:3, 2)/4._wp + mtn_dposdt(k, 1:3, 3)) - mtn_vel(k, 1:3, 1) = mtn_vel(k, 1:3, 1) + (2._wp/3._wp)*dt*(mtn_dveldt(k, 1:3, 1)/4._wp + mtn_dveldt(k, 1:3, 2)/4._wp + mtn_dveldt(k, 1:3, 3)) gas_p(k, 1) = gas_p(k, 1) + (2._wp/3._wp)*dt*(gas_dpdt(k, 1)/4._wp + gas_dpdt(k, 2)/4._wp + gas_dpdt(k, 3)) gas_mv(k, 1) = gas_mv(k, 1) + (2._wp/3._wp)*dt*(gas_dmvdt(k, 1)/4._wp + gas_dmvdt(k, 2)/4._wp + gas_dmvdt(k, 3)) + if (moving_lag_bubbles) then + mtn_posPrev(k, 1:3, 1) = mtn_pos(k, 1:3, 2) + mtn_pos(k, 1:3, 1) = mtn_pos(k, 1:3, 1) + (2._wp/3._wp)*dt*(mtn_dposdt(k, 1:3, 1)/4._wp + mtn_dposdt(k, 1:3, 2)/4._wp + mtn_dposdt(k, 1:3, 3)) + mtn_vel(k, 1:3, 1) = mtn_vel(k, 1:3, 1) + (2._wp/3._wp)*dt*(mtn_dveldt(k, 1:3, 1)/4._wp + mtn_dveldt(k, 1:3, 2)/4._wp + mtn_dveldt(k, 1:3, 3)) + end if end do $:END_GPU_PARALLEL_LOOP() call s_transfer_data_to_tmp() - call s_write_void_evol(mytime) + if (moving_lag_bubbles) call s_enforce_EL_bubbles_boundary_conditions(q_prim_vf) + call s_smear_voidfraction(bc_type) + if (lag_params%write_void_evol) call s_write_void_evol(mytime) if (lag_params%write_bubbles_stats) call s_calculate_lag_bubble_stats() - if (lag_params%write_bubbles) then $:GPU_UPDATE(host='[gas_p,gas_mv,gas_mg,intfc_rad,intfc_vel]') - call s_write_lag_particles(mytime) + call s_write_lag_bubble_evol(mytime) end if end if @@ -1176,11 +1407,199 @@ contains end subroutine s_update_lagrange_tdv_rk + !> This subroutine enforces reflective and wall boundary conditions for EL bubbles + !! @param dest Destination for the bubble position update + impure subroutine s_enforce_EL_bubbles_boundary_conditions(q_prim_vf) + + type(scalar_field), dimension(sys_size), intent(in) :: q_prim_vf + integer :: k, i, q + integer :: patch_id, newBubs, new_idx + real(wp) :: offset + integer, dimension(3) :: cell + + call nvtxStartRange("LAG-BC") + call nvtxStartRange("LAG-BC-DEV2HOST") + $:GPU_UPDATE(host='[bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, & + & gas_betaC, bub_dphidt, lag_id, gas_p, gas_mv, intfc_rad, intfc_vel, & + & mtn_pos, mtn_posPrev, mtn_vel, mtn_s, intfc_draddt, intfc_dveldt, & + & gas_dpdt, gas_dmvdt, mtn_dposdt, mtn_dveldt, keep_bubble, n_el_bubs_loc, & + & wrap_bubble_dir, wrap_bubble_loc]') + call nvtxEndRange + + ! Handle MPI transfer of bubbles going to another processor's local domain + if (num_procs > 1) then + call nvtxStartRange("LAG-BC-TRANSFER-LIST") + call s_add_particles_to_transfer_list(n_el_bubs_loc, mtn_pos(:, :, 2), mtn_posPrev(:, :, 2)) + call nvtxEndRange + + call nvtxStartRange("LAG-BC-SENDRECV") + call s_mpi_sendrecv_particles(bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, & + gas_betaC, bub_dphidt, lag_id, gas_p, gas_mv, & + intfc_rad, intfc_vel, mtn_pos, mtn_posPrev, mtn_vel, & + mtn_s, intfc_draddt, intfc_dveldt, gas_dpdt, & + gas_dmvdt, mtn_dposdt, mtn_dveldt, lag_num_ts, n_el_bubs_loc, & + 2) + call nvtxEndRange + end if + + call nvtxStartRange("LAG-BC-HOST2DEV") + $:GPU_UPDATE(device='[bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, & + & gas_betaC, bub_dphidt, lag_id, gas_p, gas_mv, intfc_rad, intfc_vel, & + & mtn_pos, mtn_posPrev, mtn_vel, mtn_s, intfc_draddt, intfc_dveldt, & + & gas_dpdt, gas_dmvdt, mtn_dposdt, mtn_dveldt, n_el_bubs_loc]') + call nvtxEndRange + + $:GPU_PARALLEL_LOOP(private='[k, cell]') + do k = 1, n_el_bubs_loc + keep_bubble(k) = 1 + wrap_bubble_loc(k, :) = 0 + wrap_bubble_dir(k, :) = 0 + + ! Relocate bubbles at solid boundaries and delete bubbles that leave + ! buffer regions + if (any(bc_x%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) & + .and. mtn_pos(k, 1, 2) < x_cb(-1) + intfc_rad(k, 2)) then + mtn_pos(k, 1, 2) = x_cb(-1) + intfc_rad(k, 2) + elseif (any(bc_x%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) & + .and. mtn_pos(k, 1, 2) > x_cb(m) - intfc_rad(k, 2)) then + mtn_pos(k, 1, 2) = x_cb(m) - intfc_rad(k, 2) + elseif (bc_x%beg == BC_PERIODIC .and. mtn_pos(k, 1, 2) < pcomm_coords(1)%beg .and. & + mtn_posPrev(k, 1, 2) >= pcomm_coords(1)%beg) then + wrap_bubble_dir(k, 1) = 1 + wrap_bubble_loc(k, 1) = -1 + elseif (bc_x%end == BC_PERIODIC .and. mtn_pos(k, 1, 2) > pcomm_coords(1)%end .and. & + mtn_posPrev(k, 1, 2) <= pcomm_coords(1)%end) then + wrap_bubble_dir(k, 1) = 1 + wrap_bubble_loc(k, 1) = 1 + elseif (mtn_pos(k, 1, 2) >= x_cb(m)) then + keep_bubble(k) = 0 + elseif (mtn_pos(k, 1, 2) < x_cb(-1)) then + keep_bubble(k) = 0 + end if + + if (any(bc_y%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) & + .and. mtn_pos(k, 2, 2) < y_cb(-1) + intfc_rad(k, 2)) then + mtn_pos(k, 2, 2) = y_cb(-1) + intfc_rad(k, 2) + else if (any(bc_y%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) & + .and. mtn_pos(k, 2, 2) > y_cb(n) - intfc_rad(k, 2)) then + mtn_pos(k, 2, 2) = y_cb(n) - intfc_rad(k, 2) + elseif (bc_y%beg == BC_PERIODIC .and. mtn_pos(k, 2, 2) < pcomm_coords(2)%beg .and. & + mtn_posPrev(k, 2, 2) >= pcomm_coords(2)%beg) then + wrap_bubble_dir(k, 2) = 1 + wrap_bubble_loc(k, 2) = -1 + elseif (bc_y%end == BC_PERIODIC .and. mtn_pos(k, 2, 2) > pcomm_coords(2)%end .and. & + mtn_posPrev(k, 2, 2) <= pcomm_coords(2)%end) then + wrap_bubble_dir(k, 2) = 1 + wrap_bubble_loc(k, 2) = 1 + elseif (mtn_pos(k, 2, 2) >= y_cb(n)) then + keep_bubble(k) = 0 + elseif (mtn_pos(k, 2, 2) < y_cb(-1)) then + keep_bubble(k) = 0 + end if + + if (p > 0) then + if (any(bc_z%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) & + .and. mtn_pos(k, 3, 2) < z_cb(-1) + intfc_rad(k, 2)) then + mtn_pos(k, 3, 2) = z_cb(-1) + intfc_rad(k, 2) + else if (any(bc_z%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) & + .and. mtn_pos(k, 3, 2) > z_cb(p) - intfc_rad(k, 2)) then + mtn_pos(k, 3, 2) = z_cb(p) - intfc_rad(k, 2) + elseif (bc_z%beg == BC_PERIODIC .and. mtn_pos(k, 3, 2) < pcomm_coords(3)%beg .and. & + mtn_posPrev(k, 3, 2) >= pcomm_coords(3)%beg) then + wrap_bubble_dir(k, 3) = 1 + wrap_bubble_loc(k, 3) = -1 + elseif (bc_z%end == BC_PERIODIC .and. mtn_pos(k, 3, 2) > pcomm_coords(3)%end .and. & + mtn_posPrev(k, 3, 2) <= pcomm_coords(3)%end) then + wrap_bubble_dir(k, 3) = 1 + wrap_bubble_loc(k, 3) = 1 + elseif (mtn_pos(k, 3, 2) >= z_cb(p)) then + keep_bubble(k) = 0 + elseif (mtn_pos(k, 3, 2) < z_cb(-1)) then + keep_bubble(k) = 0 + end if + end if + + if (keep_bubble(k) == 1) then + ! Remove bubbles that are no longer in a liquid + cell = fd_number - buff_size + call s_locate_cell(mtn_pos(k, 1:3, 2), cell, mtn_s(k, 1:3, 2)) + + if (q_prim_vf(advxb)%sf(cell(1), cell(2), cell(3)) < (1._wp - lag_params%valmaxvoid)) then + keep_bubble(k) = 0 + end if + end if + end do + $:END_GPU_PARALLEL_LOOP() + + if (n_el_bubs_loc > 0) then + call nvtxStartRange("LAG-BC-DEV2HOST") + $:GPU_UPDATE(host='[bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, & + & gas_betaC, bub_dphidt, lag_id, gas_p, gas_mv, intfc_rad, intfc_vel, & + & mtn_pos, mtn_posPrev, mtn_vel, mtn_s, intfc_draddt, intfc_dveldt, & + & gas_dpdt, gas_dmvdt, mtn_dposdt, mtn_dveldt, keep_bubble, n_el_bubs_loc, & + & wrap_bubble_dir, wrap_bubble_loc]') + call nvtxEndRange + + newBubs = 0 + do k = 1, n_el_bubs_loc + if (keep_bubble(k) == 1) then + newBubs = newBubs + 1 + if (newBubs /= k) then + call s_copy_lag_bubble(newBubs, k) + wrap_bubble_dir(newBubs, :) = wrap_bubble_dir(k, :) + wrap_bubble_loc(newBubs, :) = wrap_bubble_loc(k, :) + end if + end if + end do + n_el_bubs_loc = newBubs + + ! Handle periodic wrapping of bubbles on same processor + do k = 1, n_el_bubs_loc + if (any(wrap_bubble_dir(k, :) == 1)) then + do i = 1, num_dims + if (wrap_bubble_dir(k, i) == 1) then + offset = glb_bounds(i)%end - glb_bounds(i)%beg + if (wrap_bubble_loc(k, i) == 1) then + do q = 1, 2 + mtn_pos(k, i, q) = mtn_pos(k, i, q) - offset + mtn_posPrev(k, i, q) = mtn_posPrev(k, i, q) - offset + end do + else if (wrap_bubble_loc(k, i) == -1) then + do q = 1, 2 + mtn_pos(k, i, q) = mtn_pos(k, i, q) + offset + mtn_posPrev(k, i, q) = mtn_posPrev(k, i, q) + offset + end do + end if + end if + end do + end if + end do + call nvtxStartRange("LAG-BC-HOST2DEV") + $:GPU_UPDATE(device='[bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, & + & gas_betaC, bub_dphidt, lag_id, gas_p, gas_mv, intfc_rad, intfc_vel, & + & mtn_pos, mtn_posPrev, mtn_vel, mtn_s, intfc_draddt, intfc_dveldt, & + & gas_dpdt, gas_dmvdt, mtn_dposdt, mtn_dveldt, n_el_bubs_loc]') + call nvtxEndRange + + end if + + $:GPU_PARALLEL_LOOP(private='[cell]') + do k = 1, n_el_bubs_loc + cell = fd_number - buff_size + call s_locate_cell(mtn_pos(k, 1:3, 2), cell, mtn_s(k, 1:3, 2)) + end do + + call nvtxEndRange ! LAG-BC + + end subroutine s_enforce_EL_bubbles_boundary_conditions + !> This subroutine returns the computational coordinate of the cell for the given position. !! @param pos Input coordinates !! @param cell Computational coordinate of the cell !! @param scoord Calculated particle coordinates subroutine s_locate_cell(pos, cell, scoord) + $:GPU_ROUTINE(function_name='s_locate_cell',parallelism='[seq]', & + & cray_inline=True) real(wp), dimension(3), intent(in) :: pos real(wp), dimension(3), intent(out) :: scoord @@ -1192,7 +1611,7 @@ contains cell(1) = cell(1) - 1 end do - do while (pos(1) > x_cb(cell(1))) + do while (pos(1) >= x_cb(cell(1))) cell(1) = cell(1) + 1 end do @@ -1200,7 +1619,7 @@ contains cell(2) = cell(2) - 1 end do - do while (pos(2) > y_cb(cell(2))) + do while (pos(2) >= y_cb(cell(2))) cell(2) = cell(2) + 1 end do @@ -1208,7 +1627,7 @@ contains do while (pos(3) < z_cb(cell(3) - 1)) cell(3) = cell(3) - 1 end do - do while (pos(3) > z_cb(cell(3))) + do while (pos(3) >= z_cb(cell(3))) cell(3) = cell(3) + 1 end do end if @@ -1237,7 +1656,7 @@ contains integer :: k $:GPU_PARALLEL_LOOP(private='[k]') - do k = 1, nBubs + do k = 1, n_el_bubs_loc gas_p(k, 2) = gas_p(k, 1) gas_mv(k, 2) = gas_mv(k, 1) intfc_rad(k, 2) = intfc_rad(k, 1) @@ -1263,20 +1682,27 @@ contains if (p == 0 .and. cyl_coord .neqv. .true.) then ! Defining a virtual z-axis that has the same dimensions as y-axis ! defined in the input file - particle_in_domain = ((pos_part(1) < x_cb(m + buff_size)) .and. (pos_part(1) >= x_cb(-buff_size - 1)) .and. & - (pos_part(2) < y_cb(n + buff_size)) .and. (pos_part(2) >= y_cb(-buff_size - 1)) .and. & - (pos_part(3) < lag_params%charwidth/2._wp) .and. (pos_part(3) >= -lag_params%charwidth/2._wp)) + particle_in_domain = ((pos_part(1) < x_cb(m + buff_size - fd_number)) .and. & + (pos_part(1) >= x_cb(fd_number - buff_size - 1)) .and. & + (pos_part(2) < y_cb(n + buff_size - fd_number)) .and. & + (pos_part(2) >= y_cb(fd_number - buff_size - 1)) .and. & + (pos_part(3) < lag_params%charwidth/2._wp) .and. (pos_part(3) > -lag_params%charwidth/2._wp)) else ! cyl_coord - particle_in_domain = ((pos_part(1) < x_cb(m + buff_size)) .and. (pos_part(1) >= x_cb(-buff_size - 1)) .and. & - (abs(pos_part(2)) < y_cb(n + buff_size)) .and. (abs(pos_part(2)) >= max(y_cb(-buff_size - 1), 0._wp))) + particle_in_domain = ((pos_part(1) < x_cb(m + buff_size - fd_number)) .and. & + (pos_part(1) >= x_cb(fd_number - buff_size - 1)) .and. & + (abs(pos_part(2)) < y_cb(n + buff_size - fd_number)) .and. & + (abs(pos_part(2)) >= max(y_cb(fd_number - buff_size - 1), 0._wp))) end if ! 3D if (p > 0) then - particle_in_domain = ((pos_part(1) < x_cb(m + buff_size)) .and. (pos_part(1) >= x_cb(-buff_size - 1)) .and. & - (pos_part(2) < y_cb(n + buff_size)) .and. (pos_part(2) >= y_cb(-buff_size - 1)) .and. & - (pos_part(3) < z_cb(p + buff_size)) .and. (pos_part(3) >= z_cb(-buff_size - 1))) + particle_in_domain = ((pos_part(1) < x_cb(m + buff_size - fd_number)) .and. & + (pos_part(1) >= x_cb(fd_number - buff_size - 1)) .and. & + (pos_part(2) < y_cb(n + buff_size - fd_number)) .and. & + (pos_part(2) >= y_cb(fd_number - buff_size - 1)) .and. & + (pos_part(3) < z_cb(p + buff_size - fd_number)) .and. & + (pos_part(3) >= z_cb(fd_number - buff_size - 1))) end if ! For symmetric and wall boundary condition @@ -1390,12 +1816,39 @@ contains character(LEN=path_len + 2*name_len) :: file_loc logical :: file_exist + character(LEN=25) :: FMT + + write (file_loc, '(A,I0,A)') 'lag_bubble_evol_', proc_rank, '.dat' + file_loc = trim(case_dir)//'/D/'//trim(file_loc) + call my_inquire(trim(file_loc), file_exist) + + if (precision == 1) then + FMT = "(A16,A14,8A16)" + else + FMT = "(A24,A14,8A24)" + end if + + if (.not. file_exist) then + open (LAG_EVOL_ID, FILE=trim(file_loc), FORM='formatted', position='rewind') + write (LAG_EVOL_ID, FMT) 'currentTime', 'particleID', 'x', 'y', 'z', & + 'coreVaporMass', 'coreVaporConcentration', 'radius', 'interfaceVelocity', & + 'corePressure' + else + open (LAG_EVOL_ID, FILE=trim(file_loc), FORM='formatted', position='append') + end if + + end subroutine s_write_lag_particles + + !> @Brief Subroutine that opens the file to write the evolution of the lagrangian bubbles on each time step. + impure subroutine s_open_lag_bubble_evol() + character(LEN=path_len + 2*name_len) :: file_loc + logical :: file_exist character(LEN=25) :: FMT write (file_loc, '(A,I0,A)') 'lag_bubble_evol_', proc_rank, '.dat' file_loc = trim(case_dir)//'/D/'//trim(file_loc) - inquire (FILE=trim(file_loc), EXIST=file_exist) + call my_inquire(trim(file_loc), file_exist) if (precision == 1) then FMT = "(A16,A14,8A16)" @@ -1404,14 +1857,27 @@ contains end if if (.not. file_exist) then - open (11, FILE=trim(file_loc), FORM='formatted', position='rewind') - write (11, FMT) 'currentTime', 'particleID', 'x', 'y', 'z', & + open (LAG_EVOL_ID, FILE=trim(file_loc), FORM='formatted', position='rewind') + write (LAG_EVOL_ID, FMT) 'currentTime', 'particleID', 'x', 'y', 'z', & 'coreVaporMass', 'coreVaporConcentration', 'radius', 'interfaceVelocity', & 'corePressure' else - open (11, FILE=trim(file_loc), FORM='formatted', position='append') + open (LAG_EVOL_ID, FILE=trim(file_loc), FORM='formatted', position='append') end if + end subroutine s_open_lag_bubble_evol + + !> Subroutine that writes on each time step the changes of the lagrangian bubbles. + !! @param q_time Current time + impure subroutine s_write_lag_bubble_evol(qtime) + + real(wp), intent(in) :: qtime + integer :: k, ios + character(LEN=25) :: FMT + + character(LEN=path_len + 2*name_len) :: file_loc, path + logical :: file_exist + if (precision == 1) then FMT = "(F16.8,I14,8F16.8)" else @@ -1419,8 +1885,8 @@ contains end if ! Cycle through list - do k = 1, nBubs - write (11, FMT) & + do k = 1, n_el_bubs_loc + write (LAG_EVOL_ID, FMT) & qtime, & lag_id(k, 1), & mtn_pos(k, 1, 1), & @@ -1433,22 +1899,15 @@ contains gas_p(k, 1) end do - close (11) + end subroutine s_write_lag_bubble_evol - end subroutine s_write_lag_particles + impure subroutine s_close_lag_bubble_evol - !> Subroutine that writes some useful statistics related to the volume fraction - !! of the particles (void fraction) in the computatioational domain - !! on each time step. - !! @param qtime Current time - impure subroutine s_write_void_evol(qtime) + close (LAG_EVOL_ID) - real(wp), intent(in) :: qtime - real(wp) :: volcell, voltot - real(wp) :: lag_void_max, lag_void_avg, lag_vol - real(wp) :: void_max_glb, void_avg_glb, vol_glb + end subroutine s_close_lag_bubble_evol - integer :: i, j, k + subroutine s_open_void_evol character(LEN=path_len + 2*name_len) :: file_loc logical :: file_exist @@ -1456,19 +1915,37 @@ contains if (proc_rank == 0) then write (file_loc, '(A)') 'voidfraction.dat' file_loc = trim(case_dir)//'/D/'//trim(file_loc) - inquire (FILE=trim(file_loc), EXIST=file_exist) + call my_inquire(trim(file_loc), file_exist) if (.not. file_exist) then - open (12, FILE=trim(file_loc), FORM='formatted', position='rewind') + open (LAG_VOID_ID, FILE=trim(file_loc), FORM='formatted', position='rewind') !write (12, *) 'currentTime, averageVoidFraction, ', & ! 'maximumVoidFraction, totalParticlesVolume' !write (12, *) 'The averageVoidFraction value does ', & ! 'not reflect the real void fraction in the cloud since the ', & ! 'cells which do not have bubbles are not accounted' else - open (12, FILE=trim(file_loc), FORM='formatted', position='append') + open (LAG_VOID_ID, FILE=trim(file_loc), FORM='formatted', position='append') end if end if + end subroutine s_open_void_evol + + !> Subroutine that writes some useful statistics related to the volume fraction + !! of the particles (void fraction) in the computational domain + !! on each time step. + !! @param qtime Current time + impure subroutine s_write_void_evol(qtime) + + real(wp), intent(in) :: qtime + real(wp) :: volcell, voltot + real(wp) :: lag_void_max, lag_void_avg, lag_vol + real(wp) :: void_max_glb, void_avg_glb, vol_glb + + integer :: i, j, k + + character(LEN=path_len + 2*name_len) :: file_loc + logical :: file_exist + lag_void_max = 0._wp lag_void_avg = 0._wp lag_vol = 0._wp @@ -1503,16 +1980,21 @@ contains if (lag_vol > 0._wp) lag_void_avg = lag_void_avg/lag_vol if (proc_rank == 0) then - write (12, '(6X,4e24.8)') & + write (LAG_VOID_ID, '(6X,4e24.8)') & qtime, & lag_void_avg, & lag_void_max, & voltot - close (12) end if end subroutine s_write_void_evol + subroutine s_close_void_evol + + if (proc_rank == 0) close (LAG_VOID_ID) + + end subroutine s_close_void_evol + !> Subroutine that writes the restarting files for the particles in the lagrangian solver. !! @param t_step Current time step impure subroutine s_write_restart_lag_bubbles(t_step) @@ -1532,13 +2014,14 @@ contains integer(KIND=MPI_OFFSET_KIND) :: disp integer :: view integer, dimension(2) :: gsizes, lsizes, start_idx_part - integer, allocatable :: proc_bubble_counts(:) + integer, dimension(num_procs) :: part_order, part_ord_mpi + integer, dimension(num_procs) :: proc_bubble_counts real(wp), dimension(1:1, 1:lag_io_vars) :: dummy dummy = 0._wp bub_id = 0._wp - if (nBubs /= 0) then - do k = 1, nBubs + if (n_el_bubs_loc /= 0) then + do k = 1, n_el_bubs_loc if (particle_in_domain_physical(mtn_pos(k, 1:3, 1))) then bub_id = bub_id + 1 end if @@ -1547,8 +2030,6 @@ contains if (.not. parallel_io) return - allocate (proc_bubble_counts(num_procs)) - lsizes(1) = bub_id lsizes(2) = lag_io_vars @@ -1600,26 +2081,22 @@ contains if (bub_id > 0) then allocate (MPI_IO_DATA_lag_bubbles(max(1, bub_id), 1:lag_io_vars)) - i = 1 - do k = 1, nBubs - if (particle_in_domain_physical(mtn_pos(k, 1:3, 1))) then - MPI_IO_DATA_lag_bubbles(i, 1) = real(lag_id(k, 1)) - MPI_IO_DATA_lag_bubbles(i, 2:4) = mtn_pos(k, 1:3, 1) - MPI_IO_DATA_lag_bubbles(i, 5:7) = mtn_posPrev(k, 1:3, 1) - MPI_IO_DATA_lag_bubbles(i, 8:10) = mtn_vel(k, 1:3, 1) - MPI_IO_DATA_lag_bubbles(i, 11) = intfc_rad(k, 1) - MPI_IO_DATA_lag_bubbles(i, 12) = intfc_vel(k, 1) - MPI_IO_DATA_lag_bubbles(i, 13) = bub_R0(k) - MPI_IO_DATA_lag_bubbles(i, 14) = Rmax_stats(k) - MPI_IO_DATA_lag_bubbles(i, 15) = Rmin_stats(k) - MPI_IO_DATA_lag_bubbles(i, 16) = bub_dphidt(k) - MPI_IO_DATA_lag_bubbles(i, 17) = gas_p(k, 1) - MPI_IO_DATA_lag_bubbles(i, 18) = gas_mv(k, 1) - MPI_IO_DATA_lag_bubbles(i, 19) = gas_mg(k) - MPI_IO_DATA_lag_bubbles(i, 20) = gas_betaT(k) - MPI_IO_DATA_lag_bubbles(i, 21) = gas_betaC(k) - i = i + 1 - end if + do k = 1, n_el_bubs_loc + MPI_IO_DATA_lag_bubbles(k, 1) = real(lag_id(k, 1)) + MPI_IO_DATA_lag_bubbles(k, 2:4) = mtn_pos(k, 1:3, 1) + MPI_IO_DATA_lag_bubbles(k, 5:7) = mtn_posPrev(k, 1:3, 1) + MPI_IO_DATA_lag_bubbles(k, 8:10) = mtn_vel(k, 1:3, 1) + MPI_IO_DATA_lag_bubbles(k, 11) = intfc_rad(k, 1) + MPI_IO_DATA_lag_bubbles(k, 12) = intfc_vel(k, 1) + MPI_IO_DATA_lag_bubbles(k, 13) = bub_R0(k) + MPI_IO_DATA_lag_bubbles(k, 14) = Rmax_stats(k) + MPI_IO_DATA_lag_bubbles(k, 15) = Rmin_stats(k) + MPI_IO_DATA_lag_bubbles(k, 16) = bub_dphidt(k) + MPI_IO_DATA_lag_bubbles(k, 17) = gas_p(k, 1) + MPI_IO_DATA_lag_bubbles(k, 18) = gas_mv(k, 1) + MPI_IO_DATA_lag_bubbles(k, 19) = gas_mg(k) + MPI_IO_DATA_lag_bubbles(k, 20) = gas_betaT(k) + MPI_IO_DATA_lag_bubbles(k, 21) = gas_betaC(k) end do call MPI_TYPE_CREATE_SUBARRAY(2, gsizes, lsizes, start_idx_part, & @@ -1660,8 +2137,6 @@ contains call MPI_FILE_CLOSE(ifile, ierr) end if - deallocate (proc_bubble_counts) - #endif end subroutine s_write_restart_lag_bubbles @@ -1673,7 +2148,7 @@ contains $:GPU_PARALLEL_LOOP(private='[k]', reduction='[[Rmax_glb], [Rmin_glb]]', & & reductionOp='[MAX, MIN]', copy='[Rmax_glb,Rmin_glb]') - do k = 1, nBubs + do k = 1, n_el_bubs_loc Rmax_glb = max(Rmax_glb, intfc_rad(k, 1)/bub_R0(k)) Rmin_glb = min(Rmin_glb, intfc_rad(k, 1)/bub_R0(k)) Rmax_stats(k) = max(Rmax_stats(k), intfc_rad(k, 1)/bub_R0(k)) @@ -1683,18 +2158,15 @@ contains end subroutine s_calculate_lag_bubble_stats - !> Subroutine that writes the maximum and minimum radius of each bubble. - impure subroutine s_write_lag_bubble_stats() + impure subroutine s_open_lag_bubble_stats() - integer :: k character(LEN=path_len + 2*name_len) :: file_loc - - character(len=20) :: FMT + character(LEN=20) :: FMT + logical :: file_exist write (file_loc, '(A,I0,A)') 'stats_lag_bubbles_', proc_rank, '.dat' file_loc = trim(case_dir)//'/D/'//trim(file_loc) - - $:GPU_UPDATE(host='[Rmax_glb,Rmin_glb]') + call my_inquire(trim(file_loc), file_exist) if (precision == 1) then FMT = "(A10,A14,5A16)" @@ -1702,8 +2174,23 @@ contains FMT = "(A10,A14,5A24)" end if - open (13, FILE=trim(file_loc), FORM='formatted', position='rewind') - write (13, FMT) 'proc_rank', 'particleID', 'x', 'y', 'z', 'Rmax_glb', 'Rmin_glb' + if (.not. file_exist) then + open (LAG_STATS_ID, FILE=trim(file_loc), FORM='formatted', position='rewind') + write (LAG_STATS_ID, *) 'proc_rank, particleID, x, y, z, Rmax_glb, Rmin_glb' + else + open (LAG_STATS_ID, FILE=trim(file_loc), FORM='formatted', position='append') + end if + + end subroutine s_open_lag_bubble_stats + + !> Subroutine that writes the maximum and minimum radius of each bubble. + impure subroutine s_write_lag_bubble_stats() + + integer :: k + character(LEN=path_len + 2*name_len) :: file_loc + character(LEN=20) :: FMT + + $:GPU_UPDATE(host='[Rmax_glb,Rmin_glb]') if (precision == 1) then FMT = "(I10,I14,5F16.8)" @@ -1711,8 +2198,8 @@ contains FMT = "(I10,I14,5F24.16)" end if - do k = 1, nBubs - write (13, FMT) & + do k = 1, n_el_bubs_loc + write (LAG_STATS_ID, FMT) & proc_rank, & lag_id(k, 1), & mtn_pos(k, 1, 1), & @@ -1722,56 +2209,60 @@ contains Rmin_stats(k) end do - close (13) - end subroutine s_write_lag_bubble_stats - !> The purpose of this subroutine is to remove one specific particle if dt is too small. - !! @param bub_id Particle id - impure subroutine s_remove_lag_bubble(bub_id) + subroutine s_close_lag_bubble_stats - integer, intent(in) :: bub_id + close (LAG_STATS_ID) - integer :: i + end subroutine s_close_lag_bubble_stats - $:GPU_LOOP(parallelism='[seq]') - do i = bub_id, nBubs - 1 - lag_id(i, 1) = lag_id(i + 1, 1) - bub_R0(i) = bub_R0(i + 1) - Rmax_stats(i) = Rmax_stats(i + 1) - Rmin_stats(i) = Rmin_stats(i + 1) - gas_mg(i) = gas_mg(i + 1) - gas_betaT(i) = gas_betaT(i + 1) - gas_betaC(i) = gas_betaC(i + 1) - bub_dphidt(i) = bub_dphidt(i + 1) - gas_p(i, 1:2) = gas_p(i + 1, 1:2) - gas_mv(i, 1:2) = gas_mv(i + 1, 1:2) - intfc_rad(i, 1:2) = intfc_rad(i + 1, 1:2) - intfc_vel(i, 1:2) = intfc_vel(i + 1, 1:2) - mtn_pos(i, 1:3, 1:2) = mtn_pos(i + 1, 1:3, 1:2) - mtn_posPrev(i, 1:3, 1:2) = mtn_posPrev(i + 1, 1:3, 1:2) - mtn_vel(i, 1:3, 1:2) = mtn_vel(i + 1, 1:3, 1:2) - mtn_s(i, 1:3, 1:2) = mtn_s(i + 1, 1:3, 1:2) - intfc_draddt(i, 1:lag_num_ts) = intfc_draddt(i + 1, 1:lag_num_ts) - intfc_dveldt(i, 1:lag_num_ts) = intfc_dveldt(i + 1, 1:lag_num_ts) - gas_dpdt(i, 1:lag_num_ts) = gas_dpdt(i + 1, 1:lag_num_ts) - gas_dmvdt(i, 1:lag_num_ts) = gas_dmvdt(i + 1, 1:lag_num_ts) - end do - - nBubs = nBubs - 1 - $:GPU_UPDATE(device='[nBubs]') - - end subroutine s_remove_lag_bubble + !> The purpose of this subroutine is to remove one specific particle if dt is too small. + !! @param bub_id Particle id + impure subroutine s_copy_lag_bubble(dest, src) + + integer, intent(in) :: src, dest + + bub_R0(dest) = bub_R0(src) + Rmax_stats(dest) = Rmax_stats(src) + Rmin_stats(dest) = Rmin_stats(src) + gas_mg(dest) = gas_mg(src) + gas_betaT(dest) = gas_betaT(src) + gas_betaC(dest) = gas_betaC(src) + bub_dphidt(dest) = bub_dphidt(src) + lag_id(dest, 1) = lag_id(src, 1) + gas_p(dest, 1:2) = gas_p(src, 1:2) + gas_mv(dest, 1:2) = gas_mv(src, 1:2) + intfc_rad(dest, 1:2) = intfc_rad(src, 1:2) + intfc_vel(dest, 1:2) = intfc_vel(src, 1:2) + mtn_vel(dest, 1:3, 1:2) = mtn_vel(src, 1:3, 1:2) + mtn_s(dest, 1:3, 1:2) = mtn_s(src, 1:3, 1:2) + mtn_pos(dest, 1:3, 1:2) = mtn_pos(src, 1:3, 1:2) + mtn_posPrev(dest, 1:3, 1:2) = mtn_posPrev(src, 1:3, 1:2) + intfc_draddt(dest, 1:lag_num_ts) = intfc_draddt(src, 1:lag_num_ts) + intfc_dveldt(dest, 1:lag_num_ts) = intfc_dveldt(src, 1:lag_num_ts) + gas_dpdt(dest, 1:lag_num_ts) = gas_dpdt(src, 1:lag_num_ts) + gas_dmvdt(dest, 1:lag_num_ts) = gas_dmvdt(src, 1:lag_num_ts) + mtn_dposdt(dest, 1:3, 1:lag_num_ts) = mtn_dposdt(src, 1:3, 1:lag_num_ts) + mtn_dveldt(dest, 1:3, 1:lag_num_ts) = mtn_dveldt(src, 1:3, 1:lag_num_ts) + + end subroutine s_copy_lag_bubble !> The purpose of this subroutine is to deallocate variables impure subroutine s_finalize_lagrangian_solver() integer :: i + if (lag_params%write_void_evol) call s_close_void_evol + if (lag_params%write_bubbles) call s_close_lag_bubble_evol() + if (lag_params%write_bubbles_stats) call s_close_lag_bubble_stats() + do i = 1, q_beta_idx @:DEALLOCATE(q_beta(i)%sf) + @:DEALLOCATE(kahan_comp(i)%sf) end do @:DEALLOCATE(q_beta) + @:DEALLOCATE(kahan_comp) !Deallocating space @:DEALLOCATE(lag_id) @@ -1797,6 +2288,28 @@ contains @:DEALLOCATE(mtn_dposdt) @:DEALLOCATE(mtn_dveldt) + @:DEALLOCATE(keep_bubble) + @:DEALLOCATE(wrap_bubble_loc, wrap_bubble_dir) + + ! Deallocate pressure gradient arrays and FD coefficients + if (lag_params%vel_model > 0 .and. lag_params%pressure_force) then + @:DEALLOCATE(grad_p_x) + @:DEALLOCATE(fd_coeff_x_pgrad) + if (n > 0) then + @:DEALLOCATE(grad_p_y) + @:DEALLOCATE(fd_coeff_y_pgrad) + if (p > 0) then + @:DEALLOCATE(grad_p_z) + @:DEALLOCATE(fd_coeff_z_pgrad) + end if + end if + end if + + ! Deallocate cell list arrays + @:DEALLOCATE(cell_list_start) + @:DEALLOCATE(cell_list_count) + @:DEALLOCATE(cell_list_idx) + end subroutine s_finalize_lagrangian_solver end module m_bubbles_EL diff --git a/src/simulation/m_bubbles_EL_kernels.fpp b/src/simulation/m_bubbles_EL_kernels.fpp index 784abc5adb..f298a883e4 100644 --- a/src/simulation/m_bubbles_EL_kernels.fpp +++ b/src/simulation/m_bubbles_EL_kernels.fpp @@ -11,6 +11,22 @@ module m_bubbles_EL_kernels implicit none + ! Cell-centered pressure gradients (precomputed for translational motion) + real(wp), allocatable, dimension(:, :, :) :: grad_p_x, grad_p_y, grad_p_z + $:GPU_DECLARE(create='[grad_p_x, grad_p_y, grad_p_z]') + + ! Finite-difference coefficients for pressure gradient computation + real(wp), allocatable, dimension(:, :) :: fd_coeff_x_pgrad + real(wp), allocatable, dimension(:, :) :: fd_coeff_y_pgrad + real(wp), allocatable, dimension(:, :) :: fd_coeff_z_pgrad + $:GPU_DECLARE(create='[fd_coeff_x_pgrad, fd_coeff_y_pgrad, fd_coeff_z_pgrad]') + + ! Cell list for bubble-to-cell mapping (rebuilt each RK stage before smearing) + integer, allocatable, dimension(:, :, :) :: cell_list_start ! (0:m, 0:n, 0:p) + integer, allocatable, dimension(:, :, :) :: cell_list_count ! (0:m, 0:n, 0:p) + integer, allocatable, dimension(:) :: cell_list_idx ! (1:nBubs_glb) sorted bubble indices + $:GPU_DECLARE(create='[cell_list_start, cell_list_count, cell_list_idx]') + contains !> The purpose of this subroutine is to smear the strength of the lagrangian @@ -21,177 +37,259 @@ contains !! @param lbk_s Computational coordinates of the bubbles !! @param lbk_pos Spatial coordinates of the bubbles !! @param updatedvar Eulerian variable to be updated - subroutine s_smoothfunction(nBubs, lbk_rad, lbk_vel, lbk_s, lbk_pos, updatedvar) + subroutine s_smoothfunction(nBubs, lbk_rad, lbk_vel, lbk_s, lbk_pos, updatedvar, kcomp) integer, intent(in) :: nBubs real(wp), dimension(1:lag_params%nBubs_glb, 1:3, 1:2), intent(in) :: lbk_s, lbk_pos real(wp), dimension(1:lag_params%nBubs_glb, 1:2), intent(in) :: lbk_rad, lbk_vel type(scalar_field), dimension(:), intent(inout) :: updatedvar + type(scalar_field), dimension(:), intent(inout) :: kcomp smoothfunc:select case(lag_params%smooth_type) case (1) - call s_gaussian(nBubs, lbk_rad, lbk_vel, lbk_s, lbk_pos, updatedvar) + call s_gaussian(nBubs, lbk_rad, lbk_vel, lbk_s, lbk_pos, updatedvar, kcomp) case (2) - call s_deltafunc(nBubs, lbk_rad, lbk_vel, lbk_s, updatedvar) + call s_deltafunc(nBubs, lbk_rad, lbk_vel, lbk_s, updatedvar, kcomp) end select smoothfunc end subroutine s_smoothfunction - !> The purpose of this procedure contains the algorithm to use the delta kernel function to map the effect of the bubbles. - !! The effect of the bubbles only affects the cell where the bubble is located. - subroutine s_deltafunc(nBubs, lbk_rad, lbk_vel, lbk_s, updatedvar) + !> Builds a sorted cell list mapping each interior cell (0:m,0:n,0:p) to its + !! resident bubbles. Uses a counting-sort on the host (O(nBubs + N_cells)). + !! Must be called before s_gaussian each RK stage. + !! @param nBubs Number of lagrangian bubbles in the current domain + !! @param lbk_s Computational coordinates of the bubbles + subroutine s_build_cell_list(nBubs, lbk_s) integer, intent(in) :: nBubs real(wp), dimension(1:lag_params%nBubs_glb, 1:3, 1:2), intent(in) :: lbk_s - real(wp), dimension(1:lag_params%nBubs_glb, 1:2), intent(in) :: lbk_rad, lbk_vel - type(scalar_field), dimension(:), intent(inout) :: updatedvar - integer, dimension(3) :: cell - real(wp) :: strength_vel, strength_vol - - real(wp) :: addFun1, addFun2, addFun3 - real(wp) :: volpart, Vol + integer :: l, ci, cj, ck, idx real(wp), dimension(3) :: s_coord - integer :: l - $:GPU_PARALLEL_LOOP(private='[l,s_coord,cell]') + ! Bring current bubble positions to host + $:GPU_UPDATE(host='[lbk_s]') + + ! Pass 1: zero counts and count bubbles per cell + cell_list_count = 0 do l = 1, nBubs + s_coord(1:3) = lbk_s(l, 1:3, 2) + ci = int(s_coord(1)) + cj = int(s_coord(2)) + ck = int(s_coord(3)) + ! Clamp to interior (bubbles should already be in [0:m,0:n,0:p]) + ci = max(0, min(ci, m)) + cj = max(0, min(cj, n)) + ck = max(0, min(ck, p)) + cell_list_count(ci, cj, ck) = cell_list_count(ci, cj, ck) + 1 + end do + + ! Prefix sum to compute start indices (1-based into cell_list_idx) + idx = 1 + do ck = 0, p + do cj = 0, n + do ci = 0, m + cell_list_start(ci, cj, ck) = idx + idx = idx + cell_list_count(ci, cj, ck) + end do + end do + end do - volpart = 4._wp/3._wp*pi*lbk_rad(l, 2)**3._wp + ! Pass 2: place bubble indices into cell_list_idx + ! Temporarily reuse cell_list_count as a running offset + cell_list_count = 0 + do l = 1, nBubs s_coord(1:3) = lbk_s(l, 1:3, 2) - call s_get_cell(s_coord, cell) + ci = int(s_coord(1)) + cj = int(s_coord(2)) + ck = int(s_coord(3)) + ci = max(0, min(ci, m)) + cj = max(0, min(cj, n)) + ck = max(0, min(ck, p)) + cell_list_idx(cell_list_start(ci, cj, ck) + cell_list_count(ci, cj, ck)) = l + cell_list_count(ci, cj, ck) = cell_list_count(ci, cj, ck) + 1 + end do - strength_vol = volpart - strength_vel = 4._wp*pi*lbk_rad(l, 2)**2._wp*lbk_vel(l, 2) + ! Send cell list arrays to GPU + $:GPU_UPDATE(device='[cell_list_start, cell_list_count, cell_list_idx]') - if (num_dims == 2) then - Vol = dx(cell(1))*dy(cell(2))*lag_params%charwidth - if (cyl_coord) Vol = dx(cell(1))*dy(cell(2))*y_cc(cell(2))*2._wp*pi - else - Vol = dx(cell(1))*dy(cell(2))*dz(cell(3)) - end if + end subroutine s_build_cell_list - !Update void fraction field - addFun1 = strength_vol/Vol - $:GPU_ATOMIC(atomic='update') - updatedvar(1)%sf(cell(1), cell(2), cell(3)) = updatedvar(1)%sf(cell(1), cell(2), cell(3)) + real(addFun1, kind=stp) - - !Update time derivative of void fraction - addFun2 = strength_vel/Vol - $:GPU_ATOMIC(atomic='update') - updatedvar(2)%sf(cell(1), cell(2), cell(3)) = updatedvar(2)%sf(cell(1), cell(2), cell(3)) + real(addFun2, kind=stp) - - !Product of two smeared functions - !Update void fraction * time derivative of void fraction - if (lag_params%cluster_type >= 4) then - addFun3 = (strength_vol*strength_vel)/Vol - $:GPU_ATOMIC(atomic='update') - updatedvar(5)%sf(cell(1), cell(2), cell(3)) = updatedvar(5)%sf(cell(1), cell(2), cell(3)) + real(addFun3, kind=stp) - end if + !> Cell-centric delta-function smearing using the cell list (no GPU atomics). + !! Each bubble only affects the cell it resides in. The outer GPU loop + !! iterates over interior cells and sums contributions from resident bubbles. + subroutine s_deltafunc(nBubs, lbk_rad, lbk_vel, lbk_s, updatedvar, kcomp) + + integer, intent(in) :: nBubs + real(wp), dimension(1:lag_params%nBubs_glb, 1:3, 1:2), intent(in) :: lbk_s + real(wp), dimension(1:lag_params%nBubs_glb, 1:2), intent(in) :: lbk_rad, lbk_vel + type(scalar_field), dimension(:), intent(inout) :: updatedvar + type(scalar_field), dimension(:), intent(inout) :: kcomp + + real(wp) :: strength_vel, strength_vol + real(wp) :: volpart, Vol + real(wp) :: y_kahan, t_kahan + integer :: i, j, k, lb, bub_idx + + $:GPU_PARALLEL_LOOP(collapse=3, private='[i,j,k,lb,bub_idx,volpart,Vol,strength_vel,strength_vol,y_kahan,t_kahan]') + do k = 0, p + do j = 0, n + do i = 0, m + + ! Cell volume + if (num_dims == 2) then + Vol = dx(i)*dy(j)*lag_params%charwidth + if (cyl_coord) Vol = dx(i)*dy(j)*y_cc(j)*2._wp*pi + else + Vol = dx(i)*dy(j)*dz(k) + end if + + ! Loop over bubbles in this cell + $:GPU_LOOP(parallelism='[seq]') + do lb = cell_list_start(i, j, k), & + cell_list_start(i, j, k) + cell_list_count(i, j, k) - 1 + + bub_idx = cell_list_idx(lb) + + volpart = 4._wp/3._wp*pi*lbk_rad(bub_idx, 2)**3._wp + strength_vol = volpart + strength_vel = 4._wp*pi*lbk_rad(bub_idx, 2)**2._wp*lbk_vel(bub_idx, 2) + + ! Kahan summation for void fraction + y_kahan = real(strength_vol/Vol, kind=wp) - kcomp(1)%sf(i, j, k) + t_kahan = updatedvar(1)%sf(i, j, k) + y_kahan + kcomp(1)%sf(i, j, k) = (t_kahan - updatedvar(1)%sf(i, j, k)) - y_kahan + updatedvar(1)%sf(i, j, k) = t_kahan + + ! Kahan summation for time derivative of void fraction + y_kahan = real(strength_vel/Vol, kind=wp) - kcomp(2)%sf(i, j, k) + t_kahan = updatedvar(2)%sf(i, j, k) + y_kahan + kcomp(2)%sf(i, j, k) = (t_kahan - updatedvar(2)%sf(i, j, k)) - y_kahan + updatedvar(2)%sf(i, j, k) = t_kahan + + ! Product of two smeared functions + if (lag_params%cluster_type >= 4) then + y_kahan = real((strength_vol*strength_vel)/Vol, kind=wp) - kcomp(5)%sf(i, j, k) + t_kahan = updatedvar(5)%sf(i, j, k) + y_kahan + kcomp(5)%sf(i, j, k) = (t_kahan - updatedvar(5)%sf(i, j, k)) - y_kahan + updatedvar(5)%sf(i, j, k) = t_kahan + end if + end do + + end do + end do end do $:END_GPU_PARALLEL_LOOP() end subroutine s_deltafunc - !> The purpose of this procedure contains the algorithm to use the gaussian kernel function to map the effect of the bubbles. - !! The effect of the bubbles affects the 3X3x3 cells that surround the bubble. - subroutine s_gaussian(nBubs, lbk_rad, lbk_vel, lbk_s, lbk_pos, updatedvar) + !> Cell-centric gaussian smearing using the cell list (no GPU atomics). + !! Each grid cell accumulates contributions from nearby bubbles looked up + !! via cell_list_start/count/idx. + subroutine s_gaussian(nBubs, lbk_rad, lbk_vel, lbk_s, lbk_pos, updatedvar, kcomp) integer, intent(in) :: nBubs real(wp), dimension(1:lag_params%nBubs_glb, 1:3, 1:2), intent(in) :: lbk_s, lbk_pos real(wp), dimension(1:lag_params%nBubs_glb, 1:2), intent(in) :: lbk_rad, lbk_vel type(scalar_field), dimension(:), intent(inout) :: updatedvar + type(scalar_field), dimension(:), intent(inout) :: kcomp - real(wp), dimension(3) :: center - integer, dimension(3) :: cell - real(wp) :: stddsv + real(wp), dimension(3) :: center, nodecoord, s_coord + integer, dimension(3) :: cell, cellijk + real(wp) :: stddsv, volpart real(wp) :: strength_vel, strength_vol - - real(wp), dimension(3) :: nodecoord - real(wp) :: addFun1, addFun2, addFun3 - real(wp) :: func, func2, volpart - integer, dimension(3) :: cellaux - real(wp), dimension(3) :: s_coord - integer :: l, i, j, k - logical :: celloutside - integer :: smearGrid, smearGridz - - smearGrid = mapCells - (-mapCells) + 1 ! Include the cell that contains the bubble (3+1+3) - smearGridz = smearGrid - if (p == 0) smearGridz = 1 - - $:GPU_PARALLEL_LOOP(private='[nodecoord,l,s_coord,cell,center]', copyin='[smearGrid,smearGridz]') - do l = 1, nBubs - nodecoord(1:3) = 0 - center(1:3) = 0._wp - volpart = 4._wp/3._wp*pi*lbk_rad(l, 2)**3._wp - s_coord(1:3) = lbk_s(l, 1:3, 2) - center(1:2) = lbk_pos(l, 1:2, 2) - if (p > 0) center(3) = lbk_pos(l, 3, 2) - call s_get_cell(s_coord, cell) - call s_compute_stddsv(cell, volpart, stddsv) - - strength_vol = volpart - strength_vel = 4._wp*pi*lbk_rad(l, 2)**2._wp*lbk_vel(l, 2) - - $:GPU_LOOP(collapse=3,private='[cellaux,nodecoord]') - do i = 1, smearGrid - do j = 1, smearGrid - do k = 1, smearGridz - cellaux(1) = cell(1) + i - (mapCells + 1) - cellaux(2) = cell(2) + j - (mapCells + 1) - cellaux(3) = cell(3) + k - (mapCells + 1) - if (p == 0) cellaux(3) = 0 - - !Check if the cells intended to smear the bubbles in are in the computational domain - !and redefine the cells for symmetric boundary - call s_check_celloutside(cellaux, celloutside) - - if (.not. celloutside) then - - nodecoord(1) = x_cc(cellaux(1)) - nodecoord(2) = y_cc(cellaux(2)) - if (p > 0) nodecoord(3) = z_cc(cellaux(3)) - call s_applygaussian(center, cellaux, nodecoord, stddsv, 0._wp, func) - if (lag_params%cluster_type >= 4) call s_applygaussian(center, cellaux, nodecoord, stddsv, 1._wp, func2) - - ! Relocate cells for bubbles intersecting symmetric boundaries - if (any((/bc_x%beg, bc_x%end, bc_y%beg, bc_y%end, bc_z%beg, bc_z%end/) == BC_REFLECTIVE)) then - call s_shift_cell_symmetric_bc(cellaux, cell) - end if - else - func = 0._wp - func2 = 0._wp - cellaux(1) = cell(1) - cellaux(2) = cell(2) - cellaux(3) = cell(3) - if (p == 0) cellaux(3) = 0 - end if - - !Update void fraction field - addFun1 = func*strength_vol - $:GPU_ATOMIC(atomic='update') - updatedvar(1)%sf(cellaux(1), cellaux(2), cellaux(3)) = & - updatedvar(1)%sf(cellaux(1), cellaux(2), cellaux(3)) & - + real(addFun1, kind=stp) - - !Update time derivative of void fraction - addFun2 = func*strength_vel - $:GPU_ATOMIC(atomic='update') - updatedvar(2)%sf(cellaux(1), cellaux(2), cellaux(3)) = & - updatedvar(2)%sf(cellaux(1), cellaux(2), cellaux(3)) & - + real(addFun2, kind=stp) - - !Product of two smeared functions - !Update void fraction * time derivative of void fraction - if (lag_params%cluster_type >= 4) then - addFun3 = func2*strength_vol*strength_vel - $:GPU_ATOMIC(atomic='update') - updatedvar(5)%sf(cellaux(1), cellaux(2), cellaux(3)) = & - updatedvar(5)%sf(cellaux(1), cellaux(2), cellaux(3)) & - + real(addFun3, kind=stp) - end if + real(wp) :: func, func2 + real(wp) :: y_kahan, t_kahan + integer :: i, j, k, di, dj, dk, lb, bub_idx + integer :: di_beg, di_end, dj_beg, dj_end, dk_beg, dk_end + integer :: smear_x_beg, smear_x_end + integer :: smear_y_beg, smear_y_end + integer :: smear_z_beg, smear_z_end + + ! Extended grid range for smearing (includes buffer cells for MPI communication) + smear_x_beg = -mapCells - 1 + smear_x_end = m + mapCells + 1 + smear_y_beg = merge(-mapCells - 1, 0, n > 0) + smear_y_end = merge(n + mapCells + 1, n, n > 0) + smear_z_beg = merge(-mapCells - 1, 0, p > 0) + smear_z_end = merge(p + mapCells + 1, p, p > 0) + + $:GPU_PARALLEL_LOOP(collapse=3, & + & private='[i,j,k,di,dj,dk,lb,bub_idx,center,nodecoord,s_coord,cell,cellijk,stddsv,volpart,strength_vel,strength_vol,func,func2,y_kahan,t_kahan,di_beg,di_end,dj_beg,dj_end,dk_beg,dk_end]', & + & copyin='[smear_x_beg,smear_x_end,smear_y_beg,smear_y_end,smear_z_beg,smear_z_end]') + do k = smear_z_beg, smear_z_end + do j = smear_y_beg, smear_y_end + do i = smear_x_beg, smear_x_end + + cellijk(1) = i + cellijk(2) = j + cellijk(3) = k + + nodecoord(1) = x_cc(i) + nodecoord(2) = y_cc(j) + nodecoord(3) = 0._wp + if (p > 0) nodecoord(3) = z_cc(k) + + ! Neighbor cell range clamped to interior [0:m, 0:n, 0:p] + di_beg = max(i - mapCells, 0) + di_end = min(i + mapCells, m) + dj_beg = max(j - mapCells, 0) + dj_end = min(j + mapCells, n) + dk_beg = max(k - mapCells, 0) + dk_end = min(k + mapCells, p) + + $:GPU_LOOP(parallelism='[seq]') + do dk = dk_beg, dk_end + $:GPU_LOOP(parallelism='[seq]') + do dj = dj_beg, dj_end + $:GPU_LOOP(parallelism='[seq]') + do di = di_beg, di_end + $:GPU_LOOP(parallelism='[seq]') + do lb = cell_list_start(di, dj, dk), & + cell_list_start(di, dj, dk) + cell_list_count(di, dj, dk) - 1 + + bub_idx = cell_list_idx(lb) + + ! Bubble properties + volpart = 4._wp/3._wp*pi*lbk_rad(bub_idx, 2)**3._wp + s_coord(1:3) = lbk_s(bub_idx, 1:3, 2) + call s_get_cell(s_coord, cell) + call s_compute_stddsv(cell, volpart, stddsv) + + strength_vol = volpart + strength_vel = 4._wp*pi*lbk_rad(bub_idx, 2)**2._wp*lbk_vel(bub_idx, 2) + + center(1:2) = lbk_pos(bub_idx, 1:2, 2) + center(3) = 0._wp + if (p > 0) center(3) = lbk_pos(bub_idx, 3, 2) + + call s_applygaussian(center, cellijk, nodecoord, stddsv, 0._wp, func) + + ! Kahan summation for void fraction + y_kahan = real(func*strength_vol, kind=wp) - kcomp(1)%sf(i, j, k) + t_kahan = updatedvar(1)%sf(i, j, k) + y_kahan + kcomp(1)%sf(i, j, k) = (t_kahan - updatedvar(1)%sf(i, j, k)) - y_kahan + updatedvar(1)%sf(i, j, k) = t_kahan + + ! Kahan summation for time derivative of void fraction + y_kahan = real(func*strength_vel, kind=wp) - kcomp(2)%sf(i, j, k) + t_kahan = updatedvar(2)%sf(i, j, k) + y_kahan + kcomp(2)%sf(i, j, k) = (t_kahan - updatedvar(2)%sf(i, j, k)) - y_kahan + updatedvar(2)%sf(i, j, k) = t_kahan + + if (lag_params%cluster_type >= 4) then + call s_applygaussian(center, cellijk, nodecoord, stddsv, 1._wp, func2) + y_kahan = real(func2*strength_vol*strength_vel, kind=wp) - kcomp(5)%sf(i, j, k) + t_kahan = updatedvar(5)%sf(i, j, k) + y_kahan + kcomp(5)%sf(i, j, k) = (t_kahan - updatedvar(5)%sf(i, j, k)) - y_kahan + updatedvar(5)%sf(i, j, k) = t_kahan + end if + + end do + end do + end do end do + end do end do end do @@ -210,9 +308,10 @@ contains real(wp), intent(in) :: stddsv real(wp), intent(in) :: strength_idx real(wp), intent(out) :: func + integer :: i real(wp) :: distance - real(wp) :: theta, dtheta, L2, dzp, Lz2 + real(wp) :: theta, dtheta, L2, dzp, Lz2, zc real(wp) :: Nr, Nr_count distance = sqrt((center(1) - nodecoord(1))**2._wp + (center(2) - nodecoord(2))**2._wp + (center(3) - nodecoord(3))**2._wp) @@ -243,22 +342,16 @@ contains dtheta/2._wp/pi*exp(-0.5_wp*(distance/stddsv)**2._wp)/(sqrt(2._wp*pi)*stddsv)**(3._wp*(strength_idx + 1._wp)) end do else - - !< 2D cartesian function: - ! We smear particles considering a virtual depth (lag_params%charwidth) - theta = 0._wp - Nr = ceiling(lag_params%charwidth/(y_cb(cellaux(2)) - y_cb(cellaux(2) - 1))) - Nr_count = 1._wp - mapCells*1._wp - dzp = y_cb(cellaux(2) + 1) - y_cb(cellaux(2)) - Lz2 = (center(3) - (dzp*(0.5_wp + Nr_count) - lag_params%charwidth/2._wp))**2._wp - distance = sqrt((center(1) - nodecoord(1))**2._wp + (center(2) - nodecoord(2))**2._wp + Lz2) - func = dzp/lag_params%charwidth*exp(-0.5_wp*(distance/stddsv)**2._wp)/(sqrt(2._wp*pi)*stddsv)**3._wp - do while (Nr_count < Nr - 1._wp + ((mapCells - 1)*1._wp)) - Nr_count = Nr_count + 1._wp - Lz2 = (center(3) - (dzp*(0.5_wp + Nr_count) - lag_params%charwidth/2._wp))**2._wp + !< 2D cartesian function: Equation (48) from Madea and Colonius 2018 + ! We smear particles considering a virtual depth (lag_params%charwidth) with lag_params%charNz cells + dzp = (lag_params%charwidth/(lag_params%charNz + 1._wp)) + + func = 0._wp + do i = 0, lag_params%charNz + zc = (-lag_params%charwidth/2._wp + dzp*(0.5_wp + i)) ! Center of virtual cell i in z-direction + Lz2 = (center(3) - zc)**2._wp distance = sqrt((center(1) - nodecoord(1))**2._wp + (center(2) - nodecoord(2))**2._wp + Lz2) - func = func + & - dzp/lag_params%charwidth*exp(-0.5_wp*(distance/stddsv)**2._wp)/(sqrt(2._wp*pi)*stddsv)**(3._wp*(strength_idx + 1._wp)) + func = func + dzp/lag_params%charwidth*exp(-0.5_wp*(distance/stddsv)**2._wp)/(sqrt(2._wp*pi)*stddsv)**3._wp end do end if end if @@ -368,7 +461,7 @@ contains end if !< Compute Standard deviaton - if (((volpart/charvol) > 0.5_wp*lag_params%valmaxvoid) .or. (lag_params%smooth_type == 1)) then + if ((volpart/charvol) > 0.5_wp*lag_params%valmaxvoid .or. (lag_params%smooth_type == 1)) then rad = (3._wp*volpart/(4._wp*pi))**(1._wp/3._wp) stddsv = 1._wp*lag_params%epsilonb*max(chardist, rad) else @@ -420,4 +513,227 @@ contains end subroutine s_get_cell + !> Precomputes cell-centered pressure gradients (dp/dx, dp/dy, dp/dz) at all cell centers + !! using finite-difference coefficients of the specified order. This avoids + !! scattered memory accesses to the pressure field when computing translational + !! bubble forces. + !! @param q_prim_vf Primitive variables (pressure is at index E_idx) + subroutine s_compute_pressure_gradients(q_prim_vf) + + type(scalar_field), dimension(sys_size), intent(in) :: q_prim_vf + + integer :: i, j, k, r + + ! dp/dx at all cell centers + $:GPU_PARALLEL_LOOP(private='[i,j,k,r]', collapse=3) + do k = 0, p + do j = 0, n + do i = 0, m + grad_p_x(i, j, k) = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do r = -fd_number, fd_number + grad_p_x(i, j, k) = grad_p_x(i, j, k) + & + q_prim_vf(E_idx)%sf(i + r, j, k)*fd_coeff_x_pgrad(r, i) + end do + end do + end do + end do + $:END_GPU_PARALLEL_LOOP() + + ! dp/dy at all cell centers + if (n > 0) then + $:GPU_PARALLEL_LOOP(private='[i,j,k,r]', collapse=3) + do k = 0, p + do j = 0, n + do i = 0, m + grad_p_y(i, j, k) = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do r = -fd_number, fd_number + grad_p_y(i, j, k) = grad_p_y(i, j, k) + & + q_prim_vf(E_idx)%sf(i, j + r, k)*fd_coeff_y_pgrad(r, j) + end do + end do + end do + end do + $:END_GPU_PARALLEL_LOOP() + end if + + ! dp/dz at all cell centers + if (p > 0) then + $:GPU_PARALLEL_LOOP(private='[i,j,k,r]', collapse=3) + do k = 0, p + do j = 0, n + do i = 0, m + grad_p_z(i, j, k) = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do r = -fd_number, fd_number + grad_p_z(i, j, k) = grad_p_z(i, j, k) + & + q_prim_vf(E_idx)%sf(i, j, k + r)*fd_coeff_z_pgrad(r, k) + end do + end do + end do + end do + $:END_GPU_PARALLEL_LOOP() + end if + + end subroutine s_compute_pressure_gradients + + !! This function interpolates the velocity of Eulerian field at the position + !! of the bubble. + !! @param pos Position of the bubble in directiion i + !! @param cell Computational coordinates of the bubble + !! @param i Direction of the velocity (1: x, 2: y, 3: z) + !! @param q_prim_vf Eulerian field with primitive variables + !! @return v Interpolated velocity at the position of the bubble + function f_interpolate_velocity(pos, cell, i, q_prim_vf) result(v) + $:GPU_ROUTINE(parallelism='[seq]') + real(wp), intent(in) :: pos + integer, dimension(3), intent(in) :: cell + integer, intent(in) :: i + type(scalar_field), dimension(sys_size), intent(in) :: q_prim_vf + + real(wp) :: v + real(wp), dimension(fd_order + 1) :: xi, eta, L + + if (fd_order == 2) then + if (i == 1) then + xi(1) = x_cc(cell(1) - 1) + eta(1) = q_prim_vf(momxb)%sf(cell(1) - 1, cell(2), cell(3)) + xi(2) = x_cc(cell(1)) + eta(2) = q_prim_vf(momxb)%sf(cell(1), cell(2), cell(3)) + xi(3) = x_cc(cell(1) + 1) + eta(3) = q_prim_vf(momxb)%sf(cell(1) + 1, cell(2), cell(3)) + elseif (i == 2) then + xi(1) = y_cc(cell(2) - 1) + eta(1) = q_prim_vf(momxb + 1)%sf(cell(1), cell(2) - 1, cell(3)) + xi(2) = y_cc(cell(2)) + eta(2) = q_prim_vf(momxb + 1)%sf(cell(1), cell(2), cell(3)) + xi(3) = y_cc(cell(2) + 1) + eta(3) = q_prim_vf(momxb + 1)%sf(cell(1), cell(2) + 1, cell(3)) + elseif (i == 3) then + xi(1) = z_cc(cell(3) - 1) + eta(1) = q_prim_vf(momxe)%sf(cell(1), cell(2), cell(3) - 1) + xi(2) = z_cc(cell(3)) + eta(2) = q_prim_vf(momxe)%sf(cell(1), cell(2), cell(3)) + xi(3) = z_cc(cell(3) + 1) + eta(3) = q_prim_vf(momxe)%sf(cell(1), cell(2), cell(3) + 1) + end if + + L(1) = ((pos - xi(2))*(pos - xi(3)))/((xi(1) - xi(2))*(xi(1) - xi(3))) + L(2) = ((pos - xi(1))*(pos - xi(3)))/((xi(2) - xi(1))*(xi(2) - xi(3))) + L(3) = ((pos - xi(1))*(pos - xi(2)))/((xi(3) - xi(1))*(xi(3) - xi(2))) + + v = L(1)*eta(1) + L(2)*eta(2) + L(3)*eta(3) + elseif (fd_order == 4) then + if (i == 1) then + xi(1) = x_cc(cell(1) - 2) + eta(1) = q_prim_vf(momxb)%sf(cell(1) - 2, cell(2), cell(3)) + xi(2) = x_cc(cell(1) - 1) + eta(2) = q_prim_vf(momxb)%sf(cell(1) - 1, cell(2), cell(3)) + xi(3) = x_cc(cell(1)) + eta(3) = q_prim_vf(momxb)%sf(cell(1), cell(2), cell(3)) + xi(4) = x_cc(cell(1) + 1) + eta(4) = q_prim_vf(momxb)%sf(cell(1) + 1, cell(2), cell(3)) + xi(5) = x_cc(cell(1) + 2) + eta(5) = q_prim_vf(momxb)%sf(cell(1) + 2, cell(2), cell(3)) + elseif (i == 2) then + xi(1) = y_cc(cell(2) - 2) + eta(1) = q_prim_vf(momxb + 1)%sf(cell(1), cell(2) - 2, cell(3)) + xi(2) = y_cc(cell(2) - 1) + eta(2) = q_prim_vf(momxb + 1)%sf(cell(1), cell(2) - 1, cell(3)) + xi(3) = y_cc(cell(2)) + eta(3) = q_prim_vf(momxb + 1)%sf(cell(1), cell(2), cell(3)) + xi(4) = y_cc(cell(2) + 1) + eta(4) = q_prim_vf(momxb + 1)%sf(cell(1), cell(2) + 1, cell(3)) + xi(5) = y_cc(cell(2) + 2) + eta(5) = q_prim_vf(momxb + 1)%sf(cell(1), cell(2) + 2, cell(3)) + elseif (i == 3) then + xi(1) = z_cc(cell(3) - 2) + eta(1) = q_prim_vf(momxe)%sf(cell(1), cell(2), cell(3) - 2) + xi(2) = z_cc(cell(3) - 1) + eta(2) = q_prim_vf(momxe)%sf(cell(1), cell(2), cell(3) - 1) + xi(3) = z_cc(cell(3)) + eta(3) = q_prim_vf(momxe)%sf(cell(1), cell(2), cell(3)) + xi(4) = z_cc(cell(3) + 1) + eta(4) = q_prim_vf(momxe)%sf(cell(1), cell(2), cell(3) + 1) + xi(5) = z_cc(cell(3) + 2) + eta(5) = q_prim_vf(momxe)%sf(cell(1), cell(2), cell(3) + 2) + end if + + L(1) = ((pos - xi(2))*(pos - xi(3))*(pos - xi(4))*(pos - xi(5)))/ & + ((xi(1) - xi(2))*(xi(1) - xi(3))*(xi(1) - xi(4))*(xi(2) - xi(5))) + L(2) = ((pos - xi(1))*(pos - xi(3))*(pos - xi(4))*(pos - xi(5)))/ & + ((xi(2) - xi(1))*(xi(2) - xi(3))*(xi(2) - xi(4))*(xi(2) - xi(5))) + L(3) = ((pos - xi(1))*(pos - xi(2))*(pos - xi(4))*(pos - xi(5)))/ & + ((xi(3) - xi(1))*(xi(3) - xi(2))*(xi(3) - xi(4))*(xi(3) - xi(5))) + L(4) = ((pos - xi(1))*(pos - xi(2))*(pos - xi(3))*(pos - xi(4)))/ & + ((xi(4) - xi(1))*(xi(4) - xi(2))*(xi(4) - xi(3))*(xi(4) - xi(5))) + L(5) = ((pos - xi(1))*(pos - xi(2))*(pos - xi(3))*(pos - xi(4)))/ & + ((xi(5) - xi(1))*(xi(5) - xi(2))*(xi(5) - xi(3))*(xi(5) - xi(4))) + + v = L(1)*eta(1) + L(2)*eta(2) + L(3)*eta(3) + L(4)*eta(4) + L(5)*eta(5) + end if + + end function f_interpolate_velocity + + !! This function calculates the force on a bubble + !! based on the pressure gradient, velocity, and drag model. + !! @param pos Position of the bubble in direction i + !! @param rad Radius of the bubble + !! @param rdot Radial velocity of the bubble + !! @param vel Velocity of the bubble + !! @param mg Mass of the gas in the bubble + !! @param mv Mass of the liquid in the bubble + !! @param Re Reynolds number + !! @param rho Density of the fluid + !! @param cell Computational coordinates of the bubble + !! @param i Direction of the velocity (1: x, 2: y, 3: z) + !! @param q_prim_vf Eulerian field with primitive variables + !! @return a Acceleration of the bubble in direction i + function f_get_bubble_force(pos, rad, rdot, vel, mg, mv, Re, rho, cell, i, q_prim_vf) result(force) + $:GPU_ROUTINE(parallelism='[seq]') + real(wp), intent(in) :: pos, rad, rdot, mg, mv, Re, rho, vel + integer, dimension(3), intent(in) :: cell + integer, intent(in) :: i + type(scalar_field), dimension(sys_size), intent(in) :: q_prim_vf + + real(wp) :: dp, vol, force + real(wp) :: v_rel + + if (fd_order > 1) then + v_rel = vel - f_interpolate_velocity(pos, cell, i, q_prim_vf) + else + v_rel = vel - q_prim_vf(momxb + i - 1)%sf(cell(1), cell(2), cell(3)) + end if + + force = 0._wp + + if (lag_params%drag_model == 1) then ! Free slip Stokes drag + force = force - (4._wp*pi*rad*v_rel)/Re + else if (lag_params%drag_model == 2) then ! No slip Stokes drag + force = force - (6._wp*pi*rad*v_rel)/Re + else if (lag_params%drag_model == 3) then ! Levich drag + force = force - (12._wp*pi*rad*v_rel)/Re + end if + + if (lag_pressure_force) then + ! Use precomputed cell-centered pressure gradients + if (i == 1) then + dp = grad_p_x(cell(1), cell(2), cell(3)) + elseif (i == 2) then + dp = grad_p_y(cell(1), cell(2), cell(3)) + elseif (i == 3) then + dp = grad_p_z(cell(1), cell(2), cell(3)) + end if + + vol = (4._wp/3._wp)*pi*(rad**3._wp) + force = force - vol*dp + end if + + if (lag_params%gravity_force) then + force = force + (mg + mv)*accel_bf(i) + end if + + end function f_get_bubble_force + end module m_bubbles_EL_kernels diff --git a/src/simulation/m_compute_levelset.fpp b/src/simulation/m_compute_levelset.fpp index 0663ae73ca..f5d07f9e58 100644 --- a/src/simulation/m_compute_levelset.fpp +++ b/src/simulation/m_compute_levelset.fpp @@ -102,8 +102,8 @@ contains radius = patch_ib(ib_patch_id)%radius - dist_vec(1) = x_cc(i) - patch_ib(ib_patch_id)%x_centroid - real(gp%x_periodicity, wp)*(x_domain%end - x_domain%beg) - dist_vec(2) = y_cc(j) - patch_ib(ib_patch_id)%y_centroid - real(gp%y_periodicity, wp)*(y_domain%end - y_domain%beg) + dist_vec(1) = x_cc(i) - patch_ib(ib_patch_id)%x_centroid - real(gp%x_periodicity, wp)*(glb_bounds(1)%end - glb_bounds(1)%beg) + dist_vec(2) = y_cc(j) - patch_ib(ib_patch_id)%y_centroid - real(gp%y_periodicity, wp)*(glb_bounds(2)%end - glb_bounds(2)%beg) dist_vec(3) = 0._wp dist = sqrt(sum(dist_vec**2)) @@ -137,8 +137,8 @@ contains i = gp%loc(1) j = gp%loc(2) - center(1) = patch_ib(ib_patch_id)%x_centroid + real(gp%x_periodicity, wp)*(x_domain%end - x_domain%beg) - center(2) = patch_ib(ib_patch_id)%y_centroid + real(gp%y_periodicity, wp)*(y_domain%end - y_domain%beg) + center(1) = patch_ib(ib_patch_id)%x_centroid + real(gp%x_periodicity, wp)*(glb_bounds(1)%end - glb_bounds(1)%beg) + center(2) = patch_ib(ib_patch_id)%y_centroid + real(gp%y_periodicity, wp)*(glb_bounds(2)%end - glb_bounds(2)%beg) inverse_rotation(:, :) = patch_ib(ib_patch_id)%rotation_matrix_inverse(:, :) rotation(:, :) = patch_ib(ib_patch_id)%rotation_matrix(:, :) offset(:) = patch_ib(ib_patch_id)%centroid_offset(:) @@ -223,9 +223,9 @@ contains j = gp%loc(2) l = gp%loc(3) - center(1) = patch_ib(ib_patch_id)%x_centroid + real(gp%x_periodicity, wp)*(x_domain%end - x_domain%beg) - center(2) = patch_ib(ib_patch_id)%y_centroid + real(gp%y_periodicity, wp)*(y_domain%end - y_domain%beg) - center(3) = patch_ib(ib_patch_id)%z_centroid + real(gp%z_periodicity, wp)*(z_domain%end - z_domain%beg) + center(1) = patch_ib(ib_patch_id)%x_centroid + real(gp%x_periodicity, wp)*(glb_bounds(1)%end - glb_bounds(1)%beg) + center(2) = patch_ib(ib_patch_id)%y_centroid + real(gp%y_periodicity, wp)*(glb_bounds(2)%end - glb_bounds(2)%beg) + center(3) = patch_ib(ib_patch_id)%z_centroid + real(gp%z_periodicity, wp)*(glb_bounds(3)%end - glb_bounds(3)%beg) lz = patch_ib(ib_patch_id)%length_z inverse_rotation(:, :) = patch_ib(ib_patch_id)%rotation_matrix_inverse(:, :) rotation(:, :) = patch_ib(ib_patch_id)%rotation_matrix(:, :) @@ -328,8 +328,8 @@ contains length_x = patch_ib(ib_patch_id)%length_x length_y = patch_ib(ib_patch_id)%length_y - center(1) = patch_ib(ib_patch_id)%x_centroid + real(gp%x_periodicity, wp)*(x_domain%end - x_domain%beg) - center(2) = patch_ib(ib_patch_id)%y_centroid + real(gp%y_periodicity, wp)*(y_domain%end - y_domain%beg) + center(1) = patch_ib(ib_patch_id)%x_centroid + real(gp%x_periodicity, wp)*(glb_bounds(1)%end - glb_bounds(1)%beg) + center(2) = patch_ib(ib_patch_id)%y_centroid + real(gp%y_periodicity, wp)*(glb_bounds(2)%end - glb_bounds(2)%beg) inverse_rotation(:, :) = patch_ib(ib_patch_id)%rotation_matrix_inverse(:, :) rotation(:, :) = patch_ib(ib_patch_id)%rotation_matrix(:, :) @@ -399,8 +399,8 @@ contains length_x = patch_ib(ib_patch_id)%length_x length_y = patch_ib(ib_patch_id)%length_y - center(1) = patch_ib(ib_patch_id)%x_centroid + real(gp%x_periodicity, wp)*(x_domain%end - x_domain%beg) - center(2) = patch_ib(ib_patch_id)%y_centroid + real(gp%y_periodicity, wp)*(y_domain%end - y_domain%beg) + center(1) = patch_ib(ib_patch_id)%x_centroid + real(gp%x_periodicity, wp)*(glb_bounds(1)%end - glb_bounds(1)%beg) + center(2) = patch_ib(ib_patch_id)%y_centroid + real(gp%y_periodicity, wp)*(glb_bounds(2)%end - glb_bounds(2)%beg) inverse_rotation(:, :) = patch_ib(ib_patch_id)%rotation_matrix_inverse(:, :) rotation(:, :) = patch_ib(ib_patch_id)%rotation_matrix(:, :) @@ -453,9 +453,9 @@ contains length_y = patch_ib(ib_patch_id)%length_y length_z = patch_ib(ib_patch_id)%length_z - center(1) = patch_ib(ib_patch_id)%x_centroid + real(gp%x_periodicity, wp)*(x_domain%end - x_domain%beg) - center(2) = patch_ib(ib_patch_id)%y_centroid + real(gp%y_periodicity, wp)*(y_domain%end - y_domain%beg) - center(3) = patch_ib(ib_patch_id)%z_centroid + real(gp%z_periodicity, wp)*(z_domain%end - z_domain%beg) + center(1) = patch_ib(ib_patch_id)%x_centroid + real(gp%x_periodicity, wp)*(glb_bounds(1)%end - glb_bounds(1)%beg) + center(2) = patch_ib(ib_patch_id)%y_centroid + real(gp%y_periodicity, wp)*(glb_bounds(2)%end - glb_bounds(2)%beg) + center(3) = patch_ib(ib_patch_id)%z_centroid + real(gp%z_periodicity, wp)*(glb_bounds(3)%end - glb_bounds(3)%beg) inverse_rotation(:, :) = patch_ib(ib_patch_id)%rotation_matrix_inverse(:, :) rotation(:, :) = patch_ib(ib_patch_id)%rotation_matrix(:, :) @@ -535,9 +535,9 @@ contains k = gp%loc(3) radius = patch_ib(ib_patch_id)%radius - periodicity(1) = real(gp%x_periodicity, wp)*(x_domain%end - x_domain%beg) - periodicity(2) = real(gp%y_periodicity, wp)*(y_domain%end - y_domain%beg) - periodicity(3) = real(gp%z_periodicity, wp)*(z_domain%end - z_domain%beg) + periodicity(1) = real(gp%x_periodicity, wp)*(glb_bounds(1)%end - glb_bounds(1)%beg) + periodicity(2) = real(gp%y_periodicity, wp)*(glb_bounds(2)%end - glb_bounds(2)%beg) + periodicity(3) = real(gp%z_periodicity, wp)*(glb_bounds(3)%end - glb_bounds(3)%beg) center(1) = patch_ib(ib_patch_id)%x_centroid center(2) = patch_ib(ib_patch_id)%y_centroid center(3) = patch_ib(ib_patch_id)%z_centroid @@ -579,9 +579,9 @@ contains k = gp%loc(3) radius = patch_ib(ib_patch_id)%radius - center(1) = patch_ib(ib_patch_id)%x_centroid + real(gp%x_periodicity, wp)*(x_domain%end - x_domain%beg) - center(2) = patch_ib(ib_patch_id)%y_centroid + real(gp%y_periodicity, wp)*(y_domain%end - y_domain%beg) - center(3) = patch_ib(ib_patch_id)%z_centroid + real(gp%z_periodicity, wp)*(z_domain%end - z_domain%beg) + center(1) = patch_ib(ib_patch_id)%x_centroid + real(gp%x_periodicity, wp)*(glb_bounds(1)%end - glb_bounds(1)%beg) + center(2) = patch_ib(ib_patch_id)%y_centroid + real(gp%y_periodicity, wp)*(glb_bounds(2)%end - glb_bounds(2)%beg) + center(3) = patch_ib(ib_patch_id)%z_centroid + real(gp%z_periodicity, wp)*(glb_bounds(3)%end - glb_bounds(3)%beg) length(1) = patch_ib(ib_patch_id)%length_x length(2) = patch_ib(ib_patch_id)%length_y length(3) = patch_ib(ib_patch_id)%length_z @@ -658,10 +658,10 @@ contains total_vertices = gpu_total_vertices(patch_id) center = 0._wp - if (.not. f_is_default(patch_ib(patch_id)%x_centroid)) center(1) = patch_ib(patch_id)%x_centroid + real(gp%x_periodicity, wp)*(x_domain%end - x_domain%beg) - if (.not. f_is_default(patch_ib(patch_id)%y_centroid)) center(2) = patch_ib(patch_id)%y_centroid + real(gp%y_periodicity, wp)*(y_domain%end - y_domain%beg) + if (.not. f_is_default(patch_ib(patch_id)%x_centroid)) center(1) = patch_ib(patch_id)%x_centroid + real(gp%x_periodicity, wp)*(glb_bounds(1)%end - glb_bounds(1)%beg) + if (.not. f_is_default(patch_ib(patch_id)%y_centroid)) center(2) = patch_ib(patch_id)%y_centroid + real(gp%y_periodicity, wp)*(glb_bounds(2)%end - glb_bounds(2)%beg) if (p > 0) then - if (.not. f_is_default(patch_ib(patch_id)%z_centroid)) center(3) = patch_ib(patch_id)%z_centroid + real(gp%z_periodicity, wp)*(z_domain%end - z_domain%beg) + if (.not. f_is_default(patch_ib(patch_id)%z_centroid)) center(3) = patch_ib(patch_id)%z_centroid + real(gp%z_periodicity, wp)*(glb_bounds(3)%end - glb_bounds(3)%beg) end if inverse_rotation(:, :) = patch_ib(patch_id)%rotation_matrix_inverse(:, :) diff --git a/src/simulation/m_data_output.fpp b/src/simulation/m_data_output.fpp index 068fcb38b1..9db92e1637 100644 --- a/src/simulation/m_data_output.fpp +++ b/src/simulation/m_data_output.fpp @@ -49,19 +49,8 @@ module m_data_output s_finalize_data_output_module, & s_write_ib_data_file - real(wp), allocatable, dimension(:, :, :) :: icfl_sf !< ICFL stability criterion - real(wp), allocatable, dimension(:, :, :) :: vcfl_sf !< VCFL stability criterion - real(wp), allocatable, dimension(:, :, :) :: ccfl_sf !< CCFL stability criterion - real(wp), allocatable, dimension(:, :, :) :: Rc_sf !< Rc stability criterion real(wp), public, allocatable, dimension(:, :) :: c_mass - $:GPU_DECLARE(create='[icfl_sf,vcfl_sf,ccfl_sf,Rc_sf,c_mass]') - - real(wp) :: icfl_max_loc, icfl_max_glb !< ICFL stability extrema on local and global grids - real(wp) :: vcfl_max_loc, vcfl_max_glb !< VCFL stability extrema on local and global grids - real(wp) :: ccfl_max_loc, ccfl_max_glb !< CCFL stability extrema on local and global grids - real(wp) :: Rc_min_loc, Rc_min_glb !< Rc stability extrema on local and global grids - $:GPU_DECLARE(create='[icfl_max_loc,icfl_max_glb,vcfl_max_loc,vcfl_max_glb]') - $:GPU_DECLARE(create='[ccfl_max_loc,ccfl_max_glb,Rc_min_loc,Rc_min_glb]') + $:GPU_DECLARE(create='[c_mass]') !> @name ICFL, VCFL, CCFL and Rc stability criteria extrema over all the time-steps !> @{ @@ -165,6 +154,10 @@ contains trim('VCFL Max'), trim('Rc Min') end if + if (bubbles_lagrange) then + write (3, '(13X,A10)', advance="no") trim('N Bubbles') + end if + write (3, *) ! new line end subroutine s_open_run_time_information_file @@ -283,9 +276,18 @@ contains real(wp) :: H !< Cell-avg. enthalpy real(wp), dimension(2) :: Re !< Cell-avg. Reynolds numbers integer :: j, k, l - + real(wp) :: icfl_max_loc, icfl_max_glb !< ICFL stability extrema on local and global grids + real(wp) :: vcfl_max_loc, vcfl_max_glb !< VCFL stability extrema on local and global grids + real(wp) :: ccfl_max_loc, ccfl_max_glb !< CCFL stability extrema on local and global grids + real(wp) :: Rc_min_loc, Rc_min_glb !< Rc stability extrema on local and global grids + real(wp) :: icfl, vcfl, Rc + + icfl_max_loc = 0._wp + vcfl_max_loc = 0._wp + Rc_min_loc = huge(1.0_wp) ! Computing Stability Criteria at Current Time-step - $:GPU_PARALLEL_LOOP(collapse=3, private='[j,k,l,vel, alpha, Re, rho, vel_sum, pres, gamma, pi_inf, c, H, qv]') + $:GPU_PARALLEL_LOOP(collapse=3, private='[j,k,l,vel, alpha, Re, rho, vel_sum, pres, gamma, pi_inf, c, H, qv, icfl, vcfl, Rc]', & + & reduction='[[icfl_max_loc,vcfl_max_loc],[Rc_min_loc]]', reductionOp='[max,min]') do l = 0, p do k = 0, n do j = 0, m @@ -294,57 +296,35 @@ contains call s_compute_speed_of_sound(pres, rho, gamma, pi_inf, H, alpha, vel_sum, 0._wp, c, qv) if (viscous) then - call s_compute_stability_from_dt(vel, c, rho, Re, j, k, l, icfl_sf, vcfl_sf, Rc_sf) + call s_compute_stability_from_dt(vel, c, rho, Re, j, k, l, icfl, vcfl, Rc) else - call s_compute_stability_from_dt(vel, c, rho, Re, j, k, l, icfl_sf) + call s_compute_stability_from_dt(vel, c, rho, Re, j, k, l, icfl) end if + icfl_max_loc = max(icfl_max_loc, icfl) + vcfl_max_loc = max(vcfl_max_loc, merge(vcfl, 0.0_wp, viscous)) + Rc_min_loc = min(Rc_min_loc, merge(Rc, huge(1.0_wp), viscous)) end do end do end do $:END_GPU_PARALLEL_LOOP() - ! end: Computing Stability Criteria at Current Time-step - ! Determining local stability criteria extrema at current time-step - -#ifdef _CRAYFTN - $:GPU_UPDATE(host='[icfl_sf]') - - if (viscous) then - $:GPU_UPDATE(host='[vcfl_sf,Rc_sf]') - end if - - icfl_max_loc = maxval(icfl_sf) - - if (viscous) then - vcfl_max_loc = maxval(vcfl_sf) - Rc_min_loc = minval(Rc_sf) - end if -#else - #:call GPU_PARALLEL(copyout='[icfl_max_loc]', copyin='[icfl_sf]') - icfl_max_loc = maxval(icfl_sf) - #:endcall GPU_PARALLEL - if (viscous .or. dummy) then - #:call GPU_PARALLEL(copyout='[vcfl_max_loc, Rc_min_loc]', copyin='[vcfl_sf,Rc_sf]') - vcfl_max_loc = maxval(vcfl_sf) - Rc_min_loc = minval(Rc_sf) - #:endcall GPU_PARALLEL - end if -#endif - ! Determining global stability criteria extrema at current time-step if (num_procs > 1) then call s_mpi_reduce_stability_criteria_extrema(icfl_max_loc, & vcfl_max_loc, & Rc_min_loc, & + n_el_bubs_loc, & icfl_max_glb, & vcfl_max_glb, & - Rc_min_glb) + Rc_min_glb, & + n_el_bubs_glb) else icfl_max_glb = icfl_max_loc if (viscous) vcfl_max_glb = vcfl_max_loc if (viscous) Rc_min_glb = Rc_min_loc + if (bubbles_lagrange) n_el_bubs_glb = n_el_bubs_loc end if ! Determining the stability criteria extrema over all the time-steps @@ -366,6 +346,10 @@ contains Rc_min_glb end if + if (bubbles_lagrange) then + write (3, '(13X,I10)', advance="no") n_el_bubs_glb + end if + write (3, *) ! new line if (.not. f_approx_equal(icfl_max_glb, icfl_max_glb)) then @@ -383,6 +367,12 @@ contains call s_mpi_abort('VCFL is greater than 1.0. Exiting.') end if end if + + if (bubbles_lagrange) then + if (n_el_bubs_glb == 0) then + call s_mpi_abort('No Lagrangian bubbles remain in the domain. Exiting.') + end if + end if end if call s_mpi_barrier() @@ -1875,7 +1865,7 @@ contains write (3, '(A,F9.6)') 'ICFL Max: ', icfl_max if (viscous) write (3, '(A,F9.6)') 'VCFL Max: ', vcfl_max - if (viscous) write (3, '(A,F10.6)') 'Rc Min: ', Rc_min + if (viscous) write (3, '(A,ES16.6)') 'Rc Min: ', Rc_min call cpu_time(run_time) @@ -1916,15 +1906,10 @@ contains ! Allocating/initializing ICFL, VCFL, CCFL and Rc stability criteria if (run_time_info) then - @:ALLOCATE(icfl_sf(0:m, 0:n, 0:p)) icfl_max = 0._wp - if (viscous) then - @:ALLOCATE(vcfl_sf(0:m, 0:n, 0:p)) - @:ALLOCATE(Rc_sf (0:m, 0:n, 0:p)) - vcfl_max = 0._wp - Rc_min = 1.e3_wp + Rc_min = 1.e12_wp end if end if @@ -1954,14 +1939,6 @@ contains @:DEALLOCATE(c_mass) end if - if (run_time_info) then - ! Deallocating the ICFL, VCFL, CCFL, and Rc stability criteria - @:DEALLOCATE(icfl_sf) - if (viscous) then - @:DEALLOCATE(vcfl_sf, Rc_sf) - end if - end if - if (down_sample) then do i = 1, sys_size deallocate (q_cons_temp_ds(i)%sf) diff --git a/src/simulation/m_global_parameters.fpp b/src/simulation/m_global_parameters.fpp index c1be537ecd..6aab245102 100644 --- a/src/simulation/m_global_parameters.fpp +++ b/src/simulation/m_global_parameters.fpp @@ -54,26 +54,23 @@ module m_global_parameters !> @name Cell-boundary (CB) locations in the x-, y- and z-directions, respectively !> @{ - real(wp), target, allocatable, dimension(:) :: x_cb, y_cb, z_cb + type(bounds_info), dimension(3) :: glb_bounds !< !> @} !> @name Cell-center (CC) locations in the x-, y- and z-directions, respectively !> @{ - real(wp), target, allocatable, dimension(:) :: x_cc, y_cc, z_cc !> @} - !type(bounds_info) :: x_domain, y_domain, z_domain !< - !! Locations of the domain bounds in the x-, y- and z-coordinate directions + !> @name Cell-width distributions in the x-, y- and z-directions, respectively !> @{ - real(wp), target, allocatable, dimension(:) :: dx, dy, dz !> @} real(wp) :: dt !< Size of the time-step - $:GPU_DECLARE(create='[x_cb,y_cb,z_cb,x_cc,y_cc,z_cc,dx,dy,dz,dt,m,n,p]') + $:GPU_DECLARE(create='[x_cb,y_cb,z_cb,x_cc,y_cc,z_cc,dx,dy,dz,dt,m,n,p,glb_bounds]') !> @name Starting time-step iteration, stopping time-step iteration and the number !! of time-step iterations between successive solution backups, respectively @@ -224,6 +221,7 @@ module m_global_parameters integer :: num_bc_patches logical :: bc_io + logical, dimension(3) :: periodic_bc !> @name Boundary conditions (BC) in the x-, y- and z-directions, respectively !> @{ type(int_bounds_info) :: bc_x, bc_y, bc_z @@ -235,10 +233,6 @@ module m_global_parameters #elif defined(MFC_OpenMP) $:GPU_DECLARE(create='[bc_x, bc_y, bc_z]') #endif - type(bounds_info) :: x_domain, y_domain, z_domain - $:GPU_DECLARE(create='[x_domain, y_domain, z_domain]') - real(wp) :: x_a, y_a, z_a - real(wp) :: x_b, y_b, z_b logical :: parallel_io !< Format of the data files logical :: file_per_process !< shared file or not when using parallel io @@ -249,6 +243,19 @@ module m_global_parameters integer, allocatable, dimension(:) :: proc_coords !< !! Processor coordinates in MPI_CART_COMM + type(bounds_info), allocatable, dimension(:) :: pcomm_coords + $:GPU_DECLARE(create='[pcomm_coords]') + !! Coordinates for EL particle transfer + + type(bounds_info), allocatable, dimension(:) :: pcomm_coords_ghost + $:GPU_DECLARE(create='[pcomm_coords_ghost]') + !! Coordinates for EL particle transfer + + type(int_bounds_info), dimension(3) :: nidx !< Indices for neighboring processors + + integer, allocatable, dimension(:, :, :) :: neighbor_ranks + !! Neighbor ranks + integer, allocatable, dimension(:) :: start_idx !< !! Starting cell-center index of local processor in global grid @@ -345,6 +352,10 @@ module m_global_parameters $:GPU_DECLARE(create='[buff_size]') + integer, allocatable :: beta_vars(:) !< Indices of variables to communicate for bubble/particle coupling + + $:GPU_DECLARE(create='[beta_vars]') + integer :: shear_num !! Number of shear stress components integer, dimension(3) :: shear_indices !< !! Indices of the stress components that represent shear stress @@ -368,6 +379,9 @@ module m_global_parameters ! Subgrid Bubble Parameters type(subgrid_bubble_physical_parameters) :: bub_pp + ! Subgrid Particle Parameters + type(subgrid_particle_physical_parameters) :: particle_pp + integer :: fd_order !< !! The order of the finite-difference (fd) approximations of the first-order !! derivatives that need to be evaluated when the CoM or flow probe data @@ -484,6 +498,13 @@ module m_global_parameters gam_v, gam_g, M_v, M_g, cp_v, cp_g, R_v, R_g $:GPU_DECLARE(create='[R0ref, p0ref, rho0ref, T0ref, ss, pv, vd, mu_l, mu_v, mu_g, & gam_v, gam_g, M_v, M_g, cp_v, cp_g, R_v, R_g]') + + !> @} + + !> @name Solid particle physical parameters + !> @{ + real(wp) :: cp_particle, rho0ref_particle + $:GPU_DECLARE(create='[rho0ref_particle, cp_particle]') !> @} !> @name Acoustic acoustic_source parameters @@ -532,8 +553,20 @@ module m_global_parameters !> @name lagrangian subgrid bubble parameters !> @{! logical :: bubbles_lagrange !< Lagrangian subgrid bubble model switch + logical :: particles_lagrange !< Lagrangian subgrid particle model switch type(bubbles_lagrange_parameters) :: lag_params !< Lagrange bubbles' parameters - $:GPU_DECLARE(create='[bubbles_lagrange,lag_params]') + integer :: n_el_bubs_loc, n_el_bubs_glb !< Number of Lagrangian bubbles (local and global) + integer :: n_el_particles_loc, n_el_particles_glb !< Number of Lagrangian bubbles (local and global) + logical :: moving_lag_bubbles + logical :: moving_lag_particles + logical :: lag_pressure_force + logical :: lag_gravity_force + integer :: lag_vel_model, lag_drag_model + $:GPU_DECLARE(create='[bubbles_lagrange,lag_params,n_el_bubs_loc,n_el_bubs_glb]') + $:GPU_DECLARE(create='[particles_lagrange,n_el_particles_loc,n_el_particles_glb]') + $:GPU_DECLARE(create='[moving_lag_particles]') + $:GPU_DECLARE(create='[moving_lag_bubbles, lag_vel_model, lag_drag_model]') + $:GPU_DECLARE(create='[lag_pressure_force,lag_gravity_force]') !> @} real(wp) :: Bx0 !< Constant magnetic field in the x-direction (1D) @@ -659,6 +692,7 @@ contains num_bc_patches = 0 bc_io = .false. + periodic_bc = .false. bc_x%beg = dflt_int; bc_x%end = dflt_int bc_y%beg = dflt_int; bc_y%end = dflt_int @@ -671,9 +705,9 @@ contains #:endfor #:endfor - x_domain%beg = dflt_real; x_domain%end = dflt_real - y_domain%beg = dflt_real; y_domain%end = dflt_real - z_domain%beg = dflt_real; z_domain%end = dflt_real + glb_bounds(1)%beg = dflt_real; glb_bounds(1)%end = dflt_real + glb_bounds(2)%beg = dflt_real; glb_bounds(2)%end = dflt_real + glb_bounds(3)%beg = dflt_real; glb_bounds(3)%end = dflt_real ! Fluids physical parameters do i = 1, num_fluids_max @@ -708,6 +742,10 @@ contains bub_pp%R_v = dflt_real; R_v = dflt_real bub_pp%R_g = dflt_real; R_g = dflt_real + ! Subgrid particle parameters + particle_pp%rho0ref_particle = dflt_real + particle_pp%cp_particle = dflt_real + ! Tait EOS rhoref = dflt_real pref = dflt_real @@ -836,10 +874,29 @@ contains lag_params%massTransfer_model = .false. lag_params%write_bubbles = .false. lag_params%write_bubbles_stats = .false. + lag_params%write_void_evol = .false. lag_params%nBubs_glb = dflt_int + lag_params%vel_model = dflt_int + lag_params%drag_model = dflt_int + lag_params%pressure_force = .true. + lag_params%gravity_force = .false. lag_params%epsilonb = 1._wp lag_params%charwidth = dflt_real + lag_params%charNz = dflt_int lag_params%valmaxvoid = dflt_real + lag_params%input_path = 'input/lag_bubbles.dat' + lag_params%nParticles_glb = dflt_int + lag_params%qs_drag_model = dflt_int + lag_params%stokes_drag = dflt_int + lag_params%added_mass_model = dflt_int + lag_params%interpolation_order = dflt_int + lag_params%collision_force = .false. + + moving_lag_bubbles = .false. + lag_vel_model = dflt_int + + particles_lagrange = .false. + moving_lag_particles = .false. ! Continuum damage model tau_star = dflt_real @@ -1209,6 +1266,16 @@ contains ! END: Volume Fraction Model + if (bubbles_lagrange) then + @:ALLOCATE(beta_vars(1:3)) + beta_vars(1:3) = [1, 2, 5] + $:GPU_UPDATE(device='[beta_vars]') + elseif (particles_lagrange) then + @:ALLOCATE(beta_vars(1:8)) + beta_vars(1:8) = [1, 2, 3, 4, 5, 6, 7, 8] + $:GPU_UPDATE(device='[beta_vars]') + end if + if (chemistry) then species_idx%beg = sys_size + 1 species_idx%end = sys_size + num_species @@ -1221,6 +1288,9 @@ contains elseif (bubbles_lagrange) then allocate (MPI_IO_DATA%view(1:sys_size + 1)) allocate (MPI_IO_DATA%var(1:sys_size + 1)) + elseif (particles_lagrange) then + allocate (MPI_IO_DATA%view(1:sys_size + 1)) + allocate (MPI_IO_DATA%var(1:sys_size + 1)) else allocate (MPI_IO_DATA%view(1:sys_size)) allocate (MPI_IO_DATA%var(1:sys_size)) @@ -1242,6 +1312,11 @@ contains allocate (MPI_IO_DATA%var(i)%sf(0:m, 0:n, 0:p)) MPI_IO_DATA%var(i)%sf => null() end do + elseif (particles_lagrange) then + do i = 1, sys_size + 1 + allocate (MPI_IO_DATA%var(i)%sf(0:m, 0:n, 0:p)) + MPI_IO_DATA%var(i)%sf => null() + end do end if ! Configuring the WENO average flag that will be used to regulate @@ -1260,23 +1335,19 @@ contains if (ib) allocate (MPI_IO_IB_DATA%var%sf(0:m, 0:n, 0:p)) Np = 0 - if (elasticity) then - fd_number = max(1, fd_order/2) - end if - + if (elasticity) fd_number = max(1, fd_order/2) if (mhd) then ! TODO merge with above; waiting for hyperelasticity PR fd_number = max(1, fd_order/2) end if - - if (probe_wrt) then - fd_number = max(1, fd_order/2) - end if + if (probe_wrt) fd_number = max(1, fd_order/2) + if (bubbles_lagrange) fd_number = max(1, fd_order/2) + if (particles_lagrange) fd_number = max(1, fd_order/2) call s_configure_coordinate_bounds(recon_type, weno_polyn, muscl_polyn, & igr_order, buff_size, & idwint, idwbuff, viscous, & - bubbles_lagrange, m, n, p, & - num_dims, igr, ib) + bubbles_lagrange, particles_lagrange, & + m, n, p, num_dims, igr, ib, fd_number) $:GPU_UPDATE(device='[idwint, idwbuff]') ! Configuring Coordinate Direction Indexes @@ -1397,6 +1468,8 @@ contains #:endif allocate (proc_coords(1:num_dims)) + @:ALLOCATE(pcomm_coords(1:num_dims)) + @:ALLOCATE(pcomm_coords_ghost(1:num_dims)) if (parallel_io .neqv. .true.) return @@ -1433,6 +1506,14 @@ contains end if deallocate (proc_coords) + + @:DEALLOCATE(pcomm_coords) + @:DEALLOCATE(pcomm_coords_ghost) + + if (bubbles_lagrange .or. particles_lagrange) then + @:DEALLOCATE(beta_vars) + end if + if (parallel_io) then deallocate (start_idx) @@ -1440,6 +1521,10 @@ contains do i = 1, sys_size + 1 MPI_IO_DATA%var(i)%sf => null() end do + elseif (particles_lagrange) then + do i = 1, sys_size + 1 + MPI_IO_DATA%var(i)%sf => null() + end do else do i = 1, sys_size MPI_IO_DATA%var(i)%sf => null() @@ -1461,6 +1546,10 @@ contains if (p == 0) return; @:DEALLOCATE(z_cb, z_cc, dz) + if (allocated(neighbor_ranks)) then + @:DEALLOCATE(neighbor_ranks) + end if + end subroutine s_finalize_global_parameters_module end module m_global_parameters diff --git a/src/simulation/m_ib_patches.fpp b/src/simulation/m_ib_patches.fpp index d725a3e5f3..12fcccbf3c 100644 --- a/src/simulation/m_ib_patches.fpp +++ b/src/simulation/m_ib_patches.fpp @@ -143,8 +143,8 @@ contains ! Transferring the circular patch's radius, centroid, smearing patch ! identity and smearing coefficient information - center(1) = patch_ib(patch_id)%x_centroid + real(xp, wp)*(x_domain%end - x_domain%beg) - center(2) = patch_ib(patch_id)%y_centroid + real(yp, wp)*(y_domain%end - y_domain%beg) + center(1) = patch_ib(patch_id)%x_centroid + real(xp, wp)*(glb_bounds(1)%end - glb_bounds(1)%beg) + center(2) = patch_ib(patch_id)%y_centroid + real(yp, wp)*(glb_bounds(2)%end - glb_bounds(2)%beg) radius = patch_ib(patch_id)%radius ! encode the periodicity information into the patch_id @@ -197,8 +197,8 @@ contains real(wp), dimension(1:2) :: center !< x and y coordinates in local IB frame real(wp), dimension(1:3, 1:3) :: inverse_rotation - center(1) = patch_ib(patch_id)%x_centroid + real(xp, wp)*(x_domain%end - x_domain%beg) - center(2) = patch_ib(patch_id)%y_centroid + real(yp, wp)*(y_domain%end - y_domain%beg) + center(1) = patch_ib(patch_id)%x_centroid + real(xp, wp)*(glb_bounds(1)%end - glb_bounds(1)%beg) + center(2) = patch_ib(patch_id)%y_centroid + real(yp, wp)*(glb_bounds(2)%end - glb_bounds(2)%beg) ca_in = patch_ib(patch_id)%c pa = patch_ib(patch_id)%p ma = patch_ib(patch_id)%m @@ -360,9 +360,9 @@ contains real(wp), dimension(1:3) :: xyz_local, center, offset !< x, y, z coordinates in local IB frame real(wp), dimension(1:3, 1:3) :: inverse_rotation - center(1) = patch_ib(patch_id)%x_centroid + real(xp, wp)*(x_domain%end - x_domain%beg) - center(2) = patch_ib(patch_id)%y_centroid + real(yp, wp)*(y_domain%end - y_domain%beg) - center(3) = patch_ib(patch_id)%z_centroid + real(zp, wp)*(z_domain%end - z_domain%beg) + center(1) = patch_ib(patch_id)%x_centroid + real(xp, wp)*(glb_bounds(1)%end - glb_bounds(1)%beg) + center(2) = patch_ib(patch_id)%y_centroid + real(yp, wp)*(glb_bounds(2)%end - glb_bounds(2)%beg) + center(3) = patch_ib(patch_id)%z_centroid + real(zp, wp)*(glb_bounds(3)%end - glb_bounds(3)%beg) lz = patch_ib(patch_id)%length_z ca_in = patch_ib(patch_id)%c pa = patch_ib(patch_id)%p @@ -533,8 +533,8 @@ contains real(wp), dimension(1:3, 1:3) :: inverse_rotation ! Transferring the rectangle's centroid and length information - center(1) = patch_ib(patch_id)%x_centroid + real(xp, wp)*(x_domain%end - x_domain%beg) - center(2) = patch_ib(patch_id)%y_centroid + real(yp, wp)*(y_domain%end - y_domain%beg) + center(1) = patch_ib(patch_id)%x_centroid + real(xp, wp)*(glb_bounds(1)%end - glb_bounds(1)%beg) + center(2) = patch_ib(patch_id)%y_centroid + real(yp, wp)*(glb_bounds(2)%end - glb_bounds(2)%beg) length(1) = patch_ib(patch_id)%length_x length(2) = patch_ib(patch_id)%length_y inverse_rotation(:, :) = patch_ib(patch_id)%rotation_matrix_inverse(:, :) @@ -604,9 +604,9 @@ contains ! Transferring spherical patch's radius, centroid, smoothing patch ! identity and smoothing coefficient information - center(1) = patch_ib(patch_id)%x_centroid + real(xp, wp)*(x_domain%end - x_domain%beg) - center(2) = patch_ib(patch_id)%y_centroid + real(yp, wp)*(y_domain%end - y_domain%beg) - center(3) = patch_ib(patch_id)%z_centroid + real(zp, wp)*(z_domain%end - z_domain%beg) + center(1) = patch_ib(patch_id)%x_centroid + real(xp, wp)*(glb_bounds(1)%end - glb_bounds(1)%beg) + center(2) = patch_ib(patch_id)%y_centroid + real(yp, wp)*(glb_bounds(2)%end - glb_bounds(2)%beg) + center(3) = patch_ib(patch_id)%z_centroid + real(zp, wp)*(glb_bounds(3)%end - glb_bounds(3)%beg) radius = patch_ib(patch_id)%radius ! encode the periodicity information into the patch_id @@ -675,9 +675,9 @@ contains real(wp) :: corner_distance ! Transferring the cuboid's centroid and length information - center(1) = patch_ib(patch_id)%x_centroid + real(xp, wp)*(x_domain%end - x_domain%beg) - center(2) = patch_ib(patch_id)%y_centroid + real(yp, wp)*(y_domain%end - y_domain%beg) - center(3) = patch_ib(patch_id)%z_centroid + real(zp, wp)*(z_domain%end - z_domain%beg) + center(1) = patch_ib(patch_id)%x_centroid + real(xp, wp)*(glb_bounds(1)%end - glb_bounds(1)%beg) + center(2) = patch_ib(patch_id)%y_centroid + real(yp, wp)*(glb_bounds(2)%end - glb_bounds(2)%beg) + center(3) = patch_ib(patch_id)%z_centroid + real(zp, wp)*(glb_bounds(3)%end - glb_bounds(3)%beg) length(1) = patch_ib(patch_id)%length_x length(2) = patch_ib(patch_id)%length_y length(3) = patch_ib(patch_id)%length_z @@ -760,9 +760,9 @@ contains real(wp) :: corner_distance ! Transferring the cylindrical patch's centroid, length, radius, - center(1) = patch_ib(patch_id)%x_centroid + real(xp, wp)*(x_domain%end - x_domain%beg) - center(2) = patch_ib(patch_id)%y_centroid + real(yp, wp)*(y_domain%end - y_domain%beg) - center(3) = patch_ib(patch_id)%z_centroid + real(zp, wp)*(z_domain%end - z_domain%beg) + center(1) = patch_ib(patch_id)%x_centroid + real(xp, wp)*(glb_bounds(1)%end - glb_bounds(1)%beg) + center(2) = patch_ib(patch_id)%y_centroid + real(yp, wp)*(glb_bounds(2)%end - glb_bounds(2)%beg) + center(3) = patch_ib(patch_id)%z_centroid + real(zp, wp)*(glb_bounds(3)%end - glb_bounds(3)%beg) length(1) = patch_ib(patch_id)%length_x length(2) = patch_ib(patch_id)%length_y length(3) = patch_ib(patch_id)%length_z @@ -845,8 +845,8 @@ contains real(wp), dimension(1:3, 1:3) :: inverse_rotation ! Transferring the ellipse's centroid and length information - center(1) = patch_ib(patch_id)%x_centroid + real(xp, wp)*(x_domain%end - x_domain%beg) - center(2) = patch_ib(patch_id)%y_centroid + real(yp, wp)*(y_domain%end - y_domain%beg) + center(1) = patch_ib(patch_id)%x_centroid + real(xp, wp)*(glb_bounds(1)%end - glb_bounds(1)%beg) + center(2) = patch_ib(patch_id)%y_centroid + real(yp, wp)*(glb_bounds(2)%end - glb_bounds(2)%beg) ellipse_coeffs(1) = 0.5_wp*patch_ib(patch_id)%length_x ellipse_coeffs(2) = 0.5_wp*patch_ib(patch_id)%length_y inverse_rotation(:, :) = patch_ib(patch_id)%rotation_matrix_inverse(:, :) @@ -905,8 +905,8 @@ contains real(wp), dimension(1:3, 1:3) :: inverse_rotation, rotation center = 0._wp - center(1) = patch_ib(patch_id)%x_centroid + real(xp, wp)*(x_domain%end - x_domain%beg) - center(2) = patch_ib(patch_id)%y_centroid + real(yp, wp)*(y_domain%end - y_domain%beg) + center(1) = patch_ib(patch_id)%x_centroid + real(xp, wp)*(glb_bounds(1)%end - glb_bounds(1)%beg) + center(2) = patch_ib(patch_id)%y_centroid + real(yp, wp)*(glb_bounds(2)%end - glb_bounds(2)%beg) inverse_rotation(:, :) = patch_ib(patch_id)%rotation_matrix_inverse(:, :) rotation(:, :) = patch_ib(patch_id)%rotation_matrix(:, :) offset(:) = patch_ib(patch_id)%centroid_offset(:) @@ -987,9 +987,9 @@ contains real(wp), dimension(1:3) :: bbox_min, bbox_max, local_corner, world_corner center = 0._wp - center(1) = patch_ib(patch_id)%x_centroid + real(xp, wp)*(x_domain%end - x_domain%beg) - center(2) = patch_ib(patch_id)%y_centroid + real(yp, wp)*(y_domain%end - y_domain%beg) - center(3) = patch_ib(patch_id)%z_centroid + real(zp, wp)*(z_domain%end - z_domain%beg) + center(1) = patch_ib(patch_id)%x_centroid + real(xp, wp)*(glb_bounds(1)%end - glb_bounds(1)%beg) + center(2) = patch_ib(patch_id)%y_centroid + real(yp, wp)*(glb_bounds(2)%end - glb_bounds(2)%beg) + center(3) = patch_ib(patch_id)%z_centroid + real(zp, wp)*(glb_bounds(3)%end - glb_bounds(3)%beg) inverse_rotation(:, :) = patch_ib(patch_id)%rotation_matrix_inverse(:, :) offset(:) = patch_ib(patch_id)%centroid_offset(:) spc = patch_ib(patch_id)%model_spc @@ -1232,29 +1232,20 @@ contains integer, intent(out), optional :: zp_lower, zp_upper ! check domain wraps in x, y - #:for X in [('x'), ('y')] - ! check for periodicity - if (bc_${X}$%beg == BC_PERIODIC) then - ${X}$p_lower = -1 - ${X}$p_upper = 1 - else - !if it is not periodic, then both elements are 0 - ${X}$p_lower = 0 - ${X}$p_upper = 0 + #:for X, ID in [('x', 1), ('y', 2), ('z', 3)] + if (num_dims >= ${ID}$) then + ! check for periodicity + if (bc_${X}$%beg == BC_PERIODIC) then + ${X}$p_lower = -1 + ${X}$p_upper = 1 + else + !if it is not periodic, then both elements are 0 + ${X}$p_lower = 0 + ${X}$p_upper = 0 + end if end if #:endfor - ! z only if 3D - if (present(zp_lower) .and. p /= 0) then - if (bc_z%beg == BC_PERIODIC) then - zp_lower = -1 - zp_upper = 1 - else - zp_lower = 0 - zp_upper = 0 - end if - end if - end subroutine s_get_periodicities !> Archimedes spiral function diff --git a/src/simulation/m_ibm.fpp b/src/simulation/m_ibm.fpp index b87f5a1b19..b8434b584c 100644 --- a/src/simulation/m_ibm.fpp +++ b/src/simulation/m_ibm.fpp @@ -99,10 +99,11 @@ contains $:GPU_UPDATE(device='[patch_ib(1:num_ibs)]') ! GPU routines require updated cell centers - $:GPU_UPDATE(device='[num_ibs, x_cc, y_cc, dx, dy, x_domain, y_domain]') + $:GPU_UPDATE(device='[num_ibs, x_cc, y_cc, dx, dy]') if (p /= 0) then - $:GPU_UPDATE(device='[z_cc, dz, z_domain]') + $:GPU_UPDATE(device='[z_cc, dz]') end if + $:GPU_UPDATE(device='[glb_bounds]') ! allocate STL models call s_instantiate_STL_models() @@ -504,7 +505,7 @@ contains print *, [x_cc(i), y_cc(j), z_cc(k)] end if print *, "We are searching in dimension ", dim, " for image point at ", ghost_points_in(q)%ip_loc(:) - print *, "Domain size: ", [x_cc(-buff_size), y_cc(-buff_size), z_cc(-buff_size)] + print *, "Domain size: " print *, "x: ", x_cc(-buff_size), " to: ", x_cc(m + buff_size - 1) print *, "y: ", y_cc(-buff_size), " to: ", y_cc(n + buff_size - 1) if (p /= 0) print *, "z: ", z_cc(-buff_size), " to: ", z_cc(p + buff_size - 1) @@ -599,7 +600,7 @@ contains gp_layers_z = gp_layers if (p == 0) gp_layers_z = 0 - $:GPU_PARALLEL_LOOP(private='[i,j,k,ii,jj,kk,is_gp,local_idx,patch_id,encoded_patch_id,xp,yp,zp]', copyin='[count,count_i, x_domain, y_domain, z_domain]', firstprivate='[gp_layers,gp_layers_z]', collapse=3) + $:GPU_PARALLEL_LOOP(private='[i,j,k,ii,jj,kk,is_gp,local_idx,patch_id,encoded_patch_id,xp,yp,zp]', copyin='[count,count_i,glb_bounds]', firstprivate='[gp_layers,gp_layers_z]', collapse=3) do i = 0, m do j = 0, n do k = 0, p @@ -633,26 +634,26 @@ contains ghost_points_in(local_idx)%z_periodicity = zp ghost_points_in(local_idx)%slip = patch_ib(patch_id)%slip - if ((x_cc(i) - dx(i)) < x_domain%beg) then + if ((x_cc(i) - dx(i)) < glb_bounds(1)%beg) then ghost_points_in(local_idx)%DB(1) = -1 - else if ((x_cc(i) + dx(i)) > x_domain%end) then + else if ((x_cc(i) + dx(i)) > glb_bounds(1)%end) then ghost_points_in(local_idx)%DB(1) = 1 else ghost_points_in(local_idx)%DB(1) = 0 end if - if ((y_cc(j) - dy(j)) < y_domain%beg) then + if ((y_cc(j) - dy(j)) < glb_bounds(2)%beg) then ghost_points_in(local_idx)%DB(2) = -1 - else if ((y_cc(j) + dy(j)) > y_domain%end) then + else if ((y_cc(j) + dy(j)) > glb_bounds(2)%end) then ghost_points_in(local_idx)%DB(2) = 1 else ghost_points_in(local_idx)%DB(2) = 0 end if if (p /= 0) then - if ((z_cc(k) - dz(k)) < z_domain%beg) then + if ((z_cc(k) - dz(k)) < glb_bounds(3)%beg) then ghost_points_in(local_idx)%DB(3) = -1 - else if ((z_cc(k) + dz(k)) > z_domain%end) then + else if ((z_cc(k) + dz(k)) > glb_bounds(3)%end) then ghost_points_in(local_idx)%DB(3) = 1 else ghost_points_in(local_idx)%DB(3) = 0 @@ -1293,33 +1294,21 @@ contains do patch_id = 1, num_ibs ! check domain wraps in x, y, - #:for X in [('x'), ('y')] - ! check for periodicity - if (bc_${X}$%beg == BC_PERIODIC) then - ! check if the boundary has left the domain, and then correct - if (patch_ib(patch_id)%${X}$_centroid < ${X}$_domain%beg) then - ! if the boundary exited "left", wrap it back around to the "right" - patch_ib(patch_id)%${X}$_centroid = patch_ib(patch_id)%${X}$_centroid + (${X}$_domain%end - ${X}$_domain%beg) - else if (patch_ib(patch_id)%${X}$_centroid > ${X}$_domain%end) then - ! if the boundary exited "right", wrap it back around to the "left" - patch_ib(patch_id)%${X}$_centroid = patch_ib(patch_id)%${X}$_centroid - (${X}$_domain%end - ${X}$_domain%beg) + #:for X, ID in [('x', 1), ('y', 2), ('z',3)] + if (num_dims >= ${ID}$) then + ! check for periodicity + if (bc_${X}$%beg == BC_PERIODIC) then + ! check if the boundary has left the domain, and then correct + if (patch_ib(patch_id)%${X}$_centroid < glb_bounds(${ID}$)%beg) then + ! if the boundary exited "left", wrap it back around to the "right" + patch_ib(patch_id)%${X}$_centroid = patch_ib(patch_id)%${X}$_centroid + (glb_bounds(${ID}$)%end - glb_bounds(${ID}$)%beg) + else if (patch_ib(patch_id)%${X}$_centroid > glb_bounds(${ID}$)%end) then + ! if the boundary exited "right", wrap it back around to the "left" + patch_ib(patch_id)%${X}$_centroid = patch_ib(patch_id)%${X}$_centroid - (glb_bounds(${ID}$)%end - glb_bounds(${ID}$)%beg) + end if end if end if #:endfor - - if (p /= 0) then - ! check for periodicity - if (bc_z%beg == BC_PERIODIC) then - ! check if the boundary has left the domain, and then correct - if (patch_ib(patch_id)%z_centroid < z_domain%beg) then - ! if the boundary exited "left", wrap it back around to the "right" - patch_ib(patch_id)%z_centroid = patch_ib(patch_id)%z_centroid + (z_domain%end - z_domain%beg) - else if (patch_ib(patch_id)%z_centroid > z_domain%end) then - ! if the boundary exited "right", wrap it back around to the "left" - patch_ib(patch_id)%z_centroid = patch_ib(patch_id)%z_centroid - (z_domain%end - z_domain%beg) - end if - end if - end if end do end subroutine s_wrap_periodic_ibs diff --git a/src/simulation/m_mpi_proxy.fpp b/src/simulation/m_mpi_proxy.fpp index a684c79ec4..b1ae741a72 100644 --- a/src/simulation/m_mpi_proxy.fpp +++ b/src/simulation/m_mpi_proxy.fpp @@ -41,6 +41,24 @@ module m_mpi_proxy integer :: i_halo_size $:GPU_DECLARE(create='[i_halo_size]') + integer, dimension(-1:1, -1:1, -1:1) :: p_send_counts, p_recv_counts + integer, dimension(:, :, :, :), allocatable :: p_send_ids + character(len=1), dimension(:), allocatable :: p_send_buff, p_recv_buff + integer :: p_buff_size, p_var_size + !! EL Bubbles communication variables + integer, parameter :: MAX_NEIGHBORS = 27 + integer :: send_requests(MAX_NEIGHBORS), recv_requests(MAX_NEIGHBORS) + integer :: recv_offsets(MAX_NEIGHBORS) + integer :: neighbor_list(MAX_NEIGHBORS, 3) + integer :: n_neighbors + $:GPU_DECLARE(create='[p_send_counts]') + integer, allocatable :: force_send_counts(:), force_recv_counts(:) + integer, allocatable :: force_send_ids(:, :) + integer, allocatable :: flat_send_ids(:) + real(wp), allocatable :: force_send_vals(:, :, :) + real(wp), allocatable :: flat_send_vals(:) + $:GPU_DECLARE(create='[force_send_counts, force_send_ids, force_send_vals]') + contains !> @brief Allocates immersed boundary communication buffers for MPI halo exchanges. @@ -70,6 +88,82 @@ contains end subroutine s_initialize_mpi_proxy_module + !! This subroutine initializes the MPI buffers and variables + !! required for the particle communication. + !! @param lag_num_ts Number of stages in time-stepping scheme + subroutine s_initialize_particles_mpi(lag_num_ts) + + integer :: i, j, k + integer :: real_size, int_size, nReal, lag_num_ts + integer :: ierr !< Generic flag used to identify and report MPI errors + +#ifdef MFC_MPI + call MPI_Pack_size(1, mpi_p, MPI_COMM_WORLD, real_size, ierr) + call MPI_Pack_size(1, MPI_INTEGER, MPI_COMM_WORLD, int_size, ierr) + nReal = 7 + 16*2 + 10*lag_num_ts + p_var_size = nReal*real_size + int_size + p_buff_size = lag_params%nBubs_glb*p_var_size + @:ALLOCATE(p_send_buff(0:p_buff_size), p_recv_buff(0:p_buff_size)) + @:ALLOCATE(p_send_ids(nidx(1)%beg:nidx(1)%end, nidx(2)%beg:nidx(2)%end, nidx(3)%beg:nidx(3)%end, 0:lag_params%nBubs_glb)) + ! First, collect all neighbor information + n_neighbors = 0 + do k = nidx(3)%beg, nidx(3)%end + do j = nidx(2)%beg, nidx(2)%end + do i = nidx(1)%beg, nidx(1)%end + if (abs(i) + abs(j) + abs(k) /= 0) then + n_neighbors = n_neighbors + 1 + neighbor_list(n_neighbors, 1) = i + neighbor_list(n_neighbors, 2) = j + neighbor_list(n_neighbors, 3) = k + end if + end do + end do + end do +#endif + + end subroutine s_initialize_particles_mpi + + !! This subroutine initializes the MPI buffers and variables + !! required for the particle communication. + !! @param lag_num_ts Number of stages in time-stepping scheme + subroutine s_initialize_solid_particles_mpi(lag_num_ts) + + integer :: i, j, k + integer :: real_size, int_size, nReal, lag_num_ts + integer :: ierr !< Generic flag used to identify and report MPI errors + +#ifdef MFC_MPI + call MPI_Pack_size(1, mpi_p, MPI_COMM_WORLD, real_size, ierr) + call MPI_Pack_size(1, MPI_INTEGER, MPI_COMM_WORLD, int_size, ierr) + nReal = 7 + 13*2 + 7*lag_num_ts + p_var_size = (nReal*real_size + int_size) + p_buff_size = lag_params%nParticles_glb*p_var_size + @:ALLOCATE(p_send_buff(0:p_buff_size), p_recv_buff(0:p_buff_size)) + @:ALLOCATE(p_send_ids(nidx(1)%beg:nidx(1)%end, nidx(2)%beg:nidx(2)%end, nidx(3)%beg:nidx(3)%end, 0:lag_params%nParticles_glb)) + ! First, collect all neighbor information + n_neighbors = 0 + do k = nidx(3)%beg, nidx(3)%end + do j = nidx(2)%beg, nidx(2)%end + do i = nidx(1)%beg, nidx(1)%end + if (abs(i) + abs(j) + abs(k) /= 0) then + n_neighbors = n_neighbors + 1 + neighbor_list(n_neighbors, 1) = i + neighbor_list(n_neighbors, 2) = j + neighbor_list(n_neighbors, 3) = k + end if + end do + end do + end do + @:ALLOCATE(force_send_counts(0:num_procs-1)) + @:ALLOCATE(force_recv_counts(0:num_procs-1)) + @:ALLOCATE(force_send_ids(0:num_procs-1, 1:lag_params%nParticles_glb)) + @:ALLOCATE(force_send_vals(0:num_procs-1, 1:lag_params%nParticles_glb, 1:3)) + @:ALLOCATE(flat_send_ids(1:lag_params%nParticles_glb)) + @:ALLOCATE(flat_send_vals(1:3*lag_params%nParticles_glb)) +#endif + + end subroutine s_initialize_solid_particles_mpi + !> Since only the processor with rank 0 reads and verifies !! the consistency of user inputs, these are initially not !! available to the other processors. Then, the purpose of @@ -113,8 +207,8 @@ contains & 'bc_z%grcbc_in', 'bc_z%grcbc_out', 'bc_z%grcbc_vel_out', & & 'cfl_adap_dt', 'cfl_const_dt', 'cfl_dt', 'surface_tension', & & 'shear_stress', 'bulk_stress', 'bubbles_lagrange', & - & 'hyperelasticity', 'down_sample', 'int_comp','fft_wrt', & - & 'hyper_cleaning' ] + & 'hyperelasticity', 'down_sample', 'int_comp','fft_wrt', & + & 'hyper_cleaning', 'particles_lagrange' ] call MPI_BCAST(${VAR}$, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) #:endfor @@ -130,17 +224,40 @@ contains if (bubbles_lagrange) then #:for VAR in [ 'heatTransfer_model', 'massTransfer_model', 'pressure_corrector', & - & 'write_bubbles', 'write_bubbles_stats'] + & 'write_bubbles', 'write_bubbles_stats', 'write_void_evol', 'pressure_force', & + & 'gravity_force'] call MPI_BCAST(lag_params%${VAR}$, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) #:endfor - #:for VAR in ['solver_approach', 'cluster_type', 'smooth_type', 'nBubs_glb'] + #:for VAR in ['solver_approach', 'cluster_type', 'smooth_type', 'nBubs_glb', 'vel_model', & + & 'drag_model', 'charNz'] call MPI_BCAST(lag_params%${VAR}$, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) #:endfor #:for VAR in ['epsilonb','charwidth','valmaxvoid'] call MPI_BCAST(lag_params%${VAR}$, 1, mpi_p, 0, MPI_COMM_WORLD, ierr) #:endfor + + call MPI_BCAST(lag_params%input_path, len(lag_params%input_path), MPI_CHARACTER, 0, MPI_COMM_WORLD, ierr) + end if + + if (particles_lagrange) then + #:for VAR in [ 'heatTransfer_model', 'massTransfer_model', 'pressure_corrector', & + & 'write_bubbles', 'write_bubbles_stats', 'write_void_evol', 'pressure_force', & + & 'gravity_force', 'collision_force'] + call MPI_BCAST(lag_params%${VAR}$, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) + #:endfor + + #:for VAR in ['solver_approach', 'cluster_type', 'smooth_type', 'nParticles_glb', 'vel_model', & + & 'drag_model', 'qs_drag_model', 'stokes_drag', 'added_mass_model', 'interpolation_order'] + call MPI_BCAST(lag_params%${VAR}$, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + #:endfor + + #:for VAR in ['epsilonb','charwidth','valmaxvoid'] + call MPI_BCAST(lag_params%${VAR}$, 1, mpi_p, 0, MPI_COMM_WORLD, ierr) + #:endfor + + call MPI_BCAST(lag_params%input_path, len(lag_params%input_path), MPI_CHARACTER, 0, MPI_COMM_WORLD, ierr) end if #:for VAR in [ 'dt','weno_eps','teno_CT','pref','rhoref','R0ref','Web','Ca', 'sigma', & @@ -149,9 +266,7 @@ contains & 'bc_y%vb1','bc_y%vb2','bc_y%vb3','bc_y%ve1','bc_y%ve2','bc_y%ve3', & & 'bc_z%vb1','bc_z%vb2','bc_z%vb3','bc_z%ve1','bc_z%ve2','bc_z%ve3', & & 'bc_x%pres_in','bc_x%pres_out','bc_y%pres_in','bc_y%pres_out', 'bc_z%pres_in','bc_z%pres_out', & - & 'x_domain%beg', 'x_domain%end', 'y_domain%beg', 'y_domain%end', & - & 'z_domain%beg', 'z_domain%end', 'x_a', 'x_b', 'y_a', 'y_b', 'z_a', & - & 'z_b', 't_stop', 't_save', 'cfl_target', 'Bx0', 'alf_factor', & + & 't_stop', 't_save', 'cfl_target', 'Bx0', 'alf_factor', & & 'tau_star', 'cont_damage_s', 'alpha_bar', 'adap_dt_tol', & & 'ic_eps', 'ic_beta', 'hyper_cleaning_speed', & & 'hyper_cleaning_tau' ] @@ -200,6 +315,12 @@ contains #:endfor end if + if (particles_lagrange) then + #:for VAR in [ 'rho0ref_particle','cp_particle'] + call MPI_BCAST(particle_pp%${VAR}$, 1, mpi_p, 0, MPI_COMM_WORLD, ierr) + #:endfor + end if + do i = 1, num_fluids_max #:for VAR in ['bc_x%alpha_rho_in','bc_x%alpha_in','bc_y%alpha_rho_in','bc_y%alpha_in','bc_z%alpha_rho_in','bc_z%alpha_in'] call MPI_BCAST(${VAR}$ (i), 1, mpi_p, 0, MPI_COMM_WORLD, ierr) @@ -254,10 +375,1052 @@ contains call MPI_BCAST(nv_uvm_igr_temps_on_gpu, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) call MPI_BCAST(nv_uvm_pref_gpu, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) + ! Extra BC Variable + call MPI_BCAST(periodic_bc, 3, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) + #endif end subroutine s_mpi_bcast_user_inputs + !> @brief Packs, exchanges, and unpacks immersed boundary marker buffers between neighboring MPI ranks. + subroutine s_mpi_sendrecv_ib_buffers(ib_markers, mpi_dir, pbc_loc) + + type(integer_field), intent(inout) :: ib_markers + + integer, intent(in) :: mpi_dir, pbc_loc + + integer :: i, j, k, l, r, q !< Generic loop iterators + + integer :: buffer_counts(1:3), buffer_count + + type(int_bounds_info) :: boundary_conditions(1:3) + integer :: beg_end(1:2), grid_dims(1:3) + integer :: dst_proc, src_proc, recv_tag, send_tag + + logical :: beg_end_geq_0, qbmm_comm + + integer :: pack_offset, unpack_offset + +#ifdef MFC_MPI + integer :: ierr !< Generic flag used to identify and report MPI errors + + call nvtxStartRange("IB-MARKER-COMM-PACKBUF") + + buffer_counts = (/ & + buff_size*(n + 1)*(p + 1), & + buff_size*(m + 2*buff_size + 1)*(p + 1), & + buff_size*(m + 2*buff_size + 1)*(n + 2*buff_size + 1) & + /) + + buffer_count = buffer_counts(mpi_dir) + boundary_conditions = (/bc_x, bc_y, bc_z/) + beg_end = (/boundary_conditions(mpi_dir)%beg, boundary_conditions(mpi_dir)%end/) + beg_end_geq_0 = beg_end(max(pbc_loc, 0) - pbc_loc + 1) >= 0 + + ! Implements: + ! pbc_loc bc_x >= 0 -> [send/recv]_tag [dst/src]_proc + ! -1 (=0) 0 -> [1,0] [0,0] | 0 0 [1,0] [beg,beg] + ! -1 (=0) 1 -> [0,0] [1,0] | 0 1 [0,0] [end,beg] + ! +1 (=1) 0 -> [0,1] [1,1] | 1 0 [0,1] [end,end] + ! +1 (=1) 1 -> [1,1] [0,1] | 1 1 [1,1] [beg,end] + + send_tag = f_logical_to_int(.not. f_xor(beg_end_geq_0, pbc_loc == 1)) + recv_tag = f_logical_to_int(pbc_loc == 1) + + dst_proc = beg_end(1 + f_logical_to_int(f_xor(pbc_loc == 1, beg_end_geq_0))) + src_proc = beg_end(1 + f_logical_to_int(pbc_loc == 1)) + + grid_dims = (/m, n, p/) + + pack_offset = 0 + if (f_xor(pbc_loc == 1, beg_end_geq_0)) then + pack_offset = grid_dims(mpi_dir) - buff_size + 1 + end if + + unpack_offset = 0 + if (pbc_loc == 1) then + unpack_offset = grid_dims(mpi_dir) + buff_size + 1 + end if + + ! Pack Buffer to Send + #:for mpi_dir in [1, 2, 3] + if (mpi_dir == ${mpi_dir}$) then + #:if mpi_dir == 1 + $:GPU_PARALLEL_LOOP(collapse=3,private='[j,k,l,r]') + do l = 0, p + do k = 0, n + do j = 0, buff_size - 1 + r = (j + buff_size*(k + (n + 1)*l)) + ib_buff_send(r) = ib_markers%sf(j + pack_offset, k, l) + end do + end do + end do + $:END_GPU_PARALLEL_LOOP() + #:elif mpi_dir == 2 + $:GPU_PARALLEL_LOOP(collapse=3,private='[j,k,l,r]') + do l = 0, p + do k = 0, buff_size - 1 + do j = -buff_size, m + buff_size + r = ((j + buff_size) + (m + 2*buff_size + 1)* & + (k + buff_size*l)) + ib_buff_send(r) = ib_markers%sf(j, k + pack_offset, l) + end do + end do + end do + $:END_GPU_PARALLEL_LOOP() + #:else + $:GPU_PARALLEL_LOOP(collapse=3,private='[j,k,l,r]') + do l = 0, buff_size - 1 + do k = -buff_size, n + buff_size + do j = -buff_size, m + buff_size + r = ((j + buff_size) + (m + 2*buff_size + 1)* & + ((k + buff_size) + (n + 2*buff_size + 1)*l)) + ib_buff_send(r) = ib_markers%sf(j, k, l + pack_offset) + end do + end do + end do + $:END_GPU_PARALLEL_LOOP() + #:endif + end if + #:endfor + call nvtxEndRange ! Packbuf + + #:for rdma_mpi in [False, True] + if (rdma_mpi .eqv. ${'.true.' if rdma_mpi else '.false.'}$) then + #:if rdma_mpi + #:call GPU_HOST_DATA(use_device_addr='[ib_buff_send, ib_buff_recv]') + + call nvtxStartRange("IB-MARKER-SENDRECV-RDMA") + call MPI_SENDRECV( & + ib_buff_send, buffer_count, MPI_INTEGER, dst_proc, send_tag, & + ib_buff_recv, buffer_count, MPI_INTEGER, src_proc, recv_tag, & + MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) + call nvtxEndRange + + #:endcall GPU_HOST_DATA + $:GPU_WAIT() + #:else + call nvtxStartRange("IB-MARKER-DEV2HOST") + $:GPU_UPDATE(host='[ib_buff_send]') + call nvtxEndRange + + call nvtxStartRange("IB-MARKER-SENDRECV-NO-RMDA") + call MPI_SENDRECV( & + ib_buff_send, buffer_count, MPI_INTEGER, dst_proc, send_tag, & + ib_buff_recv, buffer_count, MPI_INTEGER, src_proc, recv_tag, & + MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) + call nvtxEndRange + + call nvtxStartRange("IB-MARKER-HOST2DEV") + $:GPU_UPDATE(device='[ib_buff_recv]') + call nvtxEndRange + #:endif + end if + #:endfor + + ! Unpack Received Buffer + call nvtxStartRange("IB-MARKER-COMM-UNPACKBUF") + #:for mpi_dir in [1, 2, 3] + if (mpi_dir == ${mpi_dir}$) then + #:if mpi_dir == 1 + $:GPU_PARALLEL_LOOP(collapse=3,private='[j,k,l,r]') + do l = 0, p + do k = 0, n + do j = -buff_size, -1 + r = (j + buff_size*((k + 1) + (n + 1)*l)) + ib_markers%sf(j + unpack_offset, k, l) = ib_buff_recv(r) + end do + end do + end do + $:END_GPU_PARALLEL_LOOP() + #:elif mpi_dir == 2 + $:GPU_PARALLEL_LOOP(collapse=3,private='[j,k,l,r]') + do l = 0, p + do k = -buff_size, -1 + do j = -buff_size, m + buff_size + r = ((j + buff_size) + (m + 2*buff_size + 1)* & + ((k + buff_size) + buff_size*l)) + ib_markers%sf(j, k + unpack_offset, l) = ib_buff_recv(r) + end do + end do + end do + $:END_GPU_PARALLEL_LOOP() + #:else + ! Unpacking buffer from bc_z%beg + $:GPU_PARALLEL_LOOP(collapse=3,private='[j,k,l,r]') + do l = -buff_size, -1 + do k = -buff_size, n + buff_size + do j = -buff_size, m + buff_size + r = ((j + buff_size) + (m + 2*buff_size + 1)* & + ((k + buff_size) + (n + 2*buff_size + 1)* & + (l + buff_size))) + ib_markers%sf(j, k, l + unpack_offset) = ib_buff_recv(r) + end do + end do + end do + $:END_GPU_PARALLEL_LOOP() + #:endif + end if + #:endfor + call nvtxEndRange +#endif + + end subroutine s_mpi_sendrecv_ib_buffers + + !> This subroutine adds particles to the transfer list for the MPI + !! communication. + !! @param nBub Current LOCAL number of bubbles + !! @param pos Current position of each bubble + !! @param posPrev Previous position of each bubble (optional, not used + !! for communication of initial condition) + impure subroutine s_add_particles_to_transfer_list(nBub, pos, posPrev, include_ghost) + + real(wp), dimension(:, :) :: pos, posPrev + integer :: bubID, nbub + integer :: i, j, k + logical, optional, intent(in) :: include_ghost + + do k = nidx(3)%beg, nidx(3)%end + do j = nidx(2)%beg, nidx(2)%end + do i = nidx(1)%beg, nidx(1)%end + p_send_counts(i, j, k) = 0 + end do + end do + end do + + do k = 1, nbub + if (f_crosses_boundary(k, 1, -1, pos, posPrev, include_ghost)) then + call s_add_particle_to_direction(k, -1, 0, 0) + if (n > 0) then + if (f_crosses_boundary(k, 2, -1, pos, posPrev, include_ghost)) then + call s_add_particle_to_direction(k, -1, -1, 0) + call s_add_particle_to_direction(k, 0, -1, 0) + if (p > 0) then + if (f_crosses_boundary(k, 3, -1, pos, posPrev, include_ghost)) then + call s_add_particle_to_direction(k, -1, -1, -1) + call s_add_particle_to_direction(k, 0, -1, -1) + call s_add_particle_to_direction(k, -1, 0, -1) + call s_add_particle_to_direction(k, 0, 0, -1) + elseif (f_crosses_boundary(k, 3, 1, pos, posPrev, include_ghost)) then + call s_add_particle_to_direction(k, -1, -1, 1) + call s_add_particle_to_direction(k, 0, -1, 1) + call s_add_particle_to_direction(k, -1, 0, 1) + call s_add_particle_to_direction(k, 0, 0, 1) + end if + end if + elseif (f_crosses_boundary(k, 2, 1, pos, posPrev, include_ghost)) then + call s_add_particle_to_direction(k, -1, 1, 0) + call s_add_particle_to_direction(k, 0, 1, 0) + if (p > 0) then + if (f_crosses_boundary(k, 3, -1, pos, posPrev, include_ghost)) then + call s_add_particle_to_direction(k, -1, 1, -1) + call s_add_particle_to_direction(k, 0, 1, -1) + call s_add_particle_to_direction(k, -1, 0, -1) + call s_add_particle_to_direction(k, 0, 0, -1) + elseif (f_crosses_boundary(k, 3, 1, pos, posPrev, include_ghost)) then + call s_add_particle_to_direction(k, -1, 1, 1) + call s_add_particle_to_direction(k, 0, 1, 1) + call s_add_particle_to_direction(k, -1, 0, 1) + call s_add_particle_to_direction(k, 0, 0, 1) + end if + end if + else + if (p > 0) then + if (f_crosses_boundary(k, 3, -1, pos, posPrev, include_ghost)) then + call s_add_particle_to_direction(k, -1, 0, -1) + call s_add_particle_to_direction(k, 0, 0, -1) + elseif (f_crosses_boundary(k, 3, 1, pos, posPrev, include_ghost)) then + call s_add_particle_to_direction(k, -1, 0, 1) + call s_add_particle_to_direction(k, 0, 0, 1) + end if + end if + end if + end if + elseif (f_crosses_boundary(k, 1, 1, pos, posPrev, include_ghost)) then + call s_add_particle_to_direction(k, 1, 0, 0) + if (n > 0) then + if (f_crosses_boundary(k, 2, -1, pos, posPrev, include_ghost)) then + call s_add_particle_to_direction(k, 1, -1, 0) + call s_add_particle_to_direction(k, 0, -1, 0) + if (p > 0) then + if (f_crosses_boundary(k, 3, -1, pos, posPrev, include_ghost)) then + call s_add_particle_to_direction(k, 1, -1, -1) + call s_add_particle_to_direction(k, 0, -1, -1) + call s_add_particle_to_direction(k, 1, 0, -1) + call s_add_particle_to_direction(k, 0, 0, -1) + elseif (f_crosses_boundary(k, 3, 1, pos, posPrev, include_ghost)) then + call s_add_particle_to_direction(k, 1, -1, 1) + call s_add_particle_to_direction(k, 0, -1, 1) + call s_add_particle_to_direction(k, 1, 0, 1) + call s_add_particle_to_direction(k, 0, 0, 1) + end if + end if + elseif (f_crosses_boundary(k, 2, 1, pos, posPrev, include_ghost)) then + call s_add_particle_to_direction(k, 1, 1, 0) + call s_add_particle_to_direction(k, 0, 1, 0) + if (p > 0) then + if (f_crosses_boundary(k, 3, -1, pos, posPrev, include_ghost)) then + call s_add_particle_to_direction(k, 1, 1, -1) + call s_add_particle_to_direction(k, 0, 1, -1) + call s_add_particle_to_direction(k, 1, 0, -1) + call s_add_particle_to_direction(k, 0, 0, -1) + elseif (f_crosses_boundary(k, 3, 1, pos, posPrev, include_ghost)) then + call s_add_particle_to_direction(k, 1, 1, 1) + call s_add_particle_to_direction(k, 0, 1, 1) + call s_add_particle_to_direction(k, 1, 0, 1) + call s_add_particle_to_direction(k, 0, 0, 1) + end if + end if + else + if (p > 0) then + if (f_crosses_boundary(k, 3, -1, pos, posPrev, include_ghost)) then + call s_add_particle_to_direction(k, 1, 0, -1) + call s_add_particle_to_direction(k, 0, 0, -1) + elseif (f_crosses_boundary(k, 3, 1, pos, posPrev, include_ghost)) then + call s_add_particle_to_direction(k, 1, 0, 1) + call s_add_particle_to_direction(k, 0, 0, 1) + end if + end if + end if + end if + elseif (f_crosses_boundary(k, 2, -1, pos, posPrev, include_ghost)) then + call s_add_particle_to_direction(k, 0, -1, 0) + if (p > 0) then + if (f_crosses_boundary(k, 3, -1, pos, posPrev, include_ghost)) then + call s_add_particle_to_direction(k, 0, -1, -1) + call s_add_particle_to_direction(k, 0, 0, -1) + elseif (f_crosses_boundary(k, 3, 1, pos, posPrev, include_ghost)) then + call s_add_particle_to_direction(k, 0, -1, 1) + call s_add_particle_to_direction(k, 0, 0, 1) + end if + end if + elseif (f_crosses_boundary(k, 2, 1, pos, posPrev, include_ghost)) then + call s_add_particle_to_direction(k, 0, 1, 0) + if (p > 0) then + if (f_crosses_boundary(k, 3, -1, pos, posPrev, include_ghost)) then + call s_add_particle_to_direction(k, 0, 1, -1) + call s_add_particle_to_direction(k, 0, 0, -1) + elseif (f_crosses_boundary(k, 3, 1, pos, posPrev, include_ghost)) then + call s_add_particle_to_direction(k, 0, 1, 1) + call s_add_particle_to_direction(k, 0, 0, 1) + end if + end if + elseif (p > 0) then + if (f_crosses_boundary(k, 3, -1, pos, posPrev, include_ghost)) then + call s_add_particle_to_direction(k, 0, 0, -1) + elseif (f_crosses_boundary(k, 3, 1, pos, posPrev, include_ghost)) then + call s_add_particle_to_direction(k, 0, 0, 1) + end if + end if + + end do + + contains !f_crosses_boundary(k, 2, -1, pos, posPrev, include_ghost + + logical function f_crosses_boundary(particle_id, dir, loc, pos, posPrev, include_ghost) + + integer, intent(in) :: particle_id, dir, loc + real(wp), dimension(:, :), intent(in) :: pos + real(wp), dimension(:, :), optional, intent(in) :: posPrev + logical, optional, intent(in) :: include_ghost + + if (present(include_ghost) .and. include_ghost) then + + if (loc == -1) then ! Beginning of the domain + if (nidx(dir)%beg == 0) then + f_crosses_boundary = .false. + return + end if + + f_crosses_boundary = pos(particle_id, dir) < pcomm_coords_ghost(dir)%beg + elseif (loc == 1) then ! End of the domain + if (nidx(dir)%end == 0) then + f_crosses_boundary = .false. + return + end if + + f_crosses_boundary = pos(particle_id, dir) > pcomm_coords_ghost(dir)%end + end if + + else + + if (loc == -1) then ! Beginning of the domain + if (nidx(dir)%beg == 0) then + f_crosses_boundary = .false. + return + end if + + f_crosses_boundary = (posPrev(particle_id, dir) >= pcomm_coords(dir)%beg .and. & + pos(particle_id, dir) < pcomm_coords(dir)%beg) + elseif (loc == 1) then ! End of the domain + if (nidx(dir)%end == 0) then + f_crosses_boundary = .false. + return + end if + + f_crosses_boundary = (posPrev(particle_id, dir) <= pcomm_coords(dir)%end .and. & + pos(particle_id, dir) > pcomm_coords(dir)%end) + end if + + end if + + end function f_crosses_boundary + + subroutine s_add_particle_to_direction(particle_id, dir_x, dir_y, dir_z) + + integer, intent(in) :: particle_id, dir_x, dir_y, dir_z + + p_send_ids(dir_x, dir_y, dir_z, p_send_counts(dir_x, dir_y, dir_z)) = particle_id + p_send_counts(dir_x, dir_y, dir_z) = p_send_counts(dir_x, dir_y, dir_z) + 1 + + end subroutine s_add_particle_to_direction + + end subroutine s_add_particles_to_transfer_list + + !> This subroutine performs the MPI communication for lagrangian particles/ + !! bubbles. + !! @param bub_R0 Initial radius of each bubble + !! @param Rmax_stats Maximum radius of each bubble + !! @param Rmin_stats Minimum radius of each bubble + !! @param gas_mg Mass of gas in each bubble + !! @param gas_betaT Heat flux model coefficient for each bubble + !! @param gas_betaC mass flux model coefficient for each bubble + !! @param bub_dphidt Subgrid velocity potential for each bubble + !! @param lag_id Global and local ID of each bubble + !! @param gas_p Pressure of the gas in each bubble + !! @param gas_mv Mass of vapor in each bubble + !! @param rad Radius of each bubble + !! @param rvel Radial velocity of each bubble + !! @param pos Position of each bubble + !! @param posPrev Previous position of each bubble + !! @param vel Velocity of each bubble + !! @param scoord Cell index in real format of each bubble + !! @param drad Radial velocity of each bubble + !! @param drvel Radial acceleration of each bubble + !! @param dgasp Time derivative of gas pressure in each bubble + !! @param dgasmv Time derivative of vapor mass in each bubble + !! @param dpos Time derivative of position of each bubble + !! @param dvel Time derivative of velocity of each bubble + !! @param lag_num_ts Number of stages in time-stepping scheme + !! @param nBubs Local number of bubbles + impure subroutine s_mpi_sendrecv_particles(bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, & + gas_betaC, bub_dphidt, lag_id, gas_p, gas_mv, rad, & + rvel, pos, posPrev, vel, scoord, drad, drvel, dgasp, & + dgasmv, dpos, dvel, lag_num_ts, nbubs, dest) + + real(wp), dimension(:) :: bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, gas_betaC, bub_dphidt + integer, dimension(:, :) :: lag_id + real(wp), dimension(:, :) :: gas_p, gas_mv, rad, rvel, drad, drvel, dgasp, dgasmv + real(wp), dimension(:, :, :) :: pos, posPrev, vel, scoord, dpos, dvel + integer :: position, bub_id, lag_num_ts, tag, partner, send_tag, recv_tag, nbubs, p_recv_size, dest + + integer :: i, j, k, l, q, r + integer :: req_send, req_recv, ierr !< Generic flag used to identify and report MPI errors + integer :: send_count, send_offset, recv_count, recv_offset + +#ifdef MFC_MPI + ! Phase 1: Exchange particle counts using non-blocking communication + send_count = 0 + recv_count = 0 + + ! Post all receives first + do l = 1, n_neighbors + i = neighbor_list(l, 1) + j = neighbor_list(l, 2) + k = neighbor_list(l, 3) + partner = neighbor_ranks(i, j, k) + recv_tag = neighbor_tag(i, j, k) + + recv_count = recv_count + 1 + call MPI_Irecv(p_recv_counts(i, j, k), 1, MPI_INTEGER, partner, recv_tag, & + MPI_COMM_WORLD, recv_requests(recv_count), ierr) + end do + + ! Post all sends + do l = 1, n_neighbors + i = neighbor_list(l, 1) + j = neighbor_list(l, 2) + k = neighbor_list(l, 3) + partner = neighbor_ranks(i, j, k) + send_tag = neighbor_tag(-i, -j, -k) + + send_count = send_count + 1 + call MPI_Isend(p_send_counts(i, j, k), 1, MPI_INTEGER, partner, send_tag, & + MPI_COMM_WORLD, send_requests(send_count), ierr) + end do + + ! Wait for all count exchanges to complete + if (recv_count > 0) then + call MPI_Waitall(recv_count, recv_requests(1:recv_count), MPI_STATUSES_IGNORE, ierr) + end if + if (send_count > 0) then + call MPI_Waitall(send_count, send_requests(1:send_count), MPI_STATUSES_IGNORE, ierr) + end if + + ! Phase 2: Exchange particle data using non-blocking communication + send_count = 0 + recv_count = 0 + + ! Post all receives for particle data first + recv_offset = 1 + do l = 1, n_neighbors + i = neighbor_list(l, 1) + j = neighbor_list(l, 2) + k = neighbor_list(l, 3) + + if (p_recv_counts(i, j, k) > 0) then + partner = neighbor_ranks(i, j, k) + p_recv_size = p_recv_counts(i, j, k)*p_var_size + recv_tag = neighbor_tag(i, j, k) + + recv_count = recv_count + 1 + call MPI_Irecv(p_recv_buff(recv_offset), p_recv_size, MPI_PACKED, partner, recv_tag, & + MPI_COMM_WORLD, recv_requests(recv_count), ierr) + recv_offsets(l) = recv_offset + recv_offset = recv_offset + p_recv_size + end if + end do + + ! Pack and send particle data + send_offset = 0 + do l = 1, n_neighbors + i = neighbor_list(l, 1) + j = neighbor_list(l, 2) + k = neighbor_list(l, 3) + + if (p_send_counts(i, j, k) > 0 .and. abs(i) + abs(j) + abs(k) /= 0 .and. abs(i) + abs(j) + abs(k) /= 0) then + partner = neighbor_ranks(i, j, k) + send_tag = neighbor_tag(-i, -j, -k) + + ! Pack data for sending + position = 0 + do q = 0, p_send_counts(i, j, k) - 1 + bub_id = p_send_ids(i, j, k, q) + + call MPI_Pack(lag_id(bub_id, 1), 1, MPI_INTEGER, p_send_buff(send_offset), p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(bub_R0(bub_id), 1, mpi_p, p_send_buff(send_offset), p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(Rmax_stats(bub_id), 1, mpi_p, p_send_buff(send_offset), p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(Rmin_stats(bub_id), 1, mpi_p, p_send_buff(send_offset), p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(gas_mg(bub_id), 1, mpi_p, p_send_buff(send_offset), p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(gas_betaT(bub_id), 1, mpi_p, p_send_buff(send_offset), p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(gas_betaC(bub_id), 1, mpi_p, p_send_buff(send_offset), p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(bub_dphidt(bub_id), 1, mpi_p, p_send_buff(send_offset), p_buff_size, position, MPI_COMM_WORLD, ierr) + do r = 1, 2 + call MPI_Pack(gas_p(bub_id, r), 1, mpi_p, p_send_buff(send_offset), p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(gas_mv(bub_id, r), 1, mpi_p, p_send_buff(send_offset), p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(rad(bub_id, r), 1, mpi_p, p_send_buff(send_offset), p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(rvel(bub_id, r), 1, mpi_p, p_send_buff(send_offset), p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(pos(bub_id, :, r), 3, mpi_p, p_send_buff(send_offset), p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(posPrev(bub_id, :, r), 3, mpi_p, p_send_buff(send_offset), p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(vel(bub_id, :, r), 3, mpi_p, p_send_buff(send_offset), p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(scoord(bub_id, :, r), 3, mpi_p, p_send_buff(send_offset), p_buff_size, position, MPI_COMM_WORLD, ierr) + end do + do r = 1, lag_num_ts + call MPI_Pack(drad(bub_id, r), 1, mpi_p, p_send_buff(send_offset), p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(drvel(bub_id, r), 1, mpi_p, p_send_buff(send_offset), p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(dgasp(bub_id, r), 1, mpi_p, p_send_buff(send_offset), p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(dgasmv(bub_id, r), 1, mpi_p, p_send_buff(send_offset), p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(dpos(bub_id, :, r), 3, mpi_p, p_send_buff(send_offset), p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(dvel(bub_id, :, r), 3, mpi_p, p_send_buff(send_offset), p_buff_size, position, MPI_COMM_WORLD, ierr) + end do + end do + + send_count = send_count + 1 + call MPI_Isend(p_send_buff(send_offset), position, MPI_PACKED, partner, send_tag, & + MPI_COMM_WORLD, send_requests(send_count), ierr) + send_offset = send_offset + position + end if + end do + + ! Wait for all recvs for contiguous data to complete + call MPI_Waitall(recv_count, recv_requests(1:recv_count), MPI_STATUSES_IGNORE, ierr) + + ! Process received data as it arrives + do l = 1, n_neighbors + i = neighbor_list(l, 1) + j = neighbor_list(l, 2) + k = neighbor_list(l, 3) + + if (p_recv_counts(i, j, k) > 0 .and. abs(i) + abs(j) + abs(k) /= 0) then + p_recv_size = p_recv_counts(i, j, k)*p_var_size + recv_offset = recv_offsets(l) + + position = 0 + ! Unpack received data + do q = 0, p_recv_counts(i, j, k) - 1 + nbubs = nbubs + 1 + bub_id = nbubs + call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, lag_id(bub_id, 1), 1, MPI_INTEGER, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, bub_R0(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, Rmax_stats(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, Rmin_stats(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, gas_mg(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, gas_betaT(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, gas_betaC(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, bub_dphidt(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr) + do r = 1, 2 + call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, gas_p(bub_id, r), 1, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, gas_mv(bub_id, r), 1, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, rad(bub_id, r), 1, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, rvel(bub_id, r), 1, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, pos(bub_id, :, r), 3, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, posPrev(bub_id, :, r), 3, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, vel(bub_id, :, r), 3, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, scoord(bub_id, :, r), 3, mpi_p, MPI_COMM_WORLD, ierr) + end do + do r = 1, lag_num_ts + call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, drad(bub_id, r), 1, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, drvel(bub_id, r), 1, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, dgasp(bub_id, r), 1, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, dgasmv(bub_id, r), 1, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, dpos(bub_id, :, r), 3, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, dvel(bub_id, :, r), 3, mpi_p, MPI_COMM_WORLD, ierr) + end do + lag_id(bub_id, 2) = bub_id + end do + recv_offset = recv_offset + p_recv_size + end if + + end do + + ! Wait for all sends to complete + if (send_count > 0) then + call MPI_Waitall(send_count, send_requests(1:send_count), MPI_STATUSES_IGNORE, ierr) + end if +#endif + + if (any(periodic_bc)) then + call s_wrap_particle_positions(pos, posPrev, nbubs, dest) + end if + + end subroutine s_mpi_sendrecv_particles + + !> This subroutine performs the MPI communication for lagrangian particles/ + !! particles. + !! @param particle_R0 Initial radius of each particle + !! @param Rmax_stats Maximum radius of each particle + !! @param Rmin_stats Minimum radius of each particle + !! @param particle_mass Mass of gas in each particle + !! @param f_p Force on each particle + !! @param gas_betaT Heat flux model coefficient for each particle + !! @param gas_betaC mass flux model coefficient for each particle + !! @param lag_id Global and local ID of each particle + !! @param rad Radius of each particle + !! @param pos Position of each particle + !! @param posPrev Previous position of each particle + !! @param vel Velocity of each particle + !! @param scoord Cell index in real format of each particle + !! @param drad DTime erivative of particles's radius + !! @param dpos Time derivative of position of each particle + !! @param dvel Time derivative of velocity of each particle + !! @param lag_num_ts Number of stages in time-stepping scheme + !! @param nParticles Local number of particles + impure subroutine s_mpi_sendrecv_solid_particles(p_owner_rank, particle_R0, Rmax_stats, Rmin_stats, particle_mass, f_p, & + lag_id, rad, pos, & + posPrev, vel, scoord, drad, dpos, & + dvel, lag_num_ts, nParticles, dest) + + integer, dimension(:) :: p_owner_rank + real(wp), dimension(:) :: particle_R0, Rmax_stats, Rmin_stats, particle_mass + real(wp), dimension(:, :) :: f_p + integer, dimension(:, :) :: lag_id + real(wp), dimension(:, :) :: rad, drad + real(wp), dimension(:, :, :) :: pos, posPrev, vel, scoord, dpos, dvel + integer :: position, particle_id, lag_num_ts, tag, partner, send_tag, recv_tag, nParticles, p_recv_size, dest + + integer :: i, j, k, l, q, r + integer :: req_send, req_recv, ierr !< Generic flag used to identify and report MPI errors + integer :: send_count, send_offset, recv_count, recv_offset + +#ifdef MFC_MPI + ! Phase 1: Exchange particle counts using non-blocking communication + send_count = 0 + recv_count = 0 + + ! Post all receives first + do l = 1, n_neighbors + i = neighbor_list(l, 1) + j = neighbor_list(l, 2) + k = neighbor_list(l, 3) + partner = neighbor_ranks(i, j, k) + recv_tag = neighbor_tag(i, j, k) + + recv_count = recv_count + 1 + call MPI_Irecv(p_recv_counts(i, j, k), 1, MPI_INTEGER, partner, recv_tag, & + MPI_COMM_WORLD, recv_requests(recv_count), ierr) + end do + + ! Post all sends + do l = 1, n_neighbors + i = neighbor_list(l, 1) + j = neighbor_list(l, 2) + k = neighbor_list(l, 3) + partner = neighbor_ranks(i, j, k) + send_tag = neighbor_tag(-i, -j, -k) + + send_count = send_count + 1 + call MPI_Isend(p_send_counts(i, j, k), 1, MPI_INTEGER, partner, send_tag, & + MPI_COMM_WORLD, send_requests(send_count), ierr) + end do + + ! Wait for all count exchanges to complete + if (recv_count > 0) then + call MPI_Waitall(recv_count, recv_requests(1:recv_count), MPI_STATUSES_IGNORE, ierr) + end if + if (send_count > 0) then + call MPI_Waitall(send_count, send_requests(1:send_count), MPI_STATUSES_IGNORE, ierr) + end if + + ! Phase 2: Exchange particle data using non-blocking communication + send_count = 0 + recv_count = 0 + + ! Post all receives for particle data first + recv_offset = 1 + do l = 1, n_neighbors + i = neighbor_list(l, 1) + j = neighbor_list(l, 2) + k = neighbor_list(l, 3) + + if (p_recv_counts(i, j, k) > 0) then + partner = neighbor_ranks(i, j, k) + p_recv_size = p_recv_counts(i, j, k)*p_var_size + recv_tag = neighbor_tag(i, j, k) + + recv_count = recv_count + 1 + call MPI_Irecv(p_recv_buff(recv_offset), p_recv_size, MPI_PACKED, partner, recv_tag, & + MPI_COMM_WORLD, recv_requests(recv_count), ierr) + recv_offsets(l) = recv_offset + recv_offset = recv_offset + p_recv_size + end if + end do + + ! Pack and send particle data + send_offset = 0 + do l = 1, n_neighbors + i = neighbor_list(l, 1) + j = neighbor_list(l, 2) + k = neighbor_list(l, 3) + + if (p_send_counts(i, j, k) > 0 .and. abs(i) + abs(j) + abs(k) /= 0 .and. abs(i) + abs(j) + abs(k) /= 0) then + partner = neighbor_ranks(i, j, k) + send_tag = neighbor_tag(-i, -j, -k) + + ! Pack data for sending + position = 0 + do q = 0, p_send_counts(i, j, k) - 1 + particle_id = p_send_ids(i, j, k, q) + + call MPI_Pack(lag_id(particle_id, 1), 1, MPI_INTEGER, p_send_buff(send_offset), p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(particle_R0(particle_id), 1, mpi_p, p_send_buff(send_offset), p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(Rmax_stats(particle_id), 1, mpi_p, p_send_buff(send_offset), p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(Rmin_stats(particle_id), 1, mpi_p, p_send_buff(send_offset), p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(particle_mass(particle_id), 1, mpi_p, p_send_buff(send_offset), p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(f_p(particle_id, :), 3, mpi_p, p_send_buff(send_offset), p_buff_size, position, MPI_COMM_WORLD, ierr) + ! call MPI_Pack(gas_betaT(particle_id), 1, mpi_p, p_send_buff(send_offset), p_buff_size, position, MPI_COMM_WORLD, ierr) + ! call MPI_Pack(gas_betaC(particle_id), 1, mpi_p, p_send_buff(send_offset), p_buff_size, position, MPI_COMM_WORLD, ierr) + ! call MPI_Pack(bub_dphidt(particle_id), 1, mpi_p, p_send_buff(send_offset), p_buff_size, position, MPI_COMM_WORLD, ierr) + do r = 1, 2 + ! call MPI_Pack(gas_p(particle_id, r), 1, mpi_p, p_send_buff(send_offset), p_buff_size, position, MPI_COMM_WORLD, ierr) + ! call MPI_Pack(gas_mv(particle_id, r), 1, mpi_p, p_send_buff(send_offset), p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(rad(particle_id, r), 1, mpi_p, p_send_buff(send_offset), p_buff_size, position, MPI_COMM_WORLD, ierr) + ! call MPI_Pack(rvel(particle_id, r), 1, mpi_p, p_send_buff(send_offset), p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(pos(particle_id, :, r), 3, mpi_p, p_send_buff(send_offset), p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(posPrev(particle_id, :, r), 3, mpi_p, p_send_buff(send_offset), p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(vel(particle_id, :, r), 3, mpi_p, p_send_buff(send_offset), p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(scoord(particle_id, :, r), 3, mpi_p, p_send_buff(send_offset), p_buff_size, position, MPI_COMM_WORLD, ierr) + end do + do r = 1, lag_num_ts + call MPI_Pack(drad(particle_id, r), 1, mpi_p, p_send_buff(send_offset), p_buff_size, position, MPI_COMM_WORLD, ierr) + ! call MPI_Pack(drvel(particle_id, r), 1, mpi_p, p_send_buff(send_offset), p_buff_size, position, MPI_COMM_WORLD, ierr) + ! call MPI_Pack(dgasp(particle_id, r), 1, mpi_p, p_send_buff(send_offset), p_buff_size, position, MPI_COMM_WORLD, ierr) + ! call MPI_Pack(dgasmv(particle_id, r), 1, mpi_p, p_send_buff(send_offset), p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(dpos(particle_id, :, r), 3, mpi_p, p_send_buff(send_offset), p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(dvel(particle_id, :, r), 3, mpi_p, p_send_buff(send_offset), p_buff_size, position, MPI_COMM_WORLD, ierr) + end do + end do + + send_count = send_count + 1 + call MPI_Isend(p_send_buff(send_offset), position, MPI_PACKED, partner, send_tag, & + MPI_COMM_WORLD, send_requests(send_count), ierr) + send_offset = send_offset + position + end if + end do + + ! Wait for all recvs for contiguous data to complete + call MPI_Waitall(recv_count, recv_requests(1:recv_count), MPI_STATUSES_IGNORE, ierr) + + ! Process received data as it arrives + do l = 1, n_neighbors + i = neighbor_list(l, 1) + j = neighbor_list(l, 2) + k = neighbor_list(l, 3) + + if (p_recv_counts(i, j, k) > 0 .and. abs(i) + abs(j) + abs(k) /= 0) then + p_recv_size = p_recv_counts(i, j, k)*p_var_size + recv_offset = recv_offsets(l) + + position = 0 + ! Unpack received data + do q = 0, p_recv_counts(i, j, k) - 1 + nParticles = nParticles + 1 + particle_id = nParticles + + p_owner_rank(particle_id) = neighbor_ranks(i, j, k) + + call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, lag_id(particle_id, 1), 1, MPI_INTEGER, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, particle_R0(particle_id), 1, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, Rmax_stats(particle_id), 1, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, Rmin_stats(particle_id), 1, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, particle_mass(particle_id), 1, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, f_p(particle_id, :), 3, mpi_p, MPI_COMM_WORLD, ierr) + ! call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, gas_betaT(particle_id), 1, mpi_p, MPI_COMM_WORLD, ierr) + ! call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, gas_betaC(particle_id), 1, mpi_p, MPI_COMM_WORLD, ierr) + ! call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, bub_dphidt(particle_id), 1, mpi_p, MPI_COMM_WORLD, ierr) + do r = 1, 2 + ! call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, gas_p(particle_id, r), 1, mpi_p, MPI_COMM_WORLD, ierr) + ! call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, gas_mv(particle_id, r), 1, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, rad(particle_id, r), 1, mpi_p, MPI_COMM_WORLD, ierr) + ! call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, rvel(particle_id, r), 1, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, pos(particle_id, :, r), 3, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, posPrev(particle_id, :, r), 3, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, vel(particle_id, :, r), 3, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, scoord(particle_id, :, r), 3, mpi_p, MPI_COMM_WORLD, ierr) + end do + do r = 1, lag_num_ts + call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, drad(particle_id, r), 1, mpi_p, MPI_COMM_WORLD, ierr) + ! call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, drvel(particle_id, r), 1, mpi_p, MPI_COMM_WORLD, ierr) + ! call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, dgasp(particle_id, r), 1, mpi_p, MPI_COMM_WORLD, ierr) + ! call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, dgasmv(particle_id, r), 1, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, dpos(particle_id, :, r), 3, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, dvel(particle_id, :, r), 3, mpi_p, MPI_COMM_WORLD, ierr) + end do + lag_id(particle_id, 2) = particle_id + end do + recv_offset = recv_offset + p_recv_size + end if + + end do + + ! Wait for all sends to complete + if (send_count > 0) then + call MPI_Waitall(send_count, send_requests(1:send_count), MPI_STATUSES_IGNORE, ierr) + end if +#endif + + if (any(periodic_bc)) then + call s_wrap_particle_positions(pos, posPrev, nParticles, dest) + end if + + end subroutine s_mpi_sendrecv_solid_particles + + !> This resets the collision force buffers + impure subroutine s_reset_force_buffers() + + force_send_counts = 0 + force_recv_counts = 0 + force_send_ids = 0 + force_send_vals = 0._wp + + $:GPU_UPDATE(device='[force_send_counts, force_send_ids, force_send_vals]') + + end subroutine + + !> This adds the forces to the buffer arrays for mpi transfer + impure subroutine s_add_force_to_send_buffer(dest_rank, gid, force) + $:GPU_ROUTINE(function_name='s_add_force_to_send_buffer', parallelism='[seq]') + + integer, intent(in) :: dest_rank, gid + real(wp), intent(in), dimension(3) :: force + integer :: idx + + $:GPU_ATOMIC(atomic='capture') + force_send_counts(dest_rank) = force_send_counts(dest_rank) + 1 + idx = force_send_counts(dest_rank) + $:END_GPU_ATOMIC_CAPTURE() + + force_send_ids(dest_rank, idx) = gid + force_send_vals(dest_rank, idx, 1) = force(1) + force_send_vals(dest_rank, idx, 2) = force(2) + force_send_vals(dest_rank, idx, 3) = force(3) + + end subroutine + + !> This communicates the collision forces across neighbor mpi ranks + impure subroutine s_transfer_collision_forces(total_recv, force_recv_ids, force_recv_vals) + + integer, intent(inout) :: total_recv + integer, intent(inout) :: force_recv_ids(:) + real(wp), intent(inout) :: force_recv_vals(:) + integer :: ierr !< Generic flag used to identify and report MPI errors + integer :: i, j, k, l, idx, total_send, recv_tag, send_tag, partner, recv_count, send_count + integer :: send_displs(0:num_procs - 1), recv_displs(0:num_procs - 1) + integer :: sendcounts_vals(0:num_procs - 1), recvcounts_vals(0:num_procs - 1) + integer :: senddispls_vals(0:num_procs - 1), recvdispls_vals(0:num_procs - 1) + + $:GPU_UPDATE(host='[force_send_counts, force_send_ids, force_send_vals]') + + ! Phase 1: Exchange force counts with neighbors only + send_count = 0 + recv_count = 0 + + do l = 1, n_neighbors + i = neighbor_list(l, 1) + j = neighbor_list(l, 2) + k = neighbor_list(l, 3) + partner = neighbor_ranks(i, j, k) + recv_tag = neighbor_tag(i, j, k) + send_tag = neighbor_tag(-i, -j, -k) + + recv_count = recv_count + 1 + call MPI_Irecv(force_recv_counts(partner), 1, MPI_INTEGER, partner, recv_tag, & + MPI_COMM_WORLD, recv_requests(recv_count), ierr) + + send_count = send_count + 1 + call MPI_Isend(force_send_counts(partner), 1, MPI_INTEGER, partner, send_tag, & + MPI_COMM_WORLD, send_requests(send_count), ierr) + end do + + call MPI_Waitall(recv_count, recv_requests(1:recv_count), MPI_STATUSES_IGNORE, ierr) + call MPI_Waitall(send_count, send_requests(1:send_count), MPI_STATUSES_IGNORE, ierr) + + ! Compute displacements + send_displs(0) = 0 + recv_displs(0) = 0 + do i = 1, num_procs - 1 + send_displs(i) = send_displs(i - 1) + force_send_counts(i - 1) + recv_displs(i) = recv_displs(i - 1) + force_recv_counts(i - 1) + end do + + do i = 0, num_procs - 1 + sendcounts_vals(i) = 3*force_send_counts(i) + recvcounts_vals(i) = 3*force_recv_counts(i) + senddispls_vals(i) = 3*send_displs(i) + recvdispls_vals(i) = 3*recv_displs(i) + end do + + total_send = sum(force_send_counts) + total_recv = sum(force_recv_counts) + + ! Flatten send buffers + idx = 1 + do i = 0, num_procs - 1 + do j = 1, force_send_counts(i) + flat_send_ids(idx) = force_send_ids(i, j) + flat_send_vals(3*(idx - 1) + 1) = force_send_vals(i, j, 1) + flat_send_vals(3*(idx - 1) + 2) = force_send_vals(i, j, 2) + flat_send_vals(3*(idx - 1) + 3) = force_send_vals(i, j, 3) + idx = idx + 1 + end do + end do + + ! Phase 2: Exchange force data with neighbors only + send_count = 0 + recv_count = 0 + + do l = 1, n_neighbors + i = neighbor_list(l, 1) + j = neighbor_list(l, 2) + k = neighbor_list(l, 3) + partner = neighbor_ranks(i, j, k) + recv_tag = neighbor_tag(i, j, k) + send_tag = neighbor_tag(-i, -j, -k) + + if (force_recv_counts(partner) > 0) then + recv_count = recv_count + 1 + call MPI_Irecv(force_recv_ids(recv_displs(partner) + 1), force_recv_counts(partner), MPI_INTEGER, & + partner, recv_tag, MPI_COMM_WORLD, recv_requests(recv_count), ierr) + recv_count = recv_count + 1 + call MPI_Irecv(force_recv_vals(recvdispls_vals(partner) + 1), recvcounts_vals(partner), mpi_p, & + partner, recv_tag + 1, MPI_COMM_WORLD, recv_requests(recv_count), ierr) + end if + + if (force_send_counts(partner) > 0) then + send_count = send_count + 1 + call MPI_Isend(flat_send_ids(send_displs(partner) + 1), force_send_counts(partner), MPI_INTEGER, & + partner, send_tag, MPI_COMM_WORLD, send_requests(send_count), ierr) + send_count = send_count + 1 + call MPI_Isend(flat_send_vals(senddispls_vals(partner) + 1), sendcounts_vals(partner), mpi_p, & + partner, send_tag + 1, MPI_COMM_WORLD, send_requests(send_count), ierr) + end if + end do + + call MPI_Waitall(recv_count, recv_requests(1:recv_count), MPI_STATUSES_IGNORE, ierr) + call MPI_Waitall(send_count, send_requests(1:send_count), MPI_STATUSES_IGNORE, ierr) + + end subroutine s_transfer_collision_forces + + !! This function returns a unique tag for each neighbor based on its position + !! relative to the current process. + !! @param i, j, k Indices of the neighbor in the range [-1, 1] + !! @return tag Unique integer tag for the neighbor + integer function neighbor_tag(i, j, k) result(tag) + + integer, intent(in) :: i, j, k + + tag = (k + 1)*9 + (j + 1)*3 + (i + 1) + + end function neighbor_tag + + subroutine s_wrap_particle_positions(pos, posPrev, nbubs, dest) + + real(wp), dimension(:, :, :) :: pos, posPrev + integer :: nbubs, dest + integer :: i, q + real(wp) :: offset + + do i = 1, nbubs + if (periodic_bc(1)) then + offset = glb_bounds(1)%end - glb_bounds(1)%beg + if (pos(i, 1, dest) > x_cb(m + buff_size)) then + do q = 1, 2 + pos(i, 1, q) = pos(i, 1, q) - offset + posPrev(i, 1, q) = posPrev(i, 1, q) - offset + end do + end if + if (pos(i, 1, dest) < x_cb(-1 - buff_size)) then + do q = 1, 2 + pos(i, 1, q) = pos(i, 1, q) + offset + posPrev(i, 1, q) = posPrev(i, 1, q) + offset + end do + end if + end if + + if (periodic_bc(2)) then + offset = glb_bounds(2)%end - glb_bounds(2)%beg + if (pos(i, 2, dest) > y_cb(n + buff_size)) then + do q = 1, 2 + pos(i, 2, q) = pos(i, 2, q) - offset + posPrev(i, 2, q) = posPrev(i, 2, q) - offset + end do + end if + if (pos(i, 2, dest) < y_cb(-buff_size - 1)) then + do q = 1, 2 + pos(i, 2, q) = pos(i, 2, q) + offset + posPrev(i, 2, q) = posPrev(i, 2, q) + offset + end do + end if + end if + + if (periodic_bc(3)) then + offset = glb_bounds(3)%end - glb_bounds(3)%beg + if (pos(i, 3, dest) > z_cb(p + buff_size)) then + do q = 1, 2 + pos(i, 3, q) = pos(i, 3, q) - offset + posPrev(i, 3, q) = posPrev(i, 3, q) - offset + end do + end if + if (pos(i, 3, dest) < z_cb(-1 - buff_size)) then + do q = 1, 2 + pos(i, 3, q) = pos(i, 3, q) + offset + posPrev(i, 3, q) = posPrev(i, 3, q) + offset + end do + end if + end if + end do + + end subroutine s_wrap_particle_positions + !> @brief Broadcasts random phase numbers from rank 0 to all MPI processes. impure subroutine s_mpi_send_random_number(phi_rn, num_freq) integer, intent(in) :: num_freq diff --git a/src/simulation/m_particles_EL.fpp b/src/simulation/m_particles_EL.fpp new file mode 100644 index 0000000000..031f5b6375 --- /dev/null +++ b/src/simulation/m_particles_EL.fpp @@ -0,0 +1,2787 @@ +!> +!! @file m_particles_EL.fpp +!! @brief Contains module m_particles_EL + +#:include 'macros.fpp' + +!> @brief This module is used to to compute the volume-averaged particle model +module m_particles_EL + + use m_global_parameters !< Definitions of the global parameters + + use m_mpi_proxy !< Message passing interface (MPI) module proxy + + use m_particles_EL_kernels !< Definitions of the kernel functions + + use m_variables_conversion !< State variables type conversion procedures + + use m_compile_specific + + use m_boundary_common + + use m_helper_basic !< Functions to compare floating point numbers + + use m_sim_helpers + + use m_helper + + use m_mpi_common + + use m_ibm + + implicit none + + real(wp), save :: next_write_time = 0._wp + + integer, allocatable, dimension(:, :) :: lag_part_id !< Global and local IDs + integer, allocatable, dimension(:) :: gid_to_local + real(wp), allocatable, dimension(:) :: particle_R0 !< Initial particle radius + real(wp), allocatable, dimension(:) :: Rmax_stats_part !< Maximum radius + real(wp), allocatable, dimension(:) :: Rmin_stats_part !< Minimum radius + $:GPU_DECLARE(create='[lag_part_id, gid_to_local, particle_R0, Rmax_stats_part, Rmin_stats_part]') + + real(wp), allocatable, dimension(:) :: particle_mass !< Particle Mass + $:GPU_DECLARE(create='[particle_mass]') + real(wp), allocatable, dimension(:) :: p_AM !< Particle Added Mass + $:GPU_DECLARE(create='[p_AM]') + + integer, allocatable, dimension(:) :: p_owner_rank !< Particle Added Mass + $:GPU_DECLARE(create='[p_owner_rank]') + + integer, allocatable, dimension(:) :: linked_list !< particle cell linked list + $:GPU_DECLARE(create='[linked_list]') + + integer, allocatable, dimension(:, :, :) :: particle_head !< particle heads at each cell + $:GPU_DECLARE(create='[particle_head]') + + !(nPart, 1 -> actual val or 2 -> temp val) + real(wp), allocatable, dimension(:, :) :: particle_rad !< Particle radius + $:GPU_DECLARE(create='[particle_rad]') + + !(nPart, 1-> x or 2->y or 3 ->z, 1 -> actual or 2 -> temporal val) + real(wp), allocatable, dimension(:, :, :) :: particle_pos !< Particle's position + real(wp), allocatable, dimension(:, :, :) :: particle_posPrev !< Particle's previous position + real(wp), allocatable, dimension(:, :, :) :: particle_vel !< Particle's velocity + real(wp), allocatable, dimension(:, :, :) :: particle_s !< Particle's computational cell position in real format + $:GPU_DECLARE(create='[particle_pos, particle_posPrev, particle_vel, particle_s]') + !(nPart, 1-> x or 2->y or 3 ->z, time-stage) + real(wp), allocatable, dimension(:, :) :: particle_draddt !< Time derivative of particle's radius + real(wp), allocatable, dimension(:, :, :) :: particle_dposdt !< Time derivative of the particle's position + real(wp), allocatable, dimension(:, :, :) :: particle_dveldt !< Time derivative of the particle's velocity + $:GPU_DECLARE(create='[particle_draddt, particle_dposdt, particle_dveldt]') + + integer, private :: lag_num_ts !< Number of time stages in the time-stepping scheme + $:GPU_DECLARE(create='[lag_num_ts]') + + real(wp) :: Rmax_glb, Rmin_glb !< Maximum and minimum bubbe size in the local domain + !< Projection of the lagrangian particles in the Eulerian framework + type(scalar_field), dimension(:), allocatable :: q_particles + type(scalar_field), dimension(:), allocatable :: kahan_comp !< Kahan compensation for q_beta accumulation + integer :: q_particles_idx !< Size of the q vector field for particle cell (q)uantities + integer, parameter :: alphaf_id = 1 + integer, parameter :: alphaupx_id = 2 !< x particle momentum index + integer, parameter :: alphaupy_id = 3 !< y particle momentum index + integer, parameter :: alphaupz_id = 4 !< z particle momentum index + integer, parameter :: Smx_id = 5 + integer, parameter :: Smy_id = 6 + integer, parameter :: Smz_id = 7 + integer, parameter :: SE_id = 8 + + type(scalar_field), dimension(:), allocatable :: field_vars !< For cell quantities (field gradients, etc.) + integer, parameter :: dPx_id = 1 !< Spatial pressure gradient in x, y, and z + integer, parameter :: dPy_id = 2 + integer, parameter :: dPz_id = 3 + integer, parameter :: drhox_id = 4 !< Spatial density gradient in x, y, and z + integer, parameter :: drhoy_id = 5 + integer, parameter :: drhoz_id = 6 + integer, parameter :: dufx_id = 7 !< Spatial velocity gradient in x, y, and z + integer, parameter :: dufy_id = 8 + integer, parameter :: dufz_id = 9 + integer, parameter :: dalphafx_id = 10 !< Spatial fluid volume fraction gradient in x, y, and z + integer, parameter :: dalphafy_id = 11 + integer, parameter :: dalphafz_id = 12 + integer, parameter :: dalphap_upx_id = 13 !< Spatial particle momentum gradient in x, y, and z + integer, parameter :: dalphap_upy_id = 14 + integer, parameter :: dalphap_upz_id = 15 + integer, parameter :: nField_vars = 15 + + type(scalar_field), dimension(:), allocatable :: weights_x_interp !< For precomputing weights + type(scalar_field), dimension(:), allocatable :: weights_y_interp !< For precomputing weights + type(scalar_field), dimension(:), allocatable :: weights_z_interp !< For precomputing weights + integer :: nWeights_interp + + type(scalar_field), dimension(:), allocatable :: weights_x_grad !< For precomputing weights + type(scalar_field), dimension(:), allocatable :: weights_y_grad !< For precomputing weights + type(scalar_field), dimension(:), allocatable :: weights_z_grad !< For precomputing weights + integer :: nWeights_grad + + $:GPU_DECLARE(create='[Rmax_glb,Rmin_glb,q_particles,kahan_comp,q_particles_idx,field_vars]') + $:GPU_DECLARE(create='[weights_x_interp,weights_y_interp,weights_z_interp,nWeights_interp]') + $:GPU_DECLARE(create='[weights_x_grad,weights_y_grad,weights_z_grad,nWeights_grad]') + + !Particle Source terms for fluid coupling + real(wp), allocatable, dimension(:, :) :: f_p !< force on each particle + $:GPU_DECLARE(create='[f_p]') + + real(wp), allocatable, dimension(:) :: gSum !< gaussian sum for each particle + $:GPU_DECLARE(create='[gSum]') + + integer, allocatable :: force_recv_ids(:) !< ids of collision forces received from other ranks + real(wp), allocatable :: force_recv_vals(:) !< collision forces received from other ranks + $:GPU_DECLARE(create='[force_recv_ids, force_recv_vals]') + + integer, parameter :: LAG_EVOL_ID = 11 ! File id for lag_bubbles_evol_*.dat + integer, parameter :: LAG_STATS_ID = 12 ! File id for stats_lag_bubbles_*.dat + integer, parameter :: LAG_VOID_ID = 13 ! File id for voidfraction.dat + + integer, allocatable, dimension(:) :: keep_bubble + integer, allocatable, dimension(:, :) :: wrap_bubble_loc, wrap_bubble_dir + $:GPU_DECLARE(create='[keep_bubble]') + $:GPU_DECLARE(create='[wrap_bubble_loc, wrap_bubble_dir]') + + integer :: error_flag !Error flag for collisions + $:GPU_DECLARE(create='[error_flag]') + + integer, parameter :: ncc = 1 !< Number of collisions cells at boundaries + real(wp) :: eps_overlap = 1.e-12 + +contains + + !> Initializes the lagrangian subgrid particle solver + !! @param q_cons_vf Initial conservative variables + impure subroutine s_initialize_particles_EL_module(q_cons_vf, bc_type) + + type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf + type(integer_field), dimension(1:num_dims, 1:2), intent(in) :: bc_type + + integer :: nParticles_glb, i, j, k, nf, l, npts + + !PRIM TO CONS VARIABLES + real(wp) :: dyn_pres, pi_inf, qv, gamma, pres, T + real(wp) :: rhou, alpharhou, rho_f, alpharho + real(wp), dimension(3) :: fluid_vel + real(wp) :: rhoYks(1:num_species) + integer :: save_count + real(wp) :: qtime + + real(wp) :: myR, func_sum + real(wp), dimension(3) :: myPos, myVel, myForce + integer, dimension(3) :: cell + logical :: only_beta = .true. + + if (cfl_dt) then + save_count = n_start + qtime = n_start*t_save + else + save_count = t_step_start + qtime = t_step_start*dt + end if + + pi_inf = 0._wp + qv = 0._wp + gamma = gammas(1) + + ! Setting number of time-stages for selected time-stepping scheme + lag_num_ts = time_stepper + + ! Allocate space for the Eulerian fields needed to map the effect of the particles + if (lag_params%solver_approach == 1) then + ! One-way coupling + q_particles_idx = 1 !For tracking volume fraction + elseif (lag_params%solver_approach == 2) then + !Two-way coupling + q_particles_idx = 8 !For tracking volume fraction(1), x-mom(2), y-mom(3), z-mom(4), and energy(5) sources, and alpha_p u_p (x(6),y(7),z(8)) + else + call s_mpi_abort('Please check the lag_params%solver_approach input') + end if + + nWeights_interp = lag_params%interpolation_order + 1 + nWeights_grad = fd_order + 1 + + pcomm_coords(1)%beg = x_cb(-1) + pcomm_coords(1)%end = x_cb(m) + $:GPU_UPDATE(device='[pcomm_coords(1)]') + if (n > 0) then + pcomm_coords(2)%beg = y_cb(-1) + pcomm_coords(2)%end = y_cb(n) + $:GPU_UPDATE(device='[pcomm_coords(2)]') + if (p > 0) then + pcomm_coords(3)%beg = z_cb(-1) + pcomm_coords(3)%end = z_cb(p) + $:GPU_UPDATE(device='[pcomm_coords(3)]') + end if + end if + + pcomm_coords_ghost(1)%beg = x_cb(-1 + ncc) + pcomm_coords_ghost(1)%end = x_cb(m - ncc) + $:GPU_UPDATE(device='[pcomm_coords_ghost(1)]') + if (n > 0) then + pcomm_coords_ghost(2)%beg = y_cb(-1 + ncc) + pcomm_coords_ghost(2)%end = y_cb(n - ncc) + $:GPU_UPDATE(device='[pcomm_coords_ghost(2)]') + if (p > 0) then + pcomm_coords_ghost(3)%beg = z_cb(-1 + ncc) + pcomm_coords_ghost(3)%end = z_cb(p - ncc) + $:GPU_UPDATE(device='[pcomm_coords_ghost(3)]') + end if + end if + + $:GPU_UPDATE(device='[lag_num_ts, q_particles_idx]') + + @:ALLOCATE(q_particles(1:q_particles_idx)) + @:ALLOCATE(kahan_comp(1:q_particles_idx)) + do i = 1, q_particles_idx + @:ALLOCATE(q_particles(i)%sf(idwbuff(1)%beg:idwbuff(1)%end, & + idwbuff(2)%beg:idwbuff(2)%end, & + idwbuff(3)%beg:idwbuff(3)%end)) + @:ACC_SETUP_SFs(q_particles(i)) + @:ALLOCATE(kahan_comp(i)%sf(idwbuff(1)%beg:idwbuff(1)%end, & + idwbuff(2)%beg:idwbuff(2)%end, & + idwbuff(3)%beg:idwbuff(3)%end)) + @:ACC_SETUP_SFs(kahan_comp(i)) + end do + + @:ALLOCATE(field_vars(1:nField_vars)) + do i = 1, nField_vars + @:ALLOCATE(field_vars(i)%sf(idwbuff(1)%beg:idwbuff(1)%end, & + idwbuff(2)%beg:idwbuff(2)%end, & + idwbuff(3)%beg:idwbuff(3)%end)) + @:ACC_SETUP_SFs(field_vars(i)) + end do + + @:ALLOCATE(weights_x_interp(1:nWeights_interp)) + do i = 1, nWeights_interp + @:ALLOCATE(weights_x_interp(i)%sf(idwbuff(1)%beg:idwbuff(1)%end,1:1,1:1)) + @:ACC_SETUP_SFs(weights_x_interp(i)) + end do + + @:ALLOCATE(weights_y_interp(1:nWeights_interp)) + do i = 1, nWeights_interp + @:ALLOCATE(weights_y_interp(i)%sf(idwbuff(2)%beg:idwbuff(2)%end,1:1,1:1)) + @:ACC_SETUP_SFs(weights_y_interp(i)) + end do + + @:ALLOCATE(weights_z_interp(1:nWeights_interp)) + do i = 1, nWeights_interp + @:ALLOCATE(weights_z_interp(i)%sf(idwbuff(3)%beg:idwbuff(3)%end,1:1,1:1)) + @:ACC_SETUP_SFs(weights_z_interp(i)) + end do + + @:ALLOCATE(weights_x_grad(1:nWeights_grad)) + do i = 1, nWeights_grad + @:ALLOCATE(weights_x_grad(i)%sf(idwbuff(1)%beg:idwbuff(1)%end,1:1,1:1)) + @:ACC_SETUP_SFs(weights_x_grad(i)) + end do + + @:ALLOCATE(weights_y_grad(1:nWeights_grad)) + do i = 1, nWeights_grad + @:ALLOCATE(weights_y_grad(i)%sf(idwbuff(2)%beg:idwbuff(2)%end,1:1,1:1)) + @:ACC_SETUP_SFs(weights_y_grad(i)) + end do + + @:ALLOCATE(weights_z_grad(1:nWeights_grad)) + do i = 1, nWeights_grad + @:ALLOCATE(weights_z_grad(i)%sf(idwbuff(3)%beg:idwbuff(3)%end,1:1,1:1)) + @:ACC_SETUP_SFs(weights_z_grad(i)) + end do + + ! Allocating space for lagrangian variables + nParticles_glb = lag_params%nParticles_glb + + @:ALLOCATE(lag_part_id(1:nParticles_glb, 1:2)) + @:ALLOCATE(gid_to_local(1:nParticles_glb)) + @:ALLOCATE(particle_R0(1:nParticles_glb)) + @:ALLOCATE(Rmax_stats_part(1:nParticles_glb)) + @:ALLOCATE(Rmin_stats_part(1:nParticles_glb)) + @:ALLOCATE(particle_mass(1:nParticles_glb)) + @:ALLOCATE(p_AM(1:nParticles_glb)) + @:ALLOCATE(p_owner_rank(1:nParticles_glb)) + @:ALLOCATE(particle_rad(1:nParticles_glb, 1:2)) + @:ALLOCATE(particle_pos(1:nParticles_glb, 1:3, 1:2)) + @:ALLOCATE(particle_posPrev(1:nParticles_glb, 1:3, 1:2)) + @:ALLOCATE(particle_vel(1:nParticles_glb, 1:3, 1:2)) + @:ALLOCATE(particle_s(1:nParticles_glb, 1:3, 1:2)) + @:ALLOCATE(particle_draddt(1:nParticles_glb, 1:lag_num_ts)) + @:ALLOCATE(particle_dposdt(1:nParticles_glb, 1:3, 1:lag_num_ts)) + @:ALLOCATE(particle_dveldt(1:nParticles_glb, 1:3, 1:lag_num_ts)) + @:ALLOCATE(f_p(1:nParticles_glb, 1:3)) + @:ALLOCATE(gSum(1:nParticles_glb)) + + @:ALLOCATE(linked_list(1:nParticles_glb)) + + @:ALLOCATE(particle_head(idwbuff(1)%beg:idwbuff(1)%end, & + idwbuff(2)%beg:idwbuff(2)%end, & + idwbuff(3)%beg:idwbuff(3)%end)) + + @:ALLOCATE(force_recv_ids(1:lag_params%nParticles_glb)) + @:ALLOCATE(force_recv_vals(1:3*lag_params%nParticles_glb)) + + @:ALLOCATE(keep_bubble(1:nParticles_glb)) + @:ALLOCATE(wrap_bubble_loc(1:nParticles_glb, 1:num_dims), wrap_bubble_dir(1:nParticles_glb, 1:num_dims)) + + if (adap_dt .and. f_is_default(adap_dt_tol)) adap_dt_tol = dflt_adap_dt_tol + + if (num_procs > 1) call s_initialize_solid_particles_mpi(lag_num_ts) + + ! Starting particles + if (lag_params%write_void_evol) call s_open_void_evol + if (lag_params%write_bubbles) call s_open_lag_bubble_evol() + if (lag_params%write_bubbles_stats) call s_open_lag_particle_stats() + + if (lag_params%vel_model > 0) then + moving_lag_particles = .true. + lag_pressure_force = lag_params%pressure_force + lag_gravity_force = lag_params%gravity_force + lag_vel_model = lag_params%vel_model + lag_drag_model = lag_params%drag_model + end if + + $:GPU_UPDATE(device='[moving_lag_particles, lag_pressure_force, & + & lag_gravity_force, lag_vel_model, lag_drag_model]') + + ! Allocate cell list arrays for atomic-free Gaussian smearing + @:ALLOCATE(cell_list_start(0:m, 0:n, 0:p)) + @:ALLOCATE(cell_list_count(0:m, 0:n, 0:p)) + @:ALLOCATE(cell_list_idx(1:lag_params%nParticles_glb)) + + call s_read_input_particles(q_cons_vf, bc_type) + + call s_reset_cell_vars() + + $:GPU_PARALLEL_LOOP(private='[k,cell,myR,myPos,myVel,myForce,func_sum]',copyin='[only_beta]') + do k = 1, n_el_particles_loc + + cell = fd_number - buff_size + call s_locate_cell(particle_pos(k, 1:3, 1), cell, particle_s(k, 1:3, 1)) + + myR = particle_R0(k) + myPos = particle_pos(k, 1:3, 1) + myVel = particle_vel(k, 1:3, 1) + myForce = f_p(k, :) + !Compute the total gaussian contribution for each particle for normalization + call s_compute_gaussian_contribution(myR, myPos, cell, func_sum) + gSum(k) = func_sum + + call s_gaussian_atomic(myR, myVel, myPos, myForce, func_sum, cell, q_particles, only_beta) + + end do + $:END_GPU_PARALLEL_LOOP() + + call s_finalize_beta_field(bc_type, only_beta) + + npts = (nWeights_interp - 1)/2 + call s_compute_barycentric_weights(npts) !For interpolation + + npts = (nWeights_grad - 1)/2 + call s_compute_fornberg_fd_weights(npts) !For finite differences + + if (lag_params%solver_approach == 2) then + if (save_count == 0) then + + !> Correcting initial conditions so they account for particles + $:GPU_PARALLEL_LOOP(private='[i,j,k,dyn_pres,fluid_vel,rho_f,alpharho,rhou,alpharhou]', collapse=3, copyin = '[pi_inf, qv, gamma, rhoYks]') + do k = idwint(3)%beg, idwint(3)%end + do j = idwint(2)%beg, idwint(2)%end + do i = idwint(1)%beg, idwint(1)%end + !!!!!!!!! Mass + do l = 1, num_fluids !num_fluid is just 1 right now + rho_f = q_cons_vf(l)%sf(i, j, k) + alpharho = q_particles(alphaf_id)%sf(i, j, k)*rho_f + q_cons_vf(l)%sf(i, j, k) = alpharho + end do + + !!!!!!!!! Momentum + dyn_pres = 0._wp + do l = momxb, momxe + fluid_vel(l - momxb + 1) = q_cons_vf(l)%sf(i, j, k)/rho_f + rhou = q_cons_vf(l)%sf(i, j, k) + alpharhou = q_particles(alphaf_id)%sf(i, j, k)*rhou + q_cons_vf(l)%sf(i, j, k) = alpharhou + dyn_pres = dyn_pres + q_cons_vf(l)%sf(i, j, k)* & + fluid_vel(l - momxb + 1)/2._wp + end do + + !!!!!!!!!Energy + call s_compute_pressure(q_cons_vf(E_idx)%sf(i, j, k), & + q_cons_vf(alf_idx)%sf(i, j, k), & + dyn_pres, pi_inf, gamma, alpharho, & + qv, rhoYks, pres, T) + + q_cons_vf(E_idx)%sf(i, j, k) = & + gamma*pres + dyn_pres + pi_inf + qv !Updating energy in cons + end do + end do + end do + $:END_GPU_PARALLEL_LOOP() + + end if + end if + + end subroutine s_initialize_particles_EL_module + + !> The purpose of this procedure is to obtain the initial bubbles' information + !! @param q_cons_vf Conservative variables + impure subroutine s_read_input_particles(q_cons_vf, bc_type) + + type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf + type(integer_field), dimension(1:num_dims, 1:2), intent(in) :: bc_type + + real(wp), dimension(8) :: inputParticle + real(wp) :: qtime + integer :: id, particle_id, save_count + integer :: i, ios + logical :: file_exist, indomain + integer, dimension(3) :: cell + + character(LEN=path_len + 2*name_len) :: path_D_dir !< + + ! Initialize number of particles + particle_id = 0 + id = 0 + + ! Read the input lag_bubble file or restart point + if (cfl_dt) then + save_count = n_start + qtime = n_start*t_save + else + save_count = t_step_start + qtime = t_step_start*dt + end if + + if (save_count == 0) then + if (proc_rank == 0) print *, 'Reading lagrange particles input file.' + call my_inquire(trim(lag_params%input_path), file_exist) + if (file_exist) then + open (94, file=trim(lag_params%input_path), form='formatted', iostat=ios) + do while (ios == 0) + read (94, *, iostat=ios) (inputParticle(i), i=1, 8) + if (ios /= 0) cycle + indomain = particle_in_domain_physical(inputParticle(1:3)) + id = id + 1 + if (id > lag_params%nParticles_glb .and. proc_rank == 0) then + call s_mpi_abort("Current number of particles is larger than nParticles_glb") + end if + if (indomain) then + particle_id = particle_id + 1 + call s_add_particles(inputParticle, q_cons_vf, particle_id) + lag_part_id(particle_id, 1) = id !global ID + lag_part_id(particle_id, 2) = particle_id !local ID + n_el_particles_loc = particle_id ! local number of particles + end if + end do + close (94) + else + call s_mpi_abort("Initialize the lagrange particles in "//trim(lag_params%input_path)) + end if + else + if (proc_rank == 0) print *, 'Restarting lagrange particles at save_count: ', save_count + call s_restart_bubbles(particle_id, save_count) + end if + + print *, " Lagrange parrticles running, in proc", proc_rank, "number:", particle_id, "/", id + + if (num_procs > 1) then + call s_mpi_reduce_int_sum(n_el_particles_loc, n_el_particles_glb) + else + n_el_particles_glb = n_el_particles_loc + end if + + if (proc_rank == 0) then + if (n_el_particles_glb == 0) call s_mpi_abort('No particles in the domain. Check '//trim(lag_params%input_path)) + end if + + $:GPU_UPDATE(device='[particles_lagrange, lag_params]') + + $:GPU_UPDATE(device='[lag_part_id,particle_R0,Rmax_stats_part,Rmin_stats_part,particle_mass, & + & f_p,p_AM,p_owner_rank,gid_to_local, & + & particle_rad,particle_pos,particle_posPrev,particle_vel, & + & particle_s,particle_draddt, & + & particle_dposdt,particle_dveldt,n_el_particles_loc]') + + Rmax_glb = min(dflt_real, -dflt_real) + Rmin_glb = max(dflt_real, -dflt_real) + $:GPU_UPDATE(device='[Rmax_glb, Rmin_glb]') + + $:GPU_UPDATE(device='[dx,dy,dz,x_cb,x_cc,y_cb,y_cc,z_cb,z_cc]') + + !Populate temporal variables + call s_transfer_data_to_tmp_particles() + + if (save_count == 0) then + ! Create ./D directory + if (proc_rank == 0) then + write (path_D_dir, '(A,I0,A,I0)') trim(case_dir)//'/D' + call my_inquire(trim(path_D_dir), file_exist) + if (.not. file_exist) call s_create_directory(trim(path_D_dir)) + end if + call s_mpi_barrier() + call s_write_restart_lag_particles(save_count) ! Needed for post_processing + if (lag_params%write_void_evol) call s_write_void_evol_particles(qtime) + end if + + if (lag_params%write_bubbles) call s_write_lag_particle_evol(qtime) + + end subroutine s_read_input_particles + + !> The purpose of this procedure is to obtain the information of the particles when starting fresh + !! @param inputPart Particle information + !! @param q_cons_vf Conservative variables + !! @param part_id Local id of the particle + impure subroutine s_add_particles(inputPart, q_cons_vf, part_id) + + type(scalar_field), dimension(sys_size), intent(in) :: q_cons_vf + real(wp), dimension(8), intent(in) :: inputPart + integer, intent(in) :: part_id + integer :: i + + real(wp) :: pliq, volparticle, concvap, totalmass, kparticle, cpparticle + real(wp) :: omegaN_local, PeG, PeT, rhol, pcrit, qv, gamma, pi_inf, dynP + integer, dimension(3) :: cell + real(wp), dimension(2) :: Re + real(wp) :: massflag, heatflag, Re_trans, Im_trans, myR, func_sum + real(wp), dimension(3) :: myPos, myVel + + massflag = 0._wp + heatflag = 0._wp + if (lag_params%massTransfer_model) massflag = 1._wp + if (lag_params%heatTransfer_model) heatflag = 1._wp + + particle_R0(part_id) = inputPart(7) + Rmax_stats_part(part_id) = min(dflt_real, -dflt_real) + Rmin_stats_part(part_id) = max(dflt_real, -dflt_real) + particle_rad(part_id, 1) = inputPart(7) + particle_pos(part_id, 1:3, 1) = inputPart(1:3) + particle_posPrev(part_id, 1:3, 1) = particle_pos(part_id, 1:3, 1) + particle_vel(part_id, 1:3, 1) = inputPart(4:6) + + !Initialize Particle Sources + f_p(part_id, 1:3) = 0._wp + p_AM(part_id) = 0._wp + p_owner_rank(part_id) = proc_rank + gid_to_local(part_id) = -1 + + if (cyl_coord .and. p == 0) then + particle_pos(part_id, 2, 1) = sqrt(particle_pos(part_id, 2, 1)**2._wp + & + particle_pos(part_id, 3, 1)**2._wp) + !Storing azimuthal angle (-Pi to Pi)) into the third coordinate variable + particle_pos(part_id, 3, 1) = atan2(inputPart(3), inputPart(2)) + particle_posPrev(part_id, 1:3, 1) = particle_pos(part_id, 1:3, 1) + end if + + cell = fd_number - buff_size + call s_locate_cell(particle_pos(part_id, 1:3, 1), cell, particle_s(part_id, 1:3, 1)) + + ! Check if the particle is located in the ghost cell of a symmetric, or wall boundary + if ((any(bc_x%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. cell(1) < 0) .or. & + (any(bc_x%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. cell(1) > m) .or. & + (any(bc_y%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. cell(2) < 0) .or. & + (any(bc_y%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. cell(2) > n)) then + call s_mpi_abort("Lagrange particle is in the ghost cells of a symmetric or wall boundary.") + end if + + if (p > 0) then + if ((any(bc_z%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. cell(3) < 0) .or. & + (any(bc_z%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. cell(3) > p)) then + call s_mpi_abort("Lagrange particle is in the ghost cells of a symmetric or wall boundary.") + end if + end if + + ! Initial particle mass + volparticle = 4._wp/3._wp*pi*particle_R0(part_id)**3 ! volume + particle_mass(part_id) = volparticle*rho0ref_particle ! mass + if (particle_mass(part_id) <= 0._wp) then + call s_mpi_abort("The initial particle mass is negative or zero. Check the particle file.") + end if + + end subroutine s_add_particles + + !> The purpose of this procedure is to obtain the information of the bubbles from a restart point. + !! @param part_id Local ID of the particle + !! @param save_count File identifier + impure subroutine s_restart_bubbles(part_id, save_count) + + integer, intent(inout) :: part_id, save_count + + character(LEN=path_len + 2*name_len) :: file_loc + real(wp) :: file_time, file_dt + integer :: file_num_procs, file_tot_part, tot_part + +#ifdef MFC_MPI + real(wp), dimension(20) :: inputvals + integer, dimension(MPI_STATUS_SIZE) :: status + integer(kind=MPI_OFFSET_KIND) :: disp + integer :: view + + integer, dimension(3) :: cell + logical :: indomain, particle_file, file_exist + + integer, dimension(2) :: gsizes, lsizes, start_idx_part + integer :: ifile, ierr, tot_data, id + integer :: i + + integer, dimension(:), allocatable :: proc_particle_counts + real(wp), dimension(1:1, 1:lag_io_vars) :: dummy + dummy = 0._wp + + ! Construct file path + write (file_loc, '(A,I0,A)') 'lag_bubbles_', save_count, '.dat' + file_loc = trim(case_dir)//'/restart_data'//trim(mpiiofs)//trim(file_loc) + + ! Check if file exists + inquire (FILE=trim(file_loc), EXIST=file_exist) + if (.not. file_exist) then + call s_mpi_abort('Restart file '//trim(file_loc)//' does not exist!') + end if + + if (.not. parallel_io) return + + if (proc_rank == 0) then + call MPI_FILE_OPEN(MPI_COMM_SELF, file_loc, MPI_MODE_RDONLY, & + mpi_info_int, ifile, ierr) + + call MPI_FILE_READ(ifile, file_tot_part, 1, MPI_INTEGER, status, ierr) + call MPI_FILE_READ(ifile, file_time, 1, mpi_p, status, ierr) + call MPI_FILE_READ(ifile, file_dt, 1, mpi_p, status, ierr) + call MPI_FILE_READ(ifile, file_num_procs, 1, MPI_INTEGER, status, ierr) + + call MPI_FILE_CLOSE(ifile, ierr) + end if + + call MPI_BCAST(file_tot_part, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + call MPI_BCAST(file_time, 1, mpi_p, 0, MPI_COMM_WORLD, ierr) + call MPI_BCAST(file_dt, 1, mpi_p, 0, MPI_COMM_WORLD, ierr) + call MPI_BCAST(file_num_procs, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + + allocate (proc_particle_counts(file_num_procs)) + + if (proc_rank == 0) then + call MPI_FILE_OPEN(MPI_COMM_SELF, file_loc, MPI_MODE_RDONLY, & + mpi_info_int, ifile, ierr) + + ! Skip to processor counts position + disp = int(sizeof(file_tot_part) + 2*sizeof(file_time) + sizeof(file_num_procs), & + MPI_OFFSET_KIND) + call MPI_FILE_SEEK(ifile, disp, MPI_SEEK_SET, ierr) + call MPI_FILE_READ(ifile, proc_particle_counts, file_num_procs, MPI_INTEGER, status, ierr) + + call MPI_FILE_CLOSE(ifile, ierr) + end if + + call MPI_BCAST(proc_particle_counts, file_num_procs, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + + ! Set time variables from file + mytime = file_time + dt = file_dt + + part_id = proc_particle_counts(proc_rank + 1) + + start_idx_part(1) = 0 + do i = 1, proc_rank + start_idx_part(1) = start_idx_part(1) + proc_particle_counts(i) + end do + + start_idx_part(2) = 0 + lsizes(1) = part_id + lsizes(2) = lag_io_vars + + gsizes(1) = file_tot_part + gsizes(2) = lag_io_vars + + if (part_id > 0) then + + allocate (MPI_IO_DATA_lag_bubbles(part_id, 1:lag_io_vars)) + + call MPI_TYPE_CREATE_SUBARRAY(2, gsizes, lsizes, start_idx_part, & + MPI_ORDER_FORTRAN, mpi_p, view, ierr) + call MPI_TYPE_COMMIT(view, ierr) + + call MPI_FILE_OPEN(MPI_COMM_WORLD, file_loc, MPI_MODE_RDONLY, & + mpi_info_int, ifile, ierr) + + ! Skip extended header + disp = int(sizeof(file_tot_part) + 2*sizeof(file_time) + sizeof(file_num_procs) + & + file_num_procs*sizeof(proc_particle_counts(1)), MPI_OFFSET_KIND) + call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, view, 'native', mpi_info_int, ierr) + + call MPI_FILE_READ_ALL(ifile, MPI_IO_DATA_lag_bubbles, & + lag_io_vars*part_id, mpi_p, status, ierr) + + call MPI_FILE_CLOSE(ifile, ierr) + call MPI_TYPE_FREE(view, ierr) + + n_el_particles_loc = part_id + + do i = 1, part_id + lag_part_id(i, 1) = int(MPI_IO_DATA_lag_bubbles(i, 1)) + particle_pos(i, 1:3, 1) = MPI_IO_DATA_lag_bubbles(i, 2:4) + particle_posPrev(i, 1:3, 1) = MPI_IO_DATA_lag_bubbles(i, 5:7) + particle_vel(i, 1:3, 1) = MPI_IO_DATA_lag_bubbles(i, 8:10) + particle_rad(i, 1) = MPI_IO_DATA_lag_bubbles(i, 11) + ! intfc_vel(i, 1) = MPI_IO_DATA_lag_bubbles(i, 12) + particle_R0(i) = MPI_IO_DATA_lag_bubbles(i, 13) + Rmax_stats_part(i) = MPI_IO_DATA_lag_bubbles(i, 14) + Rmin_stats_part(i) = MPI_IO_DATA_lag_bubbles(i, 15) + ! bub_dphidt(i) = MPI_IO_DATA_lag_bubbles(i, 16) + ! gas_p(i, 1) = MPI_IO_DATA_lag_bubbles(i, 17) + ! gas_mv(i, 1) = MPI_IO_DATA_lag_bubbles(i, 18) + particle_mass(i) = MPI_IO_DATA_lag_bubbles(i, 19) + ! gas_betaT(i) = MPI_IO_DATA_lag_bubbles(i, 20) + ! gas_betaC(i) = MPI_IO_DATA_lag_bubbles(i, 21) + cell = -buff_size + call s_locate_cell(particle_pos(i, 1:3, 1), cell, particle_s(i, 1:3, 1)) + end do + + deallocate (MPI_IO_DATA_lag_bubbles) + + else + n_el_particles_loc = 0 + + call MPI_TYPE_CONTIGUOUS(0, mpi_p, view, ierr) + call MPI_TYPE_COMMIT(view, ierr) + + call MPI_FILE_OPEN(MPI_COMM_WORLD, file_loc, MPI_MODE_RDONLY, & + mpi_info_int, ifile, ierr) + + ! Skip extended header + disp = int(sizeof(file_tot_part) + 2*sizeof(file_time) + sizeof(file_num_procs) + & + file_num_procs*sizeof(proc_particle_counts(1)), MPI_OFFSET_KIND) + call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, view, 'native', mpi_info_int, ierr) + + call MPI_FILE_READ_ALL(ifile, dummy, 0, mpi_p, status, ierr) + + call MPI_FILE_CLOSE(ifile, ierr) + call MPI_TYPE_FREE(view, ierr) + end if + + if (proc_rank == 0) then + write (*, '(A,I0,A,I0)') 'Read ', file_tot_part, ' particles from restart file at t_step = ', save_count + write (*, '(A,E15.7,A,E15.7)') 'Restart time = ', mytime, ', dt = ', dt + end if + + deallocate (proc_particle_counts) +#endif + + end subroutine s_restart_bubbles + + !> Contains the particle dynamics subroutines. + !! @param q_cons_vf Conservative variables + !! @param q_prim_vf Primitive variables + !! @param rhs_vf Calculated change of conservative variables + !! @param t_step Current time step + !! @param stage Current stage in the time-stepper algorithm + subroutine s_compute_particle_EL_dynamics(q_prim_vf, bc_type, stage, vL_x, vL_y, vL_z, vR_x, vR_y, vR_z, rhs_vf) + type(scalar_field), dimension(sys_size), intent(in) :: q_prim_vf + type(integer_field), dimension(1:num_dims, 1:2), intent(in) :: bc_type + type(scalar_field), dimension(sys_size), intent(in) :: rhs_vf + integer, intent(in) :: stage + real(wp), dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:), intent(inout) :: vL_x, vL_y, vL_z + real(wp), dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:), intent(inout) :: vR_x, vR_y, vR_z + + integer, dimension(3) :: cell, cellijk + real(wp) :: myMass, myR, myBeta_c, myBeta_t, myR0, myRe, mydrhodt, myVolumeFrac, myGamma, rmass_add, func_sum + real(wp), dimension(3) :: myVel, myPos, force_vec, s_cell + logical :: only_beta = .false. + + integer :: k, l, i, j + + if (lag_params%pressure_force .or. lag_params%added_mass_model > 0) then + do l = 1, num_dims + if (l == 1) then + call s_gradient_field(vL_x, vR_x, field_vars(dPx_id)%sf, l, E_idx) + elseif (l == 2) then + call s_gradient_field(vL_y, vR_y, field_vars(dPy_id)%sf, l, E_idx) + elseif (l == 3) then + call s_gradient_field(vL_z, vR_z, field_vars(dPz_id)%sf, l, E_idx) + end if + end do + end if + + if (lag_params%added_mass_model > 0) then + + do l = 1, num_dims + if (l == 1) then + call s_gradient_field(vL_x, vR_x, field_vars(drhox_id)%sf, l, 1) + ! call s_gradient_field(vL_x, vR_x, field_vars(dufx_id)%sf, l, momxb) + elseif (l == 2) then + call s_gradient_field(vL_y, vR_y, field_vars(drhoy_id)%sf, l, 1) + ! call s_gradient_field(vL_y, vR_y, field_vars(dufy_id)%sf, l, momxb+1) + elseif (l == 3) then + call s_gradient_field(vL_z, vR_z, field_vars(drhoz_id)%sf, l, 1) + ! call s_gradient_field(vL_z, vR_z, field_vars(dufz_id)%sf, l, momxb+2) + end if + end do + + end if + + myGamma = (1._wp/fluid_pp(1)%gamma) + 1._wp + myRe = 1.845e-5_wp !fluid_pp(1)%Re(1) !Need a viscosity model for when modeling inviscid eulerian fluid !< Dynamic viscosity + + call nvtxStartRange("LAGRANGE-PARTICLE-DYNAMICS") + + !> Compute Fluid-Particle Forces (drag/pressure/added mass) and convert to particle acceleration + $:GPU_PARALLEL_LOOP(private='[i,k,l,cell,s_cell,myMass,myR,myR0,myPos,myVel,myVolumeFrac,force_vec,rmass_add,func_sum,mydrhodt]',& + & copyin='[stage, myGamma, myRe, only_beta]') + do k = 1, n_el_particles_loc + + f_p(k, :) = 0._wp + p_owner_rank(k) = proc_rank + + s_cell = particle_s(k, 1:3, 2) + cell = int(s_cell(:)) + do i = 1, num_dims + if (s_cell(i) < 0._wp) cell(i) = cell(i) - 1 + end do + + ! Current particle state + myMass = particle_mass(k) + myR = particle_rad(k, 2) + myR0 = particle_R0(k) + myPos = particle_pos(k, :, 2) + myVel = particle_vel(k, :, 2) + myVolumeFrac = 1._wp - q_particles(alphaf_id)%sf(cell(1), cell(2), cell(3)) + mydrhodt = rhs_vf(1)%sf(cell(1), cell(2), cell(3)) + + particle_dposdt(k, :, stage) = 0._wp + particle_dveldt(k, :, stage) = 0._wp + particle_draddt(k, stage) = 0._wp + + call s_get_particle_force(myPos, myR, myVel, myMass, myRe, myGamma, myVolumeFrac, mydrhodt, cell, & + q_prim_vf, field_vars, weights_x_interp, weights_y_interp, weights_z_interp, & + force_vec, rmass_add) + + p_AM(k) = rMass_add + f_p(k, :) = f_p(k, :) + force_vec(:) + + if (.not. lag_params%collision_force) then + myMass = particle_mass(k) + p_AM(k) + myVel = particle_vel(k, :, 2) + do l = 1, num_dims + particle_dposdt(k, l, stage) = myVel(l) + particle_dveldt(k, l, stage) = f_p(k, l)/myMass + particle_draddt(k, stage) = 0._wp + end do + end if + + if (lag_params%solver_approach == 2) then + func_sum = gSum(k) + call s_gaussian_atomic(myR, myVel, myPos, force_vec, func_sum, cell, q_particles, only_beta) + end if + + end do + $:END_GPU_PARALLEL_LOOP() + + if (lag_params%solver_approach == 2) then + call s_finalize_beta_field(bc_type, only_beta) + end if + + call nvtxStartRange("LAGRANGE-PARTICLE-COLLISIONS") + if (lag_params%collision_force) then + !> Compute Particle-Particle collision forces + call s_compute_particle_EL_collisions(stage, bc_type) + + $:GPU_PARALLEL_LOOP(private='[k,l,myMass,myVel]') + do k = 1, n_el_particles_loc + myMass = particle_mass(k) + p_AM(k) + myVel = particle_vel(k, :, 2) + do l = 1, num_dims + particle_dposdt(k, l, stage) = myVel(l) + particle_dveldt(k, l, stage) = f_p(k, l)/myMass + particle_draddt(k, stage) = 0._wp + end do + end do + $:END_GPU_PARALLEL_LOOP() + + end if + call nvtxEndRange + + call nvtxEndRange + + end subroutine s_compute_particle_EL_dynamics + + !> Contains the particle collision force computation. + subroutine s_compute_particle_EL_collisions(stage, bc_type) + + integer, intent(in) :: stage + type(integer_field), dimension(1:num_dims, 1:2), intent(in) :: bc_type + integer, dimension(3) :: cell + real(wp), dimension(3) :: s_cell + integer, dimension(3) :: cellaux + integer :: i, k, l, q, ip, jp, kp, ii, jj, kk + logical :: celloutside + real(wp) :: pidtksp2, ksp, nu1, nu2, Rp1, Rp2, E1, E2, Estar, cor, rmag, Rstar, dij, eta_n, kappa_n, mp1, mp2, dt_loc + real(wp), dimension(3) :: xp1, xp2, vp1, vp2, v_rel, rpij, nij, vnij, Fnpp_ij, force_vec + integer :: kpz + integer :: total_recv + integer :: glb_id, count + integer :: n_el_particles_loc_before_ghost + + if (num_procs > 1) then + n_el_particles_loc_before_ghost = n_el_particles_loc + call s_reset_force_buffers() + call s_add_ghost_particles() + end if + + ! if (lag_num_ts == 1) then + ! dt_loc = dt + ! elseif (lag_num_ts == 2) then + ! if (stage == 1) then + ! dt_loc = dt + ! elseif (stage == 2) then + ! dt_loc = dt/2._wp + ! end if + ! elseif (lag_num_ts == 3) then + ! if (stage == 1) then + ! dt_loc = dt + ! elseif (stage == 2) then + ! dt_loc = dt/4._wp + ! elseif (stage == 3) then + ! dt_loc = (2._wp/3._wp)*dt + ! end if + ! end if + + kpz = 0 + if (num_dims == 3) kpz = 1 + + ksp = 10._wp + nu1 = 0.35_wp + nu2 = 0.35_wp + E1 = 1.e9_wp + E2 = 1.e9_wp + cor = 0.7_wp + + pidtksp2 = (pi**2)/((dt*ksp)**2) + + Estar = 1._wp/(((1._wp - nu1**2)/E1) + ((1._wp - nu2**2)/E2)) + Estar = (4._wp/3._wp)*Estar + + call s_reset_linked_list() + + call nvtxStartRange("LAGRANGE-PARTICLE-COLLISIONS") + error_flag = 0 + $:GPU_UPDATE(device='[error_flag]') + + $:GPU_PARALLEL_LOOP(private='[i,k,cell,ip,jp,kp,Rp1,xp1,mp1,vp1,kk,jj,ii,cellaux,q,Rp2,xp2,mp2,vp2,v_rel,Rstar,rpij,rmag,nij,vnij,dij,kappa_n,eta_n,Fnpp_ij,force_vec,s_cell,celloutside,count]',& + & copyin='[ksp,nu1,nu2,E1,E2,cor,pidtksp2,Estar,kpz]') + do k = 1, n_el_particles_loc + + if (.not. particle_in_domain_physical(particle_pos(k, 1:3, 2))) then + cycle + end if + + s_cell = particle_s(k, 1:3, 2) + cell = int(s_cell(:)) + do i = 1, num_dims + if (s_cell(i) < 0._wp) cell(i) = cell(i) - 1 + end do + + ip = cell(1) + jp = cell(2) + kp = cell(3) + + Rp1 = particle_rad(k, 2) + xp1 = particle_pos(k, :, 2) + mp1 = particle_mass(k) + vp1 = particle_vel(k, :, 2) + + do kk = kp - kpz, kp + kpz + do jj = jp - 1, jp + 1 + do ii = ip - 1, ip + 1 + + cellaux(1) = ii + cellaux(2) = jj + cellaux(3) = kk + + call s_check_celloutside_wbuff(cellaux, celloutside) + + if (.not. celloutside) then + q = particle_head(ii, jj, kk) + ! Traverse linked list in that cell + + count = 0 + do while (q /= -1) + + count = count + 1 + if (count > n_el_particles_loc) then + $:GPU_ATOMIC(atomic='write') + error_flag = 1 + exit + end if + + if (lag_part_id(q, 1) > lag_part_id(k, 1)) then + + Rp2 = particle_rad(q, 2) + xp2 = particle_pos(q, :, 2) + mp2 = particle_mass(q) + vp2 = particle_vel(q, :, 2) + v_rel = vp2 - vp1 + + Rstar = (Rp1*Rp2)/(Rp1 + Rp2) + rpij = xp2 - xp1 + rmag = sqrt(rpij(1)**2 + rpij(2)**2 + rpij(3)**2) + rmag = max(rmag, eps_overlap) + nij = rpij/rmag + vnij = dot_product(v_rel, nij)*nij + dij = (Rp1 + Rp2) - rmag + + if (dij > 0._wp) then + + kappa_n = min((pidtksp2*mp1), (pidtksp2*mp2), (Estar*sqrt(Rstar)*sqrt(abs(dij)))) + + eta_n = ((-2._wp*sqrt(kappa_n)*log(cor))/sqrt((log(cor))**2 + pi**2))*(1._wp/sqrt((1._wp/mp1) + (1._wp/mp2))) + + Fnpp_ij = -kappa_n*dij*nij - eta_n*vnij + + f_p(k, :) = f_p(k, :) + Fnpp_ij + + if (p_owner_rank(q) == proc_rank) then + ! f_p(q, :) = f_p(q, :) - Fnpp_ij + + $:GPU_ATOMIC(atomic='update') + f_p(q, 1) = f_p(q, 1) - Fnpp_ij(1) + + $:GPU_ATOMIC(atomic='update') + f_p(q, 2) = f_p(q, 2) - Fnpp_ij(2) + + $:GPU_ATOMIC(atomic='update') + f_p(q, 3) = f_p(q, 3) - Fnpp_ij(3) + + else + call s_add_force_to_send_buffer(p_owner_rank(q), lag_part_id(q, 1), -Fnpp_ij) + end if + end if + end if + + q = linked_list(q) + + end do + end if + + end do + end do + end do + + !>Check each local particle for wall collisions + + call s_compute_wall_collisions(xp1, vp1, Rp1, mp1, Estar, pidtksp2, cor, force_vec) + f_p(k, :) = f_p(k, :) + force_vec + + end do + $:END_GPU_PARALLEL_LOOP() + + call nvtxEndRange + + $:GPU_UPDATE(host='[error_flag]') + if (error_flag == 1) then + call s_mpi_abort("Linked list infinite loop detected") + end if + + if (num_procs > 1) then + + n_el_particles_loc = n_el_particles_loc_before_ghost + $:GPU_UPDATE(device='[n_el_particles_loc]') + + total_recv = 0 + force_recv_ids = 0 + force_recv_vals = 0. + + call s_transfer_collision_forces(total_recv, force_recv_ids, force_recv_vals) + + $:GPU_UPDATE(device = '[force_recv_ids,force_recv_vals]') + + $:GPU_PARALLEL_LOOP(private='[i,k]',copyin = '[total_recv]') + do i = 1, total_recv + k = gid_to_local(force_recv_ids(i)) + if (k > 0) then + $:GPU_ATOMIC(atomic='update') + f_p(k, 1) = f_p(k, 1) + force_recv_vals(3*(i - 1) + 1) + + $:GPU_ATOMIC(atomic='update') + f_p(k, 2) = f_p(k, 2) + force_recv_vals(3*(i - 1) + 2) + + $:GPU_ATOMIC(atomic='update') + f_p(k, 3) = f_p(k, 3) + force_recv_vals(3*(i - 1) + 3) + + end if + end do + $:END_GPU_PARALLEL_LOOP() + + end if + + end subroutine s_compute_particle_EL_collisions + + !> This subroutine checks for particles at solid walls to compute a collision force + subroutine s_compute_wall_collisions(pos, vel, rad, mass, Es, pidtksp, core, wcol_force) + $:GPU_ROUTINE(function_name='s_compute_wall_collisions',parallelism='[seq]', & + & cray_inline=True) + + real(wp), dimension(3), intent(in) :: pos, vel + real(wp), intent(in) :: rad, mass, Es, pidtksp, core + real(wp), dimension(3), intent(inout) :: wcol_force + + real(wp) :: dij + + wcol_force = 0._wp + + ! Check for particles at solid boundaries + if (any(bc_x%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/))) then + dij = rad - (pos(1) - x_cb(-1)) + + if (dij > 0._wp) then + call s_compute_wall_collision_force(dij, vel, rad, mass, Es, pidtksp, core, 1, 1._wp, wcol_force) + end if + end if + + if (any(bc_x%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/))) then + + dij = rad - (x_cb(m) - pos(1)) + + if (dij > 0._wp) then + call s_compute_wall_collision_force(dij, vel, rad, mass, Es, pidtksp, core, 1, -1._wp, wcol_force) + end if + end if + + if (any(bc_y%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/))) then + + dij = rad - (pos(2) - y_cb(-1)) + + if (dij > 0._wp) then + call s_compute_wall_collision_force(dij, vel, rad, mass, Es, pidtksp, core, 2, 1._wp, wcol_force) + end if + end if + + if (any(bc_y%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/))) then + + dij = rad - (y_cb(n) - pos(2)) + + if (dij > 0._wp) then + call s_compute_wall_collision_force(dij, vel, rad, mass, Es, pidtksp, core, 2, -1._wp, wcol_force) + end if + end if + + if (p > 0) then + if (any(bc_z%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/))) then + + dij = rad - (pos(3) - z_cb(-1)) + + if (dij > 0._wp) then + call s_compute_wall_collision_force(dij, vel, rad, mass, Es, pidtksp, core, 3, 1._wp, wcol_force) + end if + end if + + if (any(bc_z%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/))) then + + dij = rad - (z_cb(p) - pos(3)) + + if (dij > 0._wp) then + call s_compute_wall_collision_force(dij, vel, rad, mass, Es, pidtksp, core, 3, -1._wp, wcol_force) + end if + end if + + end if + + end subroutine s_compute_wall_collisions + + !> This subroutine computes the collision force with a solid wall + subroutine s_compute_wall_collision_force(dij, vel, rad, mass, Es, pidtksp, core, dir, normal, wcol_force) + $:GPU_ROUTINE(function_name='s_compute_wall_collision_force',parallelism='[seq]', & + & cray_inline=True) + + real(wp), dimension(3), intent(in) :: vel + real(wp), intent(in) :: dij, rad, mass, Es, pidtksp, core, normal + integer, intent(in) :: dir + real(wp), dimension(3), intent(inout) :: wcol_force + + real(wp), dimension(3) :: nij, v_rel, vnij + real(wp) :: kappa_n, eta_n + + ! Normal points away from wall (into domain) + nij = 0._wp + nij(dir) = normal + + ! Relative velocity (wall has zero velocity) + v_rel = vel + vnij = dot_product(v_rel, nij)*nij + + ! Wall has infinite mass so use mp1 only + kappa_n = min((pidtksp*mass), (Es*sqrt(rad)*sqrt(abs(dij)))) + + eta_n = ((-2._wp*sqrt(kappa_n)*log(core))/sqrt((log(core))**2 + pi**2)) & + *(1._wp/sqrt(1._wp/mass)) + + wcol_force = wcol_force + (kappa_n*dij*nij - eta_n*vnij) + + end subroutine s_compute_wall_collision_force + + !> This subroutine adds temporary ghost particles for collision purposes + subroutine s_add_ghost_particles() + + integer :: k, i, q + integer :: patch_id, newBubs + integer, dimension(3) :: cell + logical :: inc_ghost = .true. + + call nvtxStartRange("LAG-GHOSTADD") + call nvtxStartRange("LAG-GHOSTADD-DEV2HOST") + $:GPU_UPDATE(host='[p_owner_rank, particle_R0, Rmax_stats_part, Rmin_stats_part, particle_mass, f_p, & + & lag_part_id, particle_rad, & + & particle_pos, particle_posPrev, particle_vel, particle_s, particle_draddt, & + & particle_dposdt, particle_dveldt, n_el_particles_loc, & + & wrap_bubble_dir, wrap_bubble_loc]') + call nvtxEndRange + + ! Handle MPI transfer of particles going to another processor's local domain + if (num_procs > 1) then + call nvtxStartRange("LAG-GHOSTADD-TRANSFER-LIST") + call s_add_particles_to_transfer_list(n_el_particles_loc, particle_pos(:, :, 2), particle_posPrev(:, :, 2), inc_ghost) + call nvtxEndRange + + call nvtxStartRange("LAG-GHOSTADD-SENDRECV") + call s_mpi_sendrecv_solid_particles(p_owner_rank, particle_R0, Rmax_stats_part, Rmin_stats_part, particle_mass, f_p, & + lag_part_id, & + particle_rad, particle_pos, particle_posPrev, particle_vel, & + particle_s, particle_draddt, particle_dposdt, particle_dveldt, lag_num_ts, n_el_particles_loc, & + 2) + call nvtxEndRange + end if + + call nvtxStartRange("LAG-GHOSTADD-HOST2DEV") + $:GPU_UPDATE(device='[p_owner_rank,particle_R0, Rmax_stats_part, Rmin_stats_part, particle_mass, f_p, & + & lag_part_id, particle_rad, & + & particle_pos, particle_posPrev, particle_vel, particle_s, particle_draddt, & + & particle_dposdt, particle_dveldt, n_el_particles_loc]') + call nvtxEndRange + + call nvtxEndRange ! LAG-GHOSTADD + + $:GPU_PARALLEL_LOOP(private='[k,cell]') + do k = 1, n_el_particles_loc + + cell = fd_number - buff_size + call s_locate_cell(particle_pos(k, 1:3, 2), cell, particle_s(k, 1:3, 2)) + + end do + $:END_GPU_PARALLEL_LOOP() + + end subroutine s_add_ghost_particles + + !> The purpose of this subroutine is to check if the current cell is outside the computational domain or not (including ghost cells). + !! @param cellaux Tested cell to smear the particle effect in. + !! @param celloutside If true, then cellaux is outside the computational domain. + subroutine s_check_celloutside_wbuff(cellaux, celloutside) + $:GPU_ROUTINE(function_name='s_check_celloutside_wbuff',parallelism='[seq]', & + & cray_inline=True) + + integer, dimension(3), intent(inout) :: cellaux + logical, intent(out) :: celloutside + + celloutside = .false. + + if (num_dims == 2) then + if ((cellaux(1) < -buff_size) .or. (cellaux(2) < -buff_size)) then + celloutside = .true. + end if + + if ((cellaux(1) > m + buff_size) .or. (cellaux(2) > n + buff_size)) then + celloutside = .true. + end if + else + if ((cellaux(1) < -buff_size) .or. (cellaux(2) < -buff_size) .or. (cellaux(3) < -buff_size)) then + celloutside = .true. + end if + + if ((cellaux(1) > m + buff_size) .or. (cellaux(2) > n + buff_size) .or. (cellaux(3) > p + buff_size)) then + celloutside = .true. + end if + end if + + end subroutine s_check_celloutside_wbuff + + !> The purpose of this subroutine is to obtain the bubble source terms based on Maeda and Colonius (2018) + !! and add them to the RHS scalar field. + !! @param q_cons_vf Conservative variables + !! @param q_prim_vf Primitive variables + !! @param rhs_vf Time derivative of the conservative variables + subroutine s_compute_particles_EL_source(q_cons_vf, q_prim_vf, rhs_vf, stage) + + type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf + type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf + type(scalar_field), dimension(sys_size), intent(inout) :: rhs_vf + + integer :: i, j, k, l, nf, stage + real(wp) :: dalphapdt, alpha_f, udot_gradalpha + + !Spatial derivative of the fluid volume fraction and eulerian particle momentum fields. + do l = 1, num_dims + call s_gradient_dir_fornberg(q_particles(alphaf_id)%sf, field_vars(dalphafx_id + l - 1)%sf, l) + call s_gradient_dir_fornberg(q_particles(alphaupx_id + l - 1)%sf, field_vars(dalphap_upx_id + l - 1)%sf, l) + end do + + !> Apply particle sources to the Eulerian RHS + $:GPU_PARALLEL_LOOP(private='[i,j,k,alpha_f,dalphapdt,udot_gradalpha]', collapse=3) + do k = idwint(3)%beg, idwint(3)%end + do j = idwint(2)%beg, idwint(2)%end + do i = idwint(1)%beg, idwint(1)%end + if (q_particles(alphaf_id)%sf(i, j, k) > (1._wp - lag_params%valmaxvoid)) then + + alpha_f = q_particles(alphaf_id)%sf(i, j, k) + + dalphapdt = 0._wp + udot_gradalpha = 0._wp + do l = 1, num_dims + dalphapdt = dalphapdt + field_vars(dalphap_upx_id + l - 1)%sf(i, j, k) + udot_gradalpha = udot_gradalpha + q_prim_vf(momxb + l - 1)%sf(i, j, k)*field_vars(dalphafx_id + l - 1)%sf(i, j, k) + end do + dalphapdt = -dalphapdt + !Add any contribution to dalphapdt from particles growing or shrinking + + !> Step 1: Source terms for volume fraction corrections + !cons_var/alpha_f * (dalpha_p/dt - u dot grad(alpha_f)) + do l = 1, E_idx + rhs_vf(l)%sf(i, j, k) = rhs_vf(l)%sf(i, j, k) + & + (q_cons_vf(l)%sf(i, j, k)/alpha_f)*(dalphapdt - udot_gradalpha) + end do + + !momentum term -1/alpha_f * (p*grad(alpha_f) - Tau^v dot grad(alpha_f)) !Viscous term not implemented + do l = 1, num_dims + rhs_vf(momxb + l - 1)%sf(i, j, k) = rhs_vf(momxb + l - 1)%sf(i, j, k) - & + ((1._wp/alpha_f)* & + (q_prim_vf(E_idx)%sf(i, j, k)*field_vars(dalphafx_id + l - 1)%sf(i, j, k))) + end do + + !energy term -1/alpha_f * (p*u dot grad(alpha_f) - (Tau^v dot u) dot grad(alpha_f)) !Viscous term not implemented + rhs_vf(E_idx)%sf(i, j, k) = rhs_vf(E_idx)%sf(i, j, k) - & + ((1._wp/alpha_f)* & + (q_prim_vf(E_idx)%sf(i, j, k)*udot_gradalpha)) + + !> Step 2: Add the drag/pressure/added mass forces to the fluid + rhs_vf(momxb)%sf(i, j, k) = rhs_vf(momxb)%sf(i, j, k) + q_particles(Smx_id)%sf(i, j, k)*(1._wp/alpha_f) + rhs_vf(momxb + 1)%sf(i, j, k) = rhs_vf(momxb + 1)%sf(i, j, k) + q_particles(Smy_id)%sf(i, j, k)*(1._wp/alpha_f) + + ! Energy source + rhs_vf(E_idx)%sf(i, j, k) = rhs_vf(E_idx)%sf(i, j, k) + & + (q_particles(Smx_id)%sf(i, j, k)*q_prim_vf(momxb)%sf(i, j, k) & + + q_particles(Smy_id)%sf(i, j, k)*q_prim_vf(momxb + 1)%sf(i, j, k) & + + q_particles(SE_id)%sf(i, j, k))*(1._wp/alpha_f) + + if (num_dims == 3) then + rhs_vf(momxb + 2)%sf(i, j, k) = rhs_vf(momxb + 2)%sf(i, j, k) + q_particles(Smz_id)%sf(i, j, k)*(1._wp/alpha_f) + ! Energy source + rhs_vf(E_idx)%sf(i, j, k) = rhs_vf(E_idx)%sf(i, j, k) + & + (q_particles(Smz_id)%sf(i, j, k)*q_prim_vf(momxb + 2)%sf(i, j, k))*(1._wp/alpha_f) + end if + + end if + end do + end do + end do + $:END_GPU_PARALLEL_LOOP() + + end subroutine s_compute_particles_EL_source + + !> The purpose of this subroutine is to smear the effect of the particles in the Eulerian framework + subroutine s_reset_linked_list() + + integer :: j, k, l + + $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) + do l = idwbuff(3)%beg, idwbuff(3)%end + do k = idwbuff(2)%beg, idwbuff(2)%end + do j = idwbuff(1)%beg, idwbuff(1)%end + particle_head(j, k, l) = -1 + end do + end do + end do + $:END_GPU_PARALLEL_LOOP() + + $:GPU_PARALLEL_LOOP(private='[k]') + do k = 1, n_el_particles_loc + linked_list(k) = -1 + end do + $:END_GPU_PARALLEL_LOOP() + + call s_build_linked_list() + + end subroutine s_reset_linked_list + + !> The purpose of this subroutine is to smear the effect of the particles in the Eulerian framework + subroutine s_reset_cell_vars() + + integer :: i, j, k, l + + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) + do i = 1, max(nField_vars, q_particles_idx) ! outermost is largest of the i-like dims + do l = idwbuff(3)%beg, idwbuff(3)%end + do k = idwbuff(2)%beg, idwbuff(2)%end + do j = idwbuff(1)%beg, idwbuff(1)%end + ! Zero field_vars if i <= nField_vars + if (i <= nField_vars) field_vars(i)%sf(j, k, l) = 0._wp + ! Zero q_particles if i <= q_particles_idx + if (i <= q_particles_idx) then + q_particles(i)%sf(j, k, l) = 0._wp + kahan_comp(i)%sf(j, k, l) = 0._wp + end if + end do + end do + end do + end do + $:END_GPU_PARALLEL_LOOP() + + end subroutine s_reset_cell_vars + + subroutine s_finalize_beta_field(bc_type, onlyBeta) + + type(integer_field), dimension(1:num_dims, 1:2), intent(in) :: bc_type + integer :: j, k, l + logical, intent(in) :: onlyBeta + + call nvtxStartRange("PARTICLES-LAGRANGE-BETA-COMM") + if (onlyBeta) then + call s_populate_beta_buffers(q_particles, bc_type, 1) + else + call s_populate_beta_buffers(q_particles, bc_type, q_particles_idx) + end if + call nvtxEndRange + + !Store 1-q_particles(1) + $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) + do l = idwbuff(3)%beg, idwbuff(3)%end + do k = idwbuff(2)%beg, idwbuff(2)%end + do j = idwbuff(1)%beg, idwbuff(1)%end + q_particles(alphaf_id)%sf(j, k, l) = 1._wp - q_particles(alphaf_id)%sf(j, k, l) + ! Limiting void fraction given max value + q_particles(alphaf_id)%sf(j, k, l) = max(q_particles(alphaf_id)%sf(j, k, l), & + 1._wp - lag_params%valmaxvoid) + end do + end do + end do + $:END_GPU_PARALLEL_LOOP() + + end subroutine s_finalize_beta_field + + subroutine s_build_linked_list() + integer :: k, glb_id, i + integer, dimension(3) :: cell + real(wp), dimension(3) :: s_cell + logical :: celloutside + + $:GPU_PARALLEL_LOOP(private='[i,k,cell,s_cell,glb_id,celloutside]') + do k = 1, n_el_particles_loc + + glb_id = lag_part_id(k, 1) + gid_to_local(glb_id) = k + + s_cell = particle_s(k, 1:3, 2) + cell = int(s_cell(:)) + do i = 1, num_dims + if (s_cell(i) < 0._wp) cell(i) = cell(i) - 1 + end do + + call s_check_celloutside_wbuff(cell, celloutside) + + if (.not. celloutside) then + !!!!! Particle linked list building + $:GPU_ATOMIC(atomic='capture') + linked_list(k) = particle_head(cell(1), cell(2), cell(3)) + particle_head(cell(1), cell(2), cell(3)) = k + $:END_GPU_ATOMIC_CAPTURE() + end if + + end do + $:END_GPU_PARALLEL_LOOP() + + end subroutine s_build_linked_list + + !> This subroutine updates the Lagrange variables using the tvd RK time steppers. + !! The time derivative of the particle variables must be stored at every stage to avoid precision errors. + !! @param stage Current tvd RK stage + impure subroutine s_update_lagrange_particles_tdv_rk(q_prim_vf, bc_type, stage) + + type(scalar_field), dimension(sys_size), intent(in) :: q_prim_vf + type(integer_field), dimension(1:num_dims, 1:2), intent(in) :: bc_type + integer, intent(in) :: stage + + integer :: k + + if (time_stepper == 1) then ! 1st order TVD RK + + $:GPU_PARALLEL_LOOP(private='[k]') + do k = 1, n_el_particles_loc + !u{1} = u{n} + dt * RHS{n} + particle_rad(k, 1) = particle_rad(k, 1) + dt*particle_draddt(k, 1) + if (moving_lag_particles) then + particle_posPrev(k, 1:3, 1) = particle_pos(k, 1:3, 1) + particle_pos(k, 1:3, 1) = particle_pos(k, 1:3, 1) + dt*particle_dposdt(k, 1:3, 1) + particle_vel(k, 1:3, 1) = particle_vel(k, 1:3, 1) + dt*particle_dveldt(k, 1:3, 1) + end if + end do + $:END_GPU_PARALLEL_LOOP() + + call s_transfer_data_to_tmp_particles() + if (moving_lag_particles) call s_enforce_EL_particles_boundary_conditions(q_prim_vf, stage, bc_type) + if (lag_params%write_void_evol) call s_write_void_evol_particles(mytime) + if (lag_params%write_bubbles_stats) call s_calculate_lag_particle_stats() + if (lag_params%write_bubbles) then + ! $:GPU_UPDATE(host='[gas_p,gas_mv,particle_rad,intfc_vel]') + $:GPU_UPDATE(host='[particle_rad]') + call s_write_lag_particle_evol(mytime) + end if + + elseif (time_stepper == 2) then ! 2nd order TVD RK + if (stage == 1) then + + $:GPU_PARALLEL_LOOP(private='[k]') + do k = 1, n_el_particles_loc + !u{1} = u{n} + dt * RHS{n} + particle_rad(k, 2) = particle_rad(k, 1) + dt*particle_draddt(k, 1) + if (moving_lag_particles) then + particle_posPrev(k, 1:3, 2) = particle_pos(k, 1:3, 1) + particle_pos(k, 1:3, 2) = particle_pos(k, 1:3, 1) + dt*particle_dposdt(k, 1:3, 1) + particle_vel(k, 1:3, 2) = particle_vel(k, 1:3, 1) + dt*particle_dveldt(k, 1:3, 1) + end if + end do + $:END_GPU_PARALLEL_LOOP() + + if (moving_lag_particles) call s_enforce_EL_particles_boundary_conditions(q_prim_vf, stage, bc_type) + + elseif (stage == 2) then + + $:GPU_PARALLEL_LOOP(private='[k]') + do k = 1, n_el_particles_loc + !u{1} = u{n} + (1/2) * dt * (RHS{n} + RHS{1}) + particle_rad(k, 1) = particle_rad(k, 1) + dt*(particle_draddt(k, 1) + particle_draddt(k, 2))/2._wp + if (moving_lag_particles) then + particle_posPrev(k, 1:3, 1) = particle_pos(k, 1:3, 2) + particle_pos(k, 1:3, 1) = particle_pos(k, 1:3, 1) + dt*(particle_dposdt(k, 1:3, 1) + particle_dposdt(k, 1:3, 2))/2._wp + particle_vel(k, 1:3, 1) = particle_vel(k, 1:3, 1) + dt*(particle_dveldt(k, 1:3, 1) + particle_dveldt(k, 1:3, 2))/2._wp + end if + + end do + $:END_GPU_PARALLEL_LOOP() + + call s_transfer_data_to_tmp_particles() + if (moving_lag_particles) call s_enforce_EL_particles_boundary_conditions(q_prim_vf, stage, bc_type) + if (lag_params%write_void_evol) call s_write_void_evol_particles(mytime) + if (lag_params%write_bubbles_stats) call s_calculate_lag_particle_stats() + if (lag_params%write_bubbles) then + $:GPU_UPDATE(host='[particle_rad]') + call s_write_lag_particle_evol(mytime) + end if + + end if + + elseif (time_stepper == 3) then ! 3rd order TVD RK + if (stage == 1) then + + $:GPU_PARALLEL_LOOP(private='[k]') + do k = 1, n_el_particles_loc + !u{1} = u{n} + dt * RHS{n} + particle_rad(k, 2) = particle_rad(k, 1) + dt*particle_draddt(k, 1) + if (moving_lag_particles) then + particle_posPrev(k, 1:3, 2) = particle_pos(k, 1:3, 1) + particle_pos(k, 1:3, 2) = particle_pos(k, 1:3, 1) + dt*particle_dposdt(k, 1:3, 1) + particle_vel(k, 1:3, 2) = particle_vel(k, 1:3, 1) + dt*particle_dveldt(k, 1:3, 1) + end if + end do + $:END_GPU_PARALLEL_LOOP() + + if (moving_lag_particles) call s_enforce_EL_particles_boundary_conditions(q_prim_vf, stage, bc_type) + + elseif (stage == 2) then + + $:GPU_PARALLEL_LOOP(private='[k]') + do k = 1, n_el_particles_loc + !u{2} = u{n} + (1/4) * dt * [RHS{n} + RHS{1}] + particle_rad(k, 2) = particle_rad(k, 1) + dt*(particle_draddt(k, 1) + particle_draddt(k, 2))/4._wp + if (moving_lag_particles) then + particle_posPrev(k, 1:3, 2) = particle_pos(k, 1:3, 2) + particle_pos(k, 1:3, 2) = particle_pos(k, 1:3, 1) + dt*(particle_dposdt(k, 1:3, 1) + particle_dposdt(k, 1:3, 2))/4._wp + particle_vel(k, 1:3, 2) = particle_vel(k, 1:3, 1) + dt*(particle_dveldt(k, 1:3, 1) + particle_dveldt(k, 1:3, 2))/4._wp + end if + end do + $:END_GPU_PARALLEL_LOOP() + + if (moving_lag_particles) call s_enforce_EL_particles_boundary_conditions(q_prim_vf, stage, bc_type) + + elseif (stage == 3) then + + $:GPU_PARALLEL_LOOP(private='[k]') + do k = 1, n_el_particles_loc + !u{n+1} = u{n} + (2/3) * dt * [(1/4)* RHS{n} + (1/4)* RHS{1} + RHS{2}] + particle_rad(k, 1) = particle_rad(k, 1) + (2._wp/3._wp)*dt*(particle_draddt(k, 1)/4._wp + particle_draddt(k, 2)/4._wp + particle_draddt(k, 3)) + if (moving_lag_particles) then + particle_posPrev(k, 1:3, 1) = particle_pos(k, 1:3, 2) + particle_pos(k, 1:3, 1) = particle_pos(k, 1:3, 1) + (2._wp/3._wp)*dt*(particle_dposdt(k, 1:3, 1)/4._wp + particle_dposdt(k, 1:3, 2)/4._wp + particle_dposdt(k, 1:3, 3)) + particle_vel(k, 1:3, 1) = particle_vel(k, 1:3, 1) + (2._wp/3._wp)*dt*(particle_dveldt(k, 1:3, 1)/4._wp + particle_dveldt(k, 1:3, 2)/4._wp + particle_dveldt(k, 1:3, 3)) + end if + + end do + $:END_GPU_PARALLEL_LOOP() + + call s_transfer_data_to_tmp_particles() + if (moving_lag_particles) call s_enforce_EL_particles_boundary_conditions(q_prim_vf, stage, bc_type) + if (lag_params%write_void_evol) call s_write_void_evol_particles(mytime) + if (lag_params%write_bubbles_stats) call s_calculate_lag_particle_stats() + if (lag_params%write_bubbles .and. mytime >= next_write_time) then + $:GPU_UPDATE(host='[particle_mass,particle_rad]') + call s_write_lag_particle_evol(mytime) + next_write_time = next_write_time + t_save + end if + + end if + + end if + + end subroutine s_update_lagrange_particles_tdv_rk + + !> This subroutine enforces reflective and wall boundary conditions for EL particles + !! @param dest Destination for the bubble position update + impure subroutine s_enforce_EL_particles_boundary_conditions(q_prim_vf, nstage, bc_type) + + type(scalar_field), dimension(sys_size), intent(in) :: q_prim_vf + type(integer_field), dimension(1:num_dims, 1:2), intent(in) :: bc_type + integer, intent(in) :: nstage + real(wp) :: offset + + integer :: k, i, q + integer :: patch_id, newBubs, new_idx + integer, dimension(3) :: cell + logical :: inc_ghost = .false. + real(wp) :: myR, func_sum + real(wp), dimension(3) :: myPos, myVel, myForce + logical :: only_beta = .true. + + call nvtxStartRange("LAG-BC") + call nvtxStartRange("LAG-BC-DEV2HOST") + $:GPU_UPDATE(host='[p_owner_rank, particle_R0, Rmax_stats_part, Rmin_stats_part, particle_mass, f_p, & + & lag_part_id, particle_rad, & + & particle_pos, particle_posPrev, particle_vel, particle_s, particle_draddt, & + & particle_dposdt, particle_dveldt, keep_bubble, n_el_particles_loc, & + & wrap_bubble_dir, wrap_bubble_loc]') + call nvtxEndRange + + ! Handle MPI transfer of particles going to another processor's local domain + if (num_procs > 1) then + call nvtxStartRange("LAG-BC-TRANSFER-LIST") + call s_add_particles_to_transfer_list(n_el_particles_loc, particle_pos(:, :, 2), particle_posPrev(:, :, 2), inc_ghost) + call nvtxEndRange + + call nvtxStartRange("LAG-BC-SENDRECV") + call s_mpi_sendrecv_solid_particles(p_owner_rank, particle_R0, Rmax_stats_part, Rmin_stats_part, particle_mass, f_p, & + lag_part_id, & + particle_rad, particle_pos, particle_posPrev, particle_vel, & + particle_s, particle_draddt, particle_dposdt, particle_dveldt, lag_num_ts, n_el_particles_loc, & + 2) + call nvtxEndRange + end if + + call nvtxStartRange("LAG-BC-HOST2DEV") + $:GPU_UPDATE(device='[p_owner_rank, particle_R0, Rmax_stats_part, Rmin_stats_part, particle_mass, f_p, & + & lag_part_id, particle_rad, & + & particle_pos, particle_posPrev, particle_vel, particle_s, particle_draddt, & + & particle_dposdt, particle_dveldt, n_el_particles_loc]') + call nvtxEndRange + + $:GPU_PARALLEL_LOOP(private='[k, cell]',copyin='[nstage]') + do k = 1, n_el_particles_loc + keep_bubble(k) = 1 + wrap_bubble_loc(k, :) = 0 + wrap_bubble_dir(k, :) = 0 + + ! Relocate particles at solid boundaries and delete particles that leave + ! buffer regions + if (any(bc_x%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) & + .and. particle_pos(k, 1, 2) < x_cb(-1) + eps_overlap*particle_rad(k, 2)) then + particle_pos(k, 1, 2) = x_cb(-1) + eps_overlap*particle_rad(k, 2) + if (nstage == lag_num_ts) then + particle_pos(k, 1, 1) = particle_pos(k, 1, 2) + end if + elseif (any(bc_x%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) & + .and. particle_pos(k, 1, 2) > x_cb(m) - eps_overlap*particle_rad(k, 2)) then + particle_pos(k, 1, 2) = x_cb(m) - eps_overlap*particle_rad(k, 2) + if (nstage == lag_num_ts) then + particle_pos(k, 1, 1) = particle_pos(k, 1, 2) + end if + elseif (bc_x%beg == BC_PERIODIC .and. particle_pos(k, 1, 2) < pcomm_coords(1)%beg .and. & + particle_posPrev(k, 1, 2) >= pcomm_coords(1)%beg) then + wrap_bubble_dir(k, 1) = 1 + wrap_bubble_loc(k, 1) = -1 + elseif (bc_x%end == BC_PERIODIC .and. particle_pos(k, 1, 2) > pcomm_coords(1)%end .and. & + particle_posPrev(k, 1, 2) <= pcomm_coords(1)%end) then + wrap_bubble_dir(k, 1) = 1 + wrap_bubble_loc(k, 1) = 1 + elseif (particle_pos(k, 1, 2) >= x_cb(m)) then + keep_bubble(k) = 0 + elseif (particle_pos(k, 1, 2) < x_cb(-1)) then + keep_bubble(k) = 0 + end if + + if (any(bc_y%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) & + .and. particle_pos(k, 2, 2) < y_cb(-1) + eps_overlap*particle_rad(k, 2)) then + particle_pos(k, 2, 2) = y_cb(-1) + eps_overlap*particle_rad(k, 2) + if (nstage == lag_num_ts) then + particle_pos(k, 2, 1) = particle_pos(k, 2, 2) + end if + else if (any(bc_y%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) & + .and. particle_pos(k, 2, 2) > y_cb(n) - eps_overlap*particle_rad(k, 2)) then + particle_pos(k, 2, 2) = y_cb(n) - eps_overlap*particle_rad(k, 2) + if (nstage == lag_num_ts) then + particle_pos(k, 2, 1) = particle_pos(k, 2, 2) + end if + elseif (bc_y%beg == BC_PERIODIC .and. particle_pos(k, 2, 2) < pcomm_coords(2)%beg .and. & + particle_posPrev(k, 2, 2) >= pcomm_coords(2)%beg) then + wrap_bubble_dir(k, 2) = 1 + wrap_bubble_loc(k, 2) = -1 + elseif (bc_y%end == BC_PERIODIC .and. particle_pos(k, 2, 2) > pcomm_coords(2)%end .and. & + particle_posPrev(k, 2, 2) <= pcomm_coords(2)%end) then + wrap_bubble_dir(k, 2) = 1 + wrap_bubble_loc(k, 2) = 1 + elseif (particle_pos(k, 2, 2) >= y_cb(n)) then + keep_bubble(k) = 0 + elseif (particle_pos(k, 2, 2) < y_cb(-1)) then + keep_bubble(k) = 0 + end if + + if (p > 0) then + if (any(bc_z%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) & + .and. particle_pos(k, 3, 2) < z_cb(-1) + eps_overlap*particle_rad(k, 2)) then + particle_pos(k, 3, 2) = z_cb(-1) + eps_overlap*particle_rad(k, 2) + if (nstage == lag_num_ts) then + particle_pos(k, 3, 1) = particle_pos(k, 3, 2) + end if + else if (any(bc_z%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) & + .and. particle_pos(k, 3, 2) > z_cb(p) - eps_overlap*particle_rad(k, 2)) then + particle_pos(k, 3, 2) = z_cb(p) - eps_overlap*particle_rad(k, 2) + if (nstage == lag_num_ts) then + particle_pos(k, 3, 1) = particle_pos(k, 3, 2) + end if + elseif (bc_z%beg == BC_PERIODIC .and. particle_pos(k, 3, 2) < pcomm_coords(3)%beg .and. & + particle_posPrev(k, 3, 2) >= pcomm_coords(3)%beg) then + wrap_bubble_dir(k, 3) = 1 + wrap_bubble_loc(k, 3) = -1 + elseif (bc_z%end == BC_PERIODIC .and. particle_pos(k, 3, 2) > pcomm_coords(3)%end .and. & + particle_posPrev(k, 3, 2) <= pcomm_coords(3)%end) then + wrap_bubble_dir(k, 3) = 1 + wrap_bubble_loc(k, 3) = 1 + elseif (particle_pos(k, 3, 2) >= z_cb(p)) then + keep_bubble(k) = 0 + elseif (particle_pos(k, 3, 2) < z_cb(-1)) then + keep_bubble(k) = 0 + end if + end if + + if (keep_bubble(k) == 1) then + ! Remove bubbles that are no longer in a liquid + cell = fd_number - buff_size + call s_locate_cell(particle_pos(k, 1:3, 2), cell, particle_s(k, 1:3, 2)) + + if (q_prim_vf(advxb)%sf(cell(1), cell(2), cell(3)) < (1._wp - lag_params%valmaxvoid)) then + keep_bubble(k) = 0 + end if + end if + end do + $:END_GPU_PARALLEL_LOOP() + + if (n_el_particles_loc > 0) then + call nvtxStartRange("LAG-BC") + call nvtxStartRange("LAG-BC-DEV2HOST") + $:GPU_UPDATE(host='[particle_R0, Rmax_stats_part, Rmin_stats_part, particle_mass, f_p, & + & lag_part_id, particle_rad, & + & particle_pos, particle_posPrev, particle_vel, particle_s, particle_draddt, & + & particle_dposdt, particle_dveldt, keep_bubble, n_el_particles_loc, & + & wrap_bubble_dir, wrap_bubble_loc]') + call nvtxEndRange + + newBubs = 0 + do k = 1, n_el_particles_loc + if (keep_bubble(k) == 1) then + newBubs = newBubs + 1 + if (newBubs /= k) then + call s_copy_lag_particle(newBubs, k) + wrap_bubble_dir(newBubs, :) = wrap_bubble_dir(k, :) + wrap_bubble_loc(newBubs, :) = wrap_bubble_loc(k, :) + end if + end if + end do + + n_el_particles_loc = newBubs + + ! Handle periodic wrapping of bubbles on same processor + newBubs = 0 + do k = 1, n_el_particles_loc + if (any(wrap_bubble_dir(k, :) == 1)) then + newBubs = newBubs + 1 + new_idx = n_el_particles_loc + newBubs + call s_copy_lag_particle(new_idx, k) + do i = 1, num_dims + if (wrap_bubble_dir(k, i) == 1) then + offset = glb_bounds(i)%end - glb_bounds(i)%beg + if (wrap_bubble_loc(k, i) == 1) then + do q = 1, 2 + particle_pos(new_idx, i, q) = particle_pos(new_idx, i, q) - offset + particle_posPrev(new_idx, i, q) = particle_posPrev(new_idx, i, q) - offset + end do + else if (wrap_bubble_loc(k, i) == -1) then + do q = 1, 2 + particle_pos(new_idx, i, q) = particle_pos(new_idx, i, q) + offset + particle_posPrev(new_idx, i, q) = particle_posPrev(new_idx, i, q) + offset + end do + end if + end if + end do + end if + end do + call nvtxStartRange("LAG-BC-HOST2DEV") + $:GPU_UPDATE(device='[particle_R0, Rmax_stats_part, Rmin_stats_part, particle_mass, f_p, & + & lag_part_id, particle_rad, & + & particle_pos, particle_posPrev, particle_vel, particle_s, particle_draddt, & + & particle_dposdt, particle_dveldt, n_el_particles_loc]') + call nvtxEndRange + end if + + call s_reset_cell_vars() + + $:GPU_PARALLEL_LOOP(private='[cell,myR,myPos,myVel,myForce,func_sum]',copyin='[only_beta]') + do k = 1, n_el_particles_loc + myR = particle_rad(k, 2) + myPos = particle_pos(k, 1:3, 2) + myVel = particle_vel(k, 1:3, 2) + myForce = f_p(k, :) + + cell = fd_number - buff_size + call s_locate_cell(particle_pos(k, 1:3, 2), cell, particle_s(k, 1:3, 2)) + + !Compute the total gaussian contribution for each particle for normalization + call s_compute_gaussian_contribution(myR, myPos, cell, func_sum) + gSum(k) = func_sum + + call s_gaussian_atomic(myR, myVel, myPos, myForce, func_sum, cell, q_particles, only_beta) + + end do + + ! Update void fraction and communicate buffers + call s_finalize_beta_field(bc_type, only_beta) + + call nvtxEndRange ! LAG-BC + + end subroutine s_enforce_EL_particles_boundary_conditions + + !> This subroutine returns the computational coordinate of the cell for the given position. + !! @param pos Input coordinates + !! @param cell Computational coordinate of the cell + !! @param scoord Calculated particle coordinates + subroutine s_locate_cell(pos, cell, scoord) + $:GPU_ROUTINE(function_name='s_locate_cell',parallelism='[seq]', & + & cray_inline=True) + + real(wp), dimension(3), intent(in) :: pos + real(wp), dimension(3), intent(out) :: scoord + integer, dimension(3), intent(inout) :: cell + + integer :: i + + do while (pos(1) < x_cb(cell(1) - 1)) + cell(1) = cell(1) - 1 + end do + + do while (pos(1) >= x_cb(cell(1))) + cell(1) = cell(1) + 1 + end do + + do while (pos(2) < y_cb(cell(2) - 1)) + cell(2) = cell(2) - 1 + end do + + do while (pos(2) >= y_cb(cell(2))) + cell(2) = cell(2) + 1 + end do + + if (p > 0) then + do while (pos(3) < z_cb(cell(3) - 1)) + cell(3) = cell(3) - 1 + end do + do while (pos(3) >= z_cb(cell(3))) + cell(3) = cell(3) + 1 + end do + end if + + ! The numbering of the cell of which left boundary is the domain boundary is 0. + ! if comp.coord of the pos is s, the real coordinate of s is + ! (the coordinate of the left boundary of the Floor(s)-th cell) + ! + (s-(int(s))*(cell-width). + ! In other words, the coordinate of the center of the cell is x_cc(cell). + + !coordinates in computational space + scoord(1) = cell(1) + (pos(1) - x_cb(cell(1) - 1))/dx(cell(1)) + scoord(2) = cell(2) + (pos(2) - y_cb(cell(2) - 1))/dy(cell(2)) + scoord(3) = 0._wp + if (p > 0) scoord(3) = cell(3) + (pos(3) - z_cb(cell(3) - 1))/dz(cell(3)) + cell(:) = int(scoord(:)) + do i = 1, num_dims + if (scoord(i) < 0._wp) cell(i) = cell(i) - 1 + end do + + end subroutine s_locate_cell + + !> This subroutine transfer data into the temporal variables. + impure subroutine s_transfer_data_to_tmp_particles() + + integer :: k + + $:GPU_PARALLEL_LOOP(private='[k]') + do k = 1, n_el_particles_loc + particle_rad(k, 2) = particle_rad(k, 1) + particle_pos(k, 1:3, 2) = particle_pos(k, 1:3, 1) + particle_posPrev(k, 1:3, 2) = particle_posPrev(k, 1:3, 1) + particle_vel(k, 1:3, 2) = particle_vel(k, 1:3, 1) + particle_s(k, 1:3, 2) = particle_s(k, 1:3, 1) + end do + $:END_GPU_PARALLEL_LOOP() + + end subroutine s_transfer_data_to_tmp_particles + + !> The purpose of this procedure is to determine if the global coordinates of the bubbles + !! are present in the current MPI processor (including ghost cells). + !! @param pos_part Spatial coordinates of the bubble + function particle_in_domain(pos_part) + + logical :: particle_in_domain + real(wp), dimension(3), intent(in) :: pos_part + + ! 2D + if (p == 0 .and. cyl_coord .neqv. .true.) then + ! Defining a virtual z-axis that has the same dimensions as y-axis + ! defined in the input file + particle_in_domain = ((pos_part(1) < x_cb(m + buff_size - fd_number)) .and. & + (pos_part(1) >= x_cb(fd_number - buff_size - 1)) .and. & + (pos_part(2) < y_cb(n + buff_size - fd_number)) .and. & + (pos_part(2) >= y_cb(fd_number - buff_size - 1)) .and. & + (pos_part(3) < lag_params%charwidth/2._wp) .and. (pos_part(3) > -lag_params%charwidth/2._wp)) + else + ! cyl_coord + particle_in_domain = ((pos_part(1) < x_cb(m + buff_size - fd_number)) .and. & + (pos_part(1) >= x_cb(fd_number - buff_size - 1)) .and. & + (abs(pos_part(2)) < y_cb(n + buff_size - fd_number)) .and. & + (abs(pos_part(2)) >= max(y_cb(fd_number - buff_size - 1), 0._wp))) + end if + + ! 3D + if (p > 1) then + particle_in_domain = ((pos_part(1) < x_cb(m + buff_size - fd_number)) .and. & + (pos_part(1) >= x_cb(fd_number - buff_size - 1)) .and. & + (pos_part(2) < y_cb(n + buff_size - fd_number)) .and. & + (pos_part(2) >= y_cb(fd_number - buff_size - 1)) .and. & + (pos_part(3) < z_cb(p + buff_size - fd_number)) .and. & + (pos_part(3) >= z_cb(fd_number - buff_size - 1))) + end if + + ! For symmetric and wall boundary condition + if (any(bc_x%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/))) then + particle_in_domain = (particle_in_domain .and. (pos_part(1) >= x_cb(-1))) + end if + if (any(bc_x%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/))) then + particle_in_domain = (particle_in_domain .and. (pos_part(1) < x_cb(m))) + end if + if (any(bc_y%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. (.not. cyl_coord)) then + particle_in_domain = (particle_in_domain .and. (pos_part(2) >= y_cb(-1))) + end if + if (any(bc_y%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. (.not. cyl_coord)) then + particle_in_domain = (particle_in_domain .and. (pos_part(2) < y_cb(n))) + end if + if (p > 0) then + if (any(bc_z%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/))) then + particle_in_domain = (particle_in_domain .and. (pos_part(3) >= z_cb(-1))) + end if + if (any(bc_z%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/))) then + particle_in_domain = (particle_in_domain .and. (pos_part(3) < z_cb(p))) + end if + end if + + end function particle_in_domain + + !> The purpose of this procedure is to determine if the lagrangian bubble is located in the + !! physical domain. The ghost cells are not part of the physical domain. + !! @param pos_part Spatial coordinates of the bubble + function particle_in_domain_physical(pos_part) + + logical :: particle_in_domain_physical + real(wp), dimension(3), intent(in) :: pos_part + + particle_in_domain_physical = ((pos_part(1) < x_cb(m)) .and. (pos_part(1) >= x_cb(-1)) .and. & + (pos_part(2) < y_cb(n)) .and. (pos_part(2) >= y_cb(-1))) + + if (p > 0) then + particle_in_domain_physical = (particle_in_domain_physical .and. (pos_part(3) < z_cb(p)) .and. (pos_part(3) >= z_cb(-1))) + end if + + end function particle_in_domain_physical + + !> The purpose of this procedure is to calculate the gradient from reconstructed states along the x, y and z + !! @param vL_field left edge reconstructed values + !! @param vR_field right edge reconstructed values + !! @param dq Output gradient of q + !! @param dir Gradient spatial direction + !! @param field_var variable index for reconstructed states + subroutine s_gradient_field(vL_field, vR_field, dq, dir, field_var) + + real(stp), dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:), intent(out) :: dq + real(wp), dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:), intent(inout) :: vL_field + real(wp), dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:), intent(inout) :: vR_field + integer, intent(in) :: dir, field_var + + integer :: i, j, k + real(wp) :: mydx + + if (dir == 1) then + + $:GPU_PARALLEL_LOOP(private='[i,j,k,mydx]', collapse=3,copyin='[dir, field_var]') + do k = idwbuff(3)%beg, idwbuff(3)%end + do j = idwbuff(2)%beg, idwbuff(2)%end + do i = idwbuff(1)%beg, idwbuff(1)%end + mydx = dx(i) + dq(i, j, k) = (vR_field(i, j, k, field_var) - vL_field(i, j, k, field_var))/mydx + end do + end do + end do + $:END_GPU_PARALLEL_LOOP() + + elseif (dir == 2) then + + $:GPU_PARALLEL_LOOP(private='[i,j,k,mydx]', collapse=3,copyin='[dir, field_var]') + do k = idwbuff(3)%beg, idwbuff(3)%end + do j = idwbuff(1)%beg, idwbuff(1)%end + do i = idwbuff(2)%beg, idwbuff(2)%end + mydx = dy(i) + dq(j, i, k) = (vR_field(i, j, k, field_var) - vL_field(i, j, k, field_var))/mydx + end do + end do + end do + $:END_GPU_PARALLEL_LOOP() + + elseif (dir == 3) then + + $:GPU_PARALLEL_LOOP(private='[i,j,k,mydx]', collapse=3,copyin='[dir, field_var]') + do k = idwbuff(1)%beg, idwbuff(1)%end + do j = idwbuff(2)%beg, idwbuff(2)%end + do i = idwbuff(3)%beg, idwbuff(3)%end + mydx = dz(i) + dq(k, j, i) = (vR_field(i, j, k, field_var) - vL_field(i, j, k, field_var))/mydx + end do + end do + end do + $:END_GPU_PARALLEL_LOOP() + + end if + + end subroutine s_gradient_field + + !> The purpose of this procedure is to calculate the gradient of a scalar field along the x, y and z directions using Fornberg's method + !! @param q Input scalar field + !! @param dq Output gradient of q + !! @param dir Gradient spatial direction + subroutine s_gradient_dir_fornberg(q, dq, dir) + + real(stp), dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:), intent(in) :: q + real(stp), dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:), intent(out) :: dq + integer, intent(in) :: dir + + integer :: i, j, k, a, npts, s_idx + + npts = (nWeights_grad - 1)/2 + + if (dir == 1) then + $:GPU_PARALLEL_LOOP(private='[i,j,k,s_idx,a]', collapse=3,copyin='[npts]') + do k = idwbuff(3)%beg, idwbuff(3)%end + do j = idwbuff(2)%beg, idwbuff(2)%end + do i = idwbuff(1)%beg + 2, idwbuff(1)%end - 2 + dq(i, j, k) = 0._wp + do a = -npts, npts + s_idx = a + npts + 1 + dq(i, j, k) = dq(i, j, k) + weights_x_grad(s_idx)%sf(i, 1, 1)*q(i + a, j, k) + end do + end do + end do + end do + $:END_GPU_PARALLEL_LOOP() + elseif (dir == 2) then + $:GPU_PARALLEL_LOOP(private='[i,j,k,s_idx,a]', collapse=3,copyin='[npts]') + do k = idwbuff(3)%beg, idwbuff(3)%end + do j = idwbuff(2)%beg + 2, idwbuff(2)%end - 2 + do i = idwbuff(1)%beg, idwbuff(1)%end + dq(i, j, k) = 0._wp + do a = -npts, npts + s_idx = a + npts + 1 + dq(i, j, k) = dq(i, j, k) + weights_y_grad(s_idx)%sf(j, 1, 1)*q(i, j + a, k) + end do + end do + end do + end do + $:END_GPU_PARALLEL_LOOP() + elseif (dir == 3) then + $:GPU_PARALLEL_LOOP(private='[i,j,k,s_idx,a]', collapse=3,copyin='[npts]') + do k = idwbuff(3)%beg + 2, idwbuff(3)%end - 2 + do j = idwbuff(2)%beg, idwbuff(2)%end + do i = idwbuff(1)%beg, idwbuff(1)%end + dq(i, j, k) = 0._wp + do a = -npts, npts + s_idx = a + npts + 1 + dq(i, j, k) = dq(i, j, k) + weights_z_grad(s_idx)%sf(k, 1, 1)*q(i, j, k + a) + end do + end do + end do + end do + $:END_GPU_PARALLEL_LOOP() + end if + + end subroutine s_gradient_dir_fornberg + + !> The purpose of this procedure is to compute the Fornberg finite difference weights for derivatives (only done once at start time) + impure subroutine s_compute_fornberg_fd_weights(npts) + + integer, intent(in) :: npts + integer :: i, j, k, a, m_order + integer :: s_idx + real(wp) :: x0, y0, z0 + real(wp) :: x_stencil(nWeights_grad) + real(wp) :: c(nWeights_grad, 0:1) + + m_order = 1 ! first derivative + + $:GPU_PARALLEL_LOOP(private='[i,a,x_stencil,c,s_idx,x0]', copyin='[npts,m_order]') + do i = idwbuff(1)%beg + npts, idwbuff(1)%end - npts + do a = -npts, npts + s_idx = a + npts + 1 + x_stencil(s_idx) = x_cc(i + a) + end do + x0 = x_cc(i) + + call s_fornberg_weights(x0, x_stencil, nWeights_grad, m_order, c) + + do a = -npts, npts + s_idx = a + npts + 1 + weights_x_grad(s_idx)%sf(i, 1, 1) = c(s_idx, 1) + end do + end do + $:END_GPU_PARALLEL_LOOP() + + $:GPU_PARALLEL_LOOP(private='[j,a,x_stencil,c,s_idx,y0]', copyin='[npts,m_order]') + do j = idwbuff(2)%beg + npts, idwbuff(2)%end - npts + do a = -npts, npts + s_idx = a + npts + 1 + x_stencil(s_idx) = y_cc(j + a) + end do + y0 = y_cc(j) + + call s_fornberg_weights(y0, x_stencil, nWeights_grad, m_order, c) + + do a = -npts, npts + s_idx = a + npts + 1 + weights_y_grad(s_idx)%sf(j, 1, 1) = c(s_idx, 1) + end do + end do + $:END_GPU_PARALLEL_LOOP() + + if (num_dims == 3) then + + $:GPU_PARALLEL_LOOP(private='[k,a,x_stencil,c,s_idx,z0]', copyin='[npts,m_order]') + do k = idwbuff(3)%beg + npts, idwbuff(3)%end - npts + do a = -npts, npts + s_idx = a + npts + 1 + x_stencil(s_idx) = z_cc(k + a) + end do + z0 = z_cc(k) + + call s_fornberg_weights(z0, x_stencil, nWeights_grad, m_order, c) + + do a = -npts, npts + s_idx = a + npts + 1 + weights_z_grad(s_idx)%sf(k, 1, 1) = c(s_idx, 1) + end do + end do + $:END_GPU_PARALLEL_LOOP() + + end if + + end subroutine s_compute_fornberg_fd_weights + + !> The purpose of this procedure is to compute the Fornberg finite difference weights on a local stencil + subroutine s_fornberg_weights(x0, stencil, npts, m_order, coeffs) + ! $:GPU_ROUTINE(parallelism='[seq]') + + integer, intent(in) :: npts ! number of stencil points + integer, intent(in) :: m_order ! highest derivative order + real(wp), intent(in) :: x0 ! evaluation point + real(wp), intent(in) :: stencil(npts) ! stencil coordinates + real(wp), intent(out) :: coeffs(npts, 0:m_order) + + integer :: i, j, k, mn + real(wp) :: c1, c2, c3, c4, c5 + + coeffs = 0.0_wp + c1 = 1.0_wp + c4 = stencil(1) - x0 + coeffs(1, 0) = 1.0_wp + + do i = 2, npts + mn = min(i - 1, m_order) + c2 = 1.0_wp + c5 = c4 + c4 = stencil(i) - x0 + + do j = 1, i - 1 + c3 = stencil(i) - stencil(j) + c2 = c2*c3 + + if (j == i - 1) then + do k = mn, 1, -1 + coeffs(i, k) = c1*(k*coeffs(i - 1, k - 1) - c5*coeffs(i - 1, k))/c2 + end do + coeffs(i, 0) = -c1*c5*coeffs(i - 1, 0)/c2 + end if + + do k = mn, 1, -1 + coeffs(j, k) = (c4*coeffs(j, k) - k*coeffs(j, k - 1))/c3 + end do + coeffs(j, 0) = c4*coeffs(j, 0)/c3 + end do + + c1 = c2 + end do + + end subroutine s_fornberg_weights + + !> The purpose of this procedure is to compute the barycentric weights for interpolation (only done once at start time) + impure subroutine s_compute_barycentric_weights(npts) + + integer, intent(in) :: npts + integer :: i, j, k, l, a, b + real(wp) :: prod_x, prod_y, prod_z, dx_loc, dy_loc, dz_loc + + $:GPU_PARALLEL_LOOP(private='[i,a,b,prod_x,dx_loc]', copyin = '[npts]') + do i = idwbuff(1)%beg + npts, idwbuff(1)%end - npts + do a = -npts, npts + prod_x = 1._wp + do b = -npts, npts + if (a /= b) then + dx_loc = x_cc(i + a) - x_cc(i + b) + prod_x = prod_x*dx_loc + end if + end do + weights_x_interp(a + npts + 1)%sf(i, 1, 1) = 1._wp/prod_x + end do + end do + $:END_GPU_PARALLEL_LOOP() + + $:GPU_PARALLEL_LOOP(private='[j,a,b,prod_y,dy_loc]', copyin = '[npts]') + do j = idwbuff(2)%beg + npts, idwbuff(2)%end - npts + do a = -npts, npts + prod_y = 1._wp + do b = -npts, npts + if (a /= b) then + dy_loc = y_cc(j + a) - y_cc(j + b) + prod_y = prod_y*dy_loc + end if + end do + weights_y_interp(a + npts + 1)%sf(j, 1, 1) = 1._wp/prod_y + end do + end do + $:END_GPU_PARALLEL_LOOP() + + if (num_dims == 3) then + $:GPU_PARALLEL_LOOP(private='[k,a,b,prod_z,dz_loc]', copyin = '[npts]') + do k = idwbuff(3)%beg + npts, idwbuff(3)%end - npts + do a = -npts, npts + prod_z = 1._wp + do b = -npts, npts + if (a /= b) then + dz_loc = z_cc(k + a) - z_cc(k + b) + prod_z = prod_z*dz_loc + end if + end do + weights_z_interp(a + npts + 1)%sf(k, 1, 1) = 1._wp/prod_z + end do + end do + end if + + end subroutine s_compute_barycentric_weights + + impure subroutine s_open_lag_bubble_evol + + character(LEN=path_len + 2*name_len) :: file_loc + logical file_exist + character(LEN=25) :: FMT + + write (file_loc, '(A,I0,A)') 'lag_bubble_evol_', proc_rank, '.dat' + file_loc = trim(case_dir)//'/D/'//trim(file_loc) + call my_inquire(trim(file_loc), file_exist) + + if (precision == 1) then + FMT = "(A16,A14,8A16)" + else + FMT = "(A24,A14,8A24)" + end if + + if (.not. file_exist) then + open (LAG_EVOL_ID, FILE=trim(file_loc), FORM='formatted', position='rewind') + write (LAG_EVOL_ID, FMT) 'currentTime', 'particleID', 'x', 'y', 'z', & + 'Vx', 'Vy', 'Vz', 'Fp_x', 'Fp_y', 'Fp_z', & + 'radius' + else + open (LAG_EVOL_ID, FILE=trim(file_loc), FORM='formatted', position='append') + end if + + end subroutine s_open_lag_bubble_evol + + !> Subroutine that writes on each time step the changes of the lagrangian bubbles. + !! @param q_time Current time + impure subroutine s_write_lag_particle_evol(qtime) + + real(wp), intent(in) :: qtime + integer :: k, ios + character(LEN=25) :: FMT + + character(LEN=path_len + 2*name_len) :: file_loc, path + logical :: file_exist + + if (precision == 1) then + ! FMT = "(F16.8,I14,8F16.8)" + FMT = "(F16.8,I14,10F16.8)" + else + ! FMT = "(F24.16,I14,8F24.16)" + FMT = "(F24.16,I14,10F24.16)" + end if + + ! Cycle through list + do k = 1, n_el_particles_loc + write (LAG_EVOL_ID, FMT) & + qtime, & + lag_part_id(k, 1), & + particle_pos(k, 1, 1), & + particle_pos(k, 2, 1), & + particle_pos(k, 3, 1), & + particle_vel(k, 1, 1), & + particle_vel(k, 2, 1), & + particle_vel(k, 3, 1), & + f_p(k, 1), & + f_p(k, 2), & + f_p(k, 3), & + particle_rad(k, 1) + end do + + end subroutine s_write_lag_particle_evol + + impure subroutine s_close_lag_particle_evol + + close (LAG_EVOL_ID) + + end subroutine s_close_lag_particle_evol + + subroutine s_open_void_evol + + character(LEN=path_len + 2*name_len) :: file_loc + logical :: file_exist + + if (proc_rank == 0) then + write (file_loc, '(A)') 'voidfraction.dat' + file_loc = trim(case_dir)//'/D/'//trim(file_loc) + call my_inquire(trim(file_loc), file_exist) + if (.not. file_exist) then + open (LAG_VOID_ID, FILE=trim(file_loc), FORM='formatted', position='rewind') + !write (12, *) 'currentTime, averageVoidFraction, ', & + ! 'maximumVoidFraction, totalParticlesVolume' + !write (12, *) 'The averageVoidFraction value does ', & + ! 'not reflect the real void fraction in the cloud since the ', & + ! 'cells which do not have bubbles are not accounted' + else + open (LAG_VOID_ID, FILE=trim(file_loc), FORM='formatted', position='append') + end if + end if + + end subroutine s_open_void_evol + + !> Subroutine that writes some useful statistics related to the volume fraction + !! of the particles (void fraction) in the computatioational domain + !! on each time step. + !! @param q_time Current time + impure subroutine s_write_void_evol_particles(qtime) + + real(wp), intent(in) :: qtime + real(wp) :: volcell, voltot + real(wp) :: lag_void_max, lag_void_avg, lag_vol + real(wp) :: void_max_glb, void_avg_glb, vol_glb + + integer :: i, j, k + + character(LEN=path_len + 2*name_len) :: file_loc + logical :: file_exist + + lag_void_max = 0._wp + lag_void_avg = 0._wp + lag_vol = 0._wp + $:GPU_PARALLEL_LOOP(private='[volcell]', collapse=3, reduction='[[lag_vol, lag_void_avg], [lag_void_max]]', reductionOp='[+, MAX]', copy='[lag_vol, lag_void_avg, lag_void_max]') + do k = 0, p + do j = 0, n + do i = 0, m + lag_void_max = max(lag_void_max, 1._wp - q_particles(alphaf_id)%sf(i, j, k)) + call s_get_char_vol(i, j, k, volcell) + if ((1._wp - q_particles(alphaf_id)%sf(i, j, k)) > 5.0d-11) then + lag_void_avg = lag_void_avg + (1._wp - q_particles(alphaf_id)%sf(i, j, k))*volcell + lag_vol = lag_vol + volcell + end if + end do + end do + end do + $:END_GPU_PARALLEL_LOOP() + +#ifdef MFC_MPI + if (num_procs > 1) then + call s_mpi_allreduce_max(lag_void_max, void_max_glb) + lag_void_max = void_max_glb + call s_mpi_allreduce_sum(lag_vol, vol_glb) + lag_vol = vol_glb + call s_mpi_allreduce_sum(lag_void_avg, void_avg_glb) + lag_void_avg = void_avg_glb + end if +#endif + voltot = lag_void_avg + ! This voidavg value does not reflect the real void fraction in the cloud + ! since the cell which does not have bubbles are not accounted + if (lag_vol > 0._wp) lag_void_avg = lag_void_avg/lag_vol + + if (proc_rank == 0) then + write (LAG_VOID_ID, '(6X,4e24.8)') & + qtime, & + lag_void_avg, & + lag_void_max, & + voltot + end if + + end subroutine s_write_void_evol_particles + + subroutine s_close_void_evol + + if (proc_rank == 0) close (LAG_VOID_ID) + + end subroutine s_close_void_evol + + !> Subroutine that writes the restarting files for the particles in the lagrangian solver. + !! @param t_step Current time step + impure subroutine s_write_restart_lag_particles(t_step) + + ! Generic string used to store the address of a particular file + integer, intent(in) :: t_step + + character(LEN=path_len + 2*name_len) :: file_loc + logical :: file_exist + integer :: part_id, tot_part + integer :: i, k + +#ifdef MFC_MPI + ! For Parallel I/O + integer :: ifile, ierr + integer, dimension(MPI_STATUS_SIZE) :: status + integer(KIND=MPI_OFFSET_KIND) :: disp + integer :: view + integer, dimension(2) :: gsizes, lsizes, start_idx_part + integer, dimension(num_procs) :: part_order, part_ord_mpi + integer, dimension(num_procs) :: proc_particle_counts + real(wp), dimension(1:1, 1:lag_io_vars) :: dummy + dummy = 0._wp + + part_id = 0._wp + if (n_el_particles_loc /= 0) then + do k = 1, n_el_particles_loc + if (particle_in_domain_physical(particle_pos(k, 1:3, 1))) then + part_id = part_id + 1 + end if + end do + end if + + if (.not. parallel_io) return + + lsizes(1) = part_id + lsizes(2) = lag_io_vars + + ! Total number of particles + call MPI_ALLREDUCE(part_id, tot_part, 1, MPI_integer, & + MPI_SUM, MPI_COMM_WORLD, ierr) + + call MPI_ALLGATHER(part_id, 1, MPI_INTEGER, proc_particle_counts, 1, MPI_INTEGER, & + MPI_COMM_WORLD, ierr) + + ! Calculate starting index for this processor's particles + call MPI_EXSCAN(lsizes(1), start_idx_part(1), 1, MPI_INTEGER, MPI_SUM, MPI_COMM_WORLD, ierr) + if (proc_rank == 0) start_idx_part(1) = 0 + start_idx_part(2) = 0 + + gsizes(1) = tot_part + gsizes(2) = lag_io_vars + + write (file_loc, '(A,I0,A)') 'lag_bubbles_', t_step, '.dat' + file_loc = trim(case_dir)//'/restart_data'//trim(mpiiofs)//trim(file_loc) + + ! Clean up existing file + if (proc_rank == 0) then + inquire (FILE=trim(file_loc), EXIST=file_exist) + if (file_exist) then + call MPI_FILE_DELETE(file_loc, mpi_info_int, ierr) + end if + end if + + call MPI_BARRIER(MPI_COMM_WORLD, ierr) + + if (proc_rank == 0) then + call MPI_FILE_OPEN(MPI_COMM_SELF, file_loc, & + ior(MPI_MODE_WRONLY, MPI_MODE_CREATE), & + mpi_info_int, ifile, ierr) + + ! Write header using MPI I/O for consistency + call MPI_FILE_WRITE(ifile, tot_part, 1, MPI_INTEGER, status, ierr) + call MPI_FILE_WRITE(ifile, mytime, 1, mpi_p, status, ierr) + call MPI_FILE_WRITE(ifile, dt, 1, mpi_p, status, ierr) + call MPI_FILE_WRITE(ifile, num_procs, 1, MPI_INTEGER, status, ierr) + call MPI_FILE_WRITE(ifile, proc_particle_counts, num_procs, MPI_INTEGER, status, ierr) + + call MPI_FILE_CLOSE(ifile, ierr) + end if + + call MPI_BARRIER(MPI_COMM_WORLD, ierr) + + if (part_id > 0) then + allocate (MPI_IO_DATA_lag_bubbles(max(1, part_id), 1:lag_io_vars)) + + i = 1 + do k = 1, n_el_particles_loc + if (particle_in_domain_physical(particle_pos(k, 1:3, 1))) then + MPI_IO_DATA_lag_bubbles(i, 1) = real(lag_part_id(k, 1)) + MPI_IO_DATA_lag_bubbles(i, 2:4) = particle_pos(k, 1:3, 1) + MPI_IO_DATA_lag_bubbles(i, 5:7) = particle_posPrev(k, 1:3, 1) + MPI_IO_DATA_lag_bubbles(i, 8:10) = particle_vel(k, 1:3, 1) + MPI_IO_DATA_lag_bubbles(i, 11) = particle_rad(k, 1) + ! MPI_IO_DATA_lag_bubbles(i, 12) = intfc_vel(k, 1) + MPI_IO_DATA_lag_bubbles(i, 13) = particle_R0(k) + MPI_IO_DATA_lag_bubbles(i, 14) = Rmax_stats_part(k) + MPI_IO_DATA_lag_bubbles(i, 15) = Rmin_stats_part(k) + ! MPI_IO_DATA_lag_bubbles(i, 16) = bub_dphidt(k) + ! MPI_IO_DATA_lag_bubbles(i, 17) = gas_p(k, 1) + ! MPI_IO_DATA_lag_bubbles(i, 18) = gas_mv(k, 1) + MPI_IO_DATA_lag_bubbles(i, 19) = particle_mass(k) + ! MPI_IO_DATA_lag_bubbles(i, 20) = gas_betaT(k) + ! MPI_IO_DATA_lag_bubbles(i, 21) = gas_betaC(k) + i = i + 1 + end if + end do + + call MPI_TYPE_CREATE_SUBARRAY(2, gsizes, lsizes, start_idx_part, & + MPI_ORDER_FORTRAN, mpi_p, view, ierr) + call MPI_TYPE_COMMIT(view, ierr) + + call MPI_FILE_OPEN(MPI_COMM_WORLD, file_loc, & + ior(MPI_MODE_WRONLY, MPI_MODE_CREATE), & + mpi_info_int, ifile, ierr) + + ! Skip header (written by rank 0) + disp = int(sizeof(tot_part) + 2*sizeof(mytime) + sizeof(num_procs) + & + num_procs*sizeof(proc_particle_counts(1)), MPI_OFFSET_KIND) + call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, view, 'native', mpi_info_int, ierr) + + call MPI_FILE_WRITE_ALL(ifile, MPI_IO_DATA_lag_bubbles, & + lag_io_vars*part_id, mpi_p, status, ierr) + + call MPI_FILE_CLOSE(ifile, ierr) + + deallocate (MPI_IO_DATA_lag_bubbles) + + else + call MPI_TYPE_CONTIGUOUS(0, mpi_p, view, ierr) + call MPI_TYPE_COMMIT(view, ierr) + + call MPI_FILE_OPEN(MPI_COMM_WORLD, file_loc, & + ior(MPI_MODE_WRONLY, MPI_MODE_CREATE), & + mpi_info_int, ifile, ierr) + + ! Skip header (written by rank 0) + disp = int(sizeof(tot_part) + 2*sizeof(mytime) + sizeof(num_procs) + & + num_procs*sizeof(proc_particle_counts(1)), MPI_OFFSET_KIND) + call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, view, 'native', mpi_info_int, ierr) + + call MPI_FILE_WRITE_ALL(ifile, dummy, 0, mpi_p, status, ierr) + + call MPI_FILE_CLOSE(ifile, ierr) + end if + +#endif + + end subroutine s_write_restart_lag_particles + + !> This procedure calculates the maximum and minimum radius of each bubble. + subroutine s_calculate_lag_particle_stats() + + integer :: k + + $:GPU_PARALLEL_LOOP(private='[k]', reduction='[[Rmax_glb], [Rmin_glb]]', & + & reductionOp='[MAX, MIN]', copy='[Rmax_glb,Rmin_glb]') + do k = 1, n_el_particles_loc + Rmax_glb = max(Rmax_glb, particle_rad(k, 1)/particle_R0(k)) + Rmin_glb = min(Rmin_glb, particle_rad(k, 1)/particle_R0(k)) + Rmax_stats_part(k) = max(Rmax_stats_part(k), particle_rad(k, 1)/particle_R0(k)) + Rmin_stats_part(k) = min(Rmin_stats_part(k), particle_rad(k, 1)/particle_R0(k)) + end do + $:END_GPU_PARALLEL_LOOP() + + end subroutine s_calculate_lag_particle_stats + + impure subroutine s_open_lag_particle_stats() + + character(LEN=path_len + 2*name_len) :: file_loc + character(LEN=20) :: FMT + logical :: file_exist + + write (file_loc, '(A,I0,A)') 'stats_lag_bubbles_', proc_rank, '.dat' + file_loc = trim(case_dir)//'/D/'//trim(file_loc) + call my_inquire(trim(file_loc), file_exist) + + if (precision == 1) then + FMT = "(A10,A14,5A16)" + else + FMT = "(A10,A14,5A24)" + end if + + if (.not. file_exist) then + open (LAG_STATS_ID, FILE=trim(file_loc), FORM='formatted', position='rewind') + write (LAG_STATS_ID, *) 'proc_rank, particleID, x, y, z, Rmax_glb, Rmin_glb' + else + open (LAG_STATS_ID, FILE=trim(file_loc), FORM='formatted', position='append') + end if + + end subroutine s_open_lag_particle_stats + + !> Subroutine that writes the maximum and minimum radius of each bubble. + impure subroutine s_write_lag_particle_stats() + + integer :: k + character(LEN=path_len + 2*name_len) :: file_loc + character(LEN=20) :: FMT + + $:GPU_UPDATE(host='[Rmax_glb,Rmin_glb]') + + if (precision == 1) then + FMT = "(I10,I14,5F16.8)" + else + FMT = "(I10,I14,5F24.16)" + end if + + do k = 1, n_el_particles_loc + write (LAG_STATS_ID, FMT) & + proc_rank, & + lag_part_id(k, 1), & + particle_pos(k, 1, 1), & + particle_pos(k, 2, 1), & + particle_pos(k, 3, 1), & + Rmax_stats_part(k), & + Rmin_stats_part(k) + end do + + end subroutine s_write_lag_particle_stats + + subroutine s_close_lag_particle_stats + + close (LAG_STATS_ID) + + end subroutine s_close_lag_particle_stats + + !> The purpose of this subroutine is to remove one specific particle if dt is too small. + !! @param part_id Particle id + impure subroutine s_copy_lag_particle(dest, src) + + integer, intent(in) :: src, dest + + particle_R0(dest) = particle_R0(src) + Rmax_stats_part(dest) = Rmax_stats_part(src) + Rmin_stats_part(dest) = Rmin_stats_part(src) + particle_mass(dest) = particle_mass(src) + lag_part_id(dest, 1) = lag_part_id(src, 1) + particle_rad(dest, 1:2) = particle_rad(src, 1:2) + particle_vel(dest, 1:3, 1:2) = particle_vel(src, 1:3, 1:2) + particle_s(dest, 1:3, 1:2) = particle_s(src, 1:3, 1:2) + particle_pos(dest, 1:3, 1:2) = particle_pos(src, 1:3, 1:2) + particle_posPrev(dest, 1:3, 1:2) = particle_posPrev(src, 1:3, 1:2) + particle_draddt(dest, 1:lag_num_ts) = particle_draddt(src, 1:lag_num_ts) + f_p(dest, 1:3) = f_p(src, 1:3) + particle_dposdt(dest, 1:3, 1:lag_num_ts) = particle_dposdt(src, 1:3, 1:lag_num_ts) + particle_dveldt(dest, 1:3, 1:lag_num_ts) = particle_dveldt(src, 1:3, 1:lag_num_ts) + + end subroutine s_copy_lag_particle + + !> The purpose of this subroutine is to deallocate variables + impure subroutine s_finalize_particle_lagrangian_solver() + + integer :: i + + if (lag_params%write_void_evol) call s_close_void_evol + if (lag_params%write_bubbles) call s_close_lag_particle_evol() + if (lag_params%write_bubbles_stats) call s_close_lag_particle_stats() + + do i = 1, q_particles_idx + @:DEALLOCATE(q_particles(i)%sf) + @:DEALLOCATE(kahan_comp(i)%sf) + end do + @:DEALLOCATE(q_particles) + @:DEALLOCATE(kahan_comp) + + do i = 1, nField_vars + @:DEALLOCATE(field_vars(i)%sf) + end do + @:DEALLOCATE(field_vars) + + do i = 1, nWeights_interp + @:DEALLOCATE(weights_x_interp(i)%sf) + end do + @:DEALLOCATE(weights_x_interp) + + do i = 1, nWeights_interp + @:DEALLOCATE(weights_y_interp(i)%sf) + end do + @:DEALLOCATE(weights_y_interp) + + do i = 1, nWeights_interp + @:DEALLOCATE(weights_z_interp(i)%sf) + end do + @:DEALLOCATE(weights_z_interp) + + !Deallocating space + @:DEALLOCATE(lag_part_id) + @:DEALLOCATE(gid_to_local) + @:DEALLOCATE(particle_R0) + @:DEALLOCATE(Rmax_stats_part) + @:DEALLOCATE(Rmin_stats_part) + @:DEALLOCATE(particle_mass) + @:DEALLOCATE(p_AM) + @:DEALLOCATE(p_owner_rank) + @:DEALLOCATE(particle_rad) + @:DEALLOCATE(particle_pos) + @:DEALLOCATE(particle_posPrev) + @:DEALLOCATE(particle_vel) + @:DEALLOCATE(particle_s) + @:DEALLOCATE(particle_draddt) + @:DEALLOCATE(particle_dposdt) + @:DEALLOCATE(particle_dveldt) + @:DEALLOCATE(f_p) + @:DEALLOCATE(gSum) + + @:DEALLOCATE(force_recv_ids) + @:DEALLOCATE(force_recv_vals) + + @:DEALLOCATE(keep_bubble) + @:DEALLOCATE(wrap_bubble_loc, wrap_bubble_dir) + + @:DEALLOCATE(linked_list) + @:DEALLOCATE(particle_head) + + ! Deallocate cell list arrays + @:DEALLOCATE(cell_list_start) + @:DEALLOCATE(cell_list_count) + @:DEALLOCATE(cell_list_idx) + + end subroutine s_finalize_particle_lagrangian_solver + +end module m_particles_EL diff --git a/src/simulation/m_particles_EL_kernels.fpp b/src/simulation/m_particles_EL_kernels.fpp new file mode 100644 index 0000000000..198e2fcf25 --- /dev/null +++ b/src/simulation/m_particles_EL_kernels.fpp @@ -0,0 +1,979 @@ +!> +!! @file m_particles_EL_kernels.f90 +!! @brief Contains module m_particles_EL_kernels + +#:include 'macros.fpp' + +!> @brief This module contains kernel functions used to map the effect of the lagrangian particles +!! in the Eulerian framework. +module m_particles_EL_kernels + + use m_mpi_proxy !< Message passing interface (MPI) module proxy + + use ieee_arithmetic !< For checking NaN + + implicit none + + ! Cell list for particle-to-cell mapping (rebuilt each RK stage before smearing) + integer, allocatable, dimension(:, :, :) :: cell_list_start ! (0:m, 0:n, 0:p) + integer, allocatable, dimension(:, :, :) :: cell_list_count ! (0:m, 0:n, 0:p) + integer, allocatable, dimension(:) :: cell_list_idx ! (1:nParticles_glb) sorted particle indices + $:GPU_DECLARE(create='[cell_list_start, cell_list_count, cell_list_idx]') + +contains + + ! !> The purpose of this subroutine is to compute each particles total contribution to the gaussian for proper normalization + subroutine s_compute_gaussian_contribution(rad, pos, cell, func_s) + $:GPU_ROUTINE(function_name='s_compute_gaussian_contribution',parallelism='[seq]', & + & cray_inline=True) + + real(wp), intent(in) :: rad + real(wp), intent(in), dimension(3) :: pos + integer, intent(in), dimension(3) :: cell + real(wp), intent(out) :: func_s + + real(wp) :: volpart, stddsv, Vol_loc, func + real(wp), dimension(3) :: nodecoord, center + + integer :: ip, jp, kp, di, dj, dk, di_beg, di_end, dj_beg, dj_end, dk_beg, dk_end, mapCells_loc + integer, dimension(3) :: cellijk + + mapCells_loc = 1 + + volpart = (4._wp/3._wp)*pi*rad**3._wp + + call s_compute_stddsv(cell, volpart, stddsv) + + ip = cell(1) + jp = cell(2) + kp = cell(3) + + di_beg = ip - mapCells_loc + di_end = ip + mapCells_loc + dj_beg = jp - mapCells_loc + dj_end = jp + mapCells_loc + dk_beg = kp + dk_end = kp + + if (num_dims == 3) then + dk_beg = kp - mapCells_loc + dk_end = kp + mapCells_loc + end if + + func_s = 0._wp + do dk = dk_beg, dk_end + do dj = dj_beg, dj_end + do di = di_beg, di_end + + nodecoord(1) = x_cc(di) + nodecoord(2) = y_cc(dj) + nodecoord(3) = 0._wp + if (p > 0) nodecoord(3) = z_cc(dk) + + cellijk(1) = di + cellijk(2) = dj + cellijk(3) = dk + + center(1:2) = pos(1:2) + center(3) = 0._wp + if (p > 0) center(3) = pos(3) + + Vol_loc = dx(cellijk(1))*dy(cellijk(2)) + if (num_dims == 3) Vol_loc = dx(cellijk(1))*dy(cellijk(2))*dz(cellijk(3)) + + call s_applygaussian(center, cellijk, nodecoord, stddsv, 0._wp, func) + + func_s = func_s + func*Vol_loc + end do + end do + end do + + end subroutine s_compute_gaussian_contribution + + !> The purpose of this subroutine is to compute the gaussian smearing of particle volume fraction and source terms with atomic cell updates + subroutine s_gaussian_atomic(rad, vel, pos, force_p, gauSum, cell, updatedvar, onlyBeta) + $:GPU_ROUTINE(function_name='s_gaussian_atomic',parallelism='[seq]', & + & cray_inline=True) + + real(wp), intent(in) :: rad, gauSum + real(wp), intent(in), dimension(3) :: pos, vel, force_p + integer, intent(in), dimension(3) :: cell + type(scalar_field), dimension(:), intent(inout) :: updatedvar + + real(wp) :: volpart, stddsv, Vol_loc, func, weight + real(wp) :: fp_x, fp_y, fp_z, vp_x, vp_y, vp_z + real(wp) :: addFun_alphap, addFun_alphap_vp_x, addFun_alphap_vp_y, addFun_alphap_vp_z + real(wp) :: addFun2_x, addFun2_y, addFun2_z, addFun_E + real(wp), dimension(3) :: nodecoord, center + + integer :: ip, jp, kp, di, dj, dk, di_beg, di_end, dj_beg, dj_end, dk_beg, dk_end, mapCells_loc + integer, dimension(3) :: cellijk + logical, intent(in) :: onlyBeta + + mapCells_loc = 1 + + volpart = (4._wp/3._wp)*pi*rad**3 + + call s_compute_stddsv(cell, volpart, stddsv) + + ip = cell(1) + jp = cell(2) + kp = cell(3) + + di_beg = ip - mapCells_loc + di_end = ip + mapCells_loc + dj_beg = jp - mapCells_loc + dj_end = jp + mapCells_loc + dk_beg = kp + dk_end = kp + + if (num_dims == 3) then + dk_beg = kp - mapCells_loc + dk_end = kp + mapCells_loc + end if + + fp_x = -force_p(1) + fp_y = -force_p(2) + fp_z = -force_p(3) + + vp_x = vel(1) + vp_y = vel(2) + vp_z = vel(3) + + center(1:2) = pos(1:2) + center(3) = 0._wp + if (p > 0) center(3) = pos(3) + + do dk = dk_beg, dk_end + do dj = dj_beg, dj_end + do di = di_beg, di_end + + nodecoord(1) = x_cc(di) + nodecoord(2) = y_cc(dj) + nodecoord(3) = 0._wp + if (p > 0) nodecoord(3) = z_cc(dk) + + cellijk(1) = di + cellijk(2) = dj + cellijk(3) = dk + + Vol_loc = dx(cellijk(1))*dy(cellijk(2)) + if (num_dims == 3) Vol_loc = Vol_loc*dz(cellijk(3)) + + call s_applygaussian(center, cellijk, nodecoord, stddsv, 0._wp, func) + + weight = func/gauSum + + addFun_alphap = weight*volpart + $:GPU_ATOMIC(atomic='update') + updatedvar(1)%sf(cellijk(1), cellijk(2), cellijk(3)) = & + updatedvar(1)%sf(cellijk(1), cellijk(2), cellijk(3)) & + + real(addFun_alphap, kind=stp) + + if (lag_params%solver_approach == 2 .and. .not. onlyBeta) then + + !Update particle momentum field(x) + addFun_alphap_vp_x = weight*volpart*vp_x + $:GPU_ATOMIC(atomic='update') + updatedvar(2)%sf(cellijk(1), cellijk(2), cellijk(3)) = & + updatedvar(2)%sf(cellijk(1), cellijk(2), cellijk(3)) & + + real(addFun_alphap_vp_x, kind=stp) + + !Update particle momentum field(y) + addFun_alphap_vp_y = weight*volpart*vp_y + $:GPU_ATOMIC(atomic='update') + updatedvar(3)%sf(cellijk(1), cellijk(2), cellijk(3)) = & + updatedvar(3)%sf(cellijk(1), cellijk(2), cellijk(3)) & + + real(addFun_alphap_vp_y, kind=stp) + + if (num_dims == 3) then + !Update particle momentum field(z) + addFun_alphap_vp_z = weight*volpart*vp_z + $:GPU_ATOMIC(atomic='update') + updatedvar(4)%sf(cellijk(1), cellijk(2), cellijk(3)) = & + updatedvar(4)%sf(cellijk(1), cellijk(2), cellijk(3)) & + + real(addFun_alphap_vp_z, kind=stp) + end if + + !Update x-momentum source term + addFun2_x = weight*fp_x + $:GPU_ATOMIC(atomic='update') + updatedvar(5)%sf(cellijk(1), cellijk(2), cellijk(3)) = & + updatedvar(5)%sf(cellijk(1), cellijk(2), cellijk(3)) & + + real(addFun2_x, kind=stp) + + !Update y-momentum source term + addFun2_y = weight*fp_y + $:GPU_ATOMIC(atomic='update') + updatedvar(6)%sf(cellijk(1), cellijk(2), cellijk(3)) = & + updatedvar(6)%sf(cellijk(1), cellijk(2), cellijk(3)) & + + real(addFun2_y, kind=stp) + + if (num_dims == 3) then + !Update z-momentum source term + addFun2_z = weight*fp_z + $:GPU_ATOMIC(atomic='update') + updatedvar(7)%sf(cellijk(1), cellijk(2), cellijk(3)) = & + updatedvar(7)%sf(cellijk(1), cellijk(2), cellijk(3)) & + + real(addFun2_z, kind=stp) + end if + + !Update energy source term + addFun_E = 0._wp + $:GPU_ATOMIC(atomic='update') + updatedvar(8)%sf(cellijk(1), cellijk(2), cellijk(3)) = & + updatedvar(8)%sf(cellijk(1), cellijk(2), cellijk(3)) & + + real(addFun_E, kind=stp) + + end if + + end do + end do + end do + + end subroutine s_gaussian_atomic + + !> The purpose of this subroutine is to apply the gaussian kernel function for each particle (Maeda and Colonius, 2018)). + subroutine s_applygaussian(center, cellaux, nodecoord, stddsv, strength_idx, func) + $:GPU_ROUTINE(function_name='s_applygaussian',parallelism='[seq]', & + & cray_inline=True) + + real(wp), dimension(3), intent(in) :: center + integer, dimension(3), intent(in) :: cellaux + real(wp), dimension(3), intent(in) :: nodecoord + real(wp), intent(in) :: stddsv + real(wp), intent(in) :: strength_idx + real(wp), intent(out) :: func + integer :: i + + real(wp) :: distance + real(wp) :: theta, dtheta, L2, dzp, Lz2, zc + real(wp) :: Nr, Nr_count + + distance = sqrt((center(1) - nodecoord(1))**2._wp + (center(2) - nodecoord(2))**2._wp + (center(3) - nodecoord(3))**2._wp) + + if (num_dims == 3) then + !< 3D gaussian function + func = exp(-0.5_wp*(distance/stddsv)**2._wp)/(sqrt(2._wp*pi)*stddsv)**3._wp + else + if (cyl_coord) then + !< 2D cylindrical function: + ! We smear particles in the azimuthal direction for given r + theta = 0._wp + Nr = ceiling(2._wp*pi*nodecoord(2)/(y_cb(cellaux(2)) - y_cb(cellaux(2) - 1))) + dtheta = 2._wp*pi/Nr + L2 = center(2)**2._wp + nodecoord(2)**2._wp - 2._wp*center(2)*nodecoord(2)*cos(theta) + distance = sqrt((center(1) - nodecoord(1))**2._wp + L2) + ! Factor 2._wp is for symmetry (upper half of the 2D field (+r) is considered) + func = dtheta/2._wp/pi*exp(-0.5_wp*(distance/stddsv)**2._wp)/(sqrt(2._wp*pi)*stddsv)**3._wp + Nr_count = 0._wp + do while (Nr_count < Nr - 1._wp) + Nr_count = Nr_count + 1._wp + theta = Nr_count*dtheta + ! trigonometric relation + L2 = center(2)**2._wp + nodecoord(2)**2._wp - 2._wp*center(2)*nodecoord(2)*cos(theta) + distance = sqrt((center(1) - nodecoord(1))**2._wp + L2) + ! nodecoord(2)*dtheta is the azimuthal width of the cell + func = func + & + dtheta/2._wp/pi*exp(-0.5_wp*(distance/stddsv)**2._wp)/(sqrt(2._wp*pi)*stddsv)**(3._wp*(strength_idx + 1._wp)) + end do + else + !< 2D cartesian function: Equation (48) from Madea and Colonius 2018 + ! We smear particles considering a virtual depth (lag_params%charwidth) with lag_params%charNz cells + dzp = (lag_params%charwidth/(lag_params%charNz + 1._wp)) + + func = 0._wp + do i = 0, lag_params%charNz + zc = (-lag_params%charwidth/2._wp + dzp*(0.5_wp + i)) ! Center of virtual cell i in z-direction + Lz2 = (center(3) - zc)**2._wp + distance = sqrt((center(1) - nodecoord(1))**2._wp + (center(2) - nodecoord(2))**2._wp + Lz2) + func = func + dzp/lag_params%charwidth*exp(-0.5_wp*(distance/stddsv)**2._wp)/(sqrt(2._wp*pi)*stddsv)**3._wp + end do + end if + end if + + end subroutine s_applygaussian + + !> Calculates the standard deviation of the particle being smeared in the Eulerian framework. + !! @param cell Cell where the particle is located + !! @param volpart Volume of the particle + !! @param stddsv Standard deviaton + subroutine s_compute_stddsv(cell, volpart, stddsv) + $:GPU_ROUTINE(function_name='s_compute_stddsv',parallelism='[seq]', & + & cray_inline=True) + + integer, dimension(3), intent(in) :: cell + real(wp), intent(in) :: volpart + real(wp), intent(out) :: stddsv + + real(wp) :: chardist, charvol + real(wp) :: rad + + !< Compute characteristic distance + chardist = sqrt(dx(cell(1))*dy(cell(2))) + if (p > 0) chardist = (dx(cell(1))*dy(cell(2))*dz(cell(3)))**(1._wp/3._wp) + + !< Compute characteristic volume + if (p > 0) then + charvol = dx(cell(1))*dy(cell(2))*dz(cell(3)) + else + if (cyl_coord) then + charvol = dx(cell(1))*dy(cell(2))*y_cc(cell(2))*2._wp*pi + else + charvol = dx(cell(1))*dy(cell(2))*lag_params%charwidth + end if + end if + + rad = (3._wp*volpart/(4._wp*pi))**(1._wp/3._wp) + stddsv = 1._wp*lag_params%epsilonb*max(chardist, rad) + + end subroutine s_compute_stddsv + + !> The purpose of this subroutine is to check if the current cell is outside the computational domain or not (including ghost cells). + !! @param cellaux Tested cell to smear the particle effect in. + !! @param celloutside If true, then cellaux is outside the computational domain. + subroutine s_check_celloutside(cellaux, celloutside) + $:GPU_ROUTINE(function_name='s_check_celloutside',parallelism='[seq]', & + & cray_inline=True) + + integer, dimension(3), intent(inout) :: cellaux + logical, intent(out) :: celloutside + + celloutside = .false. + + if (num_dims == 2) then + if ((cellaux(1) < -buff_size) .or. (cellaux(2) < -buff_size)) then + celloutside = .true. + end if + + if ((cellaux(1) > m + buff_size) .or. (cellaux(2) > n + buff_size)) then + celloutside = .true. + end if + else + if ((cellaux(1) < -buff_size) .or. (cellaux(2) < -buff_size) .or. (cellaux(3) < -buff_size)) then + celloutside = .true. + end if + + if ((cellaux(1) > m + buff_size) .or. (cellaux(2) > n + buff_size) .or. (cellaux(3) > p + buff_size)) then + celloutside = .true. + end if + end if + + end subroutine s_check_celloutside + + !> This subroutine transforms the computational coordinates of the particle from + !! real type into integer. + !! @param s Computational coordinates of the particle, real type + !! @param get_cell Computational coordinates of the particle, integer type + subroutine s_get_cell(s_cell, get_cell) + $:GPU_ROUTINE(function_name='s_get_cell',parallelism='[seq]', & + & cray_inline=True) + + real(wp), dimension(3), intent(in) :: s_cell + integer, dimension(3), intent(out) :: get_cell + integer :: i + + get_cell(:) = int(s_cell(:)) + do i = 1, num_dims + if (s_cell(i) < 0._wp) get_cell(i) = get_cell(i) - 1 + end do + + end subroutine s_get_cell + + !> The purpose of this procedure is to calculate the characteristic cell volume + !! @param cell Computational coordinates (x, y, z) + !! @param Charvol Characteristic volume + subroutine s_get_char_vol(cellx, celly, cellz, Charvol) + $:GPU_ROUTINE(function_name='s_get_char_vol',parallelism='[seq]', & + & cray_inline=True) + + integer, intent(in) :: cellx, celly, cellz + real(wp), intent(out) :: Charvol + + if (p > 0) then + Charvol = dx(cellx)*dy(celly)*dz(cellz) + else + if (cyl_coord) then + Charvol = dx(cellx)*dy(celly)*y_cc(celly)*2._wp*pi + else + Charvol = dx(cellx)*dy(celly)*lag_params%charwidth + end if + end if + + end subroutine s_get_char_vol + + !! This function calculates the force on a particle + !! based on the pressure gradient, velocity, and drag model. + !! @param pos Position of the particle + !! @param rad Radius of the particle + !! @param vel_p Velocity of the particle + !! @param mass_p Particle mass + !! @param Re Viscosity! + !! @param rho Density of the fluid + !! @param vol_frac Particle Volume Fraction + !! @param cell Computational coordinates of the particle + !! @param q_prim_vf Eulerian field with primitive variables + !! @return a Acceleration of the particle in direction i + subroutine s_get_particle_force(pos, rad, vel_p, mass_p, Re, gamm, vol_frac, drhodt, cell, & + q_prim_vf, fieldvars, wx, wy, wz, force, rmass_add) + $:GPU_ROUTINE(parallelism='[seq]') + real(wp), intent(in) :: rad, mass_p, Re, gamm, vol_frac, drhodt + real(wp), dimension(3), intent(in) :: pos + integer, dimension(3), intent(in) :: cell + real(wp), dimension(3), intent(in) :: vel_p + type(scalar_field), dimension(:), intent(in) :: fieldvars + type(scalar_field), dimension(:), intent(in) :: wx, wy, wz + type(scalar_field), dimension(sys_size), intent(in) :: q_prim_vf + + real(wp), dimension(3), intent(out) :: force + real(wp), intent(out) :: rmass_add + + real(wp) :: a, vol, rho_fluid, pressure_fluid + real(wp), dimension(3) :: v_rel, dp + real(wp), dimension(fd_order) :: xi, eta, L + real(wp) :: particle_diam, gas_mu, vmag, cson + real(wp) :: slip_velocity_x, slip_velocity_y, slip_velocity_z, beta + real(wp), dimension(3) :: fluid_vel + integer :: dir + + !Added pass params + real(wp) :: mach, Cam, flux_f, flux_b, div_u, SDrho, vpgradrho + real(wp), dimension(3) :: rhoDuDt, grad_rho, fam + integer, dimension(3) :: p1 + + force = 0._wp + dp = 0._wp + grad_rho = 0._wp + fam = 0._wp + fluid_vel = 0._wp + v_rel = 0._wp + rhoDuDt = 0._wp + SDrho = 0._wp + ! div_u = 0._wp + + !!Interpolation - either even ordered barycentric or 0th order + if (lag_params%interpolation_order > 1) then + rho_fluid = f_interp_barycentric(pos, cell, q_prim_vf, 1, wx, wy, wz) + pressure_fluid = f_interp_barycentric(pos, cell, q_prim_vf, E_idx, wx, wy, wz) + do dir = 1, num_dims + if (lag_params%pressure_force .or. lag_params%added_mass_model > 0) then + dp(dir) = f_interp_barycentric(pos, cell, fieldvars, dir, wx, wy, wz) + end if + if (lag_params%added_mass_model > 0) then + grad_rho(dir) = f_interp_barycentric(pos, cell, fieldvars, 3 + dir, wx, wy, wz) + ! div_u = div_u + f_interp_barycentric(pos, cell, fieldvars, 6 + dir, wx, wy, wz) + end if + fluid_vel(dir) = f_interp_barycentric(pos, cell, q_prim_vf, momxb + dir - 1, wx, wy, wz) + end do + else + rho_fluid = q_prim_vf(1)%sf(cell(1), cell(2), cell(3)) + pressure_fluid = q_prim_vf(E_idx)%sf(cell(1), cell(2), cell(3)) + do dir = 1, num_dims + if (lag_params%pressure_force .or. lag_params%added_mass_model > 0) then + dp(dir) = fieldvars(dir)%sf(cell(1), cell(2), cell(3)) + end if + if (lag_params%added_mass_model > 0) then + grad_rho(dir) = fieldvars(3 + dir)%sf(cell(1), cell(2), cell(3)) + ! div_u = div_u + fieldvars(6 + dir)%sf(cell(1), cell(2), cell(3)) + end if + fluid_vel(dir) = q_prim_vf(momxb + dir - 1)%sf(cell(1), cell(2), cell(3)) + end do + end if + + v_rel = vel_p - fluid_vel + + if (lag_params%qs_drag_model > 0 .or. lag_params%added_mass_model > 0) then + ! Quasi-steady Drag Force Parameters + slip_velocity_x = fluid_vel(1) - vel_p(1) + slip_velocity_y = fluid_vel(2) - vel_p(2) + if (num_dims == 3) then + slip_velocity_z = fluid_vel(3) - vel_p(3) + vmag = sqrt(slip_velocity_x*slip_velocity_x + slip_velocity_y*slip_velocity_y + & + slip_velocity_z*slip_velocity_z) + elseif (num_dims == 2) then + vmag = sqrt(slip_velocity_x*slip_velocity_x + slip_velocity_y*slip_velocity_y) + end if + particle_diam = rad*2._wp + cson = sqrt((gamm*pressure_fluid)/rho_fluid) !gamma*P/rho + gas_mu = Re + end if + + if (lag_params%added_mass_model > 0) then + rhoDuDt = -dp + vpgradrho = dot_product(vel_p, grad_rho) + SDrho = drhodt + fluid_vel(1)*grad_rho(1) + fluid_vel(2)*grad_rho(2) + fluid_vel(3)*grad_rho(3) + mach = vmag/cson + end if + + ! Step 1: Force component quasi-steady + if (lag_params%qs_drag_model == 1) then + beta = QS_Parmar(rho_fluid, cson, gas_mu, gamm, vmag, particle_diam, vol_frac) + force = force - beta*v_rel + else if (lag_params%qs_drag_model == 2) then + beta = QS_Osnes(rho_fluid, cson, gas_mu, gamm, vmag, particle_diam, vol_frac) + force = force - beta*v_rel + else if (lag_params%qs_drag_model == 3) then + beta = QS_ModifiedParmar(rho_fluid, cson, gas_mu, gamm, vmag, particle_diam, vol_frac) + force = force - beta*v_rel + else if (lag_params%qs_drag_model == 4) then + beta = QS_Gidaspow(rho_fluid, cson, gas_mu, gamm, vmag, particle_diam, vol_frac) + force = force - beta*v_rel + else + !No Quasi-Steady drag + end if + + ! Step 1.1: Stokes drag + if (lag_params%stokes_drag == 1) then ! Free slip Stokes drag + force = force - 4._wp*pi*gas_mu*rad*v_rel + elseif (lag_params%stokes_drag == 2) then ! No slip Stokes drag + force = force - 6._wp*pi*gas_mu*rad*v_rel + else + !No stokes drag + end if + + ! Step 2: Pressure Gradient Force + if (lag_params%pressure_force) then + vol = (4._wp/3._wp)*pi*(rad**3._wp) + force = force - vol*dp + end if + + ! Step 3: Gravitational Force + if (lag_params%gravity_force) then + force = force + (mass_p)*accel_bf + end if + + ! Step 4: Added Mass Force + if (lag_params%added_mass_model == 1) then + vol = (4._wp/3._wp)*pi*(rad**3._wp) + if (mach > 0.6_wp) then + Cam = 1._wp + 1.8_wp*(0.6_wp**2) + 7.6_wp* & + (0.6_wp**4) + else + Cam = 1._wp + 1.8_wp*mach**2 + 7.6_wp*mach**4 + end if + + Cam = 0.5_wp*Cam*(1._wp + 0.68_wp*vol_frac**2) + rmass_add = rho_fluid*vol*Cam !(1._wp-vol_frac)*rho_fluid*vol*Cam + + fam = Cam*vol*(vel_p*SDrho + rhoDuDt + & + fluid_vel*(vpgradrho)) + + do dir = 1, num_dims + if (.not. ieee_is_finite(fam(dir))) then + fam(dir) = 0._wp + rmass_add = 0._wp + end if + end do + force = force + fam + else + rmass_add = 0._wp + end if + + do dir = 1, num_dims + if (.not. ieee_is_finite(force(dir))) then + force(dir) = 0._wp + end if + end do + + end subroutine s_get_particle_force + + function f_interp_barycentric(pos, cell, field_vf, field_index, wx, wy, wz) result(val) + $:GPU_ROUTINE(parallelism='[seq]') + + real(wp), dimension(3), intent(in) :: pos + integer, dimension(3), intent(in) :: cell + type(scalar_field), dimension(:), intent(in) :: field_vf + type(scalar_field), dimension(:), intent(in) :: wx, wy, wz + integer, intent(in) :: field_index + + integer :: i, j, k, ix, jy, kz, npts, npts_z, N, a, b + integer :: ix_count, jy_count, kz_count + real(wp) :: weight, numerator, denominator, xBar, eps + real(wp) :: val, local_min, local_max, prod_x, prod_y, prod_z + + i = cell(1) + j = cell(2) + k = cell(3) + + N = lag_params%interpolation_order + npts = N/2 + npts_z = npts + if (num_dims == 2) npts_z = 0 + eps = 1.e-12_wp + numerator = 0._wp + denominator = 0._wp + + ! if (abs(pos(1) - x_cc(i)) <= eps .and. & + ! abs(pos(2) - y_cc(j)) <= eps .and. & + ! abs(pos(3) - z_cc(k)) <= eps) then + ! val = field_vf(field_index)%sf(i, j, k) + ! return + ! end if + + ix_count = 0 + do ix = i - npts, i + npts + ix_count = ix_count + 1 + jy_count = 0 + do jy = j - npts, j + npts + jy_count = jy_count + 1 + kz_count = 0 + do kz = k - npts_z, k + npts_z + kz_count = kz_count + 1 + if (num_dims == 3) then + xBar = (pos(1) - x_cc(ix))*(pos(2) - y_cc(jy))*(pos(3) - z_cc(kz)) + weight = wx(ix_count)%sf(i, 1, 1)*wy(jy_count)%sf(j, 1, 1)*wz(kz_count)%sf(k, 1, 1) + else + xBar = (pos(1) - x_cc(ix))*(pos(2) - y_cc(jy)) + weight = wx(ix_count)%sf(i, 1, 1)*wy(jy_count)%sf(j, 1, 1) + end if + weight = weight/xBar + numerator = numerator + weight*field_vf(field_index)%sf(ix, jy, kz) + denominator = denominator + weight + end do + end do + end do + + val = numerator/denominator + + if (.not. ieee_is_finite(val)) then + val = field_vf(field_index)%sf(i, j, k) + + elseif (abs(val) <= eps) then + val = 0._wp + + end if + + end function + + ! Quasi-steady force (Re_p and Ma_p corrections): + ! Improved Drag Correlation for Spheres and Application + ! to Shock-Tube Experiments + ! - Parmar et al. (2010) + ! - AIAA Journal + ! + ! Quasi-steady force (phi corrections): + ! The Added Mass, Basset, and Viscous Drag Coefficients + ! in Nondilute Bubbly Liquids Undergoing Small-Amplitude + ! Oscillatory Motion + ! - Sangani et al. (1991) + ! - Phys. Fluids A + function QS_Parmar(rho, cson, mu_fluid, gamma, vmag, dp, volume_fraction) result(beta) + $:GPU_ROUTINE(parallelism='[seq]') + real(wp), intent(in) :: rho, cson, mu_fluid, gamma, vmag, dp, volume_fraction + real(wp) :: rcd1, rmacr, rcd_mcr, rcd_std, rmach_rat, rcd_M1 + real(wp) :: rcd_M2, C1, C2, C3, f1M, f2M, f3M, lrep, factor, cd, phi_corr + real(wp) :: beta + real(wp) :: rmachp, mp, phi, rep, re + + rmachp = vmag/cson + mp = max(rmachp, 0.01_wp) + phi = max(volume_fraction, 0.0001_wp) + rep = vmag*dp*rho/mu_fluid + re = max(rep, 0.1_wp) + + if (re < 1.e-14_wp) then + rcd1 = 1.0_wp + else + rmacr = 0.6_wp ! Critical rmachp no + rcd_mcr = (1.+0.15*re**(0.684)) + & + (re/24.0)*(0.513/(1.+483./re**(0.669))) + if (mp <= rmacr) then + rcd_std = (1.+0.15*re**(0.687)) + & + (re/24.0)*(0.42/(1.+42500./re**(1.16))) + rmach_rat = mp/rmacr + rcd1 = rcd_std + (rcd_mcr - rcd_std)*rmach_rat + else if (mp <= 1.0) then + rcd_M1 = (1.0 + 0.118*re**0.813) + & + (re/24.0)*0.69/(1.0 + 3550.0/re**.793) + C1 = 6.48_wp + C2 = 9.28_wp + C3 = 12.21_wp + f1M = -1.884_wp + 8.422_wp*mp - 13.70_wp*mp**2 + 8.162_wp*mp**3 + f2M = -2.228_wp + 10.35_wp*mp - 16.96_wp*mp**2 + 9.840_wp*mp**3 + f3M = 4.362_wp - 16.91_wp*mp + 19.84_wp*mp**2 - 6.296_wp*mp**3 + lrep = log(re) + factor = f1M*(lrep - C2)*(lrep - C3)/((C1 - C2)*(C1 - C3)) & + + f2M*(lrep - C1)*(lrep - C3)/((C2 - C1)*(C2 - C3)) & + + f3M*(lrep - C1)*(lrep - C2)/((C3 - C1)*(C3 - C2)) + rcd1 = rcd_mcr + (rcd_M1 - rcd_mcr)*factor + else if (mp < 1.75) then + rcd_M1 = (1.0 + 0.118*re**0.813) + & + (re/24.0)*0.69/(1.0 + 3550.0/re**.793) + rcd_M2 = (1.0 + 0.107*re**0.867) + & + (re/24.0)*0.646/(1.0 + 861.0/re**.634) + C1 = 6.48_wp + C2 = 8.93_wp + C3 = 12.21_wp + f1M = -2.963 + 4.392*mp - 1.169*mp**2 - 0.027*mp**3 & + - 0.233*exp((1.0 - mp)/0.011) + f2M = -6.617 + 12.11*mp - 6.501*mp**2 + 1.182*mp**3 & + - 0.174*exp((1.0 - mp)/0.010) + f3M = -5.866 + 11.57*mp - 6.665*mp**2 + 1.312*mp**3 & + - 0.350*exp((1.0 - mp)/0.012) + lrep = log(re) + factor = f1M*(lrep - C2)*(lrep - C3)/((C1 - C2)*(C1 - C3)) & + + f2M*(lrep - C1)*(lrep - C3)/((C2 - C1)*(C2 - C3)) & + + f3M*(lrep - C1)*(lrep - C2)/((C3 - C1)*(C3 - C2)) + rcd1 = rcd_M1 + (rcd_M2 - rcd_M1)*factor + else + rcd1 = (1.0 + 0.107*re**0.867) + & + (re/24.0)*0.646/(1.0 + 861.0/re**.634) + end if ! mp + end if ! re + + ! Sangani's volume fraction correction for dilute random arrays + ! Capping volume fraction at 0.5 + phi_corr = (1.0 + 5.94*min(phi, 0.5)) + + cd = (24.0/re)*rcd1*phi_corr + + beta = rcd1*3.0*pi*mu_fluid*dp + + beta = beta*phi_corr + + end function QS_Parmar + + ! Quasi-steady force (Re_p and Ma_p corrections): + ! Improved Drag Correlation for Spheres and Application + ! to Shock-Tube Experiments + ! - Parmar et al. (2010) + ! - AIAA Journal + ! + ! Quasi-steady force (phi corrections): + ! Sangani et al. (1991) volume fraction correction overshoots + ! the drag coefficient. + ! + ! We adopt instead Osnes et al. (2023) volume fraction correction + ! based on Tenneti et al. with one extra term. + ! + ! At Mach=0, the drag coefficient from this subroutine matches very + ! well with the one calculated using the Osnes subroutine, for various + ! Reynolds numbers and volume fractions. + function QS_ModifiedParmar(rho, cson, mu_fluid, gamma, vmag, dp, volume_fraction) result(beta) + $:GPU_ROUTINE(parallelism='[seq]') + real(wp), intent(in) :: rho, cson, mu_fluid, gamma, vmag, dp, volume_fraction + real(wp) :: rcd1, rmacr, rcd_mcr, rcd_std, rmach_rat, rcd_M1 + real(wp) :: rcd_M2, C1, C2, C3, f1M, f2M, f3M, lrep, factor, cd, phi_corr + real(wp) :: b1, b2, b3 + real(wp) :: beta + real(wp) :: rmachp, mp, phi, rep, re + + rmachp = vmag/cson + mp = max(rmachp, 0.01_wp) + phi = max(volume_fraction, 0.0001_wp) + rep = vmag*dp*rho/mu_fluid + re = max(rep, 0.1_wp) + + if (re < 1e-14) then + rcd1 = 1.0 + else + rmacr = 0.6 ! Critical rmachp no. + rcd_mcr = (1.+0.15*re**(0.684)) + & + (re/24.0)*(0.513/(1.+483./re**(0.669))) + if (mp <= rmacr) then + rcd_std = (1.+0.15*re**(0.687)) + & + (re/24.0)*(0.42/(1.+42500./re**(1.16))) + rmach_rat = mp/rmacr + rcd1 = rcd_std + (rcd_mcr - rcd_std)*rmach_rat + else if (mp <= 1.0) then + rcd_M1 = (1.0 + 0.118*re**0.813) + & + (re/24.0)*0.69/(1.0 + 3550.0/re**.793) + C1 = 6.48 + C2 = 9.28 + C3 = 12.21 + f1M = -1.884 + 8.422*mp - 13.70*mp**2 + 8.162*mp**3 + f2M = -2.228 + 10.35*mp - 16.96*mp**2 + 9.840*mp**3 + f3M = 4.362 - 16.91*mp + 19.84*mp**2 - 6.296*mp**3 + lrep = log(re) + factor = f1M*(lrep - C2)*(lrep - C3)/((C1 - C2)*(C1 - C3)) & + + f2M*(lrep - C1)*(lrep - C3)/((C2 - C1)*(C2 - C3)) & + + f3M*(lrep - C1)*(lrep - C2)/((C3 - C1)*(C3 - C2)) + rcd1 = rcd_mcr + (rcd_M1 - rcd_mcr)*factor + else if (mp < 1.75) then + rcd_M1 = (1.0 + 0.118*re**0.813) + & + (re/24.0)*0.69/(1.0 + 3550.0/re**.793) + rcd_M2 = (1.0 + 0.107*re**0.867) + & + (re/24.0)*0.646/(1.0 + 861.0/re**.634) + C1 = 6.48 + C2 = 8.93 + C3 = 12.21 + f1M = -2.963 + 4.392*mp - 1.169*mp**2 - 0.027*mp**3 & + - 0.233*exp((1.0 - mp)/0.011) + f2M = -6.617 + 12.11*mp - 6.501*mp**2 + 1.182*mp**3 & + - 0.174*exp((1.0 - mp)/0.010) + f3M = -5.866 + 11.57*mp - 6.665*mp**2 + 1.312*mp**3 & + - 0.350*exp((1.0 - mp)/0.012) + lrep = log(re) + factor = f1M*(lrep - C2)*(lrep - C3)/((C1 - C2)*(C1 - C3)) & + + f2M*(lrep - C1)*(lrep - C3)/((C2 - C1)*(C2 - C3)) & + + f3M*(lrep - C1)*(lrep - C2)/((C3 - C1)*(C3 - C2)) + rcd1 = rcd_M1 + (rcd_M2 - rcd_M1)*factor + else + rcd1 = (1.0 + 0.107*re**0.867) + & + (re/24.0)*0.646/(1.0 + 861.0/re**.634) + end if ! mp + end if ! re + + ! Osnes's volume fraction correction + b1 = 5.81*phi/((1.0 - phi)**2) + & + 0.48*(phi**(1._wp/3._wp))/((1.0 - phi)**3) + + b2 = ((1.0 - phi)**2)*(phi**3)* & + re*(0.95 + 0.61*(phi**3)/((1.0 - phi)*2)) + + b3 = min(sqrt(20.0_wp*mp), 1.0_wp)* & + (5.65*phi - 22.0*(phi**2) + 23.4*(phi**3))* & + (1 + tanh((mp - (0.65 - 0.24*phi))/0.35)) + + cd = (24.0/re)*rcd1 + + cd = cd/(1.0 - phi) + b3 + (24.0/re)*(1.0 - phi)*(b1 + b2) + + beta = 3.0*pi*mu_fluid*dp*(re/24.0)*cd + + end function QS_ModifiedParmar + + ! QS Force calculated as a function of Re, Ma and phi + ! + ! Use Osnes etal (2023) correlations + ! A.N. Osnes, M. Vartdal, M. Khalloufi, + ! J. Capecelatro, and S. Balachandar. + ! Comprehensive quasi-steady force correlations for compressible flow + ! through random particle suspensions. + ! International Journal of Multiphase Flow, Vol. 165, 104485, (2023). + ! doi: https://doi.org/10.1016/j.imultiphaseflow.2023.104485. + ! + ! E. Loth, J.T. Daspit, M. Jeong, T. Nagata, and T. Nonomura. + ! Supersonic and hypersonic drag coefficients for a sphere. + ! AIAA Journal, Vol. 59(8), pp. 3261-3274, (2021). + ! doi: https://doi.org/10.2514/1.J060153. + ! + ! NOTE: Re<45 Rarefied fomu_fluidla of Loth et al has been redefined by Balachandar + ! to avoid singularity as Ma -> 0. + function QS_Osnes(rho, cson, mu_fluid, gamma, vmag, dp, volume_fraction) result(beta) + $:GPU_ROUTINE(parallelism='[seq]') + real(wp), intent(in) :: rho, cson, mu_fluid, gamma, vmag, dp, volume_fraction + real(wp) :: rmachp, mp, phi, rep, re + real(wp) :: Knp, fKn, CD1, s, JM, CD2, cd_loth, CM, GM, HM, b1, b2, b3, cd, sgby2, JMt + real(wp) :: beta + + rmachp = vmag/cson + mp = max(rmachp, 0.01_wp) + phi = max(volume_fraction, 0.0001_wp) + rep = vmag*dp*rho/mu_fluid + re = max(rep, 0.1_wp) + + ! Loth's correlation + if (re <= 45.0_wp) then + ! Rarefied-dominated regime + Knp = sqrt(0.5_wp*pi*gamma)*mp/re + if (Knp > 0.01_wp) then + fKn = 1.0_wp/(1.0_wp + Knp*(2.514_wp + 0.8_wp*exp(-0.55_wp/Knp))) + else + fKn = 1.0_wp/(1.0_wp + Knp*(2.514_wp + 0.8_wp*exp(-0.55_wp/0.01_wp))) + + end if + CD1 = (24.0_wp/re)*(1.0_wp + 0.15_wp*re**(0.687_wp))*fKn + s = mp*sqrt(0.5_wp*gamma) + sgby2 = sqrt(0.5_wp*gamma) + if (mp <= 1) then + !JMt = 2.26_wp*(mp**4) - 0.1_wp*(mp**3) + 0.14_wp*mp + JMt = 2.26_wp*(mp**4) + 0.14_wp*mp + else + JMt = 1.6_wp*(mp**4) + 0.25_wp*(mp**3) + 0.11_wp*(mp**2) + 0.44_wp*mp + end if + ! + ! Refomu_fluidlated version of Loth et al. to avoid singularity at mp = 0 + ! + CD2 = (1.0_wp + 2.0_wp*(s**2))*exp(-s**2)*mp & + /((sgby2**3)*sqrt(pi)) & + + (4.0_wp*(s**4) + 4.0_wp*(s**2) - 1.0_wp) & + *erf(s)/(2.0_wp*(sgby2**4)) & + + (2.0_wp*(mp**3)/(3.0_wp*sgby2))*sqrt(pi) + + CD2 = CD2/(1.0_wp + (((CD2/JMt) - 1.0_wp)*sqrt(re/45.0_wp))) + cd_loth = CD1/(1.0_wp + (mp**4)) & + + CD2/(1.0_wp + (mp**4)) + else + ! Compression-dominated regime + ! TLJ: coefficients tweaked to get continuous values + ! on the two branches at the critical points + if (mp < 1.5_wp) then + CM = 1.65_wp + 0.65_wp*tanh(4._wp*mp - 3.4_wp) + else + !CM = 2.18_wp - 0.13_wp*tanh(0.9_wp*mp - 2.7_wp) + CM = 2.18_wp - 0.12913149918318745_wp*tanh(0.9_wp*mp - 2.7_wp) + end if + if (mp < 0.8) then + GM = 166.0_wp*(mp**3) + 3.29_wp*(mp**2) - 10.9_wp*mp + 20._wp + else + !GM = 5.0_wp + 40._wp*(mp**(-3)) + GM = 5.0_wp + 47.809331200000017_wp*(mp**(-3)) + end if + if (mp < 1) then + HM = 0.0239_wp*(mp**3) + 0.212_wp*(mp**2) & + - 0.074_wp*mp + 1._wp + else + !HM = 0.93_wp + 1.0_wp / (3.5_wp + (mp**5)) + HM = 0.93967777777777772_wp + 1.0_wp/(3.5_wp + (mp**5)) + end if + + cd_loth = (24.0_wp/re)*(1._wp + 0.15_wp*(re**(0.687_wp)))*HM + & + 0.42_wp*CM/(1._wp + 42500._wp/re**(1.16_wp*CM) + GM/sqrt(re)) + + end if + + b1 = 5.81_wp*phi/((1.0_wp - phi)**2) + & + 0.48_wp*(phi**(1._wp/3._wp))/((1.0_wp - phi)**3) + + b2 = ((1.0_wp - phi)**2)*(phi**3)* & + re*(0.95_wp + 0.61_wp*(phi**3)/((1.0_wp - phi)*2)) + + b3 = min(sqrt(20.0_wp*mp), 1.0_wp)* & + (5.65_wp*phi - 22.0_wp*(phi**2) + 23.4_wp*(phi**3))* & + (1._wp + tanh((mp - (0.65_wp - 0.24_wp*phi))/0.35_wp)) + + cd = cd_loth/(1.0_wp - phi) + b3 + (24.0_wp/re)*(1.0_wp - phi)*(b1 + b2) + + beta = 3.0_wp*pi*mu_fluid*dp*(re/24.0_wp)*cd + + end function QS_Osnes + +! Subroutine for Quasi-Steady Drag Model of Gidaspow +! +! D. Gidaspow, Multiphase Flow and Fluidization (Academic Press, 1994) +! +! Note: Model is provided per cell volume. We convert that to per +! particle using the particle volume fraction and volume + function QS_Gidaspow(rho, cson, mu_fluid, gamma, vmag, dp, volume_fraction) result(beta) + $:GPU_ROUTINE(parallelism='[seq]') + real(wp), intent(in) :: rho, cson, mu_fluid, gamma, vmag, dp, volume_fraction + real(wp) :: cd, phifRep, phif + real(wp) :: phi, rep, re + real(wp) :: beta + + rep = vmag*dp*rho/mu_fluid + phi = max(volume_fraction, 0.0001_wp) + phif = max(1._wp - volume_fraction, 0.0001_wp) + re = max(rep, 0.1_wp) + + phifRep = phif*re + + if (phifRep < 1000.0) then + cd = 24.0/phifRep*(1.0 + 0.15*(phifRep)**0.687) + else + cd = 0.44 + end if + + if (phif < 0.8) then + beta = 150.0*((phi**2)*mu_fluid)/(phif*dp**2) & + + 1.75*(rho*phi*vmag/dp) + else + beta = 0.75*cd*phi*rho*vmag/(dp*phif**1.65) + end if + + beta = beta*(pi*dp**3)/(6.0*phi) + + end function QS_Gidaspow + +end module m_particles_EL_kernels diff --git a/src/simulation/m_rhs.fpp b/src/simulation/m_rhs.fpp index e6fd6348ed..8901a2c8bf 100644 --- a/src/simulation/m_rhs.fpp +++ b/src/simulation/m_rhs.fpp @@ -30,6 +30,8 @@ module m_rhs use m_bubbles_EL + use m_particles_EL + use m_qbmm !< Moment inversion use m_hypoelastic @@ -1033,21 +1035,46 @@ contains end if if (bubbles_lagrange) then - ! RHS additions for sub-grid bubbles_lagrange - call nvtxStartRange("RHS-EL-BUBBLES-SRC") - call s_compute_bubbles_EL_source( & - q_cons_qp%vf(1:sys_size), & - q_prim_qp%vf(1:sys_size), & - rhs_vf) - call nvtxEndRange ! Compute bubble dynamics if (.not. adap_dt) then call nvtxStartRange("RHS-EL-BUBBLES-DYN") call s_compute_bubble_EL_dynamics( & q_prim_qp%vf(1:sys_size), & + bc_type, & stage) call nvtxEndRange end if + + ! RHS additions for sub-grid bubbles_lagrange + if (lag_params%solver_approach == 2) then + call nvtxStartRange("RHS-EL-BUBBLES-SRC") + call s_compute_bubbles_EL_source( & + q_cons_qp%vf(1:sys_size), & + q_prim_qp%vf(1:sys_size), & + rhs_vf) + call nvtxEndRange + end if + end if + + if (particles_lagrange) then + ! Compute particle dynamics, forces, dvdt + call nvtxStartRange("RHS-EL-PARTICLES-DYN") + call s_compute_particle_EL_dynamics( & + q_prim_qp%vf(1:sys_size), & + bc_type, stage, & + qL_rsx_vf, qL_rsy_vf, qL_rsz_vf, & + qR_rsx_vf, qR_rsy_vf, qR_rsz_vf, rhs_vf) + call nvtxEndRange + + ! RHS additions for sub-grid particles_lagrange + if (lag_params%solver_approach == 2) then + call nvtxStartRange("RHS-EL-PARTICLES-SRC") + call s_compute_particles_EL_source( & + q_cons_qp%vf(1:sys_size), & + q_prim_qp%vf(1:sys_size), & + rhs_vf, stage) + call nvtxEndRange + end if end if if (chemistry .and. chem_params%reactions) then @@ -1060,7 +1087,7 @@ contains ! END: Additional pphysics and source terms - if (run_time_info .or. probe_wrt .or. ib .or. bubbles_lagrange) then + if (run_time_info .or. probe_wrt .or. ib .or. bubbles_lagrange .or. particles_lagrange) then if (.not. igr .or. dummy) then $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) do i = 1, sys_size diff --git a/src/simulation/m_sim_helpers.fpp b/src/simulation/m_sim_helpers.fpp index a7680f84e9..f8e8526ced 100644 --- a/src/simulation/m_sim_helpers.fpp +++ b/src/simulation/m_sim_helpers.fpp @@ -175,61 +175,56 @@ contains !! @param j x index !! @param k y index !! @param l z index - !! @param icfl_sf cell-centered inviscid cfl number - !! @param vcfl_sf (optional) cell-centered viscous CFL number - !! @param Rc_sf (optional) cell centered Rc - subroutine s_compute_stability_from_dt(vel, c, rho, Re_l, j, k, l, icfl_sf, vcfl_sf, Rc_sf) + !! @param icfl cell-centered inviscid cfl number + !! @param vcfl (optional) cell-centered viscous CFL number + !! @param Rc (optional) cell centered Rc + subroutine s_compute_stability_from_dt(vel, c, rho, Re_l, j, k, l, icfl, vcfl, Rc) $:GPU_ROUTINE(parallelism='[seq]') real(wp), intent(in), dimension(num_vels) :: vel real(wp), intent(in) :: c, rho - real(wp), dimension(0:m, 0:n, 0:p), intent(inout) :: icfl_sf - real(wp), dimension(0:m, 0:n, 0:p), intent(inout), optional :: vcfl_sf, Rc_sf + real(wp), intent(inout) :: icfl + real(wp), intent(inout), optional :: vcfl, Rc real(wp), dimension(2), intent(in) :: Re_l integer, intent(in) :: j, k, l real(wp) :: fltr_dtheta ! Inviscid CFL calculation - if (p > 0 .or. n > 0) then - ! 2D/3D - icfl_sf(j, k, l) = dt/f_compute_multidim_cfl_terms(vel, c, j, k, l) - else - ! 1D - icfl_sf(j, k, l) = (dt/dx(j))*(abs(vel(1)) + c) + if (p > 0 .or. n > 0) then ! 2D/3D + icfl = dt/f_compute_multidim_cfl_terms(vel, c, j, k, l) + else ! 1D + icfl = (dt/dx(j))*(abs(vel(1)) + c) end if ! Viscous calculations if (viscous) then - if (p > 0) then + if (p > 0) then !3D #:if not MFC_CASE_OPTIMIZATION or num_dims > 2 - !3D if (grid_geometry == 3) then fltr_dtheta = f_compute_filtered_dtheta(k, l) - vcfl_sf(j, k, l) = maxval(dt/Re_l/rho) & - /min(dx(j), dy(k), fltr_dtheta)**2._wp - Rc_sf(j, k, l) = min(dx(j)*(abs(vel(1)) + c), & - dy(k)*(abs(vel(2)) + c), & - fltr_dtheta*(abs(vel(3)) + c)) & - /maxval(1._wp/Re_l) + vcfl = maxval(dt/Re_l/rho) & + /min(dx(j), dy(k), fltr_dtheta)**2._wp + Rc = min(dx(j)*(abs(vel(1)) + c), & + dy(k)*(abs(vel(2)) + c), & + fltr_dtheta*(abs(vel(3)) + c)) & + /maxval(1._wp/Re_l) else - vcfl_sf(j, k, l) = maxval(dt/Re_l/rho) & - /min(dx(j), dy(k), dz(l))**2._wp - Rc_sf(j, k, l) = min(dx(j)*(abs(vel(1)) + c), & - dy(k)*(abs(vel(2)) + c), & - dz(l)*(abs(vel(3)) + c)) & - /maxval(1._wp/Re_l) + vcfl = maxval(dt/Re_l/rho) & + /min(dx(j), dy(k), dz(l))**2._wp + Rc = min(dx(j)*(abs(vel(1)) + c), & + dy(k)*(abs(vel(2)) + c), & + dz(l)*(abs(vel(3)) + c)) & + /maxval(1._wp/Re_l) end if #:endif - elseif (n > 0) then - !2D - vcfl_sf(j, k, l) = maxval(dt/Re_l/rho)/min(dx(j), dy(k))**2._wp - Rc_sf(j, k, l) = min(dx(j)*(abs(vel(1)) + c), & - dy(k)*(abs(vel(2)) + c)) & - /maxval(1._wp/Re_l) - else - !1D - vcfl_sf(j, k, l) = maxval(dt/Re_l/rho)/dx(j)**2._wp - Rc_sf(j, k, l) = dx(j)*(abs(vel(1)) + c)/maxval(1._wp/Re_l) + elseif (n > 0) then !2D + vcfl = maxval(dt/Re_l/rho)/min(dx(j), dy(k))**2._wp + Rc = min(dx(j)*(abs(vel(1)) + c), & + dy(k)*(abs(vel(2)) + c)) & + /maxval(1._wp/Re_l) + else !1D + vcfl = maxval(dt/Re_l/rho)/dx(j)**2._wp + Rc = dx(j)*(abs(vel(1)) + c)/maxval(1._wp/Re_l) end if end if @@ -248,7 +243,7 @@ contains $:GPU_ROUTINE(parallelism='[seq]') real(wp), dimension(num_vels), intent(in) :: vel real(wp), intent(in) :: c, rho - real(wp), dimension(0:m, 0:n, 0:p), intent(inout) :: max_dt + real(wp), intent(inout) :: max_dt real(wp), dimension(2), intent(in) :: Re_l integer, intent(in) :: j, k, l @@ -256,18 +251,15 @@ contains real(wp) :: fltr_dtheta ! Inviscid CFL calculation - if (p > 0 .or. n > 0) then - ! 2D/3D cases + if (p > 0 .or. n > 0) then ! 2D/3D cases icfl_dt = cfl_target*f_compute_multidim_cfl_terms(vel, c, j, k, l) - else - ! 1D case + else ! 1D cases icfl_dt = cfl_target*(dx(j)/(abs(vel(1)) + c)) end if ! Viscous calculations if (viscous) then - if (p > 0) then - !3D + if (p > 0) then !3D if (grid_geometry == 3) then fltr_dtheta = f_compute_filtered_dtheta(k, l) vcfl_dt = cfl_target*(min(dx(j), dy(k), fltr_dtheta)**2._wp) & @@ -276,19 +268,17 @@ contains vcfl_dt = cfl_target*(min(dx(j), dy(k), dz(l))**2._wp) & /maxval(1/(rho*Re_l)) end if - elseif (n > 0) then - !2D + elseif (n > 0) then !2D vcfl_dt = cfl_target*(min(dx(j), dy(k))**2._wp)/maxval((1/Re_l)/rho) - else - !1D + else !1D vcfl_dt = cfl_target*(dx(j)**2._wp)/maxval(1/(rho*Re_l)) end if end if - if (any(Re_size > 0)) then - max_dt(j, k, l) = min(icfl_dt, vcfl_dt) + if (viscous) then + max_dt = min(icfl_dt, vcfl_dt) else - max_dt(j, k, l) = icfl_dt + max_dt = icfl_dt end if end subroutine s_compute_dt_from_cfl diff --git a/src/simulation/m_start_up.fpp b/src/simulation/m_start_up.fpp index 925ad55db0..ae6755db6d 100644 --- a/src/simulation/m_start_up.fpp +++ b/src/simulation/m_start_up.fpp @@ -56,6 +56,8 @@ module m_start_up use m_bubbles_EL !< Lagrange bubble dynamics routines + use m_particles_EL !< Lagrange particle dynamics routines + use ieee_arithmetic use m_helper_basic !< Functions to compare floating point numbers @@ -140,8 +142,6 @@ contains rdma_mpi, teno_CT, mp_weno, weno_avg, & riemann_solver, low_Mach, wave_speeds, avg_state, & bc_x, bc_y, bc_z, & - x_a, y_a, z_a, x_b, y_b, z_b, & - x_domain, y_domain, z_domain, & hypoelasticity, & ib, num_ibs, patch_ib, & fluid_pp, bub_pp, probe_wrt, prim_vars_wrt, & @@ -175,7 +175,8 @@ contains hyper_cleaning, hyper_cleaning_speed, hyper_cleaning_tau, & alf_factor, num_igr_iters, num_igr_warm_start_iters, & int_comp, ic_eps, ic_beta, nv_uvm_out_of_core, & - nv_uvm_igr_temps_on_gpu, nv_uvm_pref_gpu, down_sample, fft_wrt + nv_uvm_igr_temps_on_gpu, nv_uvm_pref_gpu, down_sample, fft_wrt, & + particles_lagrange, particle_pp ! Checking that an input file has been provided by the user. If it ! has, then the input file is read in, otherwise, simulation exits. @@ -211,11 +212,15 @@ contains if (cfl_adap_dt .or. cfl_const_dt) cfl_dt = .true. - if (any((/bc_x%beg, bc_x%end, bc_y%beg, bc_y%end, bc_z%beg, bc_z%end/) == -17) .or. & + if (any((/bc_x%beg, bc_x%end, bc_y%beg, bc_y%end, bc_z%beg, bc_z%end/) == BC_DIRICHLET) .or. & num_bc_patches > 0) then bc_io = .true. end if + if (bc_x%beg == BC_PERIODIC .and. bc_x%end == BC_PERIODIC) periodic_bc(1) = .true. + if (bc_y%beg == BC_PERIODIC .and. bc_y%end == BC_PERIODIC) periodic_bc(2) = .true. + if (bc_z%beg == BC_PERIODIC .and. bc_z%end == BC_PERIODIC) periodic_bc(3) = .true. + else call s_mpi_abort(trim(file_path)//' is missing. Exiting.') end if @@ -987,7 +992,7 @@ contains $:GPU_UPDATE(host='[lag_id, mtn_pos, mtn_posPrev, mtn_vel, intfc_rad, & & intfc_vel, bub_R0, Rmax_stats, Rmin_stats, bub_dphidt, gas_p, & & gas_mv, gas_mg, gas_betaT, gas_betaC]') - do i = 1, nBubs + do i = 1, n_el_bubs_loc if (ieee_is_nan(intfc_rad(i, 1)) .or. intfc_rad(i, 1) <= 0._wp) then call s_mpi_abort("Bubble radius is negative or NaN, please reduce dt.") end if @@ -998,6 +1003,23 @@ contains $:GPU_UPDATE(host='[Rmax_stats,Rmin_stats,gas_p,gas_mv,intfc_vel]') call s_write_restart_lag_bubbles(save_count) !parallel if (lag_params%write_bubbles_stats) call s_write_lag_bubble_stats() + + elseif (particles_lagrange) then + $:GPU_UPDATE(host='[lag_part_id, particle_pos, particle_posPrev, particle_vel, particle_rad, & + & particle_R0, Rmax_stats_part, Rmin_stats_part, & + & particle_mass]') + ! do i = 1, n_el_particles_loc + ! if (ieee_is_nan(particle_rad(i, 1)) .or. particle_rad(i, 1) <= 0._wp) then + ! call s_mpi_abort("Particle radius is negative or NaN, please reduce dt.") + ! end if + ! end do + + $:GPU_UPDATE(host='[q_particles(1)%sf]') + call s_write_data_files(q_cons_ts(stor)%vf, q_T_sf, q_prim_vf, save_count, bc_type, q_particles(1)) + $:GPU_UPDATE(host='[Rmax_stats_part,Rmin_stats_part]') + call s_write_restart_lag_particles(save_count) !parallel + if (lag_params%write_bubbles_stats) call s_write_lag_particle_stats() + else call s_write_data_files(q_cons_ts(stor)%vf, q_T_sf, q_prim_vf, save_count, bc_type) end if @@ -1035,6 +1057,11 @@ contains if (bubbles_euler .or. bubbles_lagrange) then call s_initialize_bubbles_model() end if + + if (particles_lagrange) then + call s_initialize_particles_model() + end if + call s_initialize_mpi_common_module() call s_initialize_mpi_proxy_module() call s_initialize_variables_conversion_module() @@ -1121,7 +1148,9 @@ contains end if call s_initialize_derived_variables() - if (bubbles_lagrange) call s_initialize_bubbles_EL_module(q_cons_ts(1)%vf) + + if (bubbles_lagrange) call s_initialize_bubbles_EL_module(q_cons_ts(1)%vf, bc_type) + if (particles_lagrange) call s_initialize_particles_EL_module(q_cons_ts(1)%vf, bc_type) if (hypoelasticity) call s_initialize_hypoelastic_module() if (hyperelasticity) call s_initialize_hyperelastic_module() @@ -1254,6 +1283,10 @@ contains $:GPU_UPDATE(device='[bc_y%vb1,bc_y%vb2,bc_y%vb3,bc_y%ve1,bc_y%ve2,bc_y%ve3]') $:GPU_UPDATE(device='[bc_z%vb1,bc_z%vb2,bc_z%vb3,bc_z%ve1,bc_z%ve2,bc_z%ve3]') + $:GPU_UPDATE(device='[bc_x%beg, bc_x%end]') + $:GPU_UPDATE(device='[bc_y%beg, bc_y%end]') + $:GPU_UPDATE(device='[bc_z%beg, bc_z%end]') + $:GPU_UPDATE(device='[bc_x%grcbc_in,bc_x%grcbc_out,bc_x%grcbc_vel_out]') $:GPU_UPDATE(device='[bc_y%grcbc_in,bc_y%grcbc_out,bc_y%grcbc_vel_out]') $:GPU_UPDATE(device='[bc_z%grcbc_in,bc_z%grcbc_out,bc_z%grcbc_vel_out]') @@ -1307,6 +1340,7 @@ contains call s_finalize_boundary_common_module() if (relax) call s_finalize_relaxation_solver_module() if (bubbles_lagrange) call s_finalize_lagrangian_solver() + if (particles_lagrange) call s_finalize_particle_lagrangian_solver() if (viscous .and. (.not. igr)) then call s_finalize_viscous_module() end if diff --git a/src/simulation/m_time_steppers.fpp b/src/simulation/m_time_steppers.fpp index 2835055667..62f3821b2d 100644 --- a/src/simulation/m_time_steppers.fpp +++ b/src/simulation/m_time_steppers.fpp @@ -22,6 +22,8 @@ module m_time_steppers use m_bubbles_EL !< Lagrange bubble dynamics routines + use m_particles_EL !< Lagrange particle dynamics routines + use m_ibm use m_hyperelastic @@ -68,8 +70,6 @@ module m_time_steppers real(wp), allocatable, dimension(:, :, :, :, :) :: rhs_mv - real(wp), allocatable, dimension(:, :, :) :: max_dt - integer, private :: num_ts !< !! Number of time stages in the time-stepping scheme @@ -77,7 +77,7 @@ module m_time_steppers real(wp), allocatable, dimension(:, :) :: rk_coef integer, private :: num_probe_ts - $:GPU_DECLARE(create='[q_cons_ts,q_prim_vf,q_T_sf,rhs_vf,q_prim_ts1,q_prim_ts2,rhs_mv,rhs_pb,max_dt,rk_coef,stor,bc_type]') + $:GPU_DECLARE(create='[q_cons_ts,q_prim_vf,q_T_sf,rhs_vf,q_prim_ts1,q_prim_ts2,rhs_mv,rhs_pb,rk_coef,stor,bc_type]') !> @cond #if defined(__NVCOMPILER_GPU_UNIFIED_MEM) @@ -463,10 +463,6 @@ contains call s_open_run_time_information_file() end if - if (cfl_dt) then - @:ALLOCATE(max_dt(0:m, 0:n, 0:p)) - end if - ! Allocating arrays to store the bc types @:ALLOCATE(bc_type(1:num_dims,1:2)) @@ -560,7 +556,12 @@ contains end if end if - if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(stage=s) + if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(q_prim_vf, bc_type, stage=s) + + if (particles_lagrange) then + call s_update_lagrange_particles_tdv_rk(q_prim_vf, bc_type, stage=s) + end if + $:GPU_PARALLEL_LOOP(collapse=4) do i = 1, sys_size do l = 0, p @@ -586,6 +587,7 @@ contains end do end do $:END_GPU_PARALLEL_LOOP() + !Evolve pb and mv for non-polytropic qbmm if (qbmm .and. (.not. polytropic)) then $:GPU_PARALLEL_LOOP(collapse=5) @@ -672,7 +674,7 @@ contains q_cons_ts(1)%vf, & q_T_sf, & q_prim_vf, & - idwint) + idwbuff) if (bubbles_euler) then @@ -682,16 +684,15 @@ contains elseif (bubbles_lagrange) then call s_populate_variables_buffers(bc_type, q_prim_vf, pb_ts(1)%sf, mv_ts(1)%sf) - call s_compute_bubble_EL_dynamics(q_prim_vf, stage) - call s_transfer_data_to_tmp() - call s_smear_voidfraction() + call s_compute_bubble_EL_dynamics(q_prim_vf, bc_type, stage) + if (stage == 3) then if (lag_params%write_bubbles_stats) call s_calculate_lag_bubble_stats() if (lag_params%write_bubbles) then $:GPU_UPDATE(host='[gas_p,gas_mv,intfc_rad,intfc_vel]') - call s_write_lag_particles(mytime) + call s_write_lag_bubble_evol(mytime) end if - call s_write_void_evol(mytime) + if (lag_params%write_void_evol) call s_write_void_evol(mytime) end if end if @@ -718,7 +719,7 @@ contains real(wp) :: H !< Cell-avg. enthalpy real(wp), dimension(2) :: Re !< Cell-avg. Reynolds numbers type(vector_field) :: gm_alpha_qp - + real(wp) :: max_dt real(wp) :: dt_local integer :: j, k, l !< Generic loop iterators @@ -730,7 +731,9 @@ contains idwint) end if - $:GPU_PARALLEL_LOOP(collapse=3, private='[vel, alpha, Re, rho, vel_sum, pres, gamma, pi_inf, c, H, qv]') + dt_local = huge(1.0_wp) + $:GPU_PARALLEL_LOOP(collapse=3, private='[vel, alpha, Re, rho, vel_sum, pres, gamma, pi_inf, c, H, qv]', & + & reduction='[[dt_local]]', reductionOp='[min]') do l = 0, p do k = 0, n do j = 0, m @@ -744,15 +747,12 @@ contains call s_compute_speed_of_sound(pres, rho, gamma, pi_inf, H, alpha, vel_sum, 0._wp, c, qv) call s_compute_dt_from_cfl(vel, c, max_dt, rho, Re, j, k, l) + dt_local = min(dt_local, max_dt) end do end do end do $:END_GPU_PARALLEL_LOOP() - #:call GPU_PARALLEL(copyout='[dt_local]', copyin='[max_dt]') - dt_local = minval(max_dt) - #:endcall GPU_PARALLEL - if (num_procs == 1) then dt = dt_local else diff --git a/toolchain/mfc/params/definitions.py b/toolchain/mfc/params/definitions.py index 7c456b36fc..5236cb27de 100644 --- a/toolchain/mfc/params/definitions.py +++ b/toolchain/mfc/params/definitions.py @@ -639,8 +639,8 @@ def get_value_label(param_name: str, value: int) -> str: # Bubbles "bubble_model": { - "choices": [1, 2, 3], - "value_labels": {1: "Gilmore", 2: "Keller-Miksis", 3: "Rayleigh-Plesset"}, + "choices": [0, 1, 2, 3], + "value_labels": {0: "Particle", 1: "Gilmore", 2: "Keller-Miksis", 3: "Rayleigh-Plesset"}, }, # Output @@ -898,6 +898,10 @@ def _load(): # pylint: disable=too-many-locals,too-many-statements for n in ["polytropic", "bubbles_euler", "polydisperse", "qbmm", "bubbles_lagrange"]: _r(n, LOG, {"bubbles"}) + # --- Particles --- + for n in ["particles_lagrange"]: + _r(n, LOG, {"particles"}) + # --- Viscosity --- _r("viscous", LOG, {"viscosity"}) @@ -992,6 +996,13 @@ def _load(): # pylint: disable=too-many-locals,too-many-statements _r(f"p_{d}", REAL, math=r"\f$\phi_" + d + r"\f$") _r(f"bf_{d}", LOG) + # Interfacial flow inputs + _r("normMag", REAL) + _r("p0_ic", REAL) + _r("g0_ic", REAL) + _r("normFac", REAL) + _r("interface_file", STR) + # ========================================================================== # INDEXED PARAMETERS # ========================================================================== @@ -1072,6 +1083,10 @@ def _load(): # pylint: disable=too-many-locals,too-many-statements ("R_v", r"\f$R_v\f$"), ("R_g", r"\f$R_g\f$")]: _r(f"bub_pp%{a}", REAL, {"bubbles"}, math=sym) + # --- particle_pp (particle properties) --- + for a in ["rho0ref_particle", "cp_particle"]: + _r(f"particle_pp%{a}", REAL, {"particles"}) + # --- patch_ib (10 immersed boundaries) --- for i in range(1, NI + 1): px = f"patch_ib({i})%" @@ -1160,12 +1175,23 @@ def _load(): # pylint: disable=too-many-locals,too-many-statements # --- lag_params (Lagrangian bubbles) --- for a in ["heatTransfer_model", "massTransfer_model", "pressure_corrector", - "write_bubbles", "write_bubbles_stats"]: + "write_bubbles", "write_bubbles_stats", "pressure_force", "gravity_force", + "write_void_evol" ]: _r(f"lag_params%{a}", LOG, {"bubbles"}) - for a in ["solver_approach", "cluster_type", "smooth_type", "nBubs_glb"]: + for a in ["solver_approach", "cluster_type", "smooth_type", "nBubs_glb", "drag_model", + "vel_model", "charNz"]: _r(f"lag_params%{a}", INT, {"bubbles"}) for a in ["epsilonb", "valmaxvoid", "charwidth", "c0", "rho0", "T0", "x0", "Thost"]: _r(f"lag_params%{a}", REAL, {"bubbles"}) + _r(f"lag_params%input_path", STR, {"bubbles"}) + + # --- lag_params (Lagrangian particles) --- + for a in ["nParticles_glb", "stokes_drag", "qs_drag_model", "added_mass_model", + "interpolation_order"]: + _r(f"lag_params%{a}", INT, {'particles'}) + + for a in ["collision_force"]: + _r(f"lag_params%{a}", LOG, {'particles'}) # --- chem_params --- for a in ["diffusion", "reactions"]: