From b3bc23dd469db9902b1b253460b033810ad249e9 Mon Sep 17 00:00:00 2001 From: Abishek Gopal Date: Tue, 29 Apr 2025 10:46:10 -0600 Subject: [PATCH 1/2] Initial OpenACC port of mpas_atm_update_bdy_tend --- .../dynamics/mpas_atm_boundaries.F | 154 +++++++++++++++--- 1 file changed, 134 insertions(+), 20 deletions(-) diff --git a/src/core_atmosphere/dynamics/mpas_atm_boundaries.F b/src/core_atmosphere/dynamics/mpas_atm_boundaries.F index fca1734138..a947b1bfae 100644 --- a/src/core_atmosphere/dynamics/mpas_atm_boundaries.F +++ b/src/core_atmosphere/dynamics/mpas_atm_boundaries.F @@ -99,9 +99,12 @@ subroutine mpas_atm_update_bdy_tend(clock, streamManager, block, firstCall, ierr type (mpas_pool_type), pointer :: lbc real (kind=RKIND) :: dt - integer, pointer :: nCells - integer, pointer :: nEdges - integer, pointer :: index_qv + integer, pointer :: nCells_ptr + integer, pointer :: nEdges_ptr + integer, pointer :: nVertLevels_ptr + integer, pointer :: index_qv_ptr + integer, pointer :: nScalars_ptr + integer :: nCells, nEdges, nVertLevels, index_qv, nScalars real (kind=RKIND), dimension(:,:), pointer :: u real (kind=RKIND), dimension(:,:), pointer :: ru @@ -129,7 +132,7 @@ subroutine mpas_atm_update_bdy_tend(clock, streamManager, block, firstCall, ierr type (MPAS_Time_Type) :: currTime type (MPAS_TimeInterval_Type) :: lbc_interval character(len=StrKIND) :: read_time - integer :: iEdge + integer :: iEdge, iCell, k, j integer :: cell1, cell2 @@ -167,6 +170,7 @@ subroutine mpas_atm_update_bdy_tend(clock, streamManager, block, firstCall, ierr ! Compute any derived fields from those that were read from the lbc_in stream ! call mpas_pool_get_array(lbc, 'lbc_u', u, 2) + call mpas_pool_get_array(lbc, 'lbc_w', w, 2) call mpas_pool_get_array(lbc, 'lbc_ru', ru, 2) call mpas_pool_get_array(lbc, 'lbc_rho_edge', rho_edge, 2) call mpas_pool_get_array(lbc, 'lbc_theta', theta, 2) @@ -176,26 +180,84 @@ subroutine mpas_atm_update_bdy_tend(clock, streamManager, block, firstCall, ierr call mpas_pool_get_array(lbc, 'lbc_scalars', scalars, 2) call mpas_pool_get_array(mesh, 'cellsOnEdge', cellsOnEdge) - call mpas_pool_get_dimension(mesh, 'nCells', nCells) - call mpas_pool_get_dimension(mesh, 'nEdges', nEdges) - call mpas_pool_get_dimension(lbc, 'index_qv', index_qv) + call mpas_pool_get_dimension(mesh, 'nCells', nCells_ptr) + call mpas_pool_get_dimension(mesh, 'nEdges', nEdges_ptr) + call mpas_pool_get_dimension(mesh, 'nVertLevels', nVertLevels_ptr) + call mpas_pool_get_dimension(state, 'num_scalars', nScalars_ptr) + call mpas_pool_get_dimension(lbc, 'index_qv', index_qv_ptr) call mpas_pool_get_array(mesh, 'zz', zz) + if (.not. firstCall) then + call mpas_pool_get_array(lbc, 'lbc_u', lbc_tend_u, 1) + call mpas_pool_get_array(lbc, 'lbc_ru', lbc_tend_ru, 1) + call mpas_pool_get_array(lbc, 'lbc_rho_edge', lbc_tend_rho_edge, 1) + call mpas_pool_get_array(lbc, 'lbc_w', lbc_tend_w, 1) + call mpas_pool_get_array(lbc, 'lbc_theta', lbc_tend_theta, 1) + call mpas_pool_get_array(lbc, 'lbc_rtheta_m', lbc_tend_rtheta_m, 1) + call mpas_pool_get_array(lbc, 'lbc_rho_zz', lbc_tend_rho_zz, 1) + call mpas_pool_get_array(lbc, 'lbc_rho', lbc_tend_rho, 1) + call mpas_pool_get_array(lbc, 'lbc_scalars', lbc_tend_scalars, 1) + + !$acc enter data copyin(lbc_tend_u, lbc_tend_ru, lbc_tend_rho_edge, lbc_tend_w, & + !$acc lbc_tend_theta, lbc_tend_rtheta_m, lbc_tend_rho_zz, & + !$acc lbc_tend_rho, lbc_tend_scalars) + end if + !$acc enter data copyin(u, w, theta, rho, scalars) + !$acc enter data create(ru, rho_edge, rtheta_m, rho_zz) + + ! Dereference the pointers to avoid non-array pointer for OpenACC + nCells = nCells_ptr + nEdges = nEdges_ptr + nVertLevels = nVertLevels_ptr + nScalars = nScalars_ptr + index_qv = index_qv_ptr + ! Compute lbc_rho_zz + + !$acc kernels zz(:,nCells+1) = 1.0_RKIND ! Avoid potential division by zero in the following line - rho_zz(:,:) = rho(:,:) / zz(:,:) + !$acc end kernels + + !$acc parallel + ! Compute lbc_rho_zz + !$acc loop gang vector collapse(2) + do iCell=1,nCells+1 + do k=1,nVertLevels + rho_zz(k,iCell) = rho(k,iCell) / zz(k,iCell) + end do + end do + !$acc end parallel ! Average lbc_rho_zz to edges + !$acc parallel + !$acc loop gang worker do iEdge=1,nEdges cell1 = cellsOnEdge(1,iEdge) cell2 = cellsOnEdge(2,iEdge) if (cell1 > 0 .and. cell2 > 0) then - rho_edge(:,iEdge) = 0.5_RKIND * (rho_zz(:,cell1) + rho_zz(:,cell2)) + !$acc loop vector + do k = 1, nVertLevels + rho_edge(k,iEdge) = 0.5_RKIND * (rho_zz(k,cell1) + rho_zz(k,cell2)) + end do end if end do + !$acc end parallel + + !$acc parallel + !$acc loop gang vector collapse(2) + do iEdge=1,nEdges+1 + do k=1,nVertLevels + ru(k,iEdge) = u(k,iEdge) * rho_edge(k,iEdge) + end do + end do - ru(:,:) = u(:,:) * rho_edge(:,:) - rtheta_m(:,:) = theta(:,:) * rho_zz(:,:) * (1.0_RKIND + rvord * scalars(index_qv,:,:)) + !$acc loop gang vector collapse(2) + do iCell=1,nCells+1 + do k=1,nVertLevels + rtheta_m(k,iCell) = theta(k,iCell) * rho_zz(k,iCell) * (1.0_RKIND + rvord * scalars(index_qv,k,iCell)) + end do + end do + !$acc end parallel if (.not. firstCall) then lbc_interval = currTime - LBC_intv_end @@ -225,15 +287,58 @@ subroutine mpas_atm_update_bdy_tend(clock, streamManager, block, firstCall, ierr dt = 1.0_RKIND / dt - lbc_tend_u(:,:) = (u(:,:) - lbc_tend_u(:,:)) * dt - lbc_tend_ru(:,:) = (ru(:,:) - lbc_tend_ru(:,:)) * dt - lbc_tend_rho_edge(:,:) = (rho_edge(:,:) - lbc_tend_rho_edge(:,:)) * dt - lbc_tend_w(:,:) = (w(:,:) - lbc_tend_w(:,:)) * dt - lbc_tend_theta(:,:) = (theta(:,:) - lbc_tend_theta(:,:)) * dt - lbc_tend_rtheta_m(:,:) = (rtheta_m(:,:) - lbc_tend_rtheta_m(:,:)) * dt - lbc_tend_rho_zz(:,:) = (rho_zz(:,:) - lbc_tend_rho_zz(:,:)) * dt - lbc_tend_rho(:,:) = (rho(:,:) - lbc_tend_rho(:,:)) * dt - lbc_tend_scalars(:,:,:) = (scalars(:,:,:) - lbc_tend_scalars(:,:,:)) * dt + + !$acc parallel default(present) + !$acc loop gang vector collapse(2) + do iEdge=1,nEdges+1 + do k=1,nVertLevels + lbc_tend_u(k,iEdge) = (u(k,iEdge) - lbc_tend_u(k,iEdge)) * dt + lbc_tend_ru(k,iEdge) = (ru(k,iEdge) - lbc_tend_ru(k,iEdge)) * dt + end do + end do + + !$acc loop gang vector collapse(2) + do iEdge=1,nEdges+1 + do k=1,nVertLevels + lbc_tend_rho_edge(k,iEdge) = (rho_edge(k,iEdge) - lbc_tend_rho_edge(k,iEdge)) * dt + end do + end do + + !$acc loop gang vector collapse(2) + do iCell=1,nCells+1 + do k=1,nVertLevels+1 + lbc_tend_w(k,iCell) = (w(k,iCell) - lbc_tend_w(k,iCell)) * dt + end do + end do + + !$acc loop gang vector collapse(2) + do iCell=1,nCells+1 + do k=1,nVertLevels + lbc_tend_theta(k,iCell) = (theta(k,iCell) - lbc_tend_theta(k,iCell)) * dt + lbc_tend_rtheta_m(k,iCell) = (rtheta_m(k,iCell) - lbc_tend_rtheta_m(k,iCell)) * dt + end do + end do + + !$acc loop gang vector collapse(2) + do iCell=1,nCells+1 + do k=1,nVertLevels + lbc_tend_rho_zz(k,iCell) = (rho_zz(k,iCell) - lbc_tend_rho_zz(k,iCell)) * dt + lbc_tend_rho(k,iCell) = (rho(k,iCell) - lbc_tend_rho(k,iCell)) * dt + end do + end do + !$acc end parallel + + !$acc parallel default(present) + !$acc loop gang + do iCell=1,nCells+1 + !$acc loop vector collapse(2) + do k=1,nVertLevels + do j = 1,nScalars + lbc_tend_scalars(j,k,iCell) = (scalars(j,k,iCell) - lbc_tend_scalars(j,k,iCell)) * dt + end do + end do + end do + !$acc end parallel ! ! Logging the lbc start and end times appears to be backwards, but @@ -249,6 +354,15 @@ subroutine mpas_atm_update_bdy_tend(clock, streamManager, block, firstCall, ierr end if + if (.not. firstCall) then + !$acc exit data copyout(lbc_tend_u, lbc_tend_ru, lbc_tend_rho_edge, lbc_tend_w, & + !$acc lbc_tend_theta, lbc_tend_rtheta_m, lbc_tend_rho_zz, & + !$acc lbc_tend_rho, lbc_tend_scalars) + end if + + !$acc exit data copyout(ru, rho_edge, rtheta_m, rho_zz) + !$acc exit data delete(u, w, theta, rho, scalars) + LBC_intv_end = currTime end subroutine mpas_atm_update_bdy_tend From 8bc1e966fa2c783a12913fe0607b9dae514c3ef1 Mon Sep 17 00:00:00 2001 From: Abishek Gopal Date: Tue, 29 Apr 2025 10:56:53 -0600 Subject: [PATCH 2/2] Adding timers mpas_atm_update_bdy_tend [ACC_data_xfer] --- src/core_atmosphere/dynamics/mpas_atm_boundaries.F | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/core_atmosphere/dynamics/mpas_atm_boundaries.F b/src/core_atmosphere/dynamics/mpas_atm_boundaries.F index a947b1bfae..0af66b8a20 100644 --- a/src/core_atmosphere/dynamics/mpas_atm_boundaries.F +++ b/src/core_atmosphere/dynamics/mpas_atm_boundaries.F @@ -187,6 +187,7 @@ subroutine mpas_atm_update_bdy_tend(clock, streamManager, block, firstCall, ierr call mpas_pool_get_dimension(lbc, 'index_qv', index_qv_ptr) call mpas_pool_get_array(mesh, 'zz', zz) + MPAS_ACC_TIMER_START('mpas_atm_update_bdy_tend [ACC_data_xfer]') if (.not. firstCall) then call mpas_pool_get_array(lbc, 'lbc_u', lbc_tend_u, 1) call mpas_pool_get_array(lbc, 'lbc_ru', lbc_tend_ru, 1) @@ -204,6 +205,7 @@ subroutine mpas_atm_update_bdy_tend(clock, streamManager, block, firstCall, ierr end if !$acc enter data copyin(u, w, theta, rho, scalars) !$acc enter data create(ru, rho_edge, rtheta_m, rho_zz) + MPAS_ACC_TIMER_STOP('mpas_atm_update_bdy_tend [ACC_data_xfer]') ! Dereference the pointers to avoid non-array pointer for OpenACC nCells = nCells_ptr @@ -354,6 +356,7 @@ subroutine mpas_atm_update_bdy_tend(clock, streamManager, block, firstCall, ierr end if + MPAS_ACC_TIMER_START('mpas_atm_update_bdy_tend [ACC_data_xfer]') if (.not. firstCall) then !$acc exit data copyout(lbc_tend_u, lbc_tend_ru, lbc_tend_rho_edge, lbc_tend_w, & !$acc lbc_tend_theta, lbc_tend_rtheta_m, lbc_tend_rho_zz, & @@ -362,6 +365,7 @@ subroutine mpas_atm_update_bdy_tend(clock, streamManager, block, firstCall, ierr !$acc exit data copyout(ru, rho_edge, rtheta_m, rho_zz) !$acc exit data delete(u, w, theta, rho, scalars) + MPAS_ACC_TIMER_STOP('mpas_atm_update_bdy_tend [ACC_data_xfer]') LBC_intv_end = currTime