Skip to content

Commit 4ea6abb

Browse files
committed
Merge branch 'atmosphere/port_atm_rk_integration_setup' into develop (PR #1223)
This merge enables GPU execution of atm_rk_integration_setup through the addition of OpenACC directives. The changes in this merge result in bitwise identical results to pre-merge baselines. A new timer, 'atm_rk_integration_setup [ACC_data_xfer]', has been added for data transfers in the atm_rk_integration_setup routine. * atmosphere/port_atm_rk_integration_setup: Adding default(present) to the parallel clause in atm_rk_integration_setup Initializing the garbage cells for theta_m_2 in atm_rk_integration_setup Initial OpenACC port of atm_rk_integration_setup subroutine
2 parents 9a073c2 + 3180c09 commit 4ea6abb

File tree

1 file changed

+64
-17
lines changed

1 file changed

+64
-17
lines changed

src/core_atmosphere/dynamics/mpas_atm_time_integration.F

Lines changed: 64 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -761,9 +761,7 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group)
761761
#ifdef DO_PHYSICS
762762
call mpas_pool_get_dimension(state, 'index_qv', index_qv)
763763
#endif
764-
if (config_apply_lbcs) then
765-
call mpas_pool_get_dimension(state, 'num_scalars', num_scalars)
766-
endif
764+
call mpas_pool_get_dimension(state, 'num_scalars', num_scalars)
767765

768766
!
769767
! allocate storage for physics tendency save
@@ -845,7 +843,7 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group)
845843

846844
!$OMP PARALLEL DO
847845
do thread=1,nThreads
848-
call atm_rk_integration_setup(state, diag, &
846+
call atm_rk_integration_setup(state, diag, nVertLevels, num_scalars, &
849847
cellThreadStart(thread), cellThreadEnd(thread), &
850848
vertexThreadStart(thread), vertexThreadEnd(thread), &
851849
edgeThreadStart(thread), edgeThreadEnd(thread), &
@@ -1668,16 +1666,17 @@ subroutine advance_scalars(field_name, domain, rk_step, rk_timestep, config_mono
16681666
end subroutine advance_scalars
16691667

16701668

1671-
subroutine atm_rk_integration_setup( state, diag, &
1669+
subroutine atm_rk_integration_setup( state, diag, nVertLevels, num_scalars, &
16721670
cellStart, cellEnd, vertexStart, vertexEnd, edgeStart, edgeEnd, &
16731671
cellSolveStart, cellSolveEnd, vertexSolveStart, vertexSolveEnd, edgeSolveStart, edgeSolveEnd)
16741672

16751673
implicit none
16761674

16771675
type (mpas_pool_type), intent(inout) :: state
16781676
type (mpas_pool_type), intent(inout) :: diag
1679-
integer, intent(in) :: cellStart, cellEnd, vertexStart, vertexEnd, edgeStart, edgeEnd
1677+
integer, intent(in) :: nVertLevels, num_scalars, cellStart, cellEnd, vertexStart, vertexEnd, edgeStart, edgeEnd
16801678
integer, intent(in) :: cellSolveStart, cellSolveEnd, vertexSolveStart, vertexSolveEnd, edgeSolveStart, edgeSolveEnd
1679+
integer :: iCell, iEdge, j, k
16811680

16821681
real (kind=RKIND), dimension(:,:), pointer :: ru
16831682
real (kind=RKIND), dimension(:,:), pointer :: ru_save
@@ -1716,17 +1715,65 @@ subroutine atm_rk_integration_setup( state, diag, &
17161715
call mpas_pool_get_array(state, 'scalars', scalars_1, 1)
17171716
call mpas_pool_get_array(state, 'scalars', scalars_2, 2)
17181717

1719-
ru_save(:,edgeStart:edgeEnd) = ru(:,edgeStart:edgeEnd)
1720-
rw_save(:,cellStart:cellEnd) = rw(:,cellStart:cellEnd)
1721-
rtheta_p_save(:,cellStart:cellEnd) = rtheta_p(:,cellStart:cellEnd)
1722-
rho_p_save(:,cellStart:cellEnd) = rho_p(:,cellStart:cellEnd)
1723-
1724-
u_2(:,edgeStart:edgeEnd) = u_1(:,edgeStart:edgeEnd)
1725-
w_2(:,cellStart:cellEnd) = w_1(:,cellStart:cellEnd)
1726-
theta_m_2(:,cellStart:cellEnd) = theta_m_1(:,cellStart:cellEnd)
1727-
rho_zz_2(:,cellStart:cellEnd) = rho_zz_1(:,cellStart:cellEnd)
1728-
rho_zz_old_split(:,cellStart:cellEnd) = rho_zz_1(:,cellStart:cellEnd)
1729-
scalars_2(:,:,cellStart:cellEnd) = scalars_1(:,:,cellStart:cellEnd)
1718+
MPAS_ACC_TIMER_START('atm_rk_integration_setup [ACC_data_xfer]')
1719+
!$acc enter data create(ru_save, u_2, rw_save, rtheta_p_save, rho_p_save, &
1720+
!$acc w_2, theta_m_2, rho_zz_2, rho_zz_old_split, scalars_2) &
1721+
!$acc copyin(ru, rw, rtheta_p, rho_p, u_1, w_1, theta_m_1, &
1722+
!$acc rho_zz_1, scalars_1)
1723+
MPAS_ACC_TIMER_STOP('atm_rk_integration_setup [ACC_data_xfer]')
1724+
1725+
!$acc kernels
1726+
theta_m_2(:,cellEnd+1) = 0.0_RKIND
1727+
!$acc end kernels
1728+
1729+
!$acc parallel default(present)
1730+
!$acc loop gang worker
1731+
do iEdge = edgeStart,edgeEnd
1732+
!$acc loop vector
1733+
do k = 1,nVertLevels
1734+
ru_save(k,iEdge) = ru(k,iEdge)
1735+
u_2(k,iEdge) = u_1(k,iEdge)
1736+
end do
1737+
end do
1738+
1739+
!$acc loop gang worker
1740+
do iCell = cellStart,cellEnd
1741+
!$acc loop vector
1742+
do k = 1,nVertLevels
1743+
rtheta_p_save(k,iCell) = rtheta_p(k,iCell)
1744+
rho_p_save(k,iCell) = rho_p(k,iCell)
1745+
theta_m_2(k,iCell) = theta_m_1(k,iCell)
1746+
rho_zz_2(k,iCell) = rho_zz_1(k,iCell)
1747+
rho_zz_old_split(k,iCell) = rho_zz_1(k,iCell)
1748+
end do
1749+
end do
1750+
1751+
!$acc loop gang worker
1752+
do iCell = cellStart,cellEnd
1753+
!$acc loop vector
1754+
do k = 1,nVertLevels+1
1755+
rw_save(k,iCell) = rw(k,iCell)
1756+
w_2(k,iCell) = w_1(k,iCell)
1757+
end do
1758+
end do
1759+
1760+
!$acc loop gang worker
1761+
do iCell = cellStart,cellEnd
1762+
!$acc loop vector collapse(2)
1763+
do k = 1,nVertLevels
1764+
do j = 1,num_scalars
1765+
scalars_2(j,k,iCell) = scalars_1(j,k,iCell)
1766+
end do
1767+
end do
1768+
end do
1769+
!$acc end parallel
1770+
1771+
MPAS_ACC_TIMER_START('atm_rk_integration_setup [ACC_data_xfer]')
1772+
!$acc exit data copyout(ru_save, rw_save, rtheta_p_save, rho_p_save, u_2, &
1773+
!$acc w_2, theta_m_2, rho_zz_2, rho_zz_old_split, scalars_2) &
1774+
!$acc delete(ru, rw, rtheta_p, rho_p, u_1, w_1, theta_m_1, &
1775+
!$acc rho_zz_1, scalars_1)
1776+
MPAS_ACC_TIMER_STOP('atm_rk_integration_setup [ACC_data_xfer]')
17301777

17311778
end subroutine atm_rk_integration_setup
17321779

0 commit comments

Comments
 (0)