Skip to content

Commit a3046e6

Browse files
committed
Merge branch 'atmosphere/acc_recover_large_step' into develop (PR #1220)
This merge enables GPU execution of the atm_recover_large_step_variables_work routine by adding OpenACC directives to the routine. Timing information for the OpenACC data transfers in this routine is captured in the log file by a new timer, 'atm_recover_large_step_variables [ACC_data_xfer]'. The changes in this merge lead to differences in simulation results using a regional test case, but bit-identical results can be recovered by adding the following code below line 3056 in mpas_atm_time_integration.F: ! Calculate exner and pressure_p on CPU !$acc update host(rtheta_p) do iCell=cellStart,cellEnd do k = 1, nVertLevels exner(k,iCell) = (zz(k,iCell)*(rgas/p0)*(rtheta_p(k,iCell)+rtheta_base(k,iCell)))**rcv ! pressure_p is perturbation pressure pressure_p(k,iCell) = zz(k,iCell) * rgas * (exner(k,iCell)*rtheta_p(k,iCell)+rtheta_base(k,iCell) & * (exner(k,iCell)-exner_base(k,iCell))) end do end do !$acc update device(exner,pressure_p) * atmosphere/acc_recover_large_step: Initial OpenACC port of atm_recover_large_step_variables_work
2 parents 530a5bd + be338c9 commit a3046e6

File tree

1 file changed

+96
-27
lines changed

1 file changed

+96
-27
lines changed

src/core_atmosphere/dynamics/mpas_atm_time_integration.F

Lines changed: 96 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -236,6 +236,8 @@ subroutine mpas_atm_dynamics_init(domain)
236236
real (kind=RKIND), dimension(:,:,:), pointer :: zb3_cell
237237
real (kind=RKIND), dimension(:), pointer :: fzm
238238
real (kind=RKIND), dimension(:), pointer :: fzp
239+
real (kind=RKIND), dimension(:,:,:), pointer :: zb
240+
real (kind=RKIND), dimension(:,:,:), pointer :: zb3
239241
#endif
240242

241243

@@ -368,6 +370,12 @@ subroutine mpas_atm_dynamics_init(domain)
368370
call mpas_pool_get_array(mesh, 'fzp', fzp)
369371
!$acc enter data copyin(fzp)
370372

373+
call mpas_pool_get_array(mesh, 'zb', zb)
374+
!$acc enter data copyin(zb)
375+
376+
call mpas_pool_get_array(mesh, 'zb3', zb3)
377+
!$acc enter data copyin(zb3)
378+
371379
#endif
372380

373381
end subroutine mpas_atm_dynamics_init
@@ -440,6 +448,8 @@ subroutine mpas_atm_dynamics_finalize(domain)
440448
real (kind=RKIND), dimension(:,:,:), pointer :: zb3_cell
441449
real (kind=RKIND), dimension(:), pointer :: fzm
442450
real (kind=RKIND), dimension(:), pointer :: fzp
451+
real (kind=RKIND), dimension(:,:,:), pointer :: zb
452+
real (kind=RKIND), dimension(:,:,:), pointer :: zb3
443453
#endif
444454

445455

@@ -571,6 +581,13 @@ subroutine mpas_atm_dynamics_finalize(domain)
571581

572582
call mpas_pool_get_array(mesh, 'fzp', fzp)
573583
!$acc exit data delete(fzp)
584+
585+
call mpas_pool_get_array(mesh, 'zb', zb)
586+
!$acc exit data delete(zb)
587+
588+
call mpas_pool_get_array(mesh, 'zb3', zb3)
589+
!$acc exit data delete(zb3)
590+
574591
#endif
575592

576593
end subroutine mpas_atm_dynamics_finalize
@@ -2780,7 +2797,7 @@ subroutine atm_recover_large_step_variables( state, diag, tend, mesh, configs, d
27802797
cellStart, cellEnd, vertexStart, vertexEnd, edgeStart, edgeEnd, &
27812798
cellSolveStart, cellSolveEnd, vertexSolveStart, vertexSolveEnd, edgeSolveStart, edgeSolveEnd)
27822799
2783-
! reconstitute state variables from acoustic-step perturbation variables
2800+
! reconstitute state variables from acoustic-step perturbation variables
27842801
! after the acoustic steps. The perturbation variables were originally set in
27852802
! subroutine atm_set_smlstep_pert_variables prior to their acoustic-steps update.
27862803
! we are also computing a few other state-derived variables here.
@@ -2910,7 +2927,7 @@ subroutine atm_recover_large_step_variables_work(nCells, nEdges, nCellsSolve, nE
29102927
real (kind=RKIND), intent(in) :: dt
29112928
29122929
integer, dimension(nCells+1), intent(in) :: bdyMaskCell
2913-
2930+
29142931
real (kind=RKIND), dimension(nVertLevels+1,nCells+1) :: wwAvg
29152932
real (kind=RKIND), dimension(nVertLevels+1,nCells+1) :: rw_save
29162933
real (kind=RKIND), dimension(nVertLevels+1,nCells+1) :: w
@@ -2961,45 +2978,70 @@ subroutine atm_recover_large_step_variables_work(nCells, nEdges, nCellsSolve, nE
29612978
integer :: i, iCell, iEdge, k, cell1, cell2
29622979
real (kind=RKIND) :: invNs, rcv, p0, flux
29632980
2981+
MPAS_ACC_TIMER_START('atm_recover_large_step_variables [ACC_data_xfer]')
2982+
!$acc enter data copyin(rho_p_save,rho_pp,rho_base,rw_save,rw_p, &
2983+
!$acc rtheta_p_save,rtheta_pp,rtheta_base, &
2984+
!$acc ru_save,ru_p,wwAvg,ruAvg) &
2985+
!$acc create(rho_zz,rho_p,rw,w,rtheta_p,theta_m, &
2986+
!$acc ru,u)
2987+
if (rk_step == 3) then
2988+
!$acc enter data copyin(rt_diabatic_tend,exner_base) &
2989+
!$acc create(exner,pressure_p)
2990+
end if
2991+
MPAS_ACC_TIMER_STOP('atm_recover_large_step_variables [ACC_data_xfer]')
29642992
29652993
rcv = rgas/(cp-rgas)
29662994
p0 = 1.0e+05 ! this should come from somewhere else...
29672995
2968-
! Avoid FP errors caused by a potential division by zero below by
2996+
! Avoid FP errors caused by a potential division by zero below by
29692997
! initializing the "garbage cell" of rho_zz to a non-zero value
2998+
!$acc parallel default(present)
2999+
!$acc loop gang vector
29703000
do k=1,nVertLevels
29713001
rho_zz(k,nCells+1) = 1.0
29723002
end do
3003+
!$acc end parallel
29733004
29743005
! compute new density everywhere so we can compute u from ru.
29753006
! we will also need it to compute theta_m below
29763007
29773008
invNs = 1 / real(ns,RKIND)
29783009
3010+
!$acc parallel default(present)
3011+
!$acc loop gang worker
29793012
do iCell=cellStart,cellEnd
29803013
29813014
!DIR$ IVDEP
3015+
!$acc loop vector
29823016
do k = 1, nVertLevels
29833017
rho_p(k,iCell) = rho_p_save(k,iCell) + rho_pp(k,iCell)
29843018
29853019
rho_zz(k,iCell) = rho_p(k,iCell) + rho_base(k,iCell)
29863020
end do
29873021
3022+
rw(1,iCell) = 0.0
29883023
w(1,iCell) = 0.0
29893024
29903025
!DIR$ IVDEP
3026+
!$acc loop vector
29913027
do k = 2, nVertLevels
29923028
wwAvg(k,iCell) = rw_save(k,iCell) + (wwAvg(k,iCell) * invNs)
29933029
rw(k,iCell) = rw_save(k,iCell) + rw_p(k,iCell)
29943030
29953031
! pick up part of diagnosed w from omega - divide by density later
29963032
w(k,iCell) = rw(k,iCell)/(fzm(k)*zz(k,iCell)+fzp(k)*zz(k-1,iCell))
2997-
3033+
29983034
end do
29993035
3036+
rw(nVertLevels+1,iCell) = 0.0
30003037
w(nVertLevels+1,iCell) = 0.0
3038+
end do
3039+
!$acc end parallel
30013040
3002-
if (rk_step == 3) then
3041+
if (rk_step == 3) then
3042+
!$acc parallel default(present)
3043+
!$acc loop collapse(2)
3044+
do iCell=cellStart,cellEnd
30033045
!DIR$ IVDEP
30043046
do k = 1, nVertLevels
30053047
rtheta_p(k,iCell) = rtheta_p_save(k,iCell) + rtheta_pp(k,iCell) &
@@ -3010,37 +3052,48 @@ subroutine atm_recover_large_step_variables_work(nCells, nEdges, nCellsSolve, nE
30103052
pressure_p(k,iCell) = zz(k,iCell) * rgas * (exner(k,iCell)*rtheta_p(k,iCell)+rtheta_base(k,iCell) &
30113053
* (exner(k,iCell)-exner_base(k,iCell)))
30123054
end do
3013-
else
3055+
end do
3056+
!$acc end parallel
3057+
else
3058+
!$acc parallel default(present)
3059+
!$acc loop collapse(2)
3060+
do iCell=cellStart,cellEnd
30143061
!DIR$ IVDEP
30153062
do k = 1, nVertLevels
30163063
rtheta_p(k,iCell) = rtheta_p_save(k,iCell) + rtheta_pp(k,iCell)
30173064
theta_m(k,iCell) = (rtheta_p(k,iCell) + rtheta_base(k,iCell))/rho_zz(k,iCell)
30183065
end do
3019-
end if
3020-
3021-
end do
3066+
end do
3067+
!$acc end parallel
3068+
end if
30223069
3023-
! recover time-averaged ruAvg on all edges of owned cells (for upcoming scalar transport).
3024-
! we solved for these in the acoustic-step loop.
3070+
! recover time-averaged ruAvg on all edges of owned cells (for upcoming scalar transport).
3071+
! we solved for these in the acoustic-step loop.
30253072
! we will compute ru and u here also, given we are here, even though we only need them on nEdgesSolve
30263073
30273074
!$OMP BARRIER
30283075
3076+
!$acc parallel default(present)
3077+
!$acc loop gang worker
30293078
do iEdge=edgeStart,edgeEnd
30303079
30313080
cell1 = cellsOnEdge(1,iEdge)
30323081
cell2 = cellsOnEdge(2,iEdge)
30333082
30343083
!DIR$ IVDEP
3084+
!$acc loop vector
30353085
do k = 1, nVertLevels
30363086
ruAvg(k,iEdge) = ru_save(k,iEdge) + (ruAvg(k,iEdge) * invNs)
30373087
ru(k,iEdge) = ru_save(k,iEdge) + ru_p(k,iEdge)
30383088
u(k,iEdge) = 2.*ru(k,iEdge)/(rho_zz(k,cell1)+rho_zz(k,cell2))
30393089
end do
30403090
end do
3091+
!$acc end parallel
30413092
30423093
!$OMP BARRIER
30433094
3095+
!$acc parallel default(present)
3096+
!$acc loop gang worker
30443097
do iCell=cellStart,cellEnd
30453098
30463099
! finish recovering w from (rho*omega)_p. as when we formed (rho*omega)_p from u and w, we need
@@ -3049,33 +3102,49 @@ subroutine atm_recover_large_step_variables_work(nCells, nEdges, nCellsSolve, nE
30493102
30503103
if (bdyMaskCell(iCell) <= nRelaxZone) then ! addition for regional_MPAS, no spec zone update
30513104
3052-
do i=1,nEdgesOnCell(iCell)
3053-
iEdge=edgesOnCell(i,iCell)
3105+
!$acc loop seq
3106+
do i=1,nEdgesOnCell(iCell)
3107+
iEdge=edgesOnCell(i,iCell)
30543108
3055-
flux = (cf1*ru(1,iEdge) + cf2*ru(2,iEdge) + cf3*ru(3,iEdge))
3056-
w(1,iCell) = w(1,iCell) + edgesOnCell_sign(i,iCell) * &
3057-
(zb_cell(1,i,iCell) + sign(1.0_RKIND,flux)*zb3_cell(1,i,iCell))*flux
3109+
flux = (cf1*ru(1,iEdge) + cf2*ru(2,iEdge) + cf3*ru(3,iEdge))
3110+
w(1,iCell) = w(1,iCell) + edgesOnCell_sign(i,iCell) * &
3111+
(zb_cell(1,i,iCell) + sign(1.0_RKIND,flux)*zb3_cell(1,i,iCell))*flux
30583112
30593113
!DIR$ IVDEP
3060-
do k = 2, nVertLevels
3061-
flux = (fzm(k)*ru(k,iEdge)+fzp(k)*ru(k-1,iEdge))
3062-
w(k,iCell) = w(k,iCell) + edgesOnCell_sign(i,iCell) * &
3063-
(zb_cell(k,i,iCell)+sign(1.0_RKIND,flux)*zb3_cell(k,i,iCell))*flux
3064-
end do
3114+
!$acc loop vector
3115+
do k = 2, nVertLevels
3116+
flux = (fzm(k)*ru(k,iEdge)+fzp(k)*ru(k-1,iEdge))
3117+
w(k,iCell) = w(k,iCell) + edgesOnCell_sign(i,iCell) * &
3118+
(zb_cell(k,i,iCell)+sign(1.0_RKIND,flux)*zb3_cell(k,i,iCell))*flux
3119+
end do
30653120
3066-
end do
3121+
end do
30673122
3068-
w(1,iCell) = w(1,iCell)/(cf1*rho_zz(1,iCell)+cf2*rho_zz(2,iCell)+cf3*rho_zz(3,iCell))
3123+
w(1,iCell) = w(1,iCell)/(cf1*rho_zz(1,iCell)+cf2*rho_zz(2,iCell)+cf3*rho_zz(3,iCell))
30693124
30703125
3071-
!DIR$ IVDEP
3072-
do k = 2, nVertLevels
3073-
w(k,iCell) = w(k,iCell)/(fzm(k)*rho_zz(k,iCell)+fzp(k)*rho_zz(k-1,iCell))
3074-
end do
3126+
!DIR$ IVDEP
3127+
!$acc loop vector
3128+
do k = 2, nVertLevels
3129+
w(k,iCell) = w(k,iCell)/(fzm(k)*rho_zz(k,iCell)+fzp(k)*rho_zz(k-1,iCell))
3130+
end do
30753131
30763132
end if ! addition for regional_MPAS, no spec zone update
30773133
30783134
end do
3135+
!$acc end parallel
3136+
3137+
MPAS_ACC_TIMER_START('atm_recover_large_step_variables [ACC_data_xfer]')
3138+
!$acc exit data delete(rho_p_save,rho_pp,rho_base,rw_save,rw_p, &
3139+
!$acc rtheta_p_save,rtheta_pp,rtheta_base, &
3140+
!$acc ru_save,ru_p) &
3141+
!$acc copyout(rho_zz,rho_p,rw,w,rtheta_p,theta_m, &
3142+
!$acc ru,u,wwAvg,ruAvg)
3143+
if (rk_step == 3) then
3144+
!$acc exit data delete(rt_diabatic_tend,exner_base) &
3145+
!$acc copyout(exner,pressure_p)
3146+
end if
3147+
MPAS_ACC_TIMER_STOP('atm_recover_large_step_variables [ACC_data_xfer]')
30793148
30803149
end subroutine atm_recover_large_step_variables_work
30813150

0 commit comments

Comments
 (0)