Skip to content

Commit be338c9

Browse files
committed
Initial OpenACC port of atm_recover_large_step_variables_work
These changes enable the GPU execution of the atm_recover_large_step_variables_work subroutine by adding OpenACC directives. In order to factor out the time to transfer data between CPU and GPU within this routine, the new timer 'atm_recover_large_step_variables [ACC_data_xfer]' has been added to the log file. Changes include: - Preparing the routine for porting. Modifying whitespace to make regions clear, changing implicit loop assignments to be explicit, and fusing some loops. - Adding OpenACC parallel and loop directives to the do-loops. - Managing the invariant fields needed for this routine in mpas_atm_dynamics_{init,finalize} so they are available across timesteps. - Managing the other fields needed in the routine with OpenACC directives and using default(present) to ensure data isn't missed. default(present) clauses cause a run-time error if data isn't present and will help as we fuse data regions.
1 parent 4ea6abb commit be338c9

File tree

1 file changed

+96
-27
lines changed

1 file changed

+96
-27
lines changed

src/core_atmosphere/dynamics/mpas_atm_time_integration.F

Lines changed: 96 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -233,6 +233,8 @@ subroutine mpas_atm_dynamics_init(domain)
233233
real (kind=RKIND), dimension(:,:,:), pointer :: zb3_cell
234234
real (kind=RKIND), dimension(:), pointer :: fzm
235235
real (kind=RKIND), dimension(:), pointer :: fzp
236+
real (kind=RKIND), dimension(:,:,:), pointer :: zb
237+
real (kind=RKIND), dimension(:,:,:), pointer :: zb3
236238
#endif
237239

238240

@@ -356,6 +358,12 @@ subroutine mpas_atm_dynamics_init(domain)
356358
call mpas_pool_get_array(mesh, 'fzp', fzp)
357359
!$acc enter data copyin(fzp)
358360

361+
call mpas_pool_get_array(mesh, 'zb', zb)
362+
!$acc enter data copyin(zb)
363+
364+
call mpas_pool_get_array(mesh, 'zb3', zb3)
365+
!$acc enter data copyin(zb3)
366+
359367
#endif
360368

361369
end subroutine mpas_atm_dynamics_init
@@ -425,6 +433,8 @@ subroutine mpas_atm_dynamics_finalize(domain)
425433
real (kind=RKIND), dimension(:,:,:), pointer :: zb3_cell
426434
real (kind=RKIND), dimension(:), pointer :: fzm
427435
real (kind=RKIND), dimension(:), pointer :: fzp
436+
real (kind=RKIND), dimension(:,:,:), pointer :: zb
437+
real (kind=RKIND), dimension(:,:,:), pointer :: zb3
428438
#endif
429439

430440

@@ -547,6 +557,13 @@ subroutine mpas_atm_dynamics_finalize(domain)
547557

548558
call mpas_pool_get_array(mesh, 'fzp', fzp)
549559
!$acc exit data delete(fzp)
560+
561+
call mpas_pool_get_array(mesh, 'zb', zb)
562+
!$acc exit data delete(zb)
563+
564+
call mpas_pool_get_array(mesh, 'zb3', zb3)
565+
!$acc exit data delete(zb3)
566+
550567
#endif
551568

552569
end subroutine mpas_atm_dynamics_finalize
@@ -2682,7 +2699,7 @@ subroutine atm_recover_large_step_variables( state, diag, tend, mesh, configs, d
26822699
cellStart, cellEnd, vertexStart, vertexEnd, edgeStart, edgeEnd, &
26832700
cellSolveStart, cellSolveEnd, vertexSolveStart, vertexSolveEnd, edgeSolveStart, edgeSolveEnd)
26842701
2685-
! reconstitute state variables from acoustic-step perturbation variables
2702+
! reconstitute state variables from acoustic-step perturbation variables
26862703
! after the acoustic steps. The perturbation variables were originally set in
26872704
! subroutine atm_set_smlstep_pert_variables prior to their acoustic-steps update.
26882705
! we are also computing a few other state-derived variables here.
@@ -2812,7 +2829,7 @@ subroutine atm_recover_large_step_variables_work(nCells, nEdges, nCellsSolve, nE
28122829
real (kind=RKIND), intent(in) :: dt
28132830
28142831
integer, dimension(nCells+1), intent(in) :: bdyMaskCell
2815-
2832+
28162833
real (kind=RKIND), dimension(nVertLevels+1,nCells+1) :: wwAvg
28172834
real (kind=RKIND), dimension(nVertLevels+1,nCells+1) :: rw_save
28182835
real (kind=RKIND), dimension(nVertLevels+1,nCells+1) :: w
@@ -2863,45 +2880,70 @@ subroutine atm_recover_large_step_variables_work(nCells, nEdges, nCellsSolve, nE
28632880
integer :: i, iCell, iEdge, k, cell1, cell2
28642881
real (kind=RKIND) :: invNs, rcv, p0, flux
28652882
2883+
MPAS_ACC_TIMER_START('atm_recover_large_step_variables [ACC_data_xfer]')
2884+
!$acc enter data copyin(rho_p_save,rho_pp,rho_base,rw_save,rw_p, &
2885+
!$acc rtheta_p_save,rtheta_pp,rtheta_base, &
2886+
!$acc ru_save,ru_p,wwAvg,ruAvg) &
2887+
!$acc create(rho_zz,rho_p,rw,w,rtheta_p,theta_m, &
2888+
!$acc ru,u)
2889+
if (rk_step == 3) then
2890+
!$acc enter data copyin(rt_diabatic_tend,exner_base) &
2891+
!$acc create(exner,pressure_p)
2892+
end if
2893+
MPAS_ACC_TIMER_STOP('atm_recover_large_step_variables [ACC_data_xfer]')
28662894
28672895
rcv = rgas/(cp-rgas)
28682896
p0 = 1.0e+05 ! this should come from somewhere else...
28692897
2870-
! Avoid FP errors caused by a potential division by zero below by
2898+
! Avoid FP errors caused by a potential division by zero below by
28712899
! initializing the "garbage cell" of rho_zz to a non-zero value
2900+
!$acc parallel default(present)
2901+
!$acc loop gang vector
28722902
do k=1,nVertLevels
28732903
rho_zz(k,nCells+1) = 1.0
28742904
end do
2905+
!$acc end parallel
28752906
28762907
! compute new density everywhere so we can compute u from ru.
28772908
! we will also need it to compute theta_m below
28782909
28792910
invNs = 1 / real(ns,RKIND)
28802911
2912+
!$acc parallel default(present)
2913+
!$acc loop gang worker
28812914
do iCell=cellStart,cellEnd
28822915
28832916
!DIR$ IVDEP
2917+
!$acc loop vector
28842918
do k = 1, nVertLevels
28852919
rho_p(k,iCell) = rho_p_save(k,iCell) + rho_pp(k,iCell)
28862920
28872921
rho_zz(k,iCell) = rho_p(k,iCell) + rho_base(k,iCell)
28882922
end do
28892923
2924+
rw(1,iCell) = 0.0
28902925
w(1,iCell) = 0.0
28912926
28922927
!DIR$ IVDEP
2928+
!$acc loop vector
28932929
do k = 2, nVertLevels
28942930
wwAvg(k,iCell) = rw_save(k,iCell) + (wwAvg(k,iCell) * invNs)
28952931
rw(k,iCell) = rw_save(k,iCell) + rw_p(k,iCell)
28962932
28972933
! pick up part of diagnosed w from omega - divide by density later
28982934
w(k,iCell) = rw(k,iCell)/(fzm(k)*zz(k,iCell)+fzp(k)*zz(k-1,iCell))
2899-
2935+
29002936
end do
29012937
2938+
rw(nVertLevels+1,iCell) = 0.0
29022939
w(nVertLevels+1,iCell) = 0.0
2940+
end do
2941+
!$acc end parallel
29032942
2904-
if (rk_step == 3) then
2943+
if (rk_step == 3) then
2944+
!$acc parallel default(present)
2945+
!$acc loop collapse(2)
2946+
do iCell=cellStart,cellEnd
29052947
!DIR$ IVDEP
29062948
do k = 1, nVertLevels
29072949
rtheta_p(k,iCell) = rtheta_p_save(k,iCell) + rtheta_pp(k,iCell) &
@@ -2912,37 +2954,48 @@ subroutine atm_recover_large_step_variables_work(nCells, nEdges, nCellsSolve, nE
29122954
pressure_p(k,iCell) = zz(k,iCell) * rgas * (exner(k,iCell)*rtheta_p(k,iCell)+rtheta_base(k,iCell) &
29132955
* (exner(k,iCell)-exner_base(k,iCell)))
29142956
end do
2915-
else
2957+
end do
2958+
!$acc end parallel
2959+
else
2960+
!$acc parallel default(present)
2961+
!$acc loop collapse(2)
2962+
do iCell=cellStart,cellEnd
29162963
!DIR$ IVDEP
29172964
do k = 1, nVertLevels
29182965
rtheta_p(k,iCell) = rtheta_p_save(k,iCell) + rtheta_pp(k,iCell)
29192966
theta_m(k,iCell) = (rtheta_p(k,iCell) + rtheta_base(k,iCell))/rho_zz(k,iCell)
29202967
end do
2921-
end if
2922-
2923-
end do
2968+
end do
2969+
!$acc end parallel
2970+
end if
29242971
2925-
! recover time-averaged ruAvg on all edges of owned cells (for upcoming scalar transport).
2926-
! we solved for these in the acoustic-step loop.
2972+
! recover time-averaged ruAvg on all edges of owned cells (for upcoming scalar transport).
2973+
! we solved for these in the acoustic-step loop.
29272974
! we will compute ru and u here also, given we are here, even though we only need them on nEdgesSolve
29282975
29292976
!$OMP BARRIER
29302977
2978+
!$acc parallel default(present)
2979+
!$acc loop gang worker
29312980
do iEdge=edgeStart,edgeEnd
29322981
29332982
cell1 = cellsOnEdge(1,iEdge)
29342983
cell2 = cellsOnEdge(2,iEdge)
29352984
29362985
!DIR$ IVDEP
2986+
!$acc loop vector
29372987
do k = 1, nVertLevels
29382988
ruAvg(k,iEdge) = ru_save(k,iEdge) + (ruAvg(k,iEdge) * invNs)
29392989
ru(k,iEdge) = ru_save(k,iEdge) + ru_p(k,iEdge)
29402990
u(k,iEdge) = 2.*ru(k,iEdge)/(rho_zz(k,cell1)+rho_zz(k,cell2))
29412991
end do
29422992
end do
2993+
!$acc end parallel
29432994
29442995
!$OMP BARRIER
29452996
2997+
!$acc parallel default(present)
2998+
!$acc loop gang worker
29462999
do iCell=cellStart,cellEnd
29473000
29483001
! finish recovering w from (rho*omega)_p. as when we formed (rho*omega)_p from u and w, we need
@@ -2951,33 +3004,49 @@ subroutine atm_recover_large_step_variables_work(nCells, nEdges, nCellsSolve, nE
29513004
29523005
if (bdyMaskCell(iCell) <= nRelaxZone) then ! addition for regional_MPAS, no spec zone update
29533006
2954-
do i=1,nEdgesOnCell(iCell)
2955-
iEdge=edgesOnCell(i,iCell)
3007+
!$acc loop seq
3008+
do i=1,nEdgesOnCell(iCell)
3009+
iEdge=edgesOnCell(i,iCell)
29563010
2957-
flux = (cf1*ru(1,iEdge) + cf2*ru(2,iEdge) + cf3*ru(3,iEdge))
2958-
w(1,iCell) = w(1,iCell) + edgesOnCell_sign(i,iCell) * &
2959-
(zb_cell(1,i,iCell) + sign(1.0_RKIND,flux)*zb3_cell(1,i,iCell))*flux
3011+
flux = (cf1*ru(1,iEdge) + cf2*ru(2,iEdge) + cf3*ru(3,iEdge))
3012+
w(1,iCell) = w(1,iCell) + edgesOnCell_sign(i,iCell) * &
3013+
(zb_cell(1,i,iCell) + sign(1.0_RKIND,flux)*zb3_cell(1,i,iCell))*flux
29603014
29613015
!DIR$ IVDEP
2962-
do k = 2, nVertLevels
2963-
flux = (fzm(k)*ru(k,iEdge)+fzp(k)*ru(k-1,iEdge))
2964-
w(k,iCell) = w(k,iCell) + edgesOnCell_sign(i,iCell) * &
2965-
(zb_cell(k,i,iCell)+sign(1.0_RKIND,flux)*zb3_cell(k,i,iCell))*flux
2966-
end do
3016+
!$acc loop vector
3017+
do k = 2, nVertLevels
3018+
flux = (fzm(k)*ru(k,iEdge)+fzp(k)*ru(k-1,iEdge))
3019+
w(k,iCell) = w(k,iCell) + edgesOnCell_sign(i,iCell) * &
3020+
(zb_cell(k,i,iCell)+sign(1.0_RKIND,flux)*zb3_cell(k,i,iCell))*flux
3021+
end do
29673022
2968-
end do
3023+
end do
29693024
2970-
w(1,iCell) = w(1,iCell)/(cf1*rho_zz(1,iCell)+cf2*rho_zz(2,iCell)+cf3*rho_zz(3,iCell))
3025+
w(1,iCell) = w(1,iCell)/(cf1*rho_zz(1,iCell)+cf2*rho_zz(2,iCell)+cf3*rho_zz(3,iCell))
29713026
29723027
2973-
!DIR$ IVDEP
2974-
do k = 2, nVertLevels
2975-
w(k,iCell) = w(k,iCell)/(fzm(k)*rho_zz(k,iCell)+fzp(k)*rho_zz(k-1,iCell))
2976-
end do
3028+
!DIR$ IVDEP
3029+
!$acc loop vector
3030+
do k = 2, nVertLevels
3031+
w(k,iCell) = w(k,iCell)/(fzm(k)*rho_zz(k,iCell)+fzp(k)*rho_zz(k-1,iCell))
3032+
end do
29773033
29783034
end if ! addition for regional_MPAS, no spec zone update
29793035
29803036
end do
3037+
!$acc end parallel
3038+
3039+
MPAS_ACC_TIMER_START('atm_recover_large_step_variables [ACC_data_xfer]')
3040+
!$acc exit data delete(rho_p_save,rho_pp,rho_base,rw_save,rw_p, &
3041+
!$acc rtheta_p_save,rtheta_pp,rtheta_base, &
3042+
!$acc ru_save,ru_p) &
3043+
!$acc copyout(rho_zz,rho_p,rw,w,rtheta_p,theta_m, &
3044+
!$acc ru,u,wwAvg,ruAvg)
3045+
if (rk_step == 3) then
3046+
!$acc exit data delete(rt_diabatic_tend,exner_base) &
3047+
!$acc copyout(exner,pressure_p)
3048+
end if
3049+
MPAS_ACC_TIMER_STOP('atm_recover_large_step_variables [ACC_data_xfer]')
29813050
29823051
end subroutine atm_recover_large_step_variables_work
29833052

0 commit comments

Comments
 (0)