-
Notifications
You must be signed in to change notification settings - Fork 354
Initial OpenACC port of mpas_atm_update_bdy_tend #1301
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: develop
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -99,9 +99,12 @@ subroutine mpas_atm_update_bdy_tend(clock, streamManager, block, firstCall, ierr | |
type (mpas_pool_type), pointer :: lbc | ||
real (kind=RKIND) :: dt | ||
|
||
integer, pointer :: nCells | ||
integer, pointer :: nEdges | ||
integer, pointer :: index_qv | ||
integer, pointer :: nCells_ptr | ||
integer, pointer :: nEdges_ptr | ||
integer, pointer :: nVertLevels_ptr | ||
integer, pointer :: index_qv_ptr | ||
integer, pointer :: nScalars_ptr | ||
integer :: nCells, nEdges, nVertLevels, index_qv, nScalars | ||
|
||
real (kind=RKIND), dimension(:,:), pointer :: u | ||
real (kind=RKIND), dimension(:,:), pointer :: ru | ||
|
@@ -129,7 +132,7 @@ subroutine mpas_atm_update_bdy_tend(clock, streamManager, block, firstCall, ierr | |
type (MPAS_Time_Type) :: currTime | ||
type (MPAS_TimeInterval_Type) :: lbc_interval | ||
character(len=StrKIND) :: read_time | ||
integer :: iEdge | ||
integer :: iEdge, iCell, k, j | ||
integer :: cell1, cell2 | ||
|
||
|
||
|
@@ -167,6 +170,7 @@ subroutine mpas_atm_update_bdy_tend(clock, streamManager, block, firstCall, ierr | |
! Compute any derived fields from those that were read from the lbc_in stream | ||
! | ||
call mpas_pool_get_array(lbc, 'lbc_u', u, 2) | ||
call mpas_pool_get_array(lbc, 'lbc_w', w, 2) | ||
call mpas_pool_get_array(lbc, 'lbc_ru', ru, 2) | ||
call mpas_pool_get_array(lbc, 'lbc_rho_edge', rho_edge, 2) | ||
call mpas_pool_get_array(lbc, 'lbc_theta', theta, 2) | ||
|
@@ -176,26 +180,86 @@ subroutine mpas_atm_update_bdy_tend(clock, streamManager, block, firstCall, ierr | |
call mpas_pool_get_array(lbc, 'lbc_scalars', scalars, 2) | ||
|
||
call mpas_pool_get_array(mesh, 'cellsOnEdge', cellsOnEdge) | ||
call mpas_pool_get_dimension(mesh, 'nCells', nCells) | ||
call mpas_pool_get_dimension(mesh, 'nEdges', nEdges) | ||
call mpas_pool_get_dimension(lbc, 'index_qv', index_qv) | ||
call mpas_pool_get_dimension(mesh, 'nCells', nCells_ptr) | ||
call mpas_pool_get_dimension(mesh, 'nEdges', nEdges_ptr) | ||
call mpas_pool_get_dimension(mesh, 'nVertLevels', nVertLevels_ptr) | ||
call mpas_pool_get_dimension(state, 'num_scalars', nScalars_ptr) | ||
call mpas_pool_get_dimension(lbc, 'index_qv', index_qv_ptr) | ||
call mpas_pool_get_array(mesh, 'zz', zz) | ||
|
||
MPAS_ACC_TIMER_START('mpas_atm_update_bdy_tend [ACC_data_xfer]') | ||
if (.not. firstCall) then | ||
call mpas_pool_get_array(lbc, 'lbc_u', lbc_tend_u, 1) | ||
call mpas_pool_get_array(lbc, 'lbc_ru', lbc_tend_ru, 1) | ||
call mpas_pool_get_array(lbc, 'lbc_rho_edge', lbc_tend_rho_edge, 1) | ||
call mpas_pool_get_array(lbc, 'lbc_w', lbc_tend_w, 1) | ||
call mpas_pool_get_array(lbc, 'lbc_theta', lbc_tend_theta, 1) | ||
call mpas_pool_get_array(lbc, 'lbc_rtheta_m', lbc_tend_rtheta_m, 1) | ||
call mpas_pool_get_array(lbc, 'lbc_rho_zz', lbc_tend_rho_zz, 1) | ||
call mpas_pool_get_array(lbc, 'lbc_rho', lbc_tend_rho, 1) | ||
call mpas_pool_get_array(lbc, 'lbc_scalars', lbc_tend_scalars, 1) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since we now get pointers to arrays here, I think it would be fine to include in this PR the removal of redundant calls to |
||
|
||
!$acc enter data copyin(lbc_tend_u, lbc_tend_ru, lbc_tend_rho_edge, lbc_tend_w, & | ||
!$acc lbc_tend_theta, lbc_tend_rtheta_m, lbc_tend_rho_zz, & | ||
!$acc lbc_tend_rho, lbc_tend_scalars) | ||
end if | ||
!$acc enter data copyin(u, w, theta, rho, scalars) | ||
!$acc enter data create(ru, rho_edge, rtheta_m, rho_zz) | ||
MPAS_ACC_TIMER_STOP('mpas_atm_update_bdy_tend [ACC_data_xfer]') | ||
|
||
! Dereference the pointers to avoid non-array pointer for OpenACC | ||
nCells = nCells_ptr | ||
nEdges = nEdges_ptr | ||
nVertLevels = nVertLevels_ptr | ||
nScalars = nScalars_ptr | ||
index_qv = index_qv_ptr | ||
|
||
! Compute lbc_rho_zz | ||
|
||
!$acc kernels | ||
zz(:,nCells+1) = 1.0_RKIND ! Avoid potential division by zero in the following line | ||
rho_zz(:,:) = rho(:,:) / zz(:,:) | ||
!$acc end kernels | ||
|
||
!$acc parallel | ||
! Compute lbc_rho_zz | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We could either remove this comment, or remove the same comment on line 217. I'd suggest just removing this line. |
||
!$acc loop gang vector collapse(2) | ||
do iCell=1,nCells+1 | ||
do k=1,nVertLevels | ||
rho_zz(k,iCell) = rho(k,iCell) / zz(k,iCell) | ||
end do | ||
end do | ||
!$acc end parallel | ||
|
||
! Average lbc_rho_zz to edges | ||
!$acc parallel | ||
!$acc loop gang worker | ||
do iEdge=1,nEdges | ||
cell1 = cellsOnEdge(1,iEdge) | ||
cell2 = cellsOnEdge(2,iEdge) | ||
if (cell1 > 0 .and. cell2 > 0) then | ||
rho_edge(:,iEdge) = 0.5_RKIND * (rho_zz(:,cell1) + rho_zz(:,cell2)) | ||
!$acc loop vector | ||
do k = 1, nVertLevels | ||
rho_edge(k,iEdge) = 0.5_RKIND * (rho_zz(k,cell1) + rho_zz(k,cell2)) | ||
end do | ||
end if | ||
end do | ||
!$acc end parallel | ||
|
||
!$acc parallel | ||
!$acc loop gang vector collapse(2) | ||
do iEdge=1,nEdges+1 | ||
do k=1,nVertLevels | ||
ru(k,iEdge) = u(k,iEdge) * rho_edge(k,iEdge) | ||
end do | ||
end do | ||
|
||
ru(:,:) = u(:,:) * rho_edge(:,:) | ||
rtheta_m(:,:) = theta(:,:) * rho_zz(:,:) * (1.0_RKIND + rvord * scalars(index_qv,:,:)) | ||
!$acc loop gang vector collapse(2) | ||
do iCell=1,nCells+1 | ||
do k=1,nVertLevels | ||
rtheta_m(k,iCell) = theta(k,iCell) * rho_zz(k,iCell) * (1.0_RKIND + rvord * scalars(index_qv,k,iCell)) | ||
end do | ||
end do | ||
!$acc end parallel | ||
|
||
if (.not. firstCall) then | ||
lbc_interval = currTime - LBC_intv_end | ||
|
@@ -225,15 +289,58 @@ subroutine mpas_atm_update_bdy_tend(clock, streamManager, block, firstCall, ierr | |
|
||
|
||
dt = 1.0_RKIND / dt | ||
lbc_tend_u(:,:) = (u(:,:) - lbc_tend_u(:,:)) * dt | ||
lbc_tend_ru(:,:) = (ru(:,:) - lbc_tend_ru(:,:)) * dt | ||
lbc_tend_rho_edge(:,:) = (rho_edge(:,:) - lbc_tend_rho_edge(:,:)) * dt | ||
lbc_tend_w(:,:) = (w(:,:) - lbc_tend_w(:,:)) * dt | ||
lbc_tend_theta(:,:) = (theta(:,:) - lbc_tend_theta(:,:)) * dt | ||
lbc_tend_rtheta_m(:,:) = (rtheta_m(:,:) - lbc_tend_rtheta_m(:,:)) * dt | ||
lbc_tend_rho_zz(:,:) = (rho_zz(:,:) - lbc_tend_rho_zz(:,:)) * dt | ||
lbc_tend_rho(:,:) = (rho(:,:) - lbc_tend_rho(:,:)) * dt | ||
lbc_tend_scalars(:,:,:) = (scalars(:,:,:) - lbc_tend_scalars(:,:,:)) * dt | ||
|
||
!$acc parallel default(present) | ||
!$acc loop gang vector collapse(2) | ||
do iEdge=1,nEdges+1 | ||
do k=1,nVertLevels | ||
lbc_tend_u(k,iEdge) = (u(k,iEdge) - lbc_tend_u(k,iEdge)) * dt | ||
lbc_tend_ru(k,iEdge) = (ru(k,iEdge) - lbc_tend_ru(k,iEdge)) * dt | ||
end do | ||
end do | ||
|
||
!$acc loop gang vector collapse(2) | ||
do iEdge=1,nEdges+1 | ||
do k=1,nVertLevels | ||
lbc_tend_rho_edge(k,iEdge) = (rho_edge(k,iEdge) - lbc_tend_rho_edge(k,iEdge)) * dt | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Was there a specific reason for computing |
||
end do | ||
end do | ||
|
||
!$acc loop gang vector collapse(2) | ||
do iCell=1,nCells+1 | ||
do k=1,nVertLevels+1 | ||
lbc_tend_w(k,iCell) = (w(k,iCell) - lbc_tend_w(k,iCell)) * dt | ||
end do | ||
end do | ||
|
||
!$acc loop gang vector collapse(2) | ||
do iCell=1,nCells+1 | ||
do k=1,nVertLevels | ||
lbc_tend_theta(k,iCell) = (theta(k,iCell) - lbc_tend_theta(k,iCell)) * dt | ||
lbc_tend_rtheta_m(k,iCell) = (rtheta_m(k,iCell) - lbc_tend_rtheta_m(k,iCell)) * dt | ||
end do | ||
end do | ||
|
||
!$acc loop gang vector collapse(2) | ||
do iCell=1,nCells+1 | ||
do k=1,nVertLevels | ||
lbc_tend_rho_zz(k,iCell) = (rho_zz(k,iCell) - lbc_tend_rho_zz(k,iCell)) * dt | ||
lbc_tend_rho(k,iCell) = (rho(k,iCell) - lbc_tend_rho(k,iCell)) * dt | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Similar to the comment above, would there be an advantage, or is there a disadvantage, to combining the computation of |
||
end do | ||
end do | ||
!$acc end parallel | ||
|
||
!$acc parallel default(present) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could this |
||
!$acc loop gang | ||
do iCell=1,nCells+1 | ||
!$acc loop vector collapse(2) | ||
do k=1,nVertLevels | ||
do j = 1,nScalars | ||
lbc_tend_scalars(j,k,iCell) = (scalars(j,k,iCell) - lbc_tend_scalars(j,k,iCell)) * dt | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's indent the body of this loop. |
||
end do | ||
end do | ||
end do | ||
!$acc end parallel | ||
|
||
! | ||
! Logging the lbc start and end times appears to be backwards, but | ||
|
@@ -249,6 +356,17 @@ subroutine mpas_atm_update_bdy_tend(clock, streamManager, block, firstCall, ierr | |
|
||
end if | ||
|
||
MPAS_ACC_TIMER_START('mpas_atm_update_bdy_tend [ACC_data_xfer]') | ||
if (.not. firstCall) then | ||
!$acc exit data copyout(lbc_tend_u, lbc_tend_ru, lbc_tend_rho_edge, lbc_tend_w, & | ||
!$acc lbc_tend_theta, lbc_tend_rtheta_m, lbc_tend_rho_zz, & | ||
!$acc lbc_tend_rho, lbc_tend_scalars) | ||
end if | ||
|
||
!$acc exit data copyout(ru, rho_edge, rtheta_m, rho_zz) | ||
!$acc exit data delete(u, w, theta, rho, scalars) | ||
MPAS_ACC_TIMER_STOP('mpas_atm_update_bdy_tend [ACC_data_xfer]') | ||
|
||
LBC_intv_end = currTime | ||
|
||
end subroutine mpas_atm_update_bdy_tend | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's a minor detail, but can we move this below the call to
mpas_pool_get_array
forlbc_rho_edge
so that the order of variables is consistent in this block and in the block beginning at line 192?