From e234633ef0a51faf892d10219c345d710f21abbd Mon Sep 17 00:00:00 2001 From: Ahmad Nawab Date: Tue, 21 May 2024 14:32:23 +0000 Subject: [PATCH] Remove unnecessary data transfers --- src/ecwam/cireduce_loki_gpu.F90 | 4 +++- src/ecwam/ctuwupdt.F90 | 6 +----- src/ecwam/initdpthflds.F90 | 4 ++++ src/ecwam/mpdecomp.F90 | 2 ++ src/ecwam/proenvhalo.F90 | 6 +----- src/ecwam/readmdlconf.F90 | 1 + src/ecwam/wamintgr_loki_gpu.F90 | 17 ++++++++--------- src/ecwam/yowparam.F90 | 1 - 8 files changed, 20 insertions(+), 21 deletions(-) diff --git a/src/ecwam/cireduce_loki_gpu.F90 b/src/ecwam/cireduce_loki_gpu.F90 index 59cea7f6a..cfcfc002f 100644 --- a/src/ecwam/cireduce_loki_gpu.F90 +++ b/src/ecwam/cireduce_loki_gpu.F90 @@ -53,6 +53,7 @@ SUBROUTINE CIREDUCE_LOKI_GPU (WVPRPT, FF_NOW) USE YOMHOOK , ONLY : LHOOK, DR_HOOK, JPHOOK USE YOWDRVTYPE ,ONLY: FREQUENCY, FORCING_FIELDS + USE YOWSTAT, ONLY: LUPDATE_GPU_GLOBALS ! ---------------------------------------------------------------------- IMPLICIT NONE @@ -93,7 +94,9 @@ SUBROUTINE CIREDUCE_LOKI_GPU (WVPRPT, FF_NOW) ELSE +IF(LUPDATE_GPU_GLOBALS)THEN !$loki update_device +ENDIF CALL GSTATS(1493,0) ! DETERMINE THE WAVE ATTENUATION FACTOR !$acc parallel loop gang present(FF_NOW, WVPRPT) vector_length(NPROMA_WAM) @@ -103,7 +106,6 @@ SUBROUTINE CIREDUCE_LOKI_GPU (WVPRPT, FF_NOW) ENDDO !$acc end parallel loop CALL GSTATS(1493,1) -!$loki update_host ENDIF IF (LHOOK) CALL DR_HOOK('CIREDUCE',1,ZHOOK_HANDLE) diff --git a/src/ecwam/ctuwupdt.F90 b/src/ecwam/ctuwupdt.F90 index 4a7e37243..5ddaba449 100644 --- a/src/ecwam/ctuwupdt.F90 +++ b/src/ecwam/ctuwupdt.F90 @@ -87,8 +87,6 @@ SUBROUTINE CTUWUPDT (IJS, IJL, NINF, NSUP, & IF (LHOOK) CALL DR_HOOK('CTUWUPDT',0,ZHOOK_HANDLE) -!$acc update device(sinth,costh) -!$acc update device(COSPH, nang, nfre_red) ! DEFINE JXO, JYO, KCR IF (LFRSTCTU) THEN @@ -106,7 +104,7 @@ SUBROUTINE CTUWUPDT (IJS, IJL, NINF, NSUP, & IF (.NOT. ALLOCATED(JYO)) ALLOCATE(JYO(NANG,2)) IF (.NOT. ALLOCATED(KCR)) ALLOCATE(KCR(NANG,4)) -!$acc update device(KLON, KLAT, KCOR, JXO, JYO, KCR, KPM) +!$acc update device(JXO, JYO, KCR, KPM) !$acc kernels DO K=1,NANG @@ -197,8 +195,6 @@ SUBROUTINE CTUWUPDT (IJS, IJL, NINF, NSUP, & NPROMA=(IJL-IJS+1)/MTHREADS + 1 #ifdef _OPENACC -!$acc update device(WLAT,WCOR) -!$acc update device(NFRE_RED,ZPI,FR,DELTH,NANG) !$acc data present(KLAT,WLAT,KCOR,WCOR,WLATN,WLONN,WCORN) #else !$OMP PARALLEL DO SCHEDULE(DYNAMIC,1) PRIVATE(JKGLO, KIJS, KIJL) diff --git a/src/ecwam/initdpthflds.F90 b/src/ecwam/initdpthflds.F90 index 41eea9ab7..a2fbe92a3 100644 --- a/src/ecwam/initdpthflds.F90 +++ b/src/ecwam/initdpthflds.F90 @@ -87,6 +87,10 @@ SUBROUTINE INITDPTHFLDS(WVENVI, WVPRPT, WVPRPT_LAND) WVPRPT_LAND%CIWA(:) = 1.0_JWRB +#ifdef WAM_GPU + CALL WVPRPT_LAND%SYNC_DEVICE_RDONLY() +#endif + IF (LHOOK) CALL DR_HOOK('INITDPTHFLDS',1,ZHOOK_HANDLE) END SUBROUTINE INITDPTHFLDS diff --git a/src/ecwam/mpdecomp.F90 b/src/ecwam/mpdecomp.F90 index a3c3c27b9..a571ba83b 100644 --- a/src/ecwam/mpdecomp.F90 +++ b/src/ecwam/mpdecomp.F90 @@ -1935,6 +1935,8 @@ SUBROUTINE MPDECOMP(NPR, MAXLEN, LLIRANK, LLWVENVI) WRITE(IU06,*) '' CALL FLUSH(IU06) +!$acc update device(KLON, KLAT, KCOR, WLAT, WCOR) + IF (LHOOK) CALL DR_HOOK('MPDECOMP',1,ZHOOK_HANDLE) END SUBROUTINE MPDECOMP diff --git a/src/ecwam/proenvhalo.F90 b/src/ecwam/proenvhalo.F90 index 0414edacc..a5a6b0705 100644 --- a/src/ecwam/proenvhalo.F90 +++ b/src/ecwam/proenvhalo.F90 @@ -89,11 +89,8 @@ SUBROUTINE PROENVHALO (NINF, NSUP, & !$OMP END PARALLEL DO #endif /*_OPENACC*/ -!$acc data copyin(WVPRPT_LAND) & -!$acc copyin(WVPRPT_LAND%WAVNUM,WVPRPT_LAND%CGROUP,WVPRPT_LAND%OMOSNH2KD) - CALL MPEXCHNG(BUFFER_EXT, 3*NFRE_RED+5, 1, 1) - !$acc kernels + !$acc kernels present(WVPRPT_LAND) BUFFER_EXT(NSUP+1,1:NFRE_RED) = WVPRPT_LAND%WAVNUM(1:NFRE_RED) BUFFER_EXT(NSUP+1,NFRE_RED+1:2*NFRE_RED) = WVPRPT_LAND%CGROUP(1:NFRE_RED) BUFFER_EXT(NSUP+1,2*NFRE_RED+1:3*NFRE_RED) = WVPRPT_LAND%OMOSNH2KD(1:NFRE_RED) @@ -105,7 +102,6 @@ SUBROUTINE PROENVHALO (NINF, NSUP, & !$acc end kernels !$acc end data -!$acc end data IF (LHOOK) CALL DR_HOOK('PROENVHALO',1,ZHOOK_HANDLE) diff --git a/src/ecwam/readmdlconf.F90 b/src/ecwam/readmdlconf.F90 index c3837bac7..c45d24e84 100644 --- a/src/ecwam/readmdlconf.F90 +++ b/src/ecwam/readmdlconf.F90 @@ -142,6 +142,7 @@ SUBROUTINE READMDLCONF (IU07) IJS = 1 IJL = NIBLO +!$acc update device(COSPH) IF (LHOOK) CALL DR_HOOK('READMDLCONF',1,ZHOOK_HANDLE) diff --git a/src/ecwam/wamintgr_loki_gpu.F90 b/src/ecwam/wamintgr_loki_gpu.F90 index d2e41d388..ba2659be8 100644 --- a/src/ecwam/wamintgr_loki_gpu.F90 +++ b/src/ecwam/wamintgr_loki_gpu.F90 @@ -40,7 +40,7 @@ SUBROUTINE WAMINTGR_LOKI_GPU(CDTPRA, CDATE, CDATEWH, CDTIMP, CDTIMPNEXT, & USE YOWPARAM , ONLY : NANG, NFRE USE YOWPCONS , ONLY : EPSMIN USE YOWSTAT , ONLY : CDTPRO, IDELPRO, IDELT, IDELWI, LLSOURCE, TIME_PROPAG, TIME_PHYS, & - & TIME_OFFLOAD + & TIME_OFFLOAD, LUPDATE_GPU_GLOBALS USE YOWWIND , ONLY : CDAWIFL, CDATEWO, CDATEFL USE YOMHOOK , ONLY : LHOOK, DR_HOOK, JPHOOK @@ -92,6 +92,11 @@ SUBROUTINE WAMINTGR_LOKI_GPU(CDTPRA, CDATE, CDATEWH, CDTIMP, CDTIMPNEXT, & ! ---------------- TIME0=-WAM_USER_CLOCK() + +IF(LUPDATE_GPU_GLOBALS)THEN +!$loki update_device +ENDIF + CALL VARS_4D%SYNC_DEVICE_RDWR() CALL BLK2GLO%SYNC_DEVICE_RDONLY() CALL FF_NOW%SYNC_DEVICE_RDWR(AIRD=.TRUE., WDWAVE=.TRUE., CICOVER=.TRUE., WSWAVE=.TRUE., & @@ -128,13 +133,6 @@ SUBROUTINE WAMINTGR_LOKI_GPU(CDTPRA, CDATE, CDATEWH, CDTIMP, CDTIMPNEXT, & & WVPRPT, FF_NOW, FF_NEXT) -TIME0=-WAM_USER_CLOCK() - -!$loki update_device - - -TIME_OFFLOAD = TIME_OFFLOAD + (TIME0+WAM_USER_CLOCK())*1.E-06 - ! IT IS TIME TO INTEGRATE THE SOURCE TERMS ! ---------------------------------------- IF (CDATE >= CDTIMPNEXT) THEN @@ -211,6 +209,8 @@ SUBROUTINE WAMINTGR_LOKI_GPU(CDTPRA, CDATE, CDATEWH, CDTIMP, CDTIMPNEXT, & ENDIF +LUPDATE_GPU_GLOBALS = .FALSE. + TIME0=-WAM_USER_CLOCK() CALL WVPRPT%SYNC_HOST_RDWR() CALL WVENVI%SYNC_HOST_RDWR() @@ -222,7 +222,6 @@ SUBROUTINE WAMINTGR_LOKI_GPU(CDTPRA, CDATE, CDATEWH, CDTIMP, CDTIMPNEXT, & CALL MIJ%SYNC_HOST_RDWR() CALL BLK2GLO%SYNC_HOST_RDWR() -!$loki update_host TIME_OFFLOAD = TIME_OFFLOAD + (TIME0+WAM_USER_CLOCK())*1.E-06 IF (LHOOK) CALL DR_HOOK('WAMINTGR',1,ZHOOK_HANDLE) diff --git a/src/ecwam/yowparam.F90 b/src/ecwam/yowparam.F90 index 47e140f73..f8488a29e 100644 --- a/src/ecwam/yowparam.F90 +++ b/src/ecwam/yowparam.F90 @@ -64,6 +64,5 @@ MODULE YOWPARAM ! DONE IN LATITUNAL BANDS ! (like it used to be done). ! ---------------------------------------------------------------------- -!$acc declare create( nang ) !$acc declare create( nfre_red ) END MODULE YOWPARAM