From df39cf2014675b224fb8f3a640f70dea264a3f18 Mon Sep 17 00:00:00 2001 From: Ahmad Nawab Date: Mon, 1 Apr 2024 16:09:30 +0000 Subject: [PATCH] Simplify GPU variant timings --- src/ecwam/runwam.F90 | 14 +++++++------- src/ecwam/wamintgr_loki_gpu.F90 | 15 +++++++++------ src/ecwam/yowstat.F90 | 2 +- 3 files changed, 17 insertions(+), 14 deletions(-) diff --git a/src/ecwam/runwam.F90 b/src/ecwam/runwam.F90 index cbdecb53..326f9c78 100644 --- a/src/ecwam/runwam.F90 +++ b/src/ecwam/runwam.F90 @@ -102,7 +102,7 @@ SUBROUTINE RUNWAM USE YOWMPP , ONLY : IRANK ,NPROC USE YOWSTAT , ONLY : CDATEE ,CDTPRO , & & IPROPAGS ,LSUBGRID ,IREFRA ,IDELPRO, TIME_PHYS, & - & TIME_PROPAG, TIME_PHYS_KERNEL + & TIME_PROPAG, TIME_OFFLOAD USE YOWWAMI , ONLY : CBPLTDT ,CEPLTDT USE YOWALTAS , ONLY : LODBRALT USE MPL_MODULE, ONLY : MPL_INIT, MPL_END, MPL_COMM @@ -387,10 +387,10 @@ SUBROUTINE RUNWAM WRITE (IU06,'(A)') ' + WAVE PROPAGATION TIME +' WRITE (IU06,'(A,F18.2,A)') ' + ', TIME_PROPAG, ' +' #if defined(WAM_GPU) - WRITE (IU06,'(A)') ' + SOURCE TERM TOTAL TIME +' + WRITE (IU06,'(A)') ' + SOURCE TERM TIME +' WRITE (IU06,'(A,F18.2,A)') ' + ', TIME_PHYS, ' +' - WRITE (IU06,'(A)') ' + SOURCE TERM KERNEL TIME +' - WRITE (IU06,'(A,F18.2,A)') ' + ', TIME_PHYS_KERNEL, ' +' + WRITE (IU06,'(A)') ' + DATA OFFLOAD TIME +' + WRITE (IU06,'(A,F18.2,A)') ' + ', TIME_OFFLOAD, ' +' #else WRITE (IU06,'(A)') ' + SOURCE TERM TIME +' WRITE (IU06,'(A,F18.2,A)') ' + ', TIME_PHYS, ' +' @@ -405,10 +405,10 @@ SUBROUTINE RUNWAM WRITE (6,'(A)') ' + WAVE PROPAGATION TIME +' WRITE (6,'(A,F18.2,A)') ' + ', TIME_PROPAG, ' +' #if defined(WAM_GPU) - WRITE (6,'(A)') ' + SOURCE TERM TOTAL TIME +' + WRITE (6,'(A)') ' + SOURCE TERM TIME +' WRITE (6,'(A,F18.2,A)') ' + ', TIME_PHYS, ' +' - WRITE (6,'(A)') ' + SOURCE TERM KERNEL TIME +' - WRITE (6,'(A,F18.2,A)') ' + ', TIME_PHYS_KERNEL, ' +' + WRITE (6,'(A)') ' + DATA OFFLOAD TIME +' + WRITE (6,'(A,F18.2,A)') ' + ', TIME_OFFLOAD, ' +' #else WRITE (6,'(A)') ' + SOURCE TERM TIME +' WRITE (6,'(A,F18.2,A)') ' + ', TIME_PHYS, ' +' diff --git a/src/ecwam/wamintgr_loki_gpu.F90 b/src/ecwam/wamintgr_loki_gpu.F90 index a15f2964..876087f4 100644 --- a/src/ecwam/wamintgr_loki_gpu.F90 +++ b/src/ecwam/wamintgr_loki_gpu.F90 @@ -40,7 +40,7 @@ SUBROUTINE WAMINTGR_LOKI_GPU(CDTPRA, CDATE, CDATEWH, CDTIMP, CDTIMPNEXT, & USE YOWPARAM , ONLY : NANG, NFRE USE YOWPCONS , ONLY : EPSMIN USE YOWSTAT , ONLY : CDTPRO, IDELPRO, IDELT, IDELWI, LLSOURCE, TIME_PROPAG, TIME_PHYS, & - & TIME_PHYS_KERNEL + & TIME_OFFLOAD USE YOWWIND , ONLY : CDAWIFL, CDATEWO, CDATEFL USE YOWFIELD_MOD, ONLY : FREQUENCY_FIELD, ENVIRONMENT_FIELD, FORCING_FIELDS_FIELD, & & WAVE2OCEAN_FIELD, INTGT_PARAM_FIELDS_FIELD, SOURCE_CONTRIBS_FIELD @@ -73,7 +73,7 @@ SUBROUTINE WAMINTGR_LOKI_GPU(CDTPRA, CDATE, CDATEWH, CDTIMP, CDTIMPNEXT, & REAL(KIND=JWRB), DIMENSION(NPROMA_WAM, NANG, NFRE, NCHNK), INTENT(INOUT) :: FL1 REAL(KIND=JWRB), DIMENSION(NPROMA_WAM, NANG, NFRE, NCHNK), INTENT(INOUT) :: XLLWS ! TOTAL WINDSEA MASK FROM INPUT SOURCE TERM -REAL(KIND=JWRB) :: TIME0, TIME2 +REAL(KIND=JWRB) :: TIME0 INTEGER(KIND=JWIM) :: IJ, K, M @@ -160,12 +160,14 @@ SUBROUTINE WAMINTGR_LOKI_GPU(CDTPRA, CDATE, CDATEWH, CDTIMP, CDTIMPNEXT, & !* PROPAGATION TIME ! ---------------- +TIME0=-WAM_USER_CLOCK() CALL SRC_CONTRIBS%INIT(FL1=FL1) CALL SRC_CONTRIBS%UPDATE_DEVICE(FL1=FL1_DPTR) CALL WVPRPT_FIELD%INIT(WAVNUM=WVPRPT%WAVNUM, CGROUP=WVPRPT%CGROUP, OMOSNH2KD=WVPRPT%OMOSNH2KD) CALL WVPRPT_FIELD%UPDATE_DEVICE(WAVNUM=WAVNUM_DPTR, CGROUP=CGROUP_DPTR, OMOSNH2KD=OMOSNH2KD_DPTR) CALL WVENVI_FIELD%INIT(DEPTH=WVENVI%DEPTH, DELLAM1=WVENVI%DELLAM1, COSPHM1=WVENVI%COSPHM1, UCUR=WVENVI%UCUR, VCUR=WVENVI%VCUR) CALL WVENVI_FIELD%UPDATE_DEVICE(DEPTH=DEPTH_DPTR, DELLAM1=DELLAM1_DPTR, COSPHM1=COSPHM1_DPTR, UCUR=UCUR_DPTR, VCUR=VCUR_DPTR) +TIME_OFFLOAD = TIME_OFFLOAD + (TIME0+WAM_USER_CLOCK())*1.E-06 !$acc data present(FL1_DPTR, WAVNUM_DPTR, CGROUP_DPTR, OMOSNH2KD_DPTR, DEPTH_DPTR, DELLAM1_DPTR, COSPHM1_DPTR, UCUR_DPTR, VCUR_DPTR) IF (CDATE == CDTPRA) THEN @@ -189,7 +191,7 @@ SUBROUTINE WAMINTGR_LOKI_GPU(CDTPRA, CDATE, CDATEWH, CDTIMP, CDTIMPNEXT, & CALL GSTATS(1431,0) IF (LLSOURCE) THEN - TIME2=-WAM_USER_CLOCK() + TIME0=-WAM_USER_CLOCK() CALL WVPRPT_FIELD%INIT(CIWA=WVPRPT%CIWA, CINV=WVPRPT%CINV, XK2CG=WVPRPT%XK2CG, & & STOKFAC=WVPRPT%STOKFAC) CALL WVENVI_FIELD%INIT(EMAXDPT=WVENVI%EMAXDPT, IOBND=WVENVI%IOBND, IODP=WVENVI%IODP) @@ -206,6 +208,7 @@ SUBROUTINE WAMINTGR_LOKI_GPU(CDTPRA, CDATE, CDATEWH, CDTIMP, CDTIMPNEXT, & & TAUOCYD=INTFLDS%TAUOCYD, TAUOC=INTFLDS%TAUOC, PHIOCD=INTFLDS%PHIOCD, PHIEPS=INTFLDS%PHIEPS, & & PHIAW=INTFLDS%PHIAW) CALL SRC_CONTRIBS%INIT(XLLWS=XLLWS, MIJ=MIJ) + TIME_OFFLOAD = TIME_OFFLOAD + (TIME0+WAM_USER_CLOCK())*1.E-06 !$loki update_device @@ -248,16 +251,18 @@ SUBROUTINE WAMINTGR_LOKI_GPU(CDTPRA, CDATE, CDATEWH, CDTIMP, CDTIMPNEXT, & & PHIAW_DPTR(:,ICHNK), MIJ_DPTR(:,ICHNK), XLLWS_DPTR(:,:,:,ICHNK)) END DO - TIME_PHYS_KERNEL = TIME_PHYS_KERNEL + (TIME0+WAM_USER_CLOCK())*1.E-06 + TIME_PHYS = TIME_PHYS + (TIME0+WAM_USER_CLOCK())*1.E-06 !$loki end data + TIME0=-WAM_USER_CLOCK() CALL WVPRPT_FIELD%ENSURE_HOST() CALL WVENVI_FIELD%ENSURE_HOST() CALL FF_NOW_FIELD%ENSURE_HOST() CALL WAM2NEMO_FIELD%ENSURE_HOST() CALL INTFLDS_FIELD%ENSURE_HOST() CALL SRC_CONTRIBS%ENSURE_HOST() + TIME_OFFLOAD = TIME_OFFLOAD + (TIME0+WAM_USER_CLOCK())*1.E-06 !$loki update_host CALL WVPRPT_FIELD%FINAL() @@ -267,8 +272,6 @@ SUBROUTINE WAMINTGR_LOKI_GPU(CDTPRA, CDATE, CDATEWH, CDTIMP, CDTIMPNEXT, & CALL INTFLDS_FIELD%FINAL() CALL SRC_CONTRIBS%FINAL() - TIME_PHYS = TIME_PHYS + (TIME2+WAM_USER_CLOCK())*1.E-06 - IF (LWNEMOCOU) NEMONTAU = NEMONTAU + 1 ELSE diff --git a/src/ecwam/yowstat.F90 b/src/ecwam/yowstat.F90 index ba9104e9..1dc39c56 100644 --- a/src/ecwam/yowstat.F90 +++ b/src/ecwam/yowstat.F90 @@ -92,7 +92,7 @@ MODULE YOWSTAT REAL(KIND=JWRB) :: TIME_PROPAG = 0._JWRB REAL(KIND=JWRB) :: TIME_PHYS = 0._JWRB - REAL(KIND=JWRB) :: TIME_PHYS_KERNEL = 0._JWRB + REAL(KIND=JWRB) :: TIME_OFFLOAD = 0._JWRB !* VARIABLE. TYPE. PURPOSE. ! --------- ------- --------