From ae322e007811487ef5f7c6ab7688a0cba1303949 Mon Sep 17 00:00:00 2001 From: Fabio Di Sante Date: Fri, 17 Nov 2023 14:58:36 +0100 Subject: [PATCH 01/30] ACCV6 --- src/ecwam/ctuw.F90 | 297 ++++++++++++++++++++++---------- src/ecwam/ctuwdrv.F90 | 16 +- src/ecwam/ctuwini.F90 | 36 +++- src/ecwam/ctuwupdt.F90 | 62 ++++++- src/ecwam/mpexchng.F90 | 65 +++++-- src/ecwam/mubuf.F90 | 2 +- src/ecwam/propag_wam.F90 | 112 +++++++++++- src/ecwam/propags2.F90 | 121 +++++++++---- src/ecwam/wamintgr_loki_gpu.F90 | 26 ++- src/ecwam/yowgrid.F90 | 1 + src/ecwam/yowmap.F90 | 3 +- src/ecwam/yowparam.F90 | 5 +- src/ecwam/yowstat.F90 | 1 + src/ecwam/yowubuf.F90 | 11 +- 14 files changed, 586 insertions(+), 172 deletions(-) diff --git a/src/ecwam/ctuw.F90 b/src/ecwam/ctuw.F90 index d84223788..ad4cdb36c 100644 --- a/src/ecwam/ctuw.F90 +++ b/src/ecwam/ctuw.F90 @@ -13,6 +13,7 @@ SUBROUTINE CTUW (DELPRO, MSTART, MEND, & & WLATM1, WCORM1, DP, & & CGROUP_EXT, OMOSNH2KD_EXT, & & COSPHM1_EXT, DEPTH_EXT, U_EXT, V_EXT ) +use nvtx ! ---------------------------------------------------------------------- !**** *CTUW* - COMPUTATION OF THE CONER TRANSPORT SCHEME WEIGHTS. @@ -99,7 +100,7 @@ SUBROUTINE CTUW (DELPRO, MSTART, MEND, & REAL(KIND=JWRB), DIMENSION(KIJS:KIJL) :: DRCP,DRCM REAL(KIND=JWRB), DIMENSION(KIJS:KIJL) :: CURMASK REAL(KIND=JWRB), DIMENSION(KIJS:KIJL,2) :: CGX, CGY - + ! ---------------------------------------------------------------------- @@ -141,86 +142,173 @@ SUBROUTINE CTUW (DELPRO, MSTART, MEND, & !* LOOP OVER FREQUENCIES. ! ---------------------- +! call nvtxStartRange("ctuw: Loop 1") +!$acc kernels !loop private(CGYP,KIJS,KIJL,CGX,IX,KY,UU,UREL,ISSU,VV,VREL,ISSV,DXP,DYP,ADXP,ADYP,DXUP,DXDW,DYUP,DYDW,DXX,DYY,GRIDAREAM1,WEIGHT) DO M = MSTART, MEND !* LOOP OVER DIRECTIONS. ! --------------------- +!$acc loop private(CGX,CGY) + DO K=1,NANG ! FIND MEAN GROUP VELOCITY COMPONENTS FOR DIRECTION TH(K)+180 ! ----------------------------------------------------------- - DO IC=1,2 +!$acc loop private(CGYP,IX,KY,UU,UREL,ISSU,VV,VREL,ISSV,DXP,DYP,ADXP,ADYP,DXUP,DXDW,DYUP,DYDW,DXX,DYY,GRIDAREAM1,WEIGHT) DO IJ=KIJS,KIJL - CGX(IJ,IC)= & - & 0.5_JWRB*(CGROUP_EXT(IJ,M)+CGROUP_EXT(KLON(IJ,IC),M)) & +! IC = 1 + + CGX(IJ,1)= & + & 0.5_JWRB*(CGROUP_EXT(IJ,M)+CGROUP_EXT(KLON(IJ,1),M)) & & *SINTH(K)*COSPHM1_EXT(IJ) ! IRREGULAR GRID IF (IRGG == 1) THEN - CGYP=WLAT(IJ,IC)*CGROUP_EXT(KLAT(IJ,IC,1),M)+ & - & (1.0_JWRB-WLAT(IJ,IC))*CGROUP_EXT(KLAT(IJ,IC,2),M) + CGYP=WLAT(IJ,1)*CGROUP_EXT(KLAT(IJ,1,1),M)+ & + & (1.0_JWRB-WLAT(IJ,1))*CGROUP_EXT(KLAT(IJ,1,2),M) ELSE ! REGULAR GRID - CGYP=CGROUP_EXT(KLAT(IJ,IC,1),M) + CGYP=CGROUP_EXT(KLAT(IJ,1,1),M) + ENDIF + CGY(IJ,1)=0.5_JWRB*(CGROUP_EXT(IJ,M)+DP(IJ,1)*CGYP)*COSTH(K) + + IX=BLK2GLO%IXLG(IJ) + KY=BLK2GLO%KXLT(IJ) + IF (IREFRA == 2 .OR. IREFRA == 3 ) THEN + UU=U_EXT(IJ)*COSPHM1_EXT(IJ) + UREL=CGX(IJ,1)+UU + ISSU(1)=ISAMESIGN(UREL,CGX(IJ,1)) + VV=V_EXT(IJ)*0.5_JWRB*(1.0_JWRB+DP(IJ,1)) + VREL=CGY(IJ,1)+VV + ISSV(1)=ISAMESIGN(VREL,CGY(IJ,1)) + ELSE + UREL=CGX(IJ,1) + ISSU(1)=1 + VREL=CGY(IJ,1) + ISSV(1)=1 ENDIF - CGY(IJ,IC)=0.5_JWRB*(CGROUP_EXT(IJ,M)+DP(IJ,IC)*CGYP)*COSTH(K) - ENDDO - ENDDO + DXP=-DELPRO*UREL*CMTODEG + DYP=-DELPRO*VREL*CMTODEG + + ADXP(1)=ABS(DXP) + ADYP(1)=ABS(DYP) + DXUP(1)=ADXP(1)*ISSU(1) + DXDW(1)=ADXP(1)*(1-ISSU(1)) + DYUP(1)=ADYP(1)*ISSV(1) + DYDW(1)=ADYP(1)*(1-ISSV(1)) +! GET ADVECTION WEIGHT FOR ALL NEIGHBOURING GRID POINTS +! IC = 2 + CGX(IJ,2)= & + & 0.5_JWRB*(CGROUP_EXT(IJ,M)+CGROUP_EXT(KLON(IJ,2),M)) & + & *SINTH(K)*COSPHM1_EXT(IJ) +! IRREGULAR GRID + IF (IRGG == 1) THEN + CGYP=WLAT(IJ,2)*CGROUP_EXT(KLAT(IJ,2,1),M)+ & + & (1.0_JWRB-WLAT(IJ,2))*CGROUP_EXT(KLAT(IJ,2,2),M) + ELSE +! REGULAR GRID + CGYP=CGROUP_EXT(KLAT(IJ,2,1),M) + ENDIF + CGY(IJ,2)=0.5_JWRB*(CGROUP_EXT(IJ,M)+DP(IJ,2)*CGYP)*COSTH(K) + IF (IREFRA == 2 .OR. IREFRA == 3 ) THEN + UU=U_EXT(IJ)*COSPHM1_EXT(IJ) + UREL=CGX(IJ,2)+UU + ISSU(2)=ISAMESIGN(UREL,CGX(IJ,2)) + VV=V_EXT(IJ)*0.5_JWRB*(1.0_JWRB+DP(IJ,2)) + VREL=CGY(IJ,2)+VV + ISSV(2)=ISAMESIGN(VREL,CGY(IJ,2)) + ELSE + UREL=CGX(IJ,2) + ISSU(2)=1 + VREL=CGY(IJ,2) + ISSV(2)=1 + ENDIF + DXP=-DELPRO*UREL*CMTODEG + DYP=-DELPRO*VREL*CMTODEG + ADXP(2)=ABS(DXP) + ADYP(2)=ABS(DYP) + DXUP(2)=ADXP(2)*ISSU(2) + DXDW(2)=ADXP(2)*(1-ISSU(2)) + DYUP(2)=ADYP(2)*ISSV(2) + DYDW(2)=ADYP(2)*(1-ISSV(2)) -! LOOP OVER GRID POINTS -! --------------------- - DO IJ=KIJS,KIJL - IX=BLK2GLO%IXLG(IJ) - KY=BLK2GLO%KXLT(IJ) +! GET ADVECTION WEIGHT FOR ALL NEIGHBOURING GRID POINTS -! FLUX VELOCITIES AT THE GRID BOX INTERFACE + DXX=ZDELLO(KY)-DXUP(JXO(K,2))-DXDW(JXO(K,1)) + DYY=XDELLA-DYUP(JYO(K,2))-DYDW(JYO(K,1)) - DO IC=1,2 - - IF (IREFRA == 2 .OR. IREFRA == 3 ) THEN - UU=U_EXT(IJ)*COSPHM1_EXT(IJ) - UREL=CGX(IJ,IC)+UU - ISSU(IC)=ISAMESIGN(UREL,CGX(IJ,IC)) - VV=V_EXT(IJ)*0.5_JWRB*(1.0_JWRB+DP(IJ,IC)) - VREL=CGY(IJ,IC)+VV - ISSV(IC)=ISAMESIGN(VREL,CGY(IJ,IC)) - ELSE - UREL=CGX(IJ,IC) - ISSU(IC)=1 - VREL=CGY(IJ,IC) - ISSV(IC)=1 - ENDIF - DXP=-DELPRO*UREL*CMTODEG - DYP=-DELPRO*VREL*CMTODEG - ADXP(IC)=ABS(DXP) - ADYP(IC)=ABS(DYP) + GRIDAREAM1 = 1.0_JWRB/(ZDELLO(KY)*XDELLA) + +! WEIGHTED CONTRIBUTION FROM NORTH-SOUTH DIRECTION (WLATN) + + WEIGHT(JYO(K,1))=DXX*DYUP(JYO(K,1))*GRIDAREAM1 + WEIGHT(JYO(K,2))=DXX*DYDW(JYO(K,2))*GRIDAREAM1 + + WLATN(IJ,K,M,1,1)=WLAT(IJ,1)*WEIGHT(1) + WLATN(IJ,K,M,1,2)=WLATM1(IJ,1)*WEIGHT(1) + WLATN(IJ,K,M,2,1)=WLAT(IJ,2)*WEIGHT(2) + WLATN(IJ,K,M,2,2)=WLATM1(IJ,2)*WEIGHT(2) + +! WEIGHTED CONTRIBUTION FROM EAST-WEST DIRECTION (WLONN) + + WLONN(IJ,K,M,JXO(K,1))=DYY*DXUP(JXO(K,1))*GRIDAREAM1 + WLONN(IJ,K,M,JXO(K,2))=DYY*DXDW(JXO(K,2))*GRIDAREAM1 + + +! CONTRIBUTION FROM CORNERS (KCOR) + WEIGHT(1)=DXUP(JXO(K,1))*DYUP(JYO(K,1))*GRIDAREAM1 + WEIGHT(2)=DXDW(JXO(K,2))*DYUP(JYO(K,1))*GRIDAREAM1 + WEIGHT(3)=DXUP(JXO(K,1))*DYDW(JYO(K,2))*GRIDAREAM1 + WEIGHT(4)=DXDW(JXO(K,2))*DYDW(JYO(K,2))*GRIDAREAM1 + DO ICR=1,4 + WCORN(IJ,K,M,ICR,1)=WCOR(IJ,KCR(K,ICR))*WEIGHT(ICR) + WCORN(IJ,K,M,ICR,2)=WCORM1(IJ,KCR(K,ICR))*WEIGHT(ICR) + ENDDO + +! CONTRIBUTIONS FOR IJ + SUMWN(IJ,K,M)=(ZDELLO(KY)* & + & (DYDW(JYO(K,1))+DYUP(JYO(K,2))) + & + & XDELLA* & + & (DXUP(JXO(K,2))+DXDW(JXO(K,1))) - & + & (DXDW(JXO(K,1))+DXUP(JXO(K,2)))* & + & (DYDW(JYO(K,1))+DYUP(JYO(K,2))) ) & + & *GRIDAREAM1 + + + +! LOOP OVER GRID POINTS +! --------------------- + +#IFNDEF _OPENACC + +! FLUX VELOCITIES AT THE GRID BOX INTERFACE ! BASIC CFL CHECKS (IN EACH DIRECTION) ! ---------------- - IF (ADXP(IC) > ZDELLO(KY))THEN + IF (ADXP(1) > ZDELLO(KY))THEN WRITE (IU06,*) '********************************' WRITE (IU06,*) '* CTUW: *' WRITE (IU06,*) '* CFL VIOLATED IN X DIRECTION. *' WRITE (IU06,*) '* ADXP SHOULD BE < ZDELLO, BUT *' - WRITE (IU06,*) '* ADXP = ',ADXP(IC),IC + WRITE (IU06,*) '* ADXP = ',ADXP(1),1 WRITE (IU06,*) '* ZDELLO = ',ZDELLO(KY) - DTNEW=ZDELLO(KY)*DELPRO/ADXP(IC) + DTNEW=ZDELLO(KY)*DELPRO/ADXP(1) WRITE (IU06,*) '* TIME STEP ',DELPRO WRITE (IU06,*) '* SHOULD BE REDUCED TO ', DTNEW WRITE (IU06,*) '* *' WRITE (IU06,*) '********************************' LCFLFAIL(IJ)=.TRUE. ENDIF - IF (ADYP(IC) > XDELLA)THEN + IF (ADYP(1) > XDELLA)THEN XLON=AMOWEP+(IX-1)*ZDELLO(KY) XLAT=AMOSOP+(KY-1)*XDELLA - DTNEW=XDELLA*DELPRO/ADYP(IC) + DTNEW=XDELLA*DELPRO/ADYP(1) WRITE (IU06,*) '********************************' WRITE (IU06,*) '* CTUW: *' WRITE (IU06,*) '* CFL VIOLATED IN Y DIRECTION. *' WRITE (IU06,*) '* ADYP SHOULD BE < XDELLA, BUT *' - WRITE (IU06,*) '* ADYP = ',ADYP(IC),IC + WRITE (IU06,*) '* ADYP = ',ADYP(1),1 WRITE (IU06,*) '* XDELLA = ',XDELLA WRITE (IU06,*) '* XLAT= ',XLAT,' XLON= ',XLON WRITE (IU06,*) '* DEPTH= ',DEPTH_EXT(IJ) @@ -230,62 +318,58 @@ SUBROUTINE CTUW (DELPRO, MSTART, MEND, & WRITE (IU06,*) '********************************' LCFLFAIL(IJ)=.TRUE. ENDIF - - DXUP(IC)=ADXP(IC)*ISSU(IC) - DXDW(IC)=ADXP(IC)*(1-ISSU(IC)) - DYUP(IC)=ADYP(IC)*ISSV(IC) - DYDW(IC)=ADYP(IC)*(1-ISSV(IC)) +#ENDIF - ENDDO - -! GET ADVECTION WEIGHT FOR ALL NEIGHBOURING GRID POINTS - - DXX=ZDELLO(KY)-DXUP(JXO(K,2))-DXDW(JXO(K,1)) - DYY=XDELLA-DYUP(JYO(K,2))-DYDW(JYO(K,1)) - - GRIDAREAM1 = 1.0_JWRB/(ZDELLO(KY)*XDELLA) - -! WEIGHTED CONTRIBUTION FROM NORTH-SOUTH DIRECTION (WLATN) - - WEIGHT(JYO(K,1))=DXX*DYUP(JYO(K,1))*GRIDAREAM1 - WEIGHT(JYO(K,2))=DXX*DYDW(JYO(K,2))*GRIDAREAM1 - DO IC=1,2 - WLATN(IJ,K,M,IC,1)=WLAT(IJ,IC)*WEIGHT(IC) - WLATN(IJ,K,M,IC,2)=WLATM1(IJ,IC)*WEIGHT(IC) - ENDDO - -! WEIGHTED CONTRIBUTION FROM EAST-WEST DIRECTION (WLONN) - - WLONN(IJ,K,M,JXO(K,1))=DYY*DXUP(JXO(K,1))*GRIDAREAM1 - WLONN(IJ,K,M,JXO(K,2))=DYY*DXDW(JXO(K,2))*GRIDAREAM1 - -! CONTRIBUTION FROM CORNERS (KCOR) - WEIGHT(1)=DXUP(JXO(K,1))*DYUP(JYO(K,1))*GRIDAREAM1 - WEIGHT(2)=DXDW(JXO(K,2))*DYUP(JYO(K,1))*GRIDAREAM1 - WEIGHT(3)=DXUP(JXO(K,1))*DYDW(JYO(K,2))*GRIDAREAM1 - WEIGHT(4)=DXDW(JXO(K,2))*DYDW(JYO(K,2))*GRIDAREAM1 - DO ICR=1,4 - WCORN(IJ,K,M,ICR,1)=WCOR(IJ,KCR(K,ICR))*WEIGHT(ICR) - WCORN(IJ,K,M,ICR,2)=WCORM1(IJ,KCR(K,ICR))*WEIGHT(ICR) - ENDDO - -! CONTRIBUTIONS FOR IJ - SUMWN(IJ,K,M)=(ZDELLO(KY)* & - & (DYDW(JYO(K,1))+DYUP(JYO(K,2))) + & - & XDELLA* & - & (DXUP(JXO(K,2))+DXDW(JXO(K,1))) - & - & (DXDW(JXO(K,1))+DXUP(JXO(K,2)))* & - & (DYDW(JYO(K,1))+DYUP(JYO(K,2))) ) & - & *GRIDAREAM1 +! BASIC CFL CHECKS (IN EACH DIRECTION) +! ---------------- +#IFNDEF _OPENACC + IF (ADXP(2) > ZDELLO(KY))THEN + WRITE (IU06,*) '********************************' + WRITE (IU06,*) '* CTUW: *' + WRITE (IU06,*) '* CFL VIOLATED IN X DIRECTION. *' + WRITE (IU06,*) '* ADXP SHOULD BE < ZDELLO, BUT *' + WRITE (IU06,*) '* ADXP = ',ADXP(2),2 + WRITE (IU06,*) '* ZDELLO = ',ZDELLO(KY) + DTNEW=ZDELLO(KY)*DELPRO/ADXP(2) + WRITE (IU06,*) '* TIME STEP ',DELPRO + WRITE (IU06,*) '* SHOULD BE REDUCED TO ', DTNEW + WRITE (IU06,*) '* *' + WRITE (IU06,*) '********************************' + LCFLFAIL(IJ)=.TRUE. + ENDIF + IF (ADYP(2) > XDELLA)THEN + XLON=AMOWEP+(IX-1)*ZDELLO(KY) + XLAT=AMOSOP+(KY-1)*XDELLA + DTNEW=XDELLA*DELPRO/ADYP(2) + WRITE (IU06,*) '********************************' + WRITE (IU06,*) '* CTUW: *' + WRITE (IU06,*) '* CFL VIOLATED IN Y DIRECTION. *' + WRITE (IU06,*) '* ADYP SHOULD BE < XDELLA, BUT *' + WRITE (IU06,*) '* ADYP = ',ADYP(2),2 + WRITE (IU06,*) '* XDELLA = ',XDELLA + WRITE (IU06,*) '* XLAT= ',XLAT,' XLON= ',XLON + WRITE (IU06,*) '* DEPTH= ',DEPTH_EXT(IJ) + WRITE (IU06,*) '* TIME STEP ',DELPRO + WRITE (IU06,*) '* SHOULD BE REDUCED TO ', DTNEW + WRITE (IU06,*) '* *' + WRITE (IU06,*) '********************************' + LCFLFAIL(IJ)=.TRUE. + ENDIF +#ENDIF - ENDDO ! END LOOP OVER GRID POINTS + ENDDO ! END LOOP OVER GRID POINTS ENDDO ! END LOOP OVER DIRECTIONS ENDDO ! END LOOP OVER FREQUENCIES +!$acc end kernels +! call nvtxEndRange + + + ELSE !* CARTESIAN GRID. @@ -327,9 +411,12 @@ SUBROUTINE CTUW (DELPRO, MSTART, MEND, & DELTH0 = 0.25*DELPRO/DELTH + !* LOOP OVER DIRECTIONS. ! --------------------- +! call nvtxStartRange("ctuw: Loop 2") +!$acc parallel loop private(km1,kp1,sp,sm,DELFR0,DRGP,DRGM,DRDP,DRDM,DRCP,DRCM) DO K=1,NANG KP1 = K+1 IF (KP1 > NANG) KP1 = 1 @@ -341,6 +428,7 @@ SUBROUTINE CTUW (DELPRO, MSTART, MEND, & SP = DELTH0*(SINTH(K)+SINTH(KP1))/R SM = DELTH0*(SINTH(K)+SINTH(KM1))/R +!$acc loop private(jh,tanph) DO IJ = KIJS,KIJL JH=BLK2GLO%KXLT(IJ) TANPH = SINPH(JH)/COSPH(JH) @@ -351,11 +439,13 @@ SUBROUTINE CTUW (DELPRO, MSTART, MEND, & !* COMPUTE DEPTH REFRACTION. ! ------------------------- IF (IREFRA == 1) THEN +!$acc loop DO IJ = KIJS,KIJL DRDP(IJ) = (THDD(IJ,K) + THDD(IJ,KP1))*DELTH0 DRDM(IJ) = (THDD(IJ,K) + THDD(IJ,KM1))*DELTH0 ENDDO ELSE +!$acc loop DO IJ = KIJS,KIJL DRDP(IJ) = 0.0_JWRB DRDM(IJ) = 0.0_JWRB @@ -366,11 +456,13 @@ SUBROUTINE CTUW (DELPRO, MSTART, MEND, & ! --------------------------- IF (IREFRA == 2 .OR. IREFRA == 3 ) THEN +!$acc loop DO IJ = KIJS,KIJL DRCP(IJ) = CURMASK(IJ)*(THDC(IJ,K) + THDC(IJ,KP1))*DELTH0 DRCM(IJ) = CURMASK(IJ)*(THDC(IJ,K) + THDC(IJ,KM1))*DELTH0 ENDDO ELSE +!$acc loop DO IJ = KIJS,KIJL DRCP(IJ) = 0.0_JWRB DRCM(IJ) = 0.0_JWRB @@ -384,6 +476,7 @@ SUBROUTINE CTUW (DELPRO, MSTART, MEND, & !* NO DEPTH REFRACTION. ! ------------------- IF (IREFRA == 0) THEN +!$acc loop collapse(2) private(DTHP,DTHM) DO M = MSTART, MEND DO IJ=KIJS,KIJL DTHP = DRGP(IJ)*CGROUP_EXT(IJ,M) + DRCP(IJ) @@ -391,11 +484,13 @@ SUBROUTINE CTUW (DELPRO, MSTART, MEND, & WKPMN(IJ,K,M,0)=(DTHP+ABS(DTHP))+(ABS(DTHM)-DTHM) WKPMN(IJ,K,M,1)=-DTHP+ABS(DTHP) WKPMN(IJ,K,M,-1)=DTHM+ABS(DTHM) + SUMWN(IJ,K,M)=SUMWN(IJ,K,M)+WKPMN(IJ,K,M,0) ENDDO ENDDO ELSE !* SHALLOW WATER AND DEPTH REFRACTION. ! ----------------------------------- +!$acc loop collapse(2) private(DTHP,DTHM) DO M = MSTART, MEND DO IJ=KIJS,KIJL DTHP = DRGP(IJ)*CGROUP_EXT(IJ,M)+OMOSNH2KD_EXT(IJ,M)*DRDP(IJ)+DRCP(IJ) @@ -403,6 +498,7 @@ SUBROUTINE CTUW (DELPRO, MSTART, MEND, & WKPMN(IJ,K,M,0)=(DTHP+ABS(DTHP))+(ABS(DTHM)-DTHM) WKPMN(IJ,K,M,1)=-DTHP+ABS(DTHP) WKPMN(IJ,K,M,-1)=DTHM+ABS(DTHM) + SUMWN(IJ,K,M)=SUMWN(IJ,K,M)+WKPMN(IJ,K,M,0) ENDDO ENDDO ENDIF @@ -414,12 +510,12 @@ SUBROUTINE CTUW (DELPRO, MSTART, MEND, & DELFR0 = 0.25_JWRB*DELPRO/((FRATIO-1)*ZPI) +!$acc loop private(MP1,MM1,DFP,DFM) private(DTHP,DTHM) DO M = MSTART, MEND MP1 = MIN(NFRE_RED,M+1) MM1 = MAX(1,M-1) DFP = DELFR0/FR(M) DFM = DELFR0/FR(MM1) - DO IJ=KIJS,KIJL DTHP = CURMASK(IJ) * (SDOT(IJ,K,M) + SDOT(IJ,K,MP1))*DFP DTHM = CURMASK(IJ) * (SDOT(IJ,K,M) + SDOT(IJ,K,MM1))*DFM @@ -431,12 +527,16 @@ SUBROUTINE CTUW (DELPRO, MSTART, MEND, & ENDIF ENDDO ! END LOOP ON DIRECTIONS - +!$acc end parallel +! call nvtxEndRange ! CHECK THAT WEIGHTS ARE LESS THAN 1 ! AND COMPUTE THEIR SUM AND CHECK IT IS LESS THAN 1 AS WELL !!! THE SUM IS NEEDED LATER ON !!! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +! call nvtxStartRange("ctuw: Loop 3") +!!$acc kernels loop seq +#IFNDEF _OPENACC DO K=1,NANG DO M = MSTART, MEND DO IJ=KIJS,KIJL @@ -591,6 +691,13 @@ SUBROUTINE CTUW (DELPRO, MSTART, MEND, & ENDDO ! END LOOP OVER GRID POINTS ENDDO ! END LOOP OVER FREQUENCIES ENDDO ! END LOOP OVER DIRECTIONS +#ENDIF +!!$acc end kernels +! call nvtxEndRange + +!!WORKAROUNDDDDDDD +!LCFLFAIL=.FALSE. +!!WORKAROUNDDDDDDD DO IJ=KIJS,KIJL IF (LCFLFAIL(IJ)) THEN @@ -603,11 +710,14 @@ SUBROUTINE CTUW (DELPRO, MSTART, MEND, & !!!!!!INCLUDE THE BLOCKING COEFFICIENTS INTO THE WEIGHTS OF THE ! SURROUNDING POINTS. +! call nvtxStartRange("ctuw: Loop 4") +!$acc parallel loop collapse(3) DO K=1,NANG DO M = MSTART, MEND DO IJ=KIJS,KIJL ! POINTS ON SURROUNDING LATITUDES +!$acc loop collapse(2) DO IC=1,2 DO ICL=1,2 WLATN(IJ,K,M,IC,ICL) = WLATN(IJ,K,M,IC,ICL)*OBSLAT(IJ,M,IC) @@ -615,11 +725,13 @@ SUBROUTINE CTUW (DELPRO, MSTART, MEND, & ENDDO ! POINTS ON SURROUNDING LONGITUDE +!$acc loop DO IC=1,2 WLONN(IJ,K,M,IC) = WLONN(IJ,K,M,IC)*OBSLON(IJ,M,IC) ENDDO ! SURROUNDING CORNER POINTS +!$acc loop collapse(2) DO ICR=1,4 DO ICL=1,2 WCORN(IJ,K,M,ICR,ICL) = WCORN(IJ,K,M,ICR,ICL)*OBSCOR(IJ,M,KCR(K,ICR)) @@ -629,9 +741,14 @@ SUBROUTINE CTUW (DELPRO, MSTART, MEND, & ENDDO ! END LOOP OVER GRID POINTS ENDDO ! END LOOP ON FREQUENCIES ENDDO ! END LOOP OVER DIRECTIONS +!$acc end parallel + ! call nvtxEndRange IF (LHOOK) CALL DR_HOOK('CTUW',1,ZHOOK_HANDLE) + + + RETURN CONTAINS diff --git a/src/ecwam/ctuwdrv.F90 b/src/ecwam/ctuwdrv.F90 index 646a1d539..369808538 100644 --- a/src/ecwam/ctuwdrv.F90 +++ b/src/ecwam/ctuwdrv.F90 @@ -29,12 +29,18 @@ SUBROUTINE CTUWDRV (DELPRO, MSTART, MEND, & USE PARKIND_WAVE, ONLY : JWIM, JWRB, JWRU USE YOWDRVTYPE , ONLY : WVGRIDGLO + USE YOWCURR , ONLY : LLCFLCUROFF USE YOWGRID , ONLY : NPROMA_WAM USE YOWMPP , ONLY : IRANK USE YOWPARAM , ONLY : NIBLO ,NANG ,NFRE_RED USE YOWSTAT , ONLY : IREFRA USE YOWTEST , ONLY : IU06 +USE YOWUBUF , ONLY : WLATN ,WLONN ,WCORN +USE YOWFRED , ONLY : FR ,DELTH, COSTH ,SINTH +USE YOWPCONS , ONLY : ZPI + + USE YOMHOOK , ONLY : LHOOK, DR_HOOK, JPHOOK @@ -73,12 +79,15 @@ SUBROUTINE CTUWDRV (DELPRO, MSTART, MEND, & IF (LHOOK) CALL DR_HOOK('CTUWDRV',0,ZHOOK_HANDLE) -!! NPROMA=NPROMA_WAM +!! =NPROMA_WAM MTHREADS=1 !$ MTHREADS=OMP_GET_MAX_THREADS() NPROMA=(IJL-IJS+1)/MTHREADS + 1 + +#ifndef _OPENACC !$OMP PARALLEL DO SCHEDULE(DYNAMIC,1) PRIVATE(JKGLO, KIJS, KIJL, ICALL, IJ, LL2NDCALL) +#endif /*_OPENACC*/ DO JKGLO = IJS, IJL, NPROMA KIJS=JKGLO KIJL=MIN(KIJS+NPROMA-1,IJL) @@ -91,6 +100,7 @@ SUBROUTINE CTUWDRV (DELPRO, MSTART, MEND, & & COSPHM1_EXT, DEPTH_EXT, U_EXT, V_EXT ) + ! WHEN SURFACE CURRENTS ARE USED AND LLCFLCUROFF IS TRUE ! THEN TRY TO SATISFY THE CFL CONDITION WITHOUT THE CURRENTS ! IF IT WAS VIOLATED IN THE FIRST PLACE @@ -112,8 +122,12 @@ SUBROUTINE CTUWDRV (DELPRO, MSTART, MEND, & & COSPHM1_EXT, DEPTH_EXT, U_EXT, V_EXT ) ENDIF ENDIF + + ENDDO +#ifndef _OPENACC !$OMP END PARALLEL DO +#endif /*_OPENACC*/ DO IJ=IJS,IJL IF (LCFLFAIL(IJ)) THEN diff --git a/src/ecwam/ctuwini.F90 b/src/ecwam/ctuwini.F90 index 802a5c13d..475274f9c 100644 --- a/src/ecwam/ctuwini.F90 +++ b/src/ecwam/ctuwini.F90 @@ -6,6 +6,8 @@ ! granted to it by virtue of its status as an intergovernmental organisation ! nor does it submit to any jurisdiction. ! +!MODULE CTUWINI_MOD +! CONTAINS SUBROUTINE CTUWINI (KIJS, KIJL, NINF, NSUP, BLK2GLO, COSPHM1_EXT, & & WLATM1, WCORM1, DP) @@ -44,18 +46,20 @@ SUBROUTINE CTUWINI (KIJS, KIJL, NINF, NSUP, BLK2GLO, COSPHM1_EXT, & REAL(KIND=JWRB), DIMENSION(NINF:NSUP,4), INTENT(OUT) :: WCORM1 ! 1 - WCOR REAL(KIND=JWRB), DIMENSION(NINF:NSUP,2), INTENT(OUT) :: DP ! COS PHI FACTOR - INTEGER(KIND=JWIM) :: IJ, K, M, IC, ICR, ICL, KY, KK, KKM INTEGER(KIND=JWIM) :: NLAND -REAL(KIND=JPHOOK) :: ZHOOK_HANDLE +!REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +!!$acc routine vector ! ---------------------------------------------------------------------- -IF (LHOOK) CALL DR_HOOK('CTUWINI',0,ZHOOK_HANDLE) +!IF (LHOOK) CALL DR_HOOK('CTUWINI',0,ZHOOK_HANDLE) NLAND = NSUP+1 - + + !$acc parallel loop independent collapse(2) DO IC=1,2 DO IJ = KIJS,KIJL IF (KLAT(IJ,IC,1) < NLAND .AND. KLAT(IJ,IC,2) < NLAND) THEN @@ -74,7 +78,9 @@ SUBROUTINE CTUWINI (KIJS, KIJL, NINF, NSUP, BLK2GLO, COSPHM1_EXT, & ENDIF ENDDO ENDDO - + !$acc end parallel + + !$acc parallel loop independent collapse(2) DO ICR=1,4 DO IJ = KIJS,KIJL IF (KCOR(IJ,ICR,1) < NLAND .AND. KCOR(IJ,ICR,2) < NLAND) THEN @@ -88,15 +94,17 @@ SUBROUTINE CTUWINI (KIJS, KIJL, NINF, NSUP, BLK2GLO, COSPHM1_EXT, & ELSE ! ADAPT CORNER POINT INTERPOLATION WEIGHT IF LAND IS PRESENT ! SECOND CLOSEST CORNER POINT IS OVER LAND - IF (WCOR(IJ,ICR) > 0.5_JWRB) WCOR(IJ,ICR)=1.0_JWRB + IF (WCOR(IJ,ICR) > 0.5_JWRB) WCOR(IJ,ICR)=1.0_JWRB WCORM1(IJ,ICR) = 1.0_JWRB - WCOR(IJ,ICR) ENDIF ENDDO ENDDO + !$acc end parallel ! INITIALISATION + !$acc parallel loop independent collapse(5) DO ICL=1,2 DO IC=1,2 DO M=1,NFRE_RED @@ -108,7 +116,10 @@ SUBROUTINE CTUWINI (KIJS, KIJL, NINF, NSUP, BLK2GLO, COSPHM1_EXT, & ENDDO ENDDO ENDDO + !$acc end parallel + + !$acc parallel loop independent collapse(4) DO IC=1,2 DO M=1,NFRE_RED DO K=1,NANG @@ -118,7 +129,10 @@ SUBROUTINE CTUWINI (KIJS, KIJL, NINF, NSUP, BLK2GLO, COSPHM1_EXT, & ENDDO ENDDO ENDDO + !$acc end parallel + + !$acc parallel loop independent collapse(5) DO ICL=1,2 DO ICR=1,4 DO M=1,NFRE_RED @@ -130,6 +144,7 @@ SUBROUTINE CTUWINI (KIJS, KIJL, NINF, NSUP, BLK2GLO, COSPHM1_EXT, & ENDDO ENDDO ENDDO + !$acc end parallel @@ -137,10 +152,12 @@ SUBROUTINE CTUWINI (KIJS, KIJL, NINF, NSUP, BLK2GLO, COSPHM1_EXT, & !* SPHERICAL GRID. ! --------------- - +! !* COMPUTE COS PHI FACTOR FOR ADJOINING GRID POINT. ! (for all grid points) + !$acc parallel loop independent collapse(2) private(KY,KK,KKM) DO IC=1,2 +! !!!$acc loop private(KY,KK,KKM) DO IJ = KIJS,KIJL KY=BLK2GLO%KXLT(IJ) KK=KY+2*IC-3 @@ -148,8 +165,11 @@ SUBROUTINE CTUWINI (KIJS, KIJL, NINF, NSUP, BLK2GLO, COSPHM1_EXT, & DP(IJ,IC) = COSPH(KKM)*COSPHM1_EXT(IJ) ENDDO ENDDO + !$acc end parallel ENDIF -IF (LHOOK) CALL DR_HOOK('CTUWINI',1,ZHOOK_HANDLE) + +!IF (LHOOK) CALL DR_HOOK('CTUWINI',1,ZHOOK_HANDLE) END SUBROUTINE CTUWINI +!END MODULE CTUWINI_MOD diff --git a/src/ecwam/ctuwupdt.F90 b/src/ecwam/ctuwupdt.F90 index e60b93e02..a524dd0e6 100644 --- a/src/ecwam/ctuwupdt.F90 +++ b/src/ecwam/ctuwupdt.F90 @@ -24,23 +24,29 @@ SUBROUTINE CTUWUPDT (IJS, IJL, NINF, NSUP, & ! ------------------------------------------------------------------- + USE PARKIND_WAVE, ONLY : JWIM, JWRB, JWRU USE YOWDRVTYPE , ONLY : WVGRIDGLO USE YOWCURR , ONLY : LLCFLCUROFF USE YOWFRED , ONLY : COSTH ,SINTH -USE YOWGRID , ONLY : NPROMA_WAM +USE YOWGRID , ONLY : NPROMA_WAM, COSPH USE YOWREFD , ONLY : THDD ,THDC ,SDOT USE YOWMPP , ONLY : IRANK ,NPROC -USE YOWPARAM , ONLY : NIBLO ,NANG ,NFRE_RED -USE YOWSTAT , ONLY : IFRELFMAX, DELPRO_LF, IDELPRO, IREFRA +USE YOWPARAM , ONLY : NIBLO ,NANG ,NFRE_RED, ngy +USE YOWSTAT , ONLY : IFRELFMAX, DELPRO_LF, IDELPRO, IREFRA, ICASE USE YOWTEST , ONLY : IU06 USE YOWUBUF , ONLY : SUMWN , & & JXO ,JYO ,KCR ,KPM ,MPM, & & WLATN ,WLONN ,WCORN ,WKPMN ,WMPMN , & -& LLWLATN ,LLWLONN ,LLWCORN ,LLWKPMN ,LLWMPMN +& LLWLATN ,LLWLONN ,LLWCORN ,LLWKPMN ,LLWMPMN , & +& KLON, KLAT, WLAT, KCOR, WCOR +USE YOWFRED , ONLY : FR ,DELTH, COSTH ,SINTH +USE YOWPCONS , ONLY : ZPI + USE YOMHOOK , ONLY : LHOOK, DR_HOOK, JPHOOK +!USE CTUWINI_MOD , ONLY : CTUWINI ! ---------------------------------------------------------------------- IMPLICIT NONE @@ -49,6 +55,7 @@ SUBROUTINE CTUWUPDT (IJS, IJL, NINF, NSUP, & #include "ctuw.intfb.h" #include "ctuwdrv.intfb.h" #include "ctuwini.intfb.h" +!!$acc routine(ctuwini) vector INTEGER(KIND=JWIM), INTENT(IN) :: IJS, IJL ! GRID POINTS WITHIN A BLOCK INTEGER(KIND=JWIM), INTENT(IN) :: NINF, NSUP ! GRID POINT WITH HALO EXTEND NINF:NSUP+1 @@ -78,26 +85,31 @@ SUBROUTINE CTUWUPDT (IJS, IJL, NINF, NSUP, & LOGICAL, SAVE :: LFRSTCTU DATA LFRSTCTU /.TRUE./ - ! ---------------------------------------------------------------------- IF (LHOOK) CALL DR_HOOK('CTUWUPDT',0,ZHOOK_HANDLE) +!$acc update device(sinth,costh) +!$acc update device(icase, COSPH, nang, nfre_red, ngy, niblo) !F ! DEFINE JXO, JYO, KCR IF (LFRSTCTU) THEN IF (.NOT. ALLOCATED(MPM)) ALLOCATE(MPM(NFRE_RED,-1:1)) + !$acc kernels DO M=1,NFRE_RED MPM(M,-1)= MAX(1,M-1) MPM(M,0) = M MPM(M,1) = MIN(NFRE_RED,M+1) ENDDO + !$acc end kernels IF (.NOT. ALLOCATED(KPM)) ALLOCATE(KPM(NANG,-1:1)) IF (.NOT. ALLOCATED(JXO)) ALLOCATE(JXO(NANG,2)) IF (.NOT. ALLOCATED(JYO)) ALLOCATE(JYO(NANG,2)) IF (.NOT. ALLOCATED(KCR)) ALLOCATE(KCR(NANG,4)) +!$ACC ENTER DATA COPYIN(KLON, KLAT, KCOR, JXO, JYO, KCR) + !$acc kernels DO K=1,NANG KM1 = K-1 @@ -149,12 +161,12 @@ SUBROUTINE CTUWUPDT (IJS, IJL, NINF, NSUP, & ENDIF ENDIF ENDDO + !$acc end kernels LFRSTCTU = .FALSE. ENDIF - ! THE CTU IS USED, COMPUTE THE WEIGHTS IF (.NOT. ALLOCATED(SUMWN)) ALLOCATE(SUMWN(IJS:IJL,NANG,NFRE_RED)) @@ -175,7 +187,7 @@ SUBROUTINE CTUWUPDT (IJS, IJL, NINF, NSUP, & IF (.NOT. ALLOCATED(LLWMPMN)) ALLOCATE(LLWMPMN(NANG,NFRE_RED,-1:1)) ENDIF - +!$acc enter data copyin(sumwn,LLWKPMN, WLATN,WLONN,WCORN,WKPMN) ! SOME INITIALISATION FOR *CTUW* !! NPROMA=NPROMA_WAM @@ -183,14 +195,29 @@ SUBROUTINE CTUWUPDT (IJS, IJL, NINF, NSUP, & !$ MTHREADS=OMP_GET_MAX_THREADS() NPROMA=(IJL-IJS+1)/MTHREADS + 1 + +!F!$acc update device(KLAT,WLAT,KCOR,WCOR,WLATN,WLONN,WCORN) + +!$acc enter data copyin(BLK2GLO) +!$acc enter data copyin(BLK2GLO%KXLT) + +!$acc update device(KLAT,WLAT,KCOR,WCOR) !F +!$acc update device(NFRE_RED,ZPI,FR,DELTH,NANG) +#ifndef _OPENACC !$OMP PARALLEL DO SCHEDULE(DYNAMIC,1) PRIVATE(JKGLO, KIJS, KIJL) +#endif /*_OPENACC*/ +!$acc data present(KLAT,WLAT,KCOR,WCOR,WLATN,WLONN,WCORN) DO JKGLO = IJS, IJL, NPROMA KIJS=JKGLO KIJL=MIN(KIJS+NPROMA-1,IJL) - CALL CTUWINI(KIJS, KIJL, NINF, NSUP, BLK2GLO, COSPHM1_EXT, & -& WLATM1, WCORM1, DP ) +! CALL CTUWINI(KIJS, KIJL,WLATM1, NINF, NSUP,WCORM1) + CALL CTUWINI (KIJS, KIJL, NINF, NSUP, BLK2GLO, COSPHM1_EXT, & + & WLATM1, WCORM1, DP) ENDDO +!$acc end data +#ifndef _OPENACC !$OMP END PARALLEL DO +#endif /*_OPENACC*/ ! COMPUTES THE WEIGHTS @@ -208,6 +235,7 @@ SUBROUTINE CTUWUPDT (IJS, IJL, NINF, NSUP, & MEND = IFRELFMAX ENDIF + CALL CTUWDRV (DELPRO, MSTART, MEND, & & IJS, IJL, NINF, NSUP, & & BLK2GLO, & @@ -234,14 +262,17 @@ SUBROUTINE CTUWUPDT (IJS, IJL, NINF, NSUP, & ENDIF +!!$acc update host(WLATN,WCORN,WLONN) ! FIND THE LOGICAL FLAGS THAT WILL LIMIT THE EXTEND OF THE CALCULATION IN PROPAGS2 +!$acc parallel loop independent collapse(4) DO IC=1,2 DO ICL=1,2 DO K=1,NANG DO M=1,NFRE_RED LLWLATN(K,M,IC,ICL)=.FALSE. + !$acc loop DO IJ=IJS,IJL IF (WLATN(IJ,K,M,IC,ICL) > 0.0_JWRB) THEN LLWLATN(K,M,IC,ICL)=.TRUE. @@ -252,11 +283,14 @@ SUBROUTINE CTUWUPDT (IJS, IJL, NINF, NSUP, & ENDDO ENDDO ENDDO +!$acc end parallel +!$acc parallel loop independent collapse(3) DO IC=1,2 DO M=1,NFRE_RED DO K=1,NANG LLWLONN(K,M,IC)=.FALSE. + !$acc loop DO IJ=IJS,IJL IF (WLONN(IJ,K,M,IC) > 0.0_JWRB) THEN LLWLONN(K,M,IC)=.TRUE. @@ -266,12 +300,15 @@ SUBROUTINE CTUWUPDT (IJS, IJL, NINF, NSUP, & ENDDO ENDDO ENDDO +!$acc end parallel +!$acc parallel loop independent collapse(4) DO ICL=1,2 DO ICR=1,4 DO M=1,NFRE_RED DO K=1,NANG LLWCORN(K,M,ICR,ICL)=.FALSE. + !$acc loop DO IJ=IJS,IJL IF (WCORN(IJ,K,M,ICR,ICL) > 0.0_JWRB) THEN LLWCORN(K,M,ICR,ICL)=.TRUE. @@ -282,11 +319,14 @@ SUBROUTINE CTUWUPDT (IJS, IJL, NINF, NSUP, & ENDDO ENDDO ENDDO +!$acc end parallel +!$acc parallel loop independent collapse(3) DO IC=-1,1 DO M=1,NFRE_RED DO K=1,NANG LLWKPMN(K,M,IC)=.FALSE. + !$acc loop DO IJ=IJS,IJL IF (WKPMN(IJ,K,M,IC) > 0.0_JWRB) THEN LLWKPMN(K,M,IC)=.TRUE. @@ -296,12 +336,15 @@ SUBROUTINE CTUWUPDT (IJS, IJL, NINF, NSUP, & ENDDO ENDDO ENDDO +!$acc end parallel IF (IREFRA == 2 .OR. IREFRA == 3) THEN +!$acc parallel loop independent collapse(3) DO IC=-1,1 DO M=1,NFRE_RED DO K=1,NANG LLWMPMN(K,M,IC)=.FALSE. + !$acc loop DO IJ=IJS,IJL IF (WMPMN(IJ,K,M,IC) > 0.0_JWRB) THEN LLWMPMN(K,M,IC)=.TRUE. @@ -311,6 +354,7 @@ SUBROUTINE CTUWUPDT (IJS, IJL, NINF, NSUP, & ENDDO ENDDO ENDDO +!$acc end parallel ENDIF diff --git a/src/ecwam/mpexchng.F90 b/src/ecwam/mpexchng.F90 index d30e9eab7..2f6766f06 100644 --- a/src/ecwam/mpexchng.F90 +++ b/src/ecwam/mpexchng.F90 @@ -102,59 +102,96 @@ SUBROUTINE MPEXCHNG(FLD, NDIM2, ND3S, ND3E) ALLOCATE(ZCOMBUFS(NBUFMAX,NGBTOPE)) ALLOCATE(ZCOMBUFR(NBUFMAX,NGBFROMPE)) - ! PACK SEND BUFFERS FOR NGBTOPE NEIGHBOURING PE's ! ------------------------------------------------- CALL GSTATS(1892,0) -!$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(INGB,IPROC,KCOUNT,M,K,IH,IJ) - DO INGB=1,NGBTOPE - IPROC=NTOPELST(INGB) - KCOUNT=0 - DO M = ND3S, ND3E - DO K = 1, NDIM2 - DO IH = 1, NTOPE(IPROC) - IJ=IJTOPE(IH,IPROC) - KCOUNT=KCOUNT+1 - ZCOMBUFS(KCOUNT,INGB)=FLD(IJ,K,M) + #ifdef _OPENACC + !$acc kernels loop independent private(KCOUNT,IJ) copyout(ZCOMBUFS) copyin(fld) + DO INGB=1,NGBTOPE !Total number of PE's to which information will be sent + IPROC=NTOPELST(INGB) !To which PE to send informations + !$acc loop independent collapse(3) + DO M = ND3S, ND3E + DO K = 1, NDIM2 + DO IH = 1, NTOPE(IPROC) !How many halo points to be sent + IJ=IJTOPE(IH,IPROC) !The index of which points to send + KCOUNT = (M - 1) * (NDIM2 * NTOPE(IPROC)) + (K - 1) * NTOPE(IPROC) + IH + ZCOMBUFS(KCOUNT,INGB)=FLD(IJ,K,M) ENDDO ENDDO ENDDO ENDDO + !$acc end kernels +#else +!$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(INGB,IPROC,KCOUNT,M,K,IH,IJ) + DO INGB=1,NGBTOPE + IPROC=NTOPELST(INGB) + KCOUNT=0 + DO M = ND3S, ND3E + DO K = 1, NDIM2 + DO IH = 1, NTOPE(IPROC) + IJ=IJTOPE(IH,IPROC) + KCOUNT=KCOUNT+1 + ZCOMBUFS(KCOUNT,INGB)=FLD(IJ,K,M) + ENDDO + ENDDO + ENDDO + ENDDO !$OMP END PARALLEL DO + #endif /*_OPENACC*/ + CALL GSTATS(1892,1) ! DO NON BLOCKING SENDS AND RECVS IR=0 CALL GSTATS(676,0) - DO INGB=1,NGBFROMPE IR=IR+1 IPROC=NFROMPELST(INGB) KCOUNT=NDIM3*NDIM2*NFROMPE(IPROC) +!!$acc host_data use_device(ZCOMBUFR) CALL MPL_RECV(ZCOMBUFR(1:KCOUNT,INGB),KSOURCE=IPROC,KTAG=KTAG, & & KMP_TYPE=JP_NON_BLOCKING_STANDARD,KREQUEST=IREQ(IR), & & CDSTRING='MPEXCHNG:') +!!$acc end host_data ENDDO DO INGB=1,NGBTOPE IR=IR+1 IPROC=NTOPELST(INGB) KCOUNT=NDIM3*NDIM2*NTOPE(IPROC) +!!$acc host_data use_device(ZCOMBUFR) CALL MPL_SEND(ZCOMBUFS(1:KCOUNT,INGB),KDEST=IPROC,KTAG=KTAG, & & KMP_TYPE=JP_NON_BLOCKING_STANDARD,KREQUEST=IREQ(IR), & & CDSTRING='MPEXCHNG:') +!!$acc end host_data ENDDO ! NOW WAIT FOR ALL TO COMPLETE CALL MPL_WAIT(KREQUEST=IREQ(1:IR),CDSTRING='MPEXCHNG:') - CALL GSTATS(676,1) ! DECODE THE RECEIVED BUFFERS CALL GSTATS(1893,0) + #ifdef _OPENACC + !$acc kernels loop independent private(KCOUNT,IJ) copyin(ZCOMBUFR) + DO INGB=1,NGBFROMPE + IPROC=NFROMPELST(INGB) + !$acc loop vector independent collapse(3) + DO M = ND3S, ND3E + DO K = 1, NDIM2 + DO IH = 1, NFROMPE(IPROC) + IJ=NIJSTART(IPROC)+IH-1 + KCOUNT = (M - 1) * (NDIM2 * NFROMPE(IPROC)) + (K - 1) * NFROMPE(IPROC) + IH + FLD(IJ,K,M)=ZCOMBUFR(KCOUNT,INGB) + ENDDO + ENDDO + ENDDO + ENDDO + !$acc end kernels + #else !$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(INGB,IPROC,KCOUNT,M,K,IH,IJ) DO INGB=1,NGBFROMPE IPROC=NFROMPELST(INGB) @@ -170,6 +207,8 @@ SUBROUTINE MPEXCHNG(FLD, NDIM2, ND3S, ND3E) ENDDO ENDDO !$OMP END PARALLEL DO + #endif /*_OPENACC*/ + CALL GSTATS(1893,1) KTAG=KTAG+1 diff --git a/src/ecwam/mubuf.F90 b/src/ecwam/mubuf.F90 index 6ef274472..8cb1674fe 100644 --- a/src/ecwam/mubuf.F90 +++ b/src/ecwam/mubuf.F90 @@ -197,6 +197,7 @@ SUBROUTINE MUBUF (IU01, BATHY, IU08, NPROPAGS) DEALLOCATE(KLAT) + !* 2.2 LONGITUDE NEIGHBOURS (KLON) ! --------------------------- @@ -1163,5 +1164,4 @@ SUBROUTINE MUBUF (IU01, BATHY, IU08, NPROPAGS) ENDDO ! end loop over frequencies DEALLOCATE(KDUM) - END SUBROUTINE MUBUF diff --git a/src/ecwam/propag_wam.F90 b/src/ecwam/propag_wam.F90 index 34c095920..c765b5d11 100644 --- a/src/ecwam/propag_wam.F90 +++ b/src/ecwam/propag_wam.F90 @@ -37,6 +37,7 @@ SUBROUTINE PROPAG_WAM (BLK2GLO, WVENVI, WVPRPT, FL1) ! ------------------------------------------------------------------- + use openacc USE PARKIND_WAVE, ONLY : JWIM, JWRB, JWRU USE YOWDRVTYPE , ONLY : WVGRIDGLO, ENVIRONMENT, FREQUENCY @@ -46,13 +47,19 @@ SUBROUTINE PROPAG_WAM (BLK2GLO, WVENVI, WVPRPT, FL1) USE YOWPARAM , ONLY : NANG ,NFRE ,NFRE_RED ,NIBLO , LLUNSTR USE YOWREFD , ONLY : LLUPDTTD ,THDD ,THDC ,SDOT USE YOWSTAT , ONLY : IPROPAGS ,IFRELFMAX, DELPRO_LF, IDELPRO - USE YOWUBUF , ONLY : LUPDTWGHT + USE YOWUBUF , ONLY : LUPDTWGHT, KLAT ,KLON ,KCOR , & + & WLATN ,WLONN ,WCORN ,WKPMN ,WMPMN , & + & LLWLATN ,LLWLONN ,LLWCORN ,LLWKPMN ,LLWMPMN , & + & SUMWN , & + & JXO ,JYO ,KCR ,KPM ,MPM #ifdef WAM_HAVE_UNWAM USE UNWAM , ONLY : PROPAG_UNWAM #endif USE YOMHOOK , ONLY : LHOOK, DR_HOOK, JPHOOK + USE NVTX + ! ---------------------------------------------------------------------- IMPLICIT NONE @@ -72,7 +79,7 @@ SUBROUTINE PROPAG_WAM (BLK2GLO, WVENVI, WVPRPT, FL1) REAL(KIND=JWRB), DIMENSION(NPROMA_WAM, NANG, NFRE, NCHNK), INTENT(INOUT) :: FL1 - INTEGER(KIND=JWIM) :: IJ, K, M, J + INTEGER(KIND=JWIM) :: IJ, K, M, J, II INTEGER(KIND=JWIM) :: JKGLO, NPROMA, MTHREADS INTEGER(KIND=JWIM) :: NSTEP_LF, ISUBST !$ INTEGER,EXTERNAL :: OMP_GET_MAX_THREADS @@ -97,6 +104,8 @@ SUBROUTINE PROPAG_WAM (BLK2GLO, WVENVI, WVPRPT, FL1) IF (LHOOK) CALL DR_HOOK('PROPAG_WAM',0,ZHOOK_HANDLE) +!$acc data present(FL1) +!$acc data CREATE(FL1_EXT,FL3_EXT) IF (NIBLO > 1) THEN IJSG = IJFROMCHNK(1,1) @@ -107,24 +116,38 @@ SUBROUTINE PROPAG_WAM (BLK2GLO, WVENVI, WVPRPT, FL1) NPROMA=(IJLG-IJSG+1)/MTHREADS + 1 +! !$acc data COPYIN(FL1_EXT) !!! the advection schemes are still written in block structure !!! mapping chuncks to block ONLY for actual grid points !!!! +! call nvtxStartRange("PROPAG: Loop 1") +#ifndef _OPENACC !$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(ICHNK, KIJS, IJSB, KIJL, IJLB, M, K) +#endif /*_OPENACC*/ + !$acc kernels loop independent private(KIJS, IJSB, KIJL, IJLB) DO ICHNK = 1, NCHNK KIJS = 1 IJSB = IJFROMCHNK(KIJS, ICHNK) KIJL = KIJL4CHNK(ICHNK) IJLB = IJFROMCHNK(KIJL, ICHNK) +! !$acc loop private(FL1_EXT) + !$acc loop independent collapse(2) DO M = 1, NFRE_RED DO K = 1, NANG - FL1_EXT(IJSB:IJLB, K, M) = FL1(KIJS:KIJL, K, M, ICHNK) +! FL1_EXT(IJFROMCHNK(1, ICHNK):IJFROMCHNK(KIJL4CHNK(ICHNK), ICHNK), K, M) = FL1(1:KIJL4CHNK(ICHNK), K, M, ICHNK) + FL1_EXT(IJSB:IJLB, K, M) = FL1(1:KIJL, K, M, ICHNK) ENDDO ENDDO ENDDO + !$acc end kernels +#ifndef _OPENACC !$OMP END PARALLEL DO +#endif /*_OPENACC*/ +! call nvtxEndRange ! SET THE DUMMY LAND POINT TO 0. + !$acc kernels FL1_EXT(NSUP+1,:,:) = 0.0_JWRB + !$acc end kernels IF (LLUNSTR) THEN @@ -206,26 +229,37 @@ SUBROUTINE PROPAG_WAM (BLK2GLO, WVENVI, WVPRPT, FL1) LUPDTWGHT=.FALSE. ENDIF +! call nvtxStartRange("PROPAG: First preloop") +#ifndef _OPENACC !$OMP PARALLEL DO SCHEDULE(STATIC,1) PRIVATE(JKGLO, KIJS, KIJL) +#endif /*_OPENACC*/ DO JKGLO = IJSG, IJLG, NPROMA KIJS=JKGLO KIJL=MIN(KIJS+NPROMA-1, IJLG) CALL PROPAGS2(FL1_EXT, FL3_EXT, NINF, NSUP, KIJS, KIJL, NANG, 1, NFRE_RED) ENDDO +#ifndef _OPENACC !$OMP END PARALLEL DO - +#endif /*_OPENACC*/ +! call nvtxEndRange ! SUB TIME STEPPING FOR FAST WAVES (only if IFRELFMAX > 0) IF (IFRELFMAX > 0 ) THEN NSTEP_LF = NINT(REAL(IDELPRO, JWRB)/DELPRO_LF) ISUBST = 2 ! The first step was done as part of the previous call to PROPAGS2 +! call nvtxStartRange("PROPAG: While loop") DO WHILE (ISUBST <= NSTEP_LF) +! call nvtxStartRange("PROPAG: Loop 2") +#ifndef _OPENACC !$OMP PARALLEL DO SCHEDULE(STATIC,1) PRIVATE(JKGLO, KIJS, KIJL, M, K, IJ) +#endif /*_OPENACC*/ +!$acc kernels loop private(KIJS, KIJL, FL1_EXT) DO JKGLO = IJSG, IJLG, NPROMA KIJS=JKGLO KIJL=MIN(KIJS+NPROMA-1, IJLG) + !$acc loop independent collapse(3) DO M = 1, IFRELFMAX DO K = 1, NANG DO IJ = KIJS, KIJL @@ -234,22 +268,38 @@ SUBROUTINE PROPAG_WAM (BLK2GLO, WVENVI, WVPRPT, FL1) ENDDO ENDDO ENDDO +!$acc end kernels +#ifndef _OPENACC !$OMP END PARALLEL DO +#endif /*_OPENACC*/ +! call nvtxEndRange CALL MPEXCHNG(FL1_EXT(:,:,1:IFRELFMAX), NANG, 1, IFRELFMAX) +!call nvtxStartRange("PROPAG: Inner propags2 loop") +#ifndef _OPENACC !$OMP PARALLEL DO SCHEDULE(STATIC,1) PRIVATE(JKGLO, KIJS, KIJL) +#endif /*_OPENACC*/ +! !$ACC DATA COPYIN(KLON, KLAT, KCOR, WKPMN, LLWKPMN, SUMWN, WLONN, WLATN, WCORN, JXO, JYO, KCR) DO JKGLO = IJSG, IJLG, NPROMA KIJS=JKGLO KIJL=MIN(KIJS+NPROMA-1, IJLG) + + CALL PROPAGS2(FL1_EXT(:,:,1:IFRELFMAX), FL3_EXT(:,:,1:IFRELFMAX), NINF, NSUP, KIJS, KIJL, NANG, 1, IFRELFMAX) ENDDO +! !$ACC END DATA +#ifndef _OPENACC !$OMP END PARALLEL DO +#endif /*_OPENACC*/ +! call nvtxEndRange ISUBST = ISUBST + 1 ENDDO - ENDIF ! end sub time steps (if needed) +! call nvtxEndRange + +ENDIF ! end sub time steps (if needed) CASE(1) IF (L1STCALL .OR. LLCHKCFLA) LLCHKCFL=.TRUE. @@ -305,36 +355,84 @@ SUBROUTINE PROPAG_WAM (BLK2GLO, WVENVI, WVPRPT, FL1) !!! the advection schemes are still written in block structure !!! So need to convert back to the nproma_wam chuncks +! call nvtxStartRange("PROPAG: Loop 3") +#ifndef _OPENACC !$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(ICHNK, KIJS, IJSB, KIJL, IJLB, M, K) +#endif /*_OPENACC*/ + !!$acc kernels loop private(KIJS, IJSB, KIJL, IJLB, M, K) + !$acc kernels loop independent private(KIJS, IJSB, KIJL, IJLB) DO ICHNK = 1, NCHNK KIJS = 1 IJSB = IJFROMCHNK(KIJS, ICHNK) KIJL = KIJL4CHNK(ICHNK) IJLB = IJFROMCHNK(KIJL, ICHNK) +! !$acc loop vector independent collapse(3) + !$acc loop independent collapse(3) DO M = 1, NFRE_RED DO K = 1, NANG - FL1(KIJS:KIJL, K, M, ICHNK) = FL3_EXT(IJSB:IJLB, K, M) + DO J = KIJS, KIJL + II = IJSB + J - KIJS + FL1(J, K, M, ICHNK) = FL3_EXT(II, K, M) +! FL1(KIJS:KIJL, K, M, ICHNK) = FL3_EXT(IJSB:IJLB, K, M) + ENDDO ENDDO ENDDO IF (KIJL < NPROMA_WAM) THEN !!! make sure fictious points keep values of the first point in the chunk + !$acc loop independent collapse(3) DO M = 1, NFRE_RED DO K = 1, NANG - FL1(KIJL+1:NPROMA_WAM, K, M, ICHNK) = FL1(1, K, M, ICHNK) + DO J = KIJL+1,NPROMA_WAM + FL1(J, K, M, ICHNK) = FL1(1, K, M, ICHNK) + !FL1(KIJL+1:NPROMA_WAM, K, M, ICHNK) = FL1(1, K, M, ICHNK) + ENDDO ENDDO ENDDO ENDIF ENDDO + !$acc end kernels + +!F !$acc kernels loop independent private(KIJS, IJSB, KIJL, IJLB) +!F DO ICHNK = 1, NCHNK +!F KIJS = 1 +!F IJSB = IJFROMCHNK(KIJS, ICHNK) +!F KIJL = KIJL4CHNK(ICHNK) +!F IJLB = IJFROMCHNK(KIJL, ICHNK) +!F !$acc loop seq collapse(2) +!F DO M = 1, NFRE_RED +!F DO K = 1, NANG +!F FL1(KIJS:KIJL, K, M, ICHNK) = FL3_EXT(IJSB:IJLB, K, M) +!F ENDDO +!F ENDDO +!F +!F IF (KIJL < NPROMA_WAM) THEN +!F !!! make sure fictious points keep values of the first point in the chunk +!F !$acc loop independent collapse(2) +!F DO M = 1, NFRE_RED +!F DO K = 1, NANG +!F FL1(KIJL+1:NPROMA_WAM, K, M, ICHNK) = FL1(1, K, M, ICHNK) +!F ENDDO +!F ENDDO +!F ENDIF +!F +!F ENDDO +!F !$acc end kernels +#ifndef _OPENACC !$OMP END PARALLEL DO +#endif /*_OPENACC*/ +! call nvtxEndRange CALL GSTATS(1430,1) ENDIF ! end propagation +! !$acc end data ENDIF ! more than one grid point +!$ACC END DATA +!$ACC END DATA L1STCALL=.FALSE. LLCHKCFL=.FALSE. diff --git a/src/ecwam/propags2.F90 b/src/ecwam/propags2.F90 index 039987dc5..149d12ea1 100644 --- a/src/ecwam/propags2.F90 +++ b/src/ecwam/propags2.F90 @@ -9,6 +9,7 @@ SUBROUTINE PROPAGS2 (F1, F3, NINF, NSUP, KIJS, KIJL, NANG, ND3S, ND3E) +use nvtx ! ---------------------------------------------------------------------- !**** *PROPAGS2* - ADVECTION USING THE CORNER TRANSPORT SCHEME IN SPACE @@ -95,54 +96,112 @@ SUBROUTINE PROPAGS2 (F1, F3, NINF, NSUP, KIJS, KIJL, NANG, ND3S, ND3E) IF (IREFRA /= 2 .AND. IREFRA /= 3 ) THEN !* WITHOUT DEPTH OR/AND CURRENT REFRACTION. ! ---------------------------------------- - - DO K = 1, NANG - JJX=JXO(K,1) - JJY=JYO(K,1) - JJY=JYO(K,1) - JJK=KCR(K,1) + +!call nvtxStartRange("PROPAGS2: Begin loop NANG") + +!!$acc enter data create(FJ1, FJ2, FJ3, FJ4, FJ5) copyin(KLON, KLAT, KCOR, WKPMN, LLWKPMN, SUMWN, WLONN, WLATN, WCORN) +!!$acc enter data present(F1, F3) +!! create(FJ1, FJ2, FJ3, FJ4, FJ5) copyin(KLON, KLAT, KCOR, WKPMN, LLWKPMN, SUMWN, WLONN, WLATN, WCORN) + +!$acc kernels loop present(F1,F3) create(FJ1, FJ2, FJ3, FJ4, FJ5) PRESENT(KLON,KLAT,KCOR,WKPMN,LLWKPMN, SUMWN, WLONN, WLATN, WCORN) PRESENT(JXO,JYO,KCR) + DO K = 1, NANG +! JJX=JXO(K,1) +! JJY=JYO(K,1) +! JJY=JYO(K,1) +! JJK=KCR(K,1) + + !!$acc loop independent DO M = ND3S, ND3E - DO IJ = KIJS, KIJL - FJ1(IJ)= F1(KLON(IJ,JJX) ,K ,M) - FJ2(IJ)= F1(KLAT(IJ,JJY,1),K ,M) - FJ3(IJ)= F1(KLAT(IJ,JJY,2),K ,M) - FJ4(IJ)= F1(KCOR(IJ,JJK,1),K ,M) - FJ5(IJ)= F1(KCOR(IJ,JJK,2),K ,M) - ENDDO +! DO IJ = KIJS, KIJL +! FJ1(IJ)= F1(KLON(IJ,JJX) ,K ,M) +! FJ2(IJ)= F1(KLAT(IJ,JJY,1),K ,M) +! FJ3(IJ)= F1(KLAT(IJ,JJY,2),K ,M) +! FJ4(IJ)= F1(KCOR(IJ,JJK,1),K ,M) +! FJ5(IJ)= F1(KCOR(IJ,JJK,2),K ,M) +! ENDDO !JFH Loop split to enhance vectorisation + !DIR$ IVDEP !DIR$ PREFERVECTOR + !!$acc loop vector + IF (LLWKPMN(K,M,-1).AND.(.NOT.LLWKPMN(K,M,1))) THEN DO IJ = KIJS, KIJL F3(IJ,K,M) = & & (1.0_JWRB-SUMWN(IJ,K,M))* F1(IJ ,K ,M) & -! & + WLONN(IJ,K,M,JXO(K,1)) * F1(KLON(IJ,JXO(K,1)) ,K ,M) & -! & +WLATN(IJ,K,M,JYO(K,1),1)* F1(KLAT(IJ,JYO(K,1),1),K ,M) & -! & +WLATN(IJ,K,M,JYO(K,1),2)* F1(KLAT(IJ,JYO(K,1),2),K ,M) & -! & + WCORN(IJ,K,M,1,1)* F1(KCOR(IJ,KCR(K,1),1),K ,M) & -! & + WCORN(IJ,K,M,1,2)* F1(KCOR(IJ,KCR(K,1),2),K ,M) & + & + WLONN(IJ,K,M,JXO(K,1)) * F1(KLON(IJ,JXO(K,1)) ,K ,M) & + & +WLATN(IJ,K,M,JYO(K,1),1)* F1(KLAT(IJ,JYO(K,1),1),K ,M) & + & +WLATN(IJ,K,M,JYO(K,1),2)* F1(KLAT(IJ,JYO(K,1),2),K ,M) & + & + WCORN(IJ,K,M,1,1)* F1(KCOR(IJ,KCR(K,1),1),K ,M) & + & + WCORN(IJ,K,M,1,2)* F1(KCOR(IJ,KCR(K,1),2),K ,M) + F3(IJ,K,M) = F3(IJ,K,M) & + & + WKPMN(IJ,K,M,-1)* F1(IJ,KPM(K,-1),M) ! & + WLONN(IJ,K,M,JJX) * F1(KLON(IJ,JJX) ,K ,M) & ! & +WLATN(IJ,K,M,JJY,1)* F1(KLAT(IJ,JJY,1),K ,M) & ! & +WLATN(IJ,K,M,JJY,2)* F1(KLAT(IJ,JJY,2),K ,M) & ! & + WCORN(IJ,K,M,1,1)* F1(KCOR(IJ,JJK,1),K ,M) & ! & + WCORN(IJ,K,M,1,2)* F1(KCOR(IJ,JJK,2),K ,M) & - & + WLONN(IJ,K,M,JJX) * FJ1(IJ) & - & +WLATN(IJ,K,M,JJY,1)* FJ2(IJ) & - & +WLATN(IJ,K,M,JJY,2)* FJ3(IJ) & - & + WCORN(IJ,K,M,1,1)* FJ4(IJ) & - & + WCORN(IJ,K,M,1,2)* FJ5(IJ) +! & + WLONN(IJ,K,M,JJX) * FJ1(IJ) & +! & +WLATN(IJ,K,M,JJY,1)* FJ2(IJ) & +! & +WLATN(IJ,K,M,JJY,2)* FJ3(IJ) & +! & + WCORN(IJ,K,M,1,1)* FJ4(IJ) & +! & + WCORN(IJ,K,M,1,2)* FJ5(IJ) ENDDO - - DO IC=-1,1,2 - IF (LLWKPMN(K,M,IC)) THEN - DO IJ = KIJS, KIJL + ELSE IF (LLWKPMN(K,M,-1).AND.LLWKPMN(K,M,1)) THEN + DO IJ = KIJS, KIJL + F3(IJ,K,M) = & + & (1.0_JWRB-SUMWN(IJ,K,M))* F1(IJ ,K ,M) & + & + WLONN(IJ,K,M,JXO(K,1)) * F1(KLON(IJ,JXO(K,1)) ,K ,M) & + & +WLATN(IJ,K,M,JYO(K,1),1)* F1(KLAT(IJ,JYO(K,1),1),K ,M) & + & +WLATN(IJ,K,M,JYO(K,1),2)* F1(KLAT(IJ,JYO(K,1),2),K ,M) & + & + WCORN(IJ,K,M,1,1)* F1(KCOR(IJ,KCR(K,1),1),K ,M) & + & + WCORN(IJ,K,M,1,2)* F1(KCOR(IJ,KCR(K,1),2),K ,M) F3(IJ,K,M) = F3(IJ,K,M) & - & + WKPMN(IJ,K,M,IC)* F1(IJ,KPM(K,IC),M) - ENDDO - ENDIF + & + WKPMN(IJ,K,M,-1)* F1(IJ,KPM(K,-1),M) + F3(IJ,K,M) = F3(IJ,K,M) & + & + WKPMN(IJ,K,M,1)* F1(IJ,KPM(K,1),M) ENDDO - + ELSE IF (LLWKPMN(K,M,1).AND.(.NOT.LLWKPMN(K,M,-1))) THEN + DO IJ = KIJS, KIJL + F3(IJ,K,M) = & + & (1.0_JWRB-SUMWN(IJ,K,M))* F1(IJ ,K ,M) & + & + WLONN(IJ,K,M,JXO(K,1)) * F1(KLON(IJ,JXO(K,1)) ,K ,M) & + & +WLATN(IJ,K,M,JYO(K,1),1)* F1(KLAT(IJ,JYO(K,1),1),K ,M) & + & +WLATN(IJ,K,M,JYO(K,1),2)* F1(KLAT(IJ,JYO(K,1),2),K ,M) & + & + WCORN(IJ,K,M,1,1)* F1(KCOR(IJ,KCR(K,1),1),K ,M) & + & + WCORN(IJ,K,M,1,2)* F1(KCOR(IJ,KCR(K,1),2),K ,M) + F3(IJ,K,M) = F3(IJ,K,M) & + & + WKPMN(IJ,K,M,1)* F1(IJ,KPM(K,1),M) + ENDDO + ELSE IF ((.not.LLWKPMN(K,M,-1)).and.(.not.LLWKPMN(K,M,1))) THEN + DO IJ = KIJS, KIJL + F3(IJ,K,M) = & + & (1.0_JWRB-SUMWN(IJ,K,M))* F1(IJ ,K ,M) & + & + WLONN(IJ,K,M,JXO(K,1)) * F1(KLON(IJ,JXO(K,1)) ,K ,M) & + & +WLATN(IJ,K,M,JYO(K,1),1)* F1(KLAT(IJ,JYO(K,1),1),K ,M) & + & +WLATN(IJ,K,M,JYO(K,1),2)* F1(KLAT(IJ,JYO(K,1),2),K ,M) & + & + WCORN(IJ,K,M,1,1)* F1(KCOR(IJ,KCR(K,1),1),K ,M) & + & + WCORN(IJ,K,M,1,2)* F1(KCOR(IJ,KCR(K,1),2),K ,M) + ENDDO + END IF + + !!$acc loop vector +! DO IC=-1,1,2 +! IF (LLWKPMN(K,M,IC)) THEN +! !!$acc loop vector +! DO IJ = KIJS, KIJL +! F3(IJ,K,M) = F3(IJ,K,M) & +! & + WKPMN(IJ,K,M,IC)* F1(IJ,KPM(K,IC),M) +! ENDDO +! ENDIF +! ENDDO ENDDO ENDDO + !$acc end kernels + +!!$acc exit data copyout(F3) +!!$acc exit data delete(FJ1, FJ2, FJ3, FJ4, FJ5) + +! call nvtxEndRange ELSE !* DEPTH AND CURRENT REFRACTION. diff --git a/src/ecwam/wamintgr_loki_gpu.F90 b/src/ecwam/wamintgr_loki_gpu.F90 index 132b2a57a..a1b66ca2e 100644 --- a/src/ecwam/wamintgr_loki_gpu.F90 +++ b/src/ecwam/wamintgr_loki_gpu.F90 @@ -147,6 +147,7 @@ SUBROUTINE WAMINTGR_LOKI_GPU(CDTPRA, CDATE, CDATEWH, CDTIMP, CDTIMPNEXT, & DATA LLNEWFILE / .FALSE. / + ! ---------------------------------------------------------------------- IF (LHOOK) CALL DR_HOOK('WAMINTGR',0,ZHOOK_HANDLE) @@ -154,13 +155,26 @@ SUBROUTINE WAMINTGR_LOKI_GPU(CDTPRA, CDATE, CDATEWH, CDTIMP, CDTIMPNEXT, & !* PROPAGATION TIME ! ---------------- +!!$acc enter data copyin(BLK2GLO, WVENVI, WVPRPT, FL1) copyout(FL1) + +CALL SRC_CONTRIBS%INIT(FL1=FL1) +CALL SRC_CONTRIBS%UPDATE_DEVICE(FL1=FL1_DPTR) +!$acc data present(FL1_DPTR) + +!!$acc data copyin(BLK2GLO, WVENVI, WVPRPT, FL1) copyout(FL1) IF (CDATE == CDTPRA) THEN TIME0=-WAM_USER_CLOCK() - CALL PROPAG_WAM(BLK2GLO, WVENVI, WVPRPT, FL1) + +!!$acc data present(BLK2GLO, WVENVI, WVPRPT, FL1) +!!$acc data present(FL1) + CALL PROPAG_WAM(BLK2GLO, WVENVI, WVPRPT, FL1_DPTR) +!!$acc end data TIME1(1) = TIME1(1) + (TIME0+WAM_USER_CLOCK())*1.E-06 CDATE = CDTPRO ENDIF +!$acc end data +!!$acc exit data delete(BLK2GLO, WVENVI, WVPRPT) !* RETRIEVING NEW FORCING FIELDS IF NEEDED. ! ---------------------------------------- @@ -189,7 +203,7 @@ SUBROUTINE WAMINTGR_LOKI_GPU(CDTPRA, CDATE, CDATEWH, CDTIMP, CDTIMPNEXT, & & STRNMS=INTFLDS%STRNMS, TAUXD=INTFLDS%TAUXD, TAUYD=INTFLDS%TAUYD, TAUOCXD=INTFLDS%TAUOCXD, & & TAUOCYD=INTFLDS%TAUOCYD, TAUOC=INTFLDS%TAUOC, PHIOCD=INTFLDS%PHIOCD, PHIEPS=INTFLDS%PHIEPS, & & PHIAW=INTFLDS%PHIAW) - CALL SRC_CONTRIBS%INIT(FL1=FL1, XLLWS=XLLWS, MIJ=MIJ) + CALL SRC_CONTRIBS%INIT(XLLWS=XLLWS, MIJ=MIJ) !$loki update_device @@ -205,7 +219,7 @@ SUBROUTINE WAMINTGR_LOKI_GPU(CDTPRA, CDATE, CDATEWH, CDTIMP, CDTIMPNEXT, & CALL INTFLDS_FIELD%UPDATE_DEVICE(WSEMEAN=WSEMEAN_DPTR, WSFMEAN=WSFMEAN_DPTR, USTOKES=USTOKES_DPTR, & & VSTOKES=VSTOKES_DPTR, STRNMS=STRNMS_DPTR, TAUXD=TAUXD_DPTR, TAUYD=TAUYD_DPTR, TAUOCXD=TAUOCXD_DPTR, & & TAUOCYD=TAUOCYD_DPTR, TAUOC=TAUOC_DPTR, PHIOCD=PHIOCD_DPTR, PHIEPS=PHIEPS_DPTR, PHIAW=PHIAW_DPTR) - CALL SRC_CONTRIBS%UPDATE_DEVICE(FL1=FL1_DPTR, XLLWS=XLLWS_DPTR, MIJ=MIJ_DPTR) + CALL SRC_CONTRIBS%UPDATE_DEVICE(XLLWS=XLLWS_DPTR, MIJ=MIJ_DPTR) !$acc data present(FL1_DPTR,XLLWS_DPTR,MIJ_DPTR,WAVNUM_DPTR,CGROUP_DPTR,CIWA_DPTR,CINV_DPTR,XK2CG_DPTR,STOKFAC_DPTR,& !$acc & EMAXDPT_DPTR,INDEP_DPTR,DEPTH_DPTR,IOBND_DPTR,IODP_DPTR,CICOVER_DPTR,WSWAVE_DPTR,WDWAVE_DPTR,AIRD_DPTR,& @@ -245,12 +259,6 @@ SUBROUTINE WAMINTGR_LOKI_GPU(CDTPRA, CDATE, CDATEWH, CDTIMP, CDTIMPNEXT, & CALL SRC_CONTRIBS%ENSURE_HOST() !$loki update_host - CALL WVPRPT_FIELD%FINAL() - CALL WVENVI_FIELD%FINAL() - CALL FF_NOW_FIELD%FINAL() - CALL WAM2NEMO_FIELD%FINAL() - CALL INTFLDS_FIELD%FINAL() - CALL SRC_CONTRIBS%FINAL() TIME1(3) = TIME1(3) + (TIME2+WAM_USER_CLOCK())*1.E-06 IF (LWNEMOCOU) NEMONTAU = NEMONTAU + 1 diff --git a/src/ecwam/yowgrid.F90 b/src/ecwam/yowgrid.F90 index 313c20214..b2b31688c 100644 --- a/src/ecwam/yowgrid.F90 +++ b/src/ecwam/yowgrid.F90 @@ -87,4 +87,5 @@ MODULE YOWGRID ! ---------------------------------------------------------------------- + !$acc declare create( COSPH ) END MODULE YOWGRID diff --git a/src/ecwam/yowmap.F90 b/src/ecwam/yowmap.F90 index 7099ecc28..b670deed4 100644 --- a/src/ecwam/yowmap.F90 +++ b/src/ecwam/yowmap.F90 @@ -78,4 +78,5 @@ MODULE YOWMAP ! (i.e. NO LAND AND DEEP WATER). ! ---------------------------------------------------------------------- - END MODULE YOWMAP + +END MODULE YOWMAP diff --git a/src/ecwam/yowparam.F90 b/src/ecwam/yowparam.F90 index 0f713bdf6..0bd0552f6 100644 --- a/src/ecwam/yowparam.F90 +++ b/src/ecwam/yowparam.F90 @@ -91,5 +91,8 @@ MODULE YOWPARAM ! DONE IN LATITUNAL BANDS ! (like it used to be done). ! ---------------------------------------------------------------------- - +!$acc declare create( nang ) +!$acc declare create( nfre_red ) +!$acc declare create( ngy ) +!$acc declare create( niblo ) END MODULE YOWPARAM diff --git a/src/ecwam/yowstat.F90 b/src/ecwam/yowstat.F90 index e3555b656..0714a9949 100644 --- a/src/ecwam/yowstat.F90 +++ b/src/ecwam/yowstat.F90 @@ -251,4 +251,5 @@ MODULE YOWSTAT ! *CMETER* CHARACTER SMS or ECFLOW meter command (ECMWF supervisor) ! *CEVENT* CHARACTER SMS or ECFLOW event command (ECMWF supervisor) ! ---------------------------------------------------------------------- + !$acc declare create( icase ) END MODULE YOWSTAT diff --git a/src/ecwam/yowubuf.F90 b/src/ecwam/yowubuf.F90 index 29b69d6ee..2fd621c7c 100644 --- a/src/ecwam/yowubuf.F90 +++ b/src/ecwam/yowubuf.F90 @@ -154,4 +154,13 @@ MODULE YOWUBUF ! *LLWMPMN* LOGICAL ARRAY, TRUE IF WMPMN > 0. AT ALL GRID POINTS. ! ---------------------------------------------------------------------- - END MODULE YOWUBUF + +!$acc declare create(WLAT) +!$acc declare create(KLAT) +!$acc declare create(WCOR) +!$acc declare create(KCOR) +!$acc declare create(WLATN) +!$acc declare create(WCORN) +!$acc declare create(WLONN) + + END MODULE YOWUBUF From bc824f15d4041d44121f0c8745e4ebfd045098a9 Mon Sep 17 00:00:00 2001 From: Willem Deconinck Date: Mon, 20 Nov 2023 10:34:14 +0100 Subject: [PATCH 02/30] Update intel-oneapi install script --- .github/tools/install-intel-oneapi.sh | 14 +++++++------- .github/workflows/build.yml | 9 ++++----- 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/.github/tools/install-intel-oneapi.sh b/.github/tools/install-intel-oneapi.sh index 78af1aec0..b6db3853b 100755 --- a/.github/tools/install-intel-oneapi.sh +++ b/.github/tools/install-intel-oneapi.sh @@ -1,14 +1,14 @@ -#!/bin/sh +#!/usr/bin/env bash -KEY=GPG-PUB-KEY-INTEL-SW-PRODUCTS-2023.PUB +version=2023.2.0 +KEY=GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB wget https://apt.repos.intel.com/intel-gpg-keys/$KEY sudo apt-key add $KEY rm $KEY echo "deb https://apt.repos.intel.com/oneapi all main" | sudo tee /etc/apt/sources.list.d/oneAPI.list sudo apt-get update sudo apt-get install \ - intel-oneapi-compiler-fortran \ - intel-oneapi-compiler-dpcpp-cpp-and-cpp-classic \ - intel-oneapi-mpi \ - intel-oneapi-mpi-devel \ - intel-oneapi-mkl + intel-oneapi-compiler-fortran-$version \ + intel-oneapi-compiler-dpcpp-cpp-and-cpp-classic-$version \ + intel-oneapi-mpi-devel-2021.10.0 \ + intel-oneapi-mkl-$version diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 089d70db8..1c7a389e4 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -34,8 +34,7 @@ jobs: name: - linux gnu-10 - linux clang-12 - - linux nvhpc-21.9 - - linux intel + - linux intel-classic - macos include: @@ -74,9 +73,9 @@ jobs: cmake_options: -DCMAKE_CXX_FLAGS=--diag_suppress177 caching: true - - name : linux intel + - name : linux intel-classic os: ubuntu-20.04 - compiler: intel-oneapi + compiler: intel-classic compiler_cc: icc compiler_cxx: icpc compiler_fc: ifort @@ -152,7 +151,7 @@ jobs: ${ECWAM_TOOLS}/install-intel-oneapi.sh source /opt/intel/oneapi/setvars.sh printenv >> $GITHUB_ENV - echo "CACHE_SUFFIX=$(icc -dumpversion)" >> $GITHUB_ENV + echo "CACHE_SUFFIX=$CC-$($CC -dumpversion)" >> $GITHUB_ENV - name: Install MPI shell: bash -eux {0} From 756a7544c17e67707e0038eea091f00e5daab04a Mon Sep 17 00:00:00 2001 From: Fabio Di Sante Date: Thu, 7 Dec 2023 14:53:49 +0100 Subject: [PATCH 03/30] Some comments and nvtx removed, field_api destructor restored --- src/ecwam/ctuw.F90 | 15 ------------ src/ecwam/propag_wam.F90 | 41 --------------------------------- src/ecwam/propags2.F90 | 14 ----------- src/ecwam/wamintgr_loki_gpu.F90 | 1 + src/ecwam/yowmap.F90 | 1 - 5 files changed, 1 insertion(+), 71 deletions(-) diff --git a/src/ecwam/ctuw.F90 b/src/ecwam/ctuw.F90 index ad4cdb36c..3192fbc97 100644 --- a/src/ecwam/ctuw.F90 +++ b/src/ecwam/ctuw.F90 @@ -13,7 +13,6 @@ SUBROUTINE CTUW (DELPRO, MSTART, MEND, & & WLATM1, WCORM1, DP, & & CGROUP_EXT, OMOSNH2KD_EXT, & & COSPHM1_EXT, DEPTH_EXT, U_EXT, V_EXT ) -use nvtx ! ---------------------------------------------------------------------- !**** *CTUW* - COMPUTATION OF THE CONER TRANSPORT SCHEME WEIGHTS. @@ -142,7 +141,6 @@ SUBROUTINE CTUW (DELPRO, MSTART, MEND, & !* LOOP OVER FREQUENCIES. ! ---------------------- -! call nvtxStartRange("ctuw: Loop 1") !$acc kernels !loop private(CGYP,KIJS,KIJL,CGX,IX,KY,UU,UREL,ISSU,VV,VREL,ISSV,DXP,DYP,ADXP,ADYP,DXUP,DXDW,DYUP,DYDW,DXX,DYY,GRIDAREAM1,WEIGHT) DO M = MSTART, MEND @@ -366,7 +364,6 @@ SUBROUTINE CTUW (DELPRO, MSTART, MEND, & ENDDO ! END LOOP OVER FREQUENCIES !$acc end kernels -! call nvtxEndRange @@ -415,7 +412,6 @@ SUBROUTINE CTUW (DELPRO, MSTART, MEND, & !* LOOP OVER DIRECTIONS. ! --------------------- -! call nvtxStartRange("ctuw: Loop 2") !$acc parallel loop private(km1,kp1,sp,sm,DELFR0,DRGP,DRGM,DRDP,DRDM,DRCP,DRCM) DO K=1,NANG KP1 = K+1 @@ -528,14 +524,11 @@ SUBROUTINE CTUW (DELPRO, MSTART, MEND, & ENDDO ! END LOOP ON DIRECTIONS !$acc end parallel -! call nvtxEndRange ! CHECK THAT WEIGHTS ARE LESS THAN 1 ! AND COMPUTE THEIR SUM AND CHECK IT IS LESS THAN 1 AS WELL !!! THE SUM IS NEEDED LATER ON !!! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! -! call nvtxStartRange("ctuw: Loop 3") -!!$acc kernels loop seq #IFNDEF _OPENACC DO K=1,NANG DO M = MSTART, MEND @@ -692,12 +685,6 @@ SUBROUTINE CTUW (DELPRO, MSTART, MEND, & ENDDO ! END LOOP OVER FREQUENCIES ENDDO ! END LOOP OVER DIRECTIONS #ENDIF -!!$acc end kernels -! call nvtxEndRange - -!!WORKAROUNDDDDDDD -!LCFLFAIL=.FALSE. -!!WORKAROUNDDDDDDD DO IJ=KIJS,KIJL IF (LCFLFAIL(IJ)) THEN @@ -710,7 +697,6 @@ SUBROUTINE CTUW (DELPRO, MSTART, MEND, & !!!!!!INCLUDE THE BLOCKING COEFFICIENTS INTO THE WEIGHTS OF THE ! SURROUNDING POINTS. -! call nvtxStartRange("ctuw: Loop 4") !$acc parallel loop collapse(3) DO K=1,NANG DO M = MSTART, MEND @@ -742,7 +728,6 @@ SUBROUTINE CTUW (DELPRO, MSTART, MEND, & ENDDO ! END LOOP ON FREQUENCIES ENDDO ! END LOOP OVER DIRECTIONS !$acc end parallel - ! call nvtxEndRange IF (LHOOK) CALL DR_HOOK('CTUW',1,ZHOOK_HANDLE) diff --git a/src/ecwam/propag_wam.F90 b/src/ecwam/propag_wam.F90 index c765b5d11..df6801bf9 100644 --- a/src/ecwam/propag_wam.F90 +++ b/src/ecwam/propag_wam.F90 @@ -58,8 +58,6 @@ SUBROUTINE PROPAG_WAM (BLK2GLO, WVENVI, WVPRPT, FL1) USE YOMHOOK , ONLY : LHOOK, DR_HOOK, JPHOOK - USE NVTX - ! ---------------------------------------------------------------------- IMPLICIT NONE @@ -116,10 +114,8 @@ SUBROUTINE PROPAG_WAM (BLK2GLO, WVENVI, WVPRPT, FL1) NPROMA=(IJLG-IJSG+1)/MTHREADS + 1 -! !$acc data COPYIN(FL1_EXT) !!! the advection schemes are still written in block structure !!! mapping chuncks to block ONLY for actual grid points !!!! -! call nvtxStartRange("PROPAG: Loop 1") #ifndef _OPENACC !$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(ICHNK, KIJS, IJSB, KIJL, IJLB, M, K) #endif /*_OPENACC*/ @@ -142,7 +138,6 @@ SUBROUTINE PROPAG_WAM (BLK2GLO, WVENVI, WVPRPT, FL1) #ifndef _OPENACC !$OMP END PARALLEL DO #endif /*_OPENACC*/ -! call nvtxEndRange ! SET THE DUMMY LAND POINT TO 0. !$acc kernels @@ -229,7 +224,6 @@ SUBROUTINE PROPAG_WAM (BLK2GLO, WVENVI, WVPRPT, FL1) LUPDTWGHT=.FALSE. ENDIF -! call nvtxStartRange("PROPAG: First preloop") #ifndef _OPENACC !$OMP PARALLEL DO SCHEDULE(STATIC,1) PRIVATE(JKGLO, KIJS, KIJL) #endif /*_OPENACC*/ @@ -241,17 +235,14 @@ SUBROUTINE PROPAG_WAM (BLK2GLO, WVENVI, WVPRPT, FL1) #ifndef _OPENACC !$OMP END PARALLEL DO #endif /*_OPENACC*/ -! call nvtxEndRange ! SUB TIME STEPPING FOR FAST WAVES (only if IFRELFMAX > 0) IF (IFRELFMAX > 0 ) THEN NSTEP_LF = NINT(REAL(IDELPRO, JWRB)/DELPRO_LF) ISUBST = 2 ! The first step was done as part of the previous call to PROPAGS2 -! call nvtxStartRange("PROPAG: While loop") DO WHILE (ISUBST <= NSTEP_LF) -! call nvtxStartRange("PROPAG: Loop 2") #ifndef _OPENACC !$OMP PARALLEL DO SCHEDULE(STATIC,1) PRIVATE(JKGLO, KIJS, KIJL, M, K, IJ) #endif /*_OPENACC*/ @@ -272,11 +263,9 @@ SUBROUTINE PROPAG_WAM (BLK2GLO, WVENVI, WVPRPT, FL1) #ifndef _OPENACC !$OMP END PARALLEL DO #endif /*_OPENACC*/ -! call nvtxEndRange CALL MPEXCHNG(FL1_EXT(:,:,1:IFRELFMAX), NANG, 1, IFRELFMAX) -!call nvtxStartRange("PROPAG: Inner propags2 loop") #ifndef _OPENACC !$OMP PARALLEL DO SCHEDULE(STATIC,1) PRIVATE(JKGLO, KIJS, KIJL) #endif /*_OPENACC*/ @@ -292,12 +281,10 @@ SUBROUTINE PROPAG_WAM (BLK2GLO, WVENVI, WVPRPT, FL1) #ifndef _OPENACC !$OMP END PARALLEL DO #endif /*_OPENACC*/ -! call nvtxEndRange ISUBST = ISUBST + 1 ENDDO -! call nvtxEndRange ENDIF ! end sub time steps (if needed) @@ -355,7 +342,6 @@ SUBROUTINE PROPAG_WAM (BLK2GLO, WVENVI, WVPRPT, FL1) !!! the advection schemes are still written in block structure !!! So need to convert back to the nproma_wam chuncks -! call nvtxStartRange("PROPAG: Loop 3") #ifndef _OPENACC !$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(ICHNK, KIJS, IJSB, KIJL, IJLB, M, K) #endif /*_OPENACC*/ @@ -394,36 +380,9 @@ SUBROUTINE PROPAG_WAM (BLK2GLO, WVENVI, WVPRPT, FL1) ENDDO !$acc end kernels -!F !$acc kernels loop independent private(KIJS, IJSB, KIJL, IJLB) -!F DO ICHNK = 1, NCHNK -!F KIJS = 1 -!F IJSB = IJFROMCHNK(KIJS, ICHNK) -!F KIJL = KIJL4CHNK(ICHNK) -!F IJLB = IJFROMCHNK(KIJL, ICHNK) -!F !$acc loop seq collapse(2) -!F DO M = 1, NFRE_RED -!F DO K = 1, NANG -!F FL1(KIJS:KIJL, K, M, ICHNK) = FL3_EXT(IJSB:IJLB, K, M) -!F ENDDO -!F ENDDO -!F -!F IF (KIJL < NPROMA_WAM) THEN -!F !!! make sure fictious points keep values of the first point in the chunk -!F !$acc loop independent collapse(2) -!F DO M = 1, NFRE_RED -!F DO K = 1, NANG -!F FL1(KIJL+1:NPROMA_WAM, K, M, ICHNK) = FL1(1, K, M, ICHNK) -!F ENDDO -!F ENDDO -!F ENDIF -!F -!F ENDDO -!F !$acc end kernels #ifndef _OPENACC !$OMP END PARALLEL DO #endif /*_OPENACC*/ -! call nvtxEndRange - CALL GSTATS(1430,1) diff --git a/src/ecwam/propags2.F90 b/src/ecwam/propags2.F90 index 149d12ea1..dd8e1f842 100644 --- a/src/ecwam/propags2.F90 +++ b/src/ecwam/propags2.F90 @@ -9,7 +9,6 @@ SUBROUTINE PROPAGS2 (F1, F3, NINF, NSUP, KIJS, KIJL, NANG, ND3S, ND3E) -use nvtx ! ---------------------------------------------------------------------- !**** *PROPAGS2* - ADVECTION USING THE CORNER TRANSPORT SCHEME IN SPACE @@ -96,12 +95,6 @@ SUBROUTINE PROPAGS2 (F1, F3, NINF, NSUP, KIJS, KIJL, NANG, ND3S, ND3E) IF (IREFRA /= 2 .AND. IREFRA /= 3 ) THEN !* WITHOUT DEPTH OR/AND CURRENT REFRACTION. ! ---------------------------------------- - -!call nvtxStartRange("PROPAGS2: Begin loop NANG") - -!!$acc enter data create(FJ1, FJ2, FJ3, FJ4, FJ5) copyin(KLON, KLAT, KCOR, WKPMN, LLWKPMN, SUMWN, WLONN, WLATN, WCORN) -!!$acc enter data present(F1, F3) -!! create(FJ1, FJ2, FJ3, FJ4, FJ5) copyin(KLON, KLAT, KCOR, WKPMN, LLWKPMN, SUMWN, WLONN, WLATN, WCORN) !$acc kernels loop present(F1,F3) create(FJ1, FJ2, FJ3, FJ4, FJ5) PRESENT(KLON,KLAT,KCOR,WKPMN,LLWKPMN, SUMWN, WLONN, WLATN, WCORN) PRESENT(JXO,JYO,KCR) DO K = 1, NANG @@ -123,7 +116,6 @@ SUBROUTINE PROPAGS2 (F1, F3, NINF, NSUP, KIJS, KIJL, NANG, ND3S, ND3E) !DIR$ IVDEP !DIR$ PREFERVECTOR - !!$acc loop vector IF (LLWKPMN(K,M,-1).AND.(.NOT.LLWKPMN(K,M,1))) THEN DO IJ = KIJS, KIJL F3(IJ,K,M) = & @@ -184,7 +176,6 @@ SUBROUTINE PROPAGS2 (F1, F3, NINF, NSUP, KIJS, KIJL, NANG, ND3S, ND3E) ENDDO END IF - !!$acc loop vector ! DO IC=-1,1,2 ! IF (LLWKPMN(K,M,IC)) THEN ! !!$acc loop vector @@ -198,11 +189,6 @@ SUBROUTINE PROPAGS2 (F1, F3, NINF, NSUP, KIJS, KIJL, NANG, ND3S, ND3E) ENDDO !$acc end kernels -!!$acc exit data copyout(F3) -!!$acc exit data delete(FJ1, FJ2, FJ3, FJ4, FJ5) - -! call nvtxEndRange - ELSE !* DEPTH AND CURRENT REFRACTION. ! ----------------------------- diff --git a/src/ecwam/wamintgr_loki_gpu.F90 b/src/ecwam/wamintgr_loki_gpu.F90 index a1b66ca2e..3043d4cf8 100644 --- a/src/ecwam/wamintgr_loki_gpu.F90 +++ b/src/ecwam/wamintgr_loki_gpu.F90 @@ -259,6 +259,7 @@ SUBROUTINE WAMINTGR_LOKI_GPU(CDTPRA, CDATE, CDATEWH, CDTIMP, CDTIMPNEXT, & CALL SRC_CONTRIBS%ENSURE_HOST() !$loki update_host + CALL WVPRPT_FIELD%FINAL() TIME1(3) = TIME1(3) + (TIME2+WAM_USER_CLOCK())*1.E-06 IF (LWNEMOCOU) NEMONTAU = NEMONTAU + 1 diff --git a/src/ecwam/yowmap.F90 b/src/ecwam/yowmap.F90 index b670deed4..28ddf28fe 100644 --- a/src/ecwam/yowmap.F90 +++ b/src/ecwam/yowmap.F90 @@ -78,5 +78,4 @@ MODULE YOWMAP ! (i.e. NO LAND AND DEEP WATER). ! ---------------------------------------------------------------------- - END MODULE YOWMAP From d1bd955f559ba126cb219ddda329b936faae99fd Mon Sep 17 00:00:00 2001 From: Fabio Di Sante Date: Sun, 10 Dec 2023 20:01:27 +0100 Subject: [PATCH 04/30] removed comments in propags2 --- src/ecwam/propags2.F90 | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/ecwam/propags2.F90 b/src/ecwam/propags2.F90 index dd8e1f842..9492331cc 100644 --- a/src/ecwam/propags2.F90 +++ b/src/ecwam/propags2.F90 @@ -103,7 +103,6 @@ SUBROUTINE PROPAGS2 (F1, F3, NINF, NSUP, KIJS, KIJL, NANG, ND3S, ND3E) ! JJY=JYO(K,1) ! JJK=KCR(K,1) - !!$acc loop independent DO M = ND3S, ND3E ! DO IJ = KIJS, KIJL ! FJ1(IJ)= F1(KLON(IJ,JJX) ,K ,M) @@ -178,7 +177,6 @@ SUBROUTINE PROPAGS2 (F1, F3, NINF, NSUP, KIJS, KIJL, NANG, ND3S, ND3E) ! DO IC=-1,1,2 ! IF (LLWKPMN(K,M,IC)) THEN -! !!$acc loop vector ! DO IJ = KIJS, KIJL ! F3(IJ,K,M) = F3(IJ,K,M) & ! & + WKPMN(IJ,K,M,IC)* F1(IJ,KPM(K,IC),M) From b555fecea0aa22cc144874bcda939fb574f59218 Mon Sep 17 00:00:00 2001 From: Fabio Di Sante Date: Sun, 10 Dec 2023 21:04:44 +0100 Subject: [PATCH 05/30] removed comments and guarded use openacc propag_wam - removed/added spaces in mpexchng and mubuf --- src/ecwam/mpexchng.F90 | 1 + src/ecwam/mubuf.F90 | 2 +- src/ecwam/propag_wam.F90 | 16 ++++------------ 3 files changed, 6 insertions(+), 13 deletions(-) diff --git a/src/ecwam/mpexchng.F90 b/src/ecwam/mpexchng.F90 index 2f6766f06..11c5f7116 100644 --- a/src/ecwam/mpexchng.F90 +++ b/src/ecwam/mpexchng.F90 @@ -170,6 +170,7 @@ SUBROUTINE MPEXCHNG(FLD, NDIM2, ND3S, ND3E) ! NOW WAIT FOR ALL TO COMPLETE CALL MPL_WAIT(KREQUEST=IREQ(1:IR),CDSTRING='MPEXCHNG:') + CALL GSTATS(676,1) ! DECODE THE RECEIVED BUFFERS diff --git a/src/ecwam/mubuf.F90 b/src/ecwam/mubuf.F90 index 8cb1674fe..6ef274472 100644 --- a/src/ecwam/mubuf.F90 +++ b/src/ecwam/mubuf.F90 @@ -197,7 +197,6 @@ SUBROUTINE MUBUF (IU01, BATHY, IU08, NPROPAGS) DEALLOCATE(KLAT) - !* 2.2 LONGITUDE NEIGHBOURS (KLON) ! --------------------------- @@ -1164,4 +1163,5 @@ SUBROUTINE MUBUF (IU01, BATHY, IU08, NPROPAGS) ENDDO ! end loop over frequencies DEALLOCATE(KDUM) + END SUBROUTINE MUBUF diff --git a/src/ecwam/propag_wam.F90 b/src/ecwam/propag_wam.F90 index df6801bf9..8acb7a8ee 100644 --- a/src/ecwam/propag_wam.F90 +++ b/src/ecwam/propag_wam.F90 @@ -37,7 +37,9 @@ SUBROUTINE PROPAG_WAM (BLK2GLO, WVENVI, WVPRPT, FL1) ! ------------------------------------------------------------------- +#ifndef _OPENACC use openacc +#endif /*_OPENACC*/ USE PARKIND_WAVE, ONLY : JWIM, JWRB, JWRU USE YOWDRVTYPE , ONLY : WVGRIDGLO, ENVIRONMENT, FREQUENCY @@ -47,11 +49,7 @@ SUBROUTINE PROPAG_WAM (BLK2GLO, WVENVI, WVPRPT, FL1) USE YOWPARAM , ONLY : NANG ,NFRE ,NFRE_RED ,NIBLO , LLUNSTR USE YOWREFD , ONLY : LLUPDTTD ,THDD ,THDC ,SDOT USE YOWSTAT , ONLY : IPROPAGS ,IFRELFMAX, DELPRO_LF, IDELPRO - USE YOWUBUF , ONLY : LUPDTWGHT, KLAT ,KLON ,KCOR , & - & WLATN ,WLONN ,WCORN ,WKPMN ,WMPMN , & - & LLWLATN ,LLWLONN ,LLWCORN ,LLWKPMN ,LLWMPMN , & - & SUMWN , & - & JXO ,JYO ,KCR ,KPM ,MPM + USE YOWUBUF , ONLY : LUPDTWGHT #ifdef WAM_HAVE_UNWAM USE UNWAM , ONLY : PROPAG_UNWAM #endif @@ -102,8 +100,7 @@ SUBROUTINE PROPAG_WAM (BLK2GLO, WVENVI, WVPRPT, FL1) IF (LHOOK) CALL DR_HOOK('PROPAG_WAM',0,ZHOOK_HANDLE) -!$acc data present(FL1) -!$acc data CREATE(FL1_EXT,FL3_EXT) +!$acc data PRESENT(FL1) data CREATE(FL1_EXT,FL3_EXT) IF (NIBLO > 1) THEN IJSG = IJFROMCHNK(1,1) @@ -125,7 +122,6 @@ SUBROUTINE PROPAG_WAM (BLK2GLO, WVENVI, WVPRPT, FL1) IJSB = IJFROMCHNK(KIJS, ICHNK) KIJL = KIJL4CHNK(ICHNK) IJLB = IJFROMCHNK(KIJL, ICHNK) -! !$acc loop private(FL1_EXT) !$acc loop independent collapse(2) DO M = 1, NFRE_RED DO K = 1, NANG @@ -269,7 +265,6 @@ SUBROUTINE PROPAG_WAM (BLK2GLO, WVENVI, WVPRPT, FL1) #ifndef _OPENACC !$OMP PARALLEL DO SCHEDULE(STATIC,1) PRIVATE(JKGLO, KIJS, KIJL) #endif /*_OPENACC*/ -! !$ACC DATA COPYIN(KLON, KLAT, KCOR, WKPMN, LLWKPMN, SUMWN, WLONN, WLATN, WCORN, JXO, JYO, KCR) DO JKGLO = IJSG, IJLG, NPROMA KIJS=JKGLO KIJL=MIN(KIJS+NPROMA-1, IJLG) @@ -345,14 +340,12 @@ SUBROUTINE PROPAG_WAM (BLK2GLO, WVENVI, WVPRPT, FL1) #ifndef _OPENACC !$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(ICHNK, KIJS, IJSB, KIJL, IJLB, M, K) #endif /*_OPENACC*/ - !!$acc kernels loop private(KIJS, IJSB, KIJL, IJLB, M, K) !$acc kernels loop independent private(KIJS, IJSB, KIJL, IJLB) DO ICHNK = 1, NCHNK KIJS = 1 IJSB = IJFROMCHNK(KIJS, ICHNK) KIJL = KIJL4CHNK(ICHNK) IJLB = IJFROMCHNK(KIJL, ICHNK) -! !$acc loop vector independent collapse(3) !$acc loop independent collapse(3) DO M = 1, NFRE_RED DO K = 1, NANG @@ -387,7 +380,6 @@ SUBROUTINE PROPAG_WAM (BLK2GLO, WVENVI, WVPRPT, FL1) CALL GSTATS(1430,1) ENDIF ! end propagation -! !$acc end data ENDIF ! more than one grid point !$ACC END DATA From 4f6d4f1286caff59ca98374865e9c2a2e57026cb Mon Sep 17 00:00:00 2001 From: Fabio Di Sante Date: Sun, 10 Dec 2023 21:11:54 +0100 Subject: [PATCH 06/30] removed unused acc data from wamintgr_loki --- src/ecwam/wamintgr_loki_gpu.F90 | 9 --------- 1 file changed, 9 deletions(-) diff --git a/src/ecwam/wamintgr_loki_gpu.F90 b/src/ecwam/wamintgr_loki_gpu.F90 index 3043d4cf8..13e9e03f8 100644 --- a/src/ecwam/wamintgr_loki_gpu.F90 +++ b/src/ecwam/wamintgr_loki_gpu.F90 @@ -155,27 +155,18 @@ SUBROUTINE WAMINTGR_LOKI_GPU(CDTPRA, CDATE, CDATEWH, CDTIMP, CDTIMPNEXT, & !* PROPAGATION TIME ! ---------------- -!!$acc enter data copyin(BLK2GLO, WVENVI, WVPRPT, FL1) copyout(FL1) - CALL SRC_CONTRIBS%INIT(FL1=FL1) CALL SRC_CONTRIBS%UPDATE_DEVICE(FL1=FL1_DPTR) !$acc data present(FL1_DPTR) -!!$acc data copyin(BLK2GLO, WVENVI, WVPRPT, FL1) copyout(FL1) IF (CDATE == CDTPRA) THEN TIME0=-WAM_USER_CLOCK() - -!!$acc data present(BLK2GLO, WVENVI, WVPRPT, FL1) -!!$acc data present(FL1) CALL PROPAG_WAM(BLK2GLO, WVENVI, WVPRPT, FL1_DPTR) -!!$acc end data TIME1(1) = TIME1(1) + (TIME0+WAM_USER_CLOCK())*1.E-06 CDATE = CDTPRO ENDIF !$acc end data -!!$acc exit data delete(BLK2GLO, WVENVI, WVPRPT) - !* RETRIEVING NEW FORCING FIELDS IF NEEDED. ! ---------------------------------------- CALL NEWWIND(CDTIMP, CDATEWH, LLNEWFILE, & From de24c4c70b09a79ef587557812bed4fe69c5bb73 Mon Sep 17 00:00:00 2001 From: Fabio Di Sante Date: Sun, 10 Dec 2023 21:24:19 +0100 Subject: [PATCH 07/30] ctuwdrv: restored comment --- src/ecwam/ctuwdrv.F90 | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/ecwam/ctuwdrv.F90 b/src/ecwam/ctuwdrv.F90 index 369808538..31288def3 100644 --- a/src/ecwam/ctuwdrv.F90 +++ b/src/ecwam/ctuwdrv.F90 @@ -29,7 +29,6 @@ SUBROUTINE CTUWDRV (DELPRO, MSTART, MEND, & USE PARKIND_WAVE, ONLY : JWIM, JWRB, JWRU USE YOWDRVTYPE , ONLY : WVGRIDGLO - USE YOWCURR , ONLY : LLCFLCUROFF USE YOWGRID , ONLY : NPROMA_WAM USE YOWMPP , ONLY : IRANK @@ -40,8 +39,6 @@ SUBROUTINE CTUWDRV (DELPRO, MSTART, MEND, & USE YOWFRED , ONLY : FR ,DELTH, COSTH ,SINTH USE YOWPCONS , ONLY : ZPI - - USE YOMHOOK , ONLY : LHOOK, DR_HOOK, JPHOOK ! ---------------------------------------------------------------------- @@ -79,7 +76,7 @@ SUBROUTINE CTUWDRV (DELPRO, MSTART, MEND, & IF (LHOOK) CALL DR_HOOK('CTUWDRV',0,ZHOOK_HANDLE) -!! =NPROMA_WAM +!! NPROMA=NPROMA_WAM MTHREADS=1 !$ MTHREADS=OMP_GET_MAX_THREADS() NPROMA=(IJL-IJS+1)/MTHREADS + 1 From a6090b1e2ed81209af29b609bc34c57e1fbeaa3f Mon Sep 17 00:00:00 2001 From: Fabio Di Sante Date: Sun, 10 Dec 2023 21:25:50 +0100 Subject: [PATCH 08/30] ctuwdrv: restored whitespaces --- src/ecwam/ctuwdrv.F90 | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/ecwam/ctuwdrv.F90 b/src/ecwam/ctuwdrv.F90 index 31288def3..b1ae1a365 100644 --- a/src/ecwam/ctuwdrv.F90 +++ b/src/ecwam/ctuwdrv.F90 @@ -81,7 +81,6 @@ SUBROUTINE CTUWDRV (DELPRO, MSTART, MEND, & !$ MTHREADS=OMP_GET_MAX_THREADS() NPROMA=(IJL-IJS+1)/MTHREADS + 1 - #ifndef _OPENACC !$OMP PARALLEL DO SCHEDULE(DYNAMIC,1) PRIVATE(JKGLO, KIJS, KIJL, ICALL, IJ, LL2NDCALL) #endif /*_OPENACC*/ @@ -97,7 +96,6 @@ SUBROUTINE CTUWDRV (DELPRO, MSTART, MEND, & & COSPHM1_EXT, DEPTH_EXT, U_EXT, V_EXT ) - ! WHEN SURFACE CURRENTS ARE USED AND LLCFLCUROFF IS TRUE ! THEN TRY TO SATISFY THE CFL CONDITION WITHOUT THE CURRENTS ! IF IT WAS VIOLATED IN THE FIRST PLACE From 35584436ee96e787a451de01e326e46f4ce4db42 Mon Sep 17 00:00:00 2001 From: Fabio Di Sante Date: Sun, 10 Dec 2023 21:26:39 +0100 Subject: [PATCH 09/30] ctuwdrv: restored whitespaces --- src/ecwam/ctuwdrv.F90 | 1 - 1 file changed, 1 deletion(-) diff --git a/src/ecwam/ctuwdrv.F90 b/src/ecwam/ctuwdrv.F90 index b1ae1a365..259ea7291 100644 --- a/src/ecwam/ctuwdrv.F90 +++ b/src/ecwam/ctuwdrv.F90 @@ -117,7 +117,6 @@ SUBROUTINE CTUWDRV (DELPRO, MSTART, MEND, & & COSPHM1_EXT, DEPTH_EXT, U_EXT, V_EXT ) ENDIF ENDIF - ENDDO #ifndef _OPENACC From 31a263a58c2e2c407b1f5cebd3932b7174dbe93f Mon Sep 17 00:00:00 2001 From: Fabio Di Sante Date: Sun, 10 Dec 2023 21:27:15 +0100 Subject: [PATCH 10/30] ctuwdrv: restored whitespaces --- src/ecwam/ctuwdrv.F90 | 1 - 1 file changed, 1 deletion(-) diff --git a/src/ecwam/ctuwdrv.F90 b/src/ecwam/ctuwdrv.F90 index 259ea7291..0bcbc6449 100644 --- a/src/ecwam/ctuwdrv.F90 +++ b/src/ecwam/ctuwdrv.F90 @@ -117,7 +117,6 @@ SUBROUTINE CTUWDRV (DELPRO, MSTART, MEND, & & COSPHM1_EXT, DEPTH_EXT, U_EXT, V_EXT ) ENDIF ENDIF - ENDDO #ifndef _OPENACC !$OMP END PARALLEL DO From a0d568a5499c08f2109021c681acfa624f07e5e3 Mon Sep 17 00:00:00 2001 From: Fabio Di Sante Date: Sun, 10 Dec 2023 21:30:02 +0100 Subject: [PATCH 11/30] ctuwini: removed commented module, restored dr_hook --- src/ecwam/ctuwini.F90 | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/src/ecwam/ctuwini.F90 b/src/ecwam/ctuwini.F90 index 475274f9c..33e07c7a0 100644 --- a/src/ecwam/ctuwini.F90 +++ b/src/ecwam/ctuwini.F90 @@ -6,8 +6,6 @@ ! granted to it by virtue of its status as an intergovernmental organisation ! nor does it submit to any jurisdiction. ! -!MODULE CTUWINI_MOD -! CONTAINS SUBROUTINE CTUWINI (KIJS, KIJL, NINF, NSUP, BLK2GLO, COSPHM1_EXT, & & WLATM1, WCORM1, DP) @@ -51,8 +49,6 @@ SUBROUTINE CTUWINI (KIJS, KIJL, NINF, NSUP, BLK2GLO, COSPHM1_EXT, & !REAL(KIND=JPHOOK) :: ZHOOK_HANDLE -!!$acc routine vector - ! ---------------------------------------------------------------------- !IF (LHOOK) CALL DR_HOOK('CTUWINI',0,ZHOOK_HANDLE) @@ -169,7 +165,6 @@ SUBROUTINE CTUWINI (KIJS, KIJL, NINF, NSUP, BLK2GLO, COSPHM1_EXT, & ENDIF -!IF (LHOOK) CALL DR_HOOK('CTUWINI',1,ZHOOK_HANDLE) +IF (LHOOK) CALL DR_HOOK('CTUWINI',1,ZHOOK_HANDLE) END SUBROUTINE CTUWINI -!END MODULE CTUWINI_MOD From c655ed2d02bd3ceb42e4830107d2f8ff8797a075 Mon Sep 17 00:00:00 2001 From: Fabio Di Sante Date: Sun, 10 Dec 2023 21:31:53 +0100 Subject: [PATCH 12/30] ctuwupdt: removed commented openacc pragmas --- src/ecwam/ctuwupdt.F90 | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/ecwam/ctuwupdt.F90 b/src/ecwam/ctuwupdt.F90 index a524dd0e6..a4d9e4649 100644 --- a/src/ecwam/ctuwupdt.F90 +++ b/src/ecwam/ctuwupdt.F90 @@ -55,7 +55,6 @@ SUBROUTINE CTUWUPDT (IJS, IJL, NINF, NSUP, & #include "ctuw.intfb.h" #include "ctuwdrv.intfb.h" #include "ctuwini.intfb.h" -!!$acc routine(ctuwini) vector INTEGER(KIND=JWIM), INTENT(IN) :: IJS, IJL ! GRID POINTS WITHIN A BLOCK INTEGER(KIND=JWIM), INTENT(IN) :: NINF, NSUP ! GRID POINT WITH HALO EXTEND NINF:NSUP+1 @@ -262,8 +261,6 @@ SUBROUTINE CTUWUPDT (IJS, IJL, NINF, NSUP, & ENDIF -!!$acc update host(WLATN,WCORN,WLONN) - ! FIND THE LOGICAL FLAGS THAT WILL LIMIT THE EXTEND OF THE CALCULATION IN PROPAGS2 !$acc parallel loop independent collapse(4) From dd875794414687f890ef8c8e5dbdf7a1e8c77875 Mon Sep 17 00:00:00 2001 From: Fabio Di Sante Date: Sun, 10 Dec 2023 21:33:06 +0100 Subject: [PATCH 13/30] ctuwupdt: removed commented openacc pragmas and !F --- src/ecwam/ctuwupdt.F90 | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/ecwam/ctuwupdt.F90 b/src/ecwam/ctuwupdt.F90 index a4d9e4649..aaacd4e62 100644 --- a/src/ecwam/ctuwupdt.F90 +++ b/src/ecwam/ctuwupdt.F90 @@ -89,7 +89,7 @@ SUBROUTINE CTUWUPDT (IJS, IJL, NINF, NSUP, & IF (LHOOK) CALL DR_HOOK('CTUWUPDT',0,ZHOOK_HANDLE) !$acc update device(sinth,costh) -!$acc update device(icase, COSPH, nang, nfre_red, ngy, niblo) !F +!$acc update device(icase, COSPH, nang, nfre_red, ngy, niblo) ! DEFINE JXO, JYO, KCR IF (LFRSTCTU) THEN @@ -194,13 +194,10 @@ SUBROUTINE CTUWUPDT (IJS, IJL, NINF, NSUP, & !$ MTHREADS=OMP_GET_MAX_THREADS() NPROMA=(IJL-IJS+1)/MTHREADS + 1 - -!F!$acc update device(KLAT,WLAT,KCOR,WCOR,WLATN,WLONN,WCORN) - !$acc enter data copyin(BLK2GLO) !$acc enter data copyin(BLK2GLO%KXLT) -!$acc update device(KLAT,WLAT,KCOR,WCOR) !F +!$acc update device(KLAT,WLAT,KCOR,WCOR) !$acc update device(NFRE_RED,ZPI,FR,DELTH,NANG) #ifndef _OPENACC !$OMP PARALLEL DO SCHEDULE(DYNAMIC,1) PRIVATE(JKGLO, KIJS, KIJL) From d642d66b3ea163a8f4f23d60acdd709ffb8edf72 Mon Sep 17 00:00:00 2001 From: Fabio Di Sante Date: Sun, 10 Dec 2023 21:37:23 +0100 Subject: [PATCH 14/30] wamintgr_loki_gpu: FIELD_API destructors restored --- src/ecwam/wamintgr_loki_gpu.F90 | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/ecwam/wamintgr_loki_gpu.F90 b/src/ecwam/wamintgr_loki_gpu.F90 index 13e9e03f8..19fa80c41 100644 --- a/src/ecwam/wamintgr_loki_gpu.F90 +++ b/src/ecwam/wamintgr_loki_gpu.F90 @@ -251,6 +251,11 @@ SUBROUTINE WAMINTGR_LOKI_GPU(CDTPRA, CDATE, CDATEWH, CDTIMP, CDTIMPNEXT, & !$loki update_host CALL WVPRPT_FIELD%FINAL() + CALL WVENVI_FIELD%FINAL() + CALL FF_NOW_FIELD%FINAL() + CALL WAM2NEMO_FIELD%FINAL() + CALL INTFLDS_FIELD%FINAL() + CALL SRC_CONTRIBS%FINAL() TIME1(3) = TIME1(3) + (TIME2+WAM_USER_CLOCK())*1.E-06 IF (LWNEMOCOU) NEMONTAU = NEMONTAU + 1 From ed6a9d1350428d3bf3cf1a6234375a9e024dd32e Mon Sep 17 00:00:00 2001 From: Fabio Di Sante Date: Sun, 10 Dec 2023 21:52:21 +0100 Subject: [PATCH 15/30] wamintgr_loki_gpu: whitespace removed --- src/ecwam/wamintgr_loki_gpu.F90 | 1 - 1 file changed, 1 deletion(-) diff --git a/src/ecwam/wamintgr_loki_gpu.F90 b/src/ecwam/wamintgr_loki_gpu.F90 index 19fa80c41..009b1229d 100644 --- a/src/ecwam/wamintgr_loki_gpu.F90 +++ b/src/ecwam/wamintgr_loki_gpu.F90 @@ -147,7 +147,6 @@ SUBROUTINE WAMINTGR_LOKI_GPU(CDTPRA, CDATE, CDATEWH, CDTIMP, CDTIMPNEXT, & DATA LLNEWFILE / .FALSE. / - ! ---------------------------------------------------------------------- IF (LHOOK) CALL DR_HOOK('WAMINTGR',0,ZHOOK_HANDLE) From 79988d1b79f158294f9c471a3735627b3b004c23 Mon Sep 17 00:00:00 2001 From: Fabio Di Sante Date: Mon, 11 Dec 2023 12:20:38 +0100 Subject: [PATCH 16/30] ctuwini: uncommented ZHOOK_HANDLE declaration --- src/ecwam/ctuwini.F90 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ecwam/ctuwini.F90 b/src/ecwam/ctuwini.F90 index 33e07c7a0..0a28187b0 100644 --- a/src/ecwam/ctuwini.F90 +++ b/src/ecwam/ctuwini.F90 @@ -47,7 +47,7 @@ SUBROUTINE CTUWINI (KIJS, KIJL, NINF, NSUP, BLK2GLO, COSPHM1_EXT, & INTEGER(KIND=JWIM) :: IJ, K, M, IC, ICR, ICL, KY, KK, KKM INTEGER(KIND=JWIM) :: NLAND -!REAL(KIND=JPHOOK) :: ZHOOK_HANDLE +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE ! ---------------------------------------------------------------------- From 142659b6d717142e1f384c9954cd77b100b6a386 Mon Sep 17 00:00:00 2001 From: Fabio Di Sante Date: Mon, 11 Dec 2023 12:49:07 +0100 Subject: [PATCH 17/30] ctuwupdt: removed unused module, propag_wam: fixed openacc data pragma --- src/ecwam/ctuwupdt.F90 | 2 -- src/ecwam/mpexchng.F90 | 1 + src/ecwam/propag_wam.F90 | 3 +-- 3 files changed, 2 insertions(+), 4 deletions(-) diff --git a/src/ecwam/ctuwupdt.F90 b/src/ecwam/ctuwupdt.F90 index aaacd4e62..981447e53 100644 --- a/src/ecwam/ctuwupdt.F90 +++ b/src/ecwam/ctuwupdt.F90 @@ -24,7 +24,6 @@ SUBROUTINE CTUWUPDT (IJS, IJL, NINF, NSUP, & ! ------------------------------------------------------------------- - USE PARKIND_WAVE, ONLY : JWIM, JWRB, JWRU USE YOWDRVTYPE , ONLY : WVGRIDGLO @@ -46,7 +45,6 @@ SUBROUTINE CTUWUPDT (IJS, IJL, NINF, NSUP, & USE YOMHOOK , ONLY : LHOOK, DR_HOOK, JPHOOK -!USE CTUWINI_MOD , ONLY : CTUWINI ! ---------------------------------------------------------------------- IMPLICIT NONE diff --git a/src/ecwam/mpexchng.F90 b/src/ecwam/mpexchng.F90 index 11c5f7116..b05531461 100644 --- a/src/ecwam/mpexchng.F90 +++ b/src/ecwam/mpexchng.F90 @@ -145,6 +145,7 @@ SUBROUTINE MPEXCHNG(FLD, NDIM2, ND3S, ND3E) IR=0 CALL GSTATS(676,0) + DO INGB=1,NGBFROMPE IR=IR+1 IPROC=NFROMPELST(INGB) diff --git a/src/ecwam/propag_wam.F90 b/src/ecwam/propag_wam.F90 index 8acb7a8ee..941c8628f 100644 --- a/src/ecwam/propag_wam.F90 +++ b/src/ecwam/propag_wam.F90 @@ -100,7 +100,7 @@ SUBROUTINE PROPAG_WAM (BLK2GLO, WVENVI, WVPRPT, FL1) IF (LHOOK) CALL DR_HOOK('PROPAG_WAM',0,ZHOOK_HANDLE) -!$acc data PRESENT(FL1) data CREATE(FL1_EXT,FL3_EXT) +!$acc data PRESENT(FL1) CREATE(FL1_EXT,FL3_EXT) IF (NIBLO > 1) THEN IJSG = IJFROMCHNK(1,1) @@ -382,7 +382,6 @@ SUBROUTINE PROPAG_WAM (BLK2GLO, WVENVI, WVPRPT, FL1) ENDIF ! end propagation ENDIF ! more than one grid point -!$ACC END DATA !$ACC END DATA L1STCALL=.FALSE. From 6b8301aac08dbe17d0049eed423880ea130bd953 Mon Sep 17 00:00:00 2001 From: Fabio Di Sante Date: Mon, 11 Dec 2023 12:53:09 +0100 Subject: [PATCH 18/30] ctuw: reverted spaces --- src/ecwam/ctuw.F90 | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/src/ecwam/ctuw.F90 b/src/ecwam/ctuw.F90 index 3192fbc97..05b4f7d2b 100644 --- a/src/ecwam/ctuw.F90 +++ b/src/ecwam/ctuw.F90 @@ -99,7 +99,6 @@ SUBROUTINE CTUW (DELPRO, MSTART, MEND, & REAL(KIND=JWRB), DIMENSION(KIJS:KIJL) :: DRCP,DRCM REAL(KIND=JWRB), DIMENSION(KIJS:KIJL) :: CURMASK REAL(KIND=JWRB), DIMENSION(KIJS:KIJL,2) :: CGX, CGY - ! ---------------------------------------------------------------------- @@ -364,9 +363,6 @@ SUBROUTINE CTUW (DELPRO, MSTART, MEND, & ENDDO ! END LOOP OVER FREQUENCIES !$acc end kernels - - - ELSE !* CARTESIAN GRID. @@ -408,7 +404,6 @@ SUBROUTINE CTUW (DELPRO, MSTART, MEND, & DELTH0 = 0.25*DELPRO/DELTH - !* LOOP OVER DIRECTIONS. ! --------------------- @@ -512,6 +507,7 @@ SUBROUTINE CTUW (DELPRO, MSTART, MEND, & MM1 = MAX(1,M-1) DFP = DELFR0/FR(M) DFM = DELFR0/FR(MM1) + DO IJ=KIJS,KIJL DTHP = CURMASK(IJ) * (SDOT(IJ,K,M) + SDOT(IJ,K,MP1))*DFP DTHM = CURMASK(IJ) * (SDOT(IJ,K,M) + SDOT(IJ,K,MM1))*DFM @@ -523,6 +519,7 @@ SUBROUTINE CTUW (DELPRO, MSTART, MEND, & ENDIF ENDDO ! END LOOP ON DIRECTIONS + !$acc end parallel ! CHECK THAT WEIGHTS ARE LESS THAN 1 @@ -731,9 +728,6 @@ SUBROUTINE CTUW (DELPRO, MSTART, MEND, & IF (LHOOK) CALL DR_HOOK('CTUW',1,ZHOOK_HANDLE) - - - RETURN CONTAINS From a8f5c9b2bcc2f6e19000faa12c6c4f90f906033d Mon Sep 17 00:00:00 2001 From: Fabio Di Sante Date: Mon, 11 Dec 2023 12:56:54 +0100 Subject: [PATCH 19/30] ctuwini: uncommented DR_HOOK reverted spaces --- src/ecwam/ctuw.F90 | 1 + src/ecwam/ctuwini.F90 | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/ecwam/ctuw.F90 b/src/ecwam/ctuw.F90 index 05b4f7d2b..4baab580a 100644 --- a/src/ecwam/ctuw.F90 +++ b/src/ecwam/ctuw.F90 @@ -100,6 +100,7 @@ SUBROUTINE CTUW (DELPRO, MSTART, MEND, & REAL(KIND=JWRB), DIMENSION(KIJS:KIJL) :: CURMASK REAL(KIND=JWRB), DIMENSION(KIJS:KIJL,2) :: CGX, CGY + ! ---------------------------------------------------------------------- IF (LHOOK) CALL DR_HOOK('CTUW',0,ZHOOK_HANDLE) diff --git a/src/ecwam/ctuwini.F90 b/src/ecwam/ctuwini.F90 index 0a28187b0..8ffb4e0db 100644 --- a/src/ecwam/ctuwini.F90 +++ b/src/ecwam/ctuwini.F90 @@ -44,6 +44,7 @@ SUBROUTINE CTUWINI (KIJS, KIJL, NINF, NSUP, BLK2GLO, COSPHM1_EXT, & REAL(KIND=JWRB), DIMENSION(NINF:NSUP,4), INTENT(OUT) :: WCORM1 ! 1 - WCOR REAL(KIND=JWRB), DIMENSION(NINF:NSUP,2), INTENT(OUT) :: DP ! COS PHI FACTOR + INTEGER(KIND=JWIM) :: IJ, K, M, IC, ICR, ICL, KY, KK, KKM INTEGER(KIND=JWIM) :: NLAND @@ -51,7 +52,7 @@ SUBROUTINE CTUWINI (KIJS, KIJL, NINF, NSUP, BLK2GLO, COSPHM1_EXT, & ! ---------------------------------------------------------------------- -!IF (LHOOK) CALL DR_HOOK('CTUWINI',0,ZHOOK_HANDLE) +IF (LHOOK) CALL DR_HOOK('CTUWINI',0,ZHOOK_HANDLE) NLAND = NSUP+1 @@ -153,7 +154,6 @@ SUBROUTINE CTUWINI (KIJS, KIJL, NINF, NSUP, BLK2GLO, COSPHM1_EXT, & ! (for all grid points) !$acc parallel loop independent collapse(2) private(KY,KK,KKM) DO IC=1,2 -! !!!$acc loop private(KY,KK,KKM) DO IJ = KIJS,KIJL KY=BLK2GLO%KXLT(IJ) KK=KY+2*IC-3 From a0c96073bf9995e8a41c70eb41d2b531e05c5ed4 Mon Sep 17 00:00:00 2001 From: Fabio Di Sante Date: Wed, 17 Jan 2024 11:38:29 +0100 Subject: [PATCH 20/30] cuda aware MPI --- CMakeLists.txt | 6 ++++++ src/ecwam/CMakeLists.txt | 10 +++++++++ src/ecwam/ctuwupdt.F90 | 10 ++++++--- src/ecwam/mpexchng.F90 | 46 ++++++++++++++++++++++++++++++---------- 4 files changed, 58 insertions(+), 14 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 5a513809d..4af79a9a7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -116,6 +116,12 @@ ecbuild_add_option( FEATURE WAM_GPU DESCRIPTION "Offload ecWam physics to the GPU" CONDITION HAVE_FIELD_API ) +### CUDA-aware MPI +ecbuild_add_option( FEATURE GPU_AWARE_MPI + DEFAULT OFF + REQUIRED_PACKAGES "MPI COMPONENTS Fortran" + DESCRIPTION "Enable CUDA-aware MPI" ) + ### check for CUDA include(CheckLanguage) check_language(CUDA) diff --git a/src/ecwam/CMakeLists.txt b/src/ecwam/CMakeLists.txt index d7f308f8a..3c1db49b0 100644 --- a/src/ecwam/CMakeLists.txt +++ b/src/ecwam/CMakeLists.txt @@ -410,6 +410,10 @@ else() unset( ${PNAME}_OCEANMODEL_INCLUDE_DIRS ) endif() +if( HAVE_GPU_AWARE_MPI ) + list(APPEND ECWAM_DEFINITIONS WITH_GPU_AWARE_MPI ) +endif() + # Using dynamic linking creates undefined references to the device # copies of module global variables set( LIBRARY_TYPE SHARED ) @@ -432,6 +436,7 @@ ecbuild_add_library( ${MULTIO_LIBRARIES} ${OpenMP_Fortran_LIBRARIES} $<${HAVE_FIELD_API}:field_api_${prec}> + MPI::MPI_Fortran PUBLIC_INCLUDES $ PRIVATE_INCLUDES ${${PNAME}_OCEANMODEL_INCLUDE_DIRS} ) @@ -493,6 +498,7 @@ if( HAVE_WAM_LOKI ) ${MULTIO_LIBRARIES} ${OpenMP_Fortran_LIBRARIES} $<${HAVE_FIELD_API}:field_api_${prec}> + MPI::MPI_Fortran PUBLIC_INCLUDES $ PRIVATE_INCLUDES ${${PNAME}_OCEANMODEL_INCLUDE_DIRS} ${CMAKE_CURRENT_SOURCE_DIR} ) @@ -537,6 +543,7 @@ if( HAVE_WAM_LOKI ) ${MULTIO_LIBRARIES} ${OpenMP_Fortran_LIBRARIES} $<${HAVE_FIELD_API}:field_api_${prec}> + MPI::MPI_Fortran PUBLIC_INCLUDES $ PRIVATE_INCLUDES ${${PNAME}_OCEANMODEL_INCLUDE_DIRS} ${CMAKE_CURRENT_SOURCE_DIR} ) @@ -588,6 +595,7 @@ if( HAVE_WAM_LOKI ) ${MULTIO_LIBRARIES} ${OpenMP_Fortran_LIBRARIES} $<${HAVE_FIELD_API}:field_api_${prec}> + MPI::MPI_Fortran PUBLIC_INCLUDES $ PRIVATE_INCLUDES ${${PNAME}_OCEANMODEL_INCLUDE_DIRS} ${CMAKE_CURRENT_SOURCE_DIR} ) @@ -636,6 +644,7 @@ if( HAVE_WAM_LOKI ) ${MULTIO_LIBRARIES} ${OpenMP_Fortran_LIBRARIES} $<${HAVE_FIELD_API}:field_api_${prec}> + MPI::MPI_Fortran PUBLIC_INCLUDES $ PRIVATE_INCLUDES ${${PNAME}_OCEANMODEL_INCLUDE_DIRS} ${CMAKE_CURRENT_SOURCE_DIR} ) @@ -684,6 +693,7 @@ if( HAVE_CUDA ) ${MULTIO_LIBRARIES} ${OpenMP_Fortran_LIBRARIES} $<${HAVE_FIELD_API}:field_api_${prec}> + MPI::MPI_Fortran PUBLIC_INCLUDES $ PRIVATE_INCLUDES ${${PNAME}_OCEANMODEL_INCLUDE_DIRS} ${CMAKE_CURRENT_SOURCE_DIR} ) diff --git a/src/ecwam/ctuwupdt.F90 b/src/ecwam/ctuwupdt.F90 index 981447e53..4dfdaf47a 100644 --- a/src/ecwam/ctuwupdt.F90 +++ b/src/ecwam/ctuwupdt.F90 @@ -45,6 +45,7 @@ SUBROUTINE CTUWUPDT (IJS, IJL, NINF, NSUP, & USE YOMHOOK , ONLY : LHOOK, DR_HOOK, JPHOOK +USE OPENACC ! ---------------------------------------------------------------------- IMPLICIT NONE @@ -82,6 +83,7 @@ SUBROUTINE CTUWUPDT (IJS, IJL, NINF, NSUP, & LOGICAL, SAVE :: LFRSTCTU DATA LFRSTCTU /.TRUE./ + ! ---------------------------------------------------------------------- IF (LHOOK) CALL DR_HOOK('CTUWUPDT',0,ZHOOK_HANDLE) @@ -105,7 +107,7 @@ SUBROUTINE CTUWUPDT (IJS, IJL, NINF, NSUP, & IF (.NOT. ALLOCATED(JYO)) ALLOCATE(JYO(NANG,2)) IF (.NOT. ALLOCATED(KCR)) ALLOCATE(KCR(NANG,4)) -!$ACC ENTER DATA COPYIN(KLON, KLAT, KCOR, JXO, JYO, KCR) +!$acc enter data copyin(KLON, KLAT, KCOR, JXO, JYO, KCR) !$acc kernels DO K=1,NANG @@ -184,7 +186,8 @@ SUBROUTINE CTUWUPDT (IJS, IJL, NINF, NSUP, & IF (.NOT. ALLOCATED(LLWMPMN)) ALLOCATE(LLWMPMN(NANG,NFRE_RED,-1:1)) ENDIF -!$acc enter data copyin(sumwn,LLWKPMN, WLATN,WLONN,WCORN,WKPMN) + +!$acc enter data copyin(SUMWN,LLWKPMN, WLATN,WLONN,WCORN,WKPMN) ! SOME INITIALISATION FOR *CTUW* !! NPROMA=NPROMA_WAM @@ -193,7 +196,7 @@ SUBROUTINE CTUWUPDT (IJS, IJL, NINF, NSUP, & NPROMA=(IJL-IJS+1)/MTHREADS + 1 !$acc enter data copyin(BLK2GLO) -!$acc enter data copyin(BLK2GLO%KXLT) +!!$acc enter data copyin(BLK2GLO%KXLT) !$acc update device(KLAT,WLAT,KCOR,WCOR) !$acc update device(NFRE_RED,ZPI,FR,DELTH,NANG) @@ -349,6 +352,7 @@ SUBROUTINE CTUWUPDT (IJS, IJL, NINF, NSUP, & !$acc end parallel ENDIF +!$acc exit data delete(BLK2GLO) IF (ALLOCATED(THDD)) DEALLOCATE(THDD) IF (ALLOCATED(THDC)) DEALLOCATE(THDC) diff --git a/src/ecwam/mpexchng.F90 b/src/ecwam/mpexchng.F90 index b05531461..3146e4dcb 100644 --- a/src/ecwam/mpexchng.F90 +++ b/src/ecwam/mpexchng.F90 @@ -65,6 +65,11 @@ SUBROUTINE MPEXCHNG(FLD, NDIM2, ND3S, ND3E) USE YOMHOOK , ONLY : LHOOK, DR_HOOK, JPHOOK USE MPL_MODULE, ONLY : MPL_RECV, MPL_SEND, MPL_WAIT, & & JP_NON_BLOCKING_STANDARD +!#ifdef WITH_GPU_AWARE_MPI + USE MPL_DATA_MODULE ,ONLY : MPL_COMM_OML + USE OML_MOD ,ONLY : OML_MY_THREAD + USE MPI +!#endif !---------------------------------------------------------------------- @@ -84,6 +89,8 @@ SUBROUTINE MPEXCHNG(FLD, NDIM2, ND3S, ND3E) REAL(KIND=JWRB), ALLOCATABLE :: ZCOMBUFR(:,:) LOGICAL :: LLOK + INTEGER(KIND=JWIM) :: IERROR +! TYPE(MPI_REAL) :: mpi_real !---------------------------------------------------------------------- @@ -101,12 +108,15 @@ SUBROUTINE MPEXCHNG(FLD, NDIM2, ND3S, ND3E) NBUFMAX=MAX(NTOPEMAX,NFROMPEMAX)*NDIM2*NDIM3 ALLOCATE(ZCOMBUFS(NBUFMAX,NGBTOPE)) ALLOCATE(ZCOMBUFR(NBUFMAX,NGBFROMPE)) +#ifdef WITH_GPU_AWARE_MPI +!$acc data create(ZCOMBUFS,ZCOMBUFR) +#endif ! PACK SEND BUFFERS FOR NGBTOPE NEIGHBOURING PE's ! ------------------------------------------------- CALL GSTATS(1892,0) - #ifdef _OPENACC - !$acc kernels loop independent private(KCOUNT,IJ) copyout(ZCOMBUFS) copyin(fld) +#ifdef _OPENACC +!$acc kernels loop independent private(KCOUNT,IJ) DO INGB=1,NGBTOPE !Total number of PE's to which information will be sent IPROC=NTOPELST(INGB) !To which PE to send informations !$acc loop independent collapse(3) @@ -120,7 +130,7 @@ SUBROUTINE MPEXCHNG(FLD, NDIM2, ND3S, ND3E) ENDDO ENDDO ENDDO - !$acc end kernels +!$acc end kernels #else !$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(INGB,IPROC,KCOUNT,M,K,IH,IJ) DO INGB=1,NGBTOPE @@ -137,7 +147,7 @@ SUBROUTINE MPEXCHNG(FLD, NDIM2, ND3S, ND3E) ENDDO ENDDO !$OMP END PARALLEL DO - #endif /*_OPENACC*/ +#endif /*_OPENACC*/ CALL GSTATS(1892,1) @@ -145,40 +155,50 @@ SUBROUTINE MPEXCHNG(FLD, NDIM2, ND3S, ND3E) IR=0 CALL GSTATS(676,0) - DO INGB=1,NGBFROMPE IR=IR+1 IPROC=NFROMPELST(INGB) KCOUNT=NDIM3*NDIM2*NFROMPE(IPROC) -!!$acc host_data use_device(ZCOMBUFR) +#ifdef WITH_GPU_AWARE_MPI +!$acc host_data use_device(ZCOMBUFR) + CALL MPI_IRECV(ZCOMBUFR(1:KCOUNT,INGB),KCOUNT, & + & MPI_DOUBLE_PRECISION,IPROC-1, KTAG, & + & MPL_COMM_OML(OML_MY_THREAD()),IREQ(IR), IERROR) +!$acc end host_data +#else CALL MPL_RECV(ZCOMBUFR(1:KCOUNT,INGB),KSOURCE=IPROC,KTAG=KTAG, & & KMP_TYPE=JP_NON_BLOCKING_STANDARD,KREQUEST=IREQ(IR), & & CDSTRING='MPEXCHNG:') -!!$acc end host_data +#endif ENDDO DO INGB=1,NGBTOPE IR=IR+1 IPROC=NTOPELST(INGB) KCOUNT=NDIM3*NDIM2*NTOPE(IPROC) -!!$acc host_data use_device(ZCOMBUFR) +#ifdef WITH_GPU_AWARE_MPI +!$acc host_data use_device(ZCOMBUFS) + CALL MPI_ISEND(ZCOMBUFS(1:KCOUNT,INGB),KCOUNT, & + & MPI_DOUBLE_PRECISION,IPROC-1, KTAG, & + & MPL_COMM_OML(OML_MY_THREAD()),IREQ(IR), IERROR) +!$acc end host_data +#else CALL MPL_SEND(ZCOMBUFS(1:KCOUNT,INGB),KDEST=IPROC,KTAG=KTAG, & & KMP_TYPE=JP_NON_BLOCKING_STANDARD,KREQUEST=IREQ(IR), & & CDSTRING='MPEXCHNG:') -!!$acc end host_data +#endif ENDDO ! NOW WAIT FOR ALL TO COMPLETE CALL MPL_WAIT(KREQUEST=IREQ(1:IR),CDSTRING='MPEXCHNG:') - CALL GSTATS(676,1) ! DECODE THE RECEIVED BUFFERS CALL GSTATS(1893,0) #ifdef _OPENACC - !$acc kernels loop independent private(KCOUNT,IJ) copyin(ZCOMBUFR) + !$acc kernels loop independent private(KCOUNT,IJ) !copyin(ZCOMBUFR) DO INGB=1,NGBFROMPE IPROC=NFROMPELST(INGB) !$acc loop vector independent collapse(3) @@ -215,9 +235,13 @@ SUBROUTINE MPEXCHNG(FLD, NDIM2, ND3S, ND3E) KTAG=KTAG+1 +#ifdef WITH_GPU_AWARE_MPI +!$acc end data +#endif DEALLOCATE(ZCOMBUFS) DEALLOCATE(ZCOMBUFR) + IF (LHOOK) CALL DR_HOOK('MPEXCHNG',1,ZHOOK_HANDLE) END SUBROUTINE MPEXCHNG From f78680f14b8d3c2ea99f71c6d60c511f22ce2d87 Mon Sep 17 00:00:00 2001 From: Fabio Di Sante Date: Thu, 18 Jan 2024 16:18:27 +0100 Subject: [PATCH 21/30] CPU-only ecWAM variant fails validation solved --- src/ecwam/ctuw.F90 | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/ecwam/ctuw.F90 b/src/ecwam/ctuw.F90 index 4baab580a..673020e86 100644 --- a/src/ecwam/ctuw.F90 +++ b/src/ecwam/ctuw.F90 @@ -476,7 +476,9 @@ SUBROUTINE CTUW (DELPRO, MSTART, MEND, & WKPMN(IJ,K,M,0)=(DTHP+ABS(DTHP))+(ABS(DTHM)-DTHM) WKPMN(IJ,K,M,1)=-DTHP+ABS(DTHP) WKPMN(IJ,K,M,-1)=DTHM+ABS(DTHM) +#ifdef _OPENACC SUMWN(IJ,K,M)=SUMWN(IJ,K,M)+WKPMN(IJ,K,M,0) +#endif ENDDO ENDDO ELSE @@ -490,7 +492,9 @@ SUBROUTINE CTUW (DELPRO, MSTART, MEND, & WKPMN(IJ,K,M,0)=(DTHP+ABS(DTHP))+(ABS(DTHM)-DTHM) WKPMN(IJ,K,M,1)=-DTHP+ABS(DTHP) WKPMN(IJ,K,M,-1)=DTHM+ABS(DTHM) +#ifdef _OPENACC SUMWN(IJ,K,M)=SUMWN(IJ,K,M)+WKPMN(IJ,K,M,0) +#endif ENDDO ENDDO ENDIF @@ -682,7 +686,7 @@ SUBROUTINE CTUW (DELPRO, MSTART, MEND, & ENDDO ! END LOOP OVER GRID POINTS ENDDO ! END LOOP OVER FREQUENCIES ENDDO ! END LOOP OVER DIRECTIONS -#ENDIF +#endif DO IJ=KIJS,KIJL IF (LCFLFAIL(IJ)) THEN From 708454032686447b6b1ed9ad02f8f0de318db7ff Mon Sep 17 00:00:00 2001 From: Fabio Di Sante Date: Mon, 29 Jan 2024 10:37:10 +0100 Subject: [PATCH 22/30] Using MTHREADS = OMP_GET_MAX_THREADS when openacc activated --- src/ecwam/ctuwdrv.F90 | 4 +++- src/ecwam/ctuwupdt.F90 | 4 +++- src/ecwam/propag_wam.F90 | 4 +++- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/src/ecwam/ctuwdrv.F90 b/src/ecwam/ctuwdrv.F90 index 0bcbc6449..03f47ffe1 100644 --- a/src/ecwam/ctuwdrv.F90 +++ b/src/ecwam/ctuwdrv.F90 @@ -78,7 +78,9 @@ SUBROUTINE CTUWDRV (DELPRO, MSTART, MEND, & !! NPROMA=NPROMA_WAM MTHREADS=1 -!$ MTHREADS=OMP_GET_MAX_THREADS() +#ifdef _OPENACC + MTHREADS=OMP_GET_MAX_THREADS() +#endif /*_OPENACC*/ NPROMA=(IJL-IJS+1)/MTHREADS + 1 #ifndef _OPENACC diff --git a/src/ecwam/ctuwupdt.F90 b/src/ecwam/ctuwupdt.F90 index 4dfdaf47a..e12211bf6 100644 --- a/src/ecwam/ctuwupdt.F90 +++ b/src/ecwam/ctuwupdt.F90 @@ -192,7 +192,9 @@ SUBROUTINE CTUWUPDT (IJS, IJL, NINF, NSUP, & ! SOME INITIALISATION FOR *CTUW* !! NPROMA=NPROMA_WAM MTHREADS=1 -!$ MTHREADS=OMP_GET_MAX_THREADS() +#ifdef _OPENACC + MTHREADS=OMP_GET_MAX_THREADS() +#endif /*_OPENACC*/ NPROMA=(IJL-IJS+1)/MTHREADS + 1 !$acc enter data copyin(BLK2GLO) diff --git a/src/ecwam/propag_wam.F90 b/src/ecwam/propag_wam.F90 index 941c8628f..fc4b11ed5 100644 --- a/src/ecwam/propag_wam.F90 +++ b/src/ecwam/propag_wam.F90 @@ -107,7 +107,9 @@ SUBROUTINE PROPAG_WAM (BLK2GLO, WVENVI, WVPRPT, FL1) IJLG = IJSG + SUM(KIJL4CHNK) - 1 MTHREADS=1 -!$ MTHREADS=OMP_GET_MAX_THREADS() +#ifdef _OPENACC + MTHREADS=OMP_GET_MAX_THREADS() +#endif /*_OPENACC*/ NPROMA=(IJLG-IJSG+1)/MTHREADS + 1 From b7dd9c93973a15d43641ee32f1676c556a4cfd08 Mon Sep 17 00:00:00 2001 From: Fabio Di Sante Date: Mon, 29 Jan 2024 18:18:15 +0100 Subject: [PATCH 23/30] Removed unclosed acc enter data and converted in acc declare --- src/ecwam/ctuwupdt.F90 | 10 ++-------- src/ecwam/yowubuf.F90 | 7 +++++++ 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/src/ecwam/ctuwupdt.F90 b/src/ecwam/ctuwupdt.F90 index e12211bf6..eadc0e7d9 100644 --- a/src/ecwam/ctuwupdt.F90 +++ b/src/ecwam/ctuwupdt.F90 @@ -107,7 +107,7 @@ SUBROUTINE CTUWUPDT (IJS, IJL, NINF, NSUP, & IF (.NOT. ALLOCATED(JYO)) ALLOCATE(JYO(NANG,2)) IF (.NOT. ALLOCATED(KCR)) ALLOCATE(KCR(NANG,4)) -!$acc enter data copyin(KLON, KLAT, KCOR, JXO, JYO, KCR) +!$acc update device(KLON, KLAT, KCOR, JXO, JYO, KCR) !$acc kernels DO K=1,NANG @@ -186,9 +186,6 @@ SUBROUTINE CTUWUPDT (IJS, IJL, NINF, NSUP, & IF (.NOT. ALLOCATED(LLWMPMN)) ALLOCATE(LLWMPMN(NANG,NFRE_RED,-1:1)) ENDIF - -!$acc enter data copyin(SUMWN,LLWKPMN, WLATN,WLONN,WCORN,WKPMN) - ! SOME INITIALISATION FOR *CTUW* !! NPROMA=NPROMA_WAM MTHREADS=1 @@ -197,10 +194,7 @@ SUBROUTINE CTUWUPDT (IJS, IJL, NINF, NSUP, & #endif /*_OPENACC*/ NPROMA=(IJL-IJS+1)/MTHREADS + 1 -!$acc enter data copyin(BLK2GLO) -!!$acc enter data copyin(BLK2GLO%KXLT) - -!$acc update device(KLAT,WLAT,KCOR,WCOR) +!$acc update device(WLAT,WCOR) !$acc update device(NFRE_RED,ZPI,FR,DELTH,NANG) #ifndef _OPENACC !$OMP PARALLEL DO SCHEDULE(DYNAMIC,1) PRIVATE(JKGLO, KIJS, KIJL) diff --git a/src/ecwam/yowubuf.F90 b/src/ecwam/yowubuf.F90 index 2fd621c7c..fb8ca2213 100644 --- a/src/ecwam/yowubuf.F90 +++ b/src/ecwam/yowubuf.F90 @@ -157,10 +157,17 @@ MODULE YOWUBUF !$acc declare create(WLAT) !$acc declare create(KLAT) +!$acc declare create(KLON) !$acc declare create(WCOR) !$acc declare create(KCOR) +!$acc declare create(KCR) +!$acc declare create(JXO) +!$acc declare create(JYO) !$acc declare create(WLATN) !$acc declare create(WCORN) !$acc declare create(WLONN) +!$acc declare create(WKPMN) +!$acc declare create(SUMWN) +!$acc declare create(LLWKPMN) END MODULE YOWUBUF From d74d1f627626d32c8750be2f7aacb7735c03be19 Mon Sep 17 00:00:00 2001 From: Fabio Di Sante Date: Thu, 1 Feb 2024 09:56:33 +0100 Subject: [PATCH 24/30] Cleaning code (removing spaces and comments) updated the timestep for O1280 case --- src/ecwam/ctuwini.F90 | 5 +---- src/ecwam/yowmap.F90 | 1 + tests/etopo1_oper_an_fc_O1280.yml | 4 ++-- 3 files changed, 4 insertions(+), 6 deletions(-) diff --git a/src/ecwam/ctuwini.F90 b/src/ecwam/ctuwini.F90 index 8ffb4e0db..cc372ca6b 100644 --- a/src/ecwam/ctuwini.F90 +++ b/src/ecwam/ctuwini.F90 @@ -115,7 +115,6 @@ SUBROUTINE CTUWINI (KIJS, KIJL, NINF, NSUP, BLK2GLO, COSPHM1_EXT, & ENDDO !$acc end parallel - !$acc parallel loop independent collapse(4) DO IC=1,2 DO M=1,NFRE_RED @@ -128,7 +127,6 @@ SUBROUTINE CTUWINI (KIJS, KIJL, NINF, NSUP, BLK2GLO, COSPHM1_EXT, & ENDDO !$acc end parallel - !$acc parallel loop independent collapse(5) DO ICL=1,2 DO ICR=1,4 @@ -149,7 +147,7 @@ SUBROUTINE CTUWINI (KIJS, KIJL, NINF, NSUP, BLK2GLO, COSPHM1_EXT, & !* SPHERICAL GRID. ! --------------- -! + !* COMPUTE COS PHI FACTOR FOR ADJOINING GRID POINT. ! (for all grid points) !$acc parallel loop independent collapse(2) private(KY,KK,KKM) @@ -164,7 +162,6 @@ SUBROUTINE CTUWINI (KIJS, KIJL, NINF, NSUP, BLK2GLO, COSPHM1_EXT, & !$acc end parallel ENDIF - IF (LHOOK) CALL DR_HOOK('CTUWINI',1,ZHOOK_HANDLE) END SUBROUTINE CTUWINI diff --git a/src/ecwam/yowmap.F90 b/src/ecwam/yowmap.F90 index 28ddf28fe..b670deed4 100644 --- a/src/ecwam/yowmap.F90 +++ b/src/ecwam/yowmap.F90 @@ -78,4 +78,5 @@ MODULE YOWMAP ! (i.e. NO LAND AND DEEP WATER). ! ---------------------------------------------------------------------- + END MODULE YOWMAP diff --git a/tests/etopo1_oper_an_fc_O1280.yml b/tests/etopo1_oper_an_fc_O1280.yml index f20a3df38..f5a21089a 100644 --- a/tests/etopo1_oper_an_fc_O1280.yml +++ b/tests/etopo1_oper_an_fc_O1280.yml @@ -4,9 +4,9 @@ frequencies: 29 bathymetry: ETOPO1 advection: - timestep: 450 + timestep: 225 physics: - timestep: 450 + timestep: 225 analysis.begin: 2022-12-31 12:00:00 analysis.end: 2023-01-01 00:00:00 From 42620145148f30b1e82cf577e5dc6e4796398711 Mon Sep 17 00:00:00 2001 From: Fabio Di Sante Date: Thu, 1 Feb 2024 15:17:33 +0100 Subject: [PATCH 25/30] More cleaning and minor fixing. OMP_GET_MAX_THREADS --- src/ecwam/ctuw.F90 | 4 ++-- src/ecwam/ctuwdrv.F90 | 3 ++- src/ecwam/ctuwupdt.F90 | 7 ++++++- src/ecwam/mpexchng.F90 | 4 ++-- src/ecwam/propag_wam.F90 | 3 +-- src/ecwam/wamintgr_loki_gpu.F90 | 1 + src/ecwam/yowubuf.F90 | 2 -- 7 files changed, 14 insertions(+), 10 deletions(-) diff --git a/src/ecwam/ctuw.F90 b/src/ecwam/ctuw.F90 index 673020e86..b29533363 100644 --- a/src/ecwam/ctuw.F90 +++ b/src/ecwam/ctuw.F90 @@ -316,7 +316,7 @@ SUBROUTINE CTUW (DELPRO, MSTART, MEND, & WRITE (IU06,*) '********************************' LCFLFAIL(IJ)=.TRUE. ENDIF -#ENDIF +#endif ! BASIC CFL CHECKS (IN EACH DIRECTION) @@ -354,7 +354,7 @@ SUBROUTINE CTUW (DELPRO, MSTART, MEND, & WRITE (IU06,*) '********************************' LCFLFAIL(IJ)=.TRUE. ENDIF -#ENDIF +#endif ENDDO ! END LOOP OVER GRID POINTS diff --git a/src/ecwam/ctuwdrv.F90 b/src/ecwam/ctuwdrv.F90 index 03f47ffe1..e028999fd 100644 --- a/src/ecwam/ctuwdrv.F90 +++ b/src/ecwam/ctuwdrv.F90 @@ -61,11 +61,12 @@ SUBROUTINE CTUWDRV (DELPRO, MSTART, MEND, & REAL(KIND=JWRB), DIMENSION(NINF:NSUP+1), INTENT(IN) :: DEPTH_EXT ! WATER DEPTH REAL(KIND=JWRB), DIMENSION(NINF:NSUP+1), INTENT(IN) :: U_EXT ! U-COMPONENT OF SURFACE CURRENT REAL(KIND=JWRB), DIMENSION(NINF:NSUP+1), INTENT(IN) :: V_EXT ! V-COMPONENT OF SURFACE CURRENT +INTEGER,EXTERNAL :: OMP_GET_MAX_THREADS INTEGER(KIND=JWIM) :: IJ, ICALL INTEGER(KIND=JWIM) :: JKGLO, KIJS, KIJL, NPROMA, MTHREADS -!$ INTEGER,EXTERNAL :: OMP_GET_MAX_THREADS +INTEGER,EXTERNAL :: OMP_GET_MAX_THREAD REAL(KIND=JPHOOK) :: ZHOOK_HANDLE diff --git a/src/ecwam/ctuwupdt.F90 b/src/ecwam/ctuwupdt.F90 index eadc0e7d9..49d695588 100644 --- a/src/ecwam/ctuwupdt.F90 +++ b/src/ecwam/ctuwupdt.F90 @@ -45,7 +45,9 @@ SUBROUTINE CTUWUPDT (IJS, IJL, NINF, NSUP, & USE YOMHOOK , ONLY : LHOOK, DR_HOOK, JPHOOK +#ifdef _OPENACC USE OPENACC +#endif /*_OPENACC*/ ! ---------------------------------------------------------------------- IMPLICIT NONE @@ -70,7 +72,7 @@ SUBROUTINE CTUWUPDT (IJS, IJL, NINF, NSUP, & INTEGER(KIND=JWIM) :: IC, ICL, ICR INTEGER(KIND=JWIM) :: MSTART, MEND INTEGER(KIND=JWIM) :: JKGLO, KIJS, KIJL, NPROMA, MTHREADS -!$ INTEGER,EXTERNAL :: OMP_GET_MAX_THREADS +INTEGER,EXTERNAL :: OMP_GET_MAX_THREADS REAL(KIND=JPHOOK) :: ZHOOK_HANDLE REAL(KIND=JWRB) :: DELPRO @@ -166,6 +168,7 @@ SUBROUTINE CTUWUPDT (IJS, IJL, NINF, NSUP, & ENDIF + ! THE CTU IS USED, COMPUTE THE WEIGHTS IF (.NOT. ALLOCATED(SUMWN)) ALLOCATE(SUMWN(IJS:IJL,NANG,NFRE_RED)) @@ -186,6 +189,8 @@ SUBROUTINE CTUWUPDT (IJS, IJL, NINF, NSUP, & IF (.NOT. ALLOCATED(LLWMPMN)) ALLOCATE(LLWMPMN(NANG,NFRE_RED,-1:1)) ENDIF + + ! SOME INITIALISATION FOR *CTUW* !! NPROMA=NPROMA_WAM MTHREADS=1 diff --git a/src/ecwam/mpexchng.F90 b/src/ecwam/mpexchng.F90 index 3146e4dcb..663f72447 100644 --- a/src/ecwam/mpexchng.F90 +++ b/src/ecwam/mpexchng.F90 @@ -155,6 +155,7 @@ SUBROUTINE MPEXCHNG(FLD, NDIM2, ND3S, ND3E) IR=0 CALL GSTATS(676,0) + DO INGB=1,NGBFROMPE IR=IR+1 IPROC=NFROMPELST(INGB) @@ -192,6 +193,7 @@ SUBROUTINE MPEXCHNG(FLD, NDIM2, ND3S, ND3E) ! NOW WAIT FOR ALL TO COMPLETE CALL MPL_WAIT(KREQUEST=IREQ(1:IR),CDSTRING='MPEXCHNG:') + CALL GSTATS(676,1) ! DECODE THE RECEIVED BUFFERS @@ -230,7 +232,6 @@ SUBROUTINE MPEXCHNG(FLD, NDIM2, ND3S, ND3E) ENDDO !$OMP END PARALLEL DO #endif /*_OPENACC*/ - CALL GSTATS(1893,1) KTAG=KTAG+1 @@ -241,7 +242,6 @@ SUBROUTINE MPEXCHNG(FLD, NDIM2, ND3S, ND3E) DEALLOCATE(ZCOMBUFS) DEALLOCATE(ZCOMBUFR) - IF (LHOOK) CALL DR_HOOK('MPEXCHNG',1,ZHOOK_HANDLE) END SUBROUTINE MPEXCHNG diff --git a/src/ecwam/propag_wam.F90 b/src/ecwam/propag_wam.F90 index fc4b11ed5..a1b175e92 100644 --- a/src/ecwam/propag_wam.F90 +++ b/src/ecwam/propag_wam.F90 @@ -78,7 +78,7 @@ SUBROUTINE PROPAG_WAM (BLK2GLO, WVENVI, WVPRPT, FL1) INTEGER(KIND=JWIM) :: IJ, K, M, J, II INTEGER(KIND=JWIM) :: JKGLO, NPROMA, MTHREADS INTEGER(KIND=JWIM) :: NSTEP_LF, ISUBST -!$ INTEGER,EXTERNAL :: OMP_GET_MAX_THREADS + INTEGER,EXTERNAL :: OMP_GET_MAX_THREADS INTEGER(KIND=JWIM) :: IJSG, IJLG, ICHNK, KIJS, KIJL, IJSB, IJLB REAL(KIND=JPHOOK) :: ZHOOK_HANDLE @@ -274,7 +274,6 @@ SUBROUTINE PROPAG_WAM (BLK2GLO, WVENVI, WVPRPT, FL1) CALL PROPAGS2(FL1_EXT(:,:,1:IFRELFMAX), FL3_EXT(:,:,1:IFRELFMAX), NINF, NSUP, KIJS, KIJL, NANG, 1, IFRELFMAX) ENDDO -! !$ACC END DATA #ifndef _OPENACC !$OMP END PARALLEL DO #endif /*_OPENACC*/ diff --git a/src/ecwam/wamintgr_loki_gpu.F90 b/src/ecwam/wamintgr_loki_gpu.F90 index 009b1229d..2049385f9 100644 --- a/src/ecwam/wamintgr_loki_gpu.F90 +++ b/src/ecwam/wamintgr_loki_gpu.F90 @@ -166,6 +166,7 @@ SUBROUTINE WAMINTGR_LOKI_GPU(CDTPRA, CDATE, CDATEWH, CDTIMP, CDTIMPNEXT, & ENDIF !$acc end data + !* RETRIEVING NEW FORCING FIELDS IF NEEDED. ! ---------------------------------------- CALL NEWWIND(CDTIMP, CDATEWH, LLNEWFILE, & diff --git a/src/ecwam/yowubuf.F90 b/src/ecwam/yowubuf.F90 index fb8ca2213..4ec5c17d5 100644 --- a/src/ecwam/yowubuf.F90 +++ b/src/ecwam/yowubuf.F90 @@ -154,7 +154,6 @@ MODULE YOWUBUF ! *LLWMPMN* LOGICAL ARRAY, TRUE IF WMPMN > 0. AT ALL GRID POINTS. ! ---------------------------------------------------------------------- - !$acc declare create(WLAT) !$acc declare create(KLAT) !$acc declare create(KLON) @@ -169,5 +168,4 @@ MODULE YOWUBUF !$acc declare create(WKPMN) !$acc declare create(SUMWN) !$acc declare create(LLWKPMN) - END MODULE YOWUBUF From e422898faf0644fb852aecedbd362a2d83334a7c Mon Sep 17 00:00:00 2001 From: Fabio Di Sante Date: Thu, 1 Feb 2024 15:31:46 +0100 Subject: [PATCH 26/30] removed trailing spaces in the ifdef endif to make gnu compiling succesfully --- src/ecwam/mpexchng.F90 | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/ecwam/mpexchng.F90 b/src/ecwam/mpexchng.F90 index 663f72447..225d25538 100644 --- a/src/ecwam/mpexchng.F90 +++ b/src/ecwam/mpexchng.F90 @@ -199,7 +199,7 @@ SUBROUTINE MPEXCHNG(FLD, NDIM2, ND3S, ND3E) ! DECODE THE RECEIVED BUFFERS CALL GSTATS(1893,0) - #ifdef _OPENACC +#ifdef _OPENACC !$acc kernels loop independent private(KCOUNT,IJ) !copyin(ZCOMBUFR) DO INGB=1,NGBFROMPE IPROC=NFROMPELST(INGB) @@ -215,7 +215,7 @@ SUBROUTINE MPEXCHNG(FLD, NDIM2, ND3S, ND3E) ENDDO ENDDO !$acc end kernels - #else +#else !$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(INGB,IPROC,KCOUNT,M,K,IH,IJ) DO INGB=1,NGBFROMPE IPROC=NFROMPELST(INGB) @@ -231,7 +231,7 @@ SUBROUTINE MPEXCHNG(FLD, NDIM2, ND3S, ND3E) ENDDO ENDDO !$OMP END PARALLEL DO - #endif /*_OPENACC*/ +#endif /*_OPENACC*/ CALL GSTATS(1893,1) KTAG=KTAG+1 From fb87ad58fe93ac1a16759c32d6e174296d2bb4b9 Mon Sep 17 00:00:00 2001 From: Fabio Di Sante Date: Thu, 1 Feb 2024 15:43:29 +0100 Subject: [PATCH 27/30] removed use openacc (not needed) to make intel compiling succesfully --- src/ecwam/ctuwupdt.F90 | 3 --- src/ecwam/propag_wam.F90 | 3 --- 2 files changed, 6 deletions(-) diff --git a/src/ecwam/ctuwupdt.F90 b/src/ecwam/ctuwupdt.F90 index 49d695588..944c46117 100644 --- a/src/ecwam/ctuwupdt.F90 +++ b/src/ecwam/ctuwupdt.F90 @@ -45,9 +45,6 @@ SUBROUTINE CTUWUPDT (IJS, IJL, NINF, NSUP, & USE YOMHOOK , ONLY : LHOOK, DR_HOOK, JPHOOK -#ifdef _OPENACC -USE OPENACC -#endif /*_OPENACC*/ ! ---------------------------------------------------------------------- IMPLICIT NONE diff --git a/src/ecwam/propag_wam.F90 b/src/ecwam/propag_wam.F90 index a1b175e92..4346199ef 100644 --- a/src/ecwam/propag_wam.F90 +++ b/src/ecwam/propag_wam.F90 @@ -37,9 +37,6 @@ SUBROUTINE PROPAG_WAM (BLK2GLO, WVENVI, WVPRPT, FL1) ! ------------------------------------------------------------------- -#ifndef _OPENACC - use openacc -#endif /*_OPENACC*/ USE PARKIND_WAVE, ONLY : JWIM, JWRB, JWRU USE YOWDRVTYPE , ONLY : WVGRIDGLO, ENVIRONMENT, FREQUENCY From 318594c6a8b3619d46c6fca74a6bdcf23d5ca76a Mon Sep 17 00:00:00 2001 From: Fabio Di Sante Date: Thu, 8 Feb 2024 16:01:37 +0100 Subject: [PATCH 28/30] proenvhalo ported to GPU --- src/ecwam/proenvhalo.F90 | 62 +++++++++++++++++++++++++-------- src/ecwam/propag_wam.F90 | 28 ++++++++------- src/ecwam/wamintgr.F90 | 4 ++- src/ecwam/wamintgr_loki_gpu.F90 | 23 ++++++++---- 4 files changed, 81 insertions(+), 36 deletions(-) diff --git a/src/ecwam/proenvhalo.F90 b/src/ecwam/proenvhalo.F90 index e6780ecaf..ff7d3d2c2 100644 --- a/src/ecwam/proenvhalo.F90 +++ b/src/ecwam/proenvhalo.F90 @@ -8,8 +8,8 @@ ! SUBROUTINE PROENVHALO (NINF, NSUP, & -& WVPRPT, & -& WVENVI, & +& WAVNUM, CGROUP, OMOSNH2KD, & +& DEPTH, DELLAM1, COSPHM1, UCUR, VCUR, & & WAVNUM_EXT, CGROUP_EXT, OMOSNH2KD_EXT, & & DELLAM1_EXT, COSPHM1_EXT, & & DEPTH_EXT, U_EXT, V_EXT ) @@ -44,10 +44,6 @@ SUBROUTINE PROENVHALO (NINF, NSUP, & INTEGER(KIND=JWIM), INTENT(IN) :: NINF, NSUP ! HALO EXTEND NINF to NSUP+1 - TYPE(FREQUENCY), INTENT(IN) :: WVPRPT - - TYPE(ENVIRONMENT), INTENT(IN) :: WVENVI - REAL(KIND=JWRB), DIMENSION(NINF:NSUP+1, NFRE_RED), INTENT(OUT) :: WAVNUM_EXT ! WAVE NUMBER REAL(KIND=JWRB), DIMENSION(NINF:NSUP+1, NFRE_RED), INTENT(OUT) :: CGROUP_EXT ! GROUP VELOCITY REAL(KIND=JWRB), DIMENSION(NINF:NSUP+1, NFRE_RED), INTENT(OUT) :: OMOSNH2KD_EXT ! OMEGA / SINH(2KD) @@ -57,6 +53,8 @@ SUBROUTINE PROENVHALO (NINF, NSUP, & REAL(KIND=JWRB), DIMENSION(NINF:NSUP+1), INTENT(OUT) :: U_EXT ! U-COMPONENT OF SURFACE CURRENT REAL(KIND=JWRB), DIMENSION(NINF:NSUP+1), INTENT(OUT) :: V_EXT ! V-COMPONENT OF SURFACE CURRENT + REAL(KIND=JWRB), DIMENSION(NPROMA_WAM, NFRE, NCHNK), INTENT(IN) :: WAVNUM, CGROUP, OMOSNH2KD + REAL(KIND=JWRB), DIMENSION(NPROMA_WAM, NCHNK), INTENT(IN) :: DEPTH, DELLAM1, COSPHM1, UCUR, VCUR INTEGER(KIND=JWIM) :: IJ, M INTEGER(KIND=JWIM) :: ICHNK, KIJS, KIJL, IJSB, IJLB @@ -66,51 +64,85 @@ SUBROUTINE PROENVHALO (NINF, NSUP, & ! ---------------------------------------------------------------------- IF (LHOOK) CALL DR_HOOK('PROENVHALO',0,ZHOOK_HANDLE) +!$acc data present(WAVNUM,CGROUP,OMOSNH2KD,DELLAM1,COSPHM1,DEPTH, UCUR,VCUR,& +!$acc WAVNUM_EXT,CGROUP_EXT,OMOSNH2KD_EXT,DELLAM1_EXT,COSPHM1_EXT,DEPTH_EXT,U_EXT,V_EXT) !!! mapping chuncks to block ONLY for actual grid points !!!! +#ifndef _OPENACC !$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(ICHNK, KIJS, IJSB, KIJL, IJLB, M) +#endif /*_OPENACC*/ +!$acc kernels loop private(ICHNK, KIJS, IJSB, KIJL, IJLB) DO ICHNK = 1, NCHNK KIJS = 1 IJSB = IJFROMCHNK(KIJS, ICHNK) KIJL = KIJL4CHNK(ICHNK) IJLB = IJFROMCHNK(KIJL, ICHNK) - WAVNUM_EXT(IJSB:IJLB, 1:NFRE_RED) = WVPRPT%WAVNUM(KIJS:KIJL, 1:NFRE_RED,ICHNK) - CGROUP_EXT(IJSB:IJLB, 1:NFRE_RED) = WVPRPT%CGROUP(KIJS:KIJL, 1:NFRE_RED,ICHNK) - OMOSNH2KD_EXT(IJSB:IJLB, 1:NFRE_RED) = WVPRPT%OMOSNH2KD(KIJS:KIJL, 1:NFRE_RED,ICHNK) - - DELLAM1_EXT(IJSB:IJLB) = WVENVI%DELLAM1(KIJS:KIJL,ICHNK) - COSPHM1_EXT(IJSB:IJLB) = WVENVI%COSPHM1(KIJS:KIJL,ICHNK) - DEPTH_EXT(IJSB:IJLB) = WVENVI%DEPTH(KIJS:KIJL,ICHNK) - U_EXT(IJSB:IJLB) = WVENVI%UCUR(KIJS:KIJL,ICHNK) - V_EXT(IJSB:IJLB) = WVENVI%VCUR(KIJS:KIJL,ICHNK) +!$acc loop + DO M = 1, NFRE_RED + WAVNUM_EXT(IJSB:IJLB, M) = WAVNUM(KIJS:KIJL, M,ICHNK) + CGROUP_EXT(IJSB:IJLB, M) = CGROUP(KIJS:KIJL, M,ICHNK) + OMOSNH2KD_EXT(IJSB:IJLB, M) = OMOSNH2KD(KIJS:KIJL, M,ICHNK) + ENDDO + + DELLAM1_EXT(IJSB:IJLB) = DELLAM1(KIJS:KIJL,ICHNK) + COSPHM1_EXT(IJSB:IJLB) = COSPHM1(KIJS:KIJL,ICHNK) + DEPTH_EXT(IJSB:IJLB) = DEPTH(KIJS:KIJL,ICHNK) + U_EXT(IJSB:IJLB) = UCUR(KIJS:KIJL,ICHNK) + V_EXT(IJSB:IJLB) = VCUR(KIJS:KIJL,ICHNK) ENDDO +!$acc end kernels +#ifndef _OPENACC !$OMP END PARALLEL DO +#endif /*_OPENACC*/ +!$acc enter data copyin(WVPRPT_LAND) +!$acc enter data copyin(WVPRPT_LAND%WAVNUM,WVPRPT_LAND%CGROUP,WVPRPT_LAND%OMOSNH2KD) +!$acc data present(WVPRPT_LAND) copyin(BATHYMAX) !! should be combined into one single data exchange, when we start using this option.... !!! CALL MPEXCHNG(WAVNUM_EXT, NFRE_RED, 1, 1) + !$acc kernels WAVNUM_EXT(NSUP+1,1:NFRE_RED) = WVPRPT_LAND%WAVNUM(1:NFRE_RED) + !$acc end kernels CALL MPEXCHNG(CGROUP_EXT, NFRE_RED, 1, 1) + !$acc kernels CGROUP_EXT(NSUP+1,1:NFRE_RED) = WVPRPT_LAND%CGROUP(1:NFRE_RED) + !$acc end kernels CALL MPEXCHNG(OMOSNH2KD_EXT, NFRE_RED, 1, 1) + !$acc kernels OMOSNH2KD_EXT(NSUP+1,1:NFRE_RED) = WVPRPT_LAND%OMOSNH2KD(1:NFRE_RED) + !$acc end kernels CALL MPEXCHNG(DELLAM1_EXT, 1, 1, 1) + !$acc kernels DELLAM1_EXT(NSUP+1) = 0.0_JWRB + !$acc end kernels CALL MPEXCHNG(COSPHM1_EXT, 1, 1, 1) + !$acc kernels COSPHM1_EXT(NSUP+1) = 0.0_JWRB + !$acc end kernels CALL MPEXCHNG(DEPTH_EXT, 1, 1, 1) + !$acc kernels DEPTH_EXT(NSUP+1) = BATHYMAX + !$acc end kernels CALL MPEXCHNG(U_EXT, 1, 1, 1) + !$acc kernels U_EXT(NSUP+1) = 0.0_JWRB + !$acc end kernels CALL MPEXCHNG(V_EXT, 1, 1, 1) + !$acc kernels V_EXT(NSUP+1) = 0.0_JWRB + !$acc end kernels +!$acc end data +!$acc exit data delete(WVPRPT_LAND%WAVNUM,WVPRPT_LAND%CGROUP,WVPRPT_LAND%OMOSNH2KD) +!$acc exit data delete(WVPRPT_LAND) +!$acc end data IF (LHOOK) CALL DR_HOOK('PROENVHALO',1,ZHOOK_HANDLE) diff --git a/src/ecwam/propag_wam.F90 b/src/ecwam/propag_wam.F90 index 4346199ef..39152aa24 100644 --- a/src/ecwam/propag_wam.F90 +++ b/src/ecwam/propag_wam.F90 @@ -7,7 +7,8 @@ ! nor does it submit to any jurisdiction. ! -SUBROUTINE PROPAG_WAM (BLK2GLO, WVENVI, WVPRPT, FL1) +SUBROUTINE PROPAG_WAM (BLK2GLO, WAVNUM, CGROUP, OMOSNH2KD, FL1, & +& DEPTH, DELLAM1, COSPHM1, UCUR, VCUR) ! ---------------------------------------------------------------------- @@ -67,9 +68,9 @@ SUBROUTINE PROPAG_WAM (BLK2GLO, WVENVI, WVPRPT, FL1) #include "propdot.intfb.h" TYPE(WVGRIDGLO), INTENT(IN) :: BLK2GLO - TYPE(ENVIRONMENT), INTENT(IN) :: WVENVI - TYPE(FREQUENCY), INTENT(IN) :: WVPRPT REAL(KIND=JWRB), DIMENSION(NPROMA_WAM, NANG, NFRE, NCHNK), INTENT(INOUT) :: FL1 + REAL(KIND=JWRB), DIMENSION(NPROMA_WAM, NFRE, NCHNK), INTENT(IN) :: WAVNUM, CGROUP, OMOSNH2KD + REAL(KIND=JWRB), DIMENSION(NPROMA_WAM, NCHNK), INTENT(IN) :: DEPTH, DELLAM1, COSPHM1, UCUR, VCUR INTEGER(KIND=JWIM) :: IJ, K, M, J, II @@ -97,7 +98,8 @@ SUBROUTINE PROPAG_WAM (BLK2GLO, WVENVI, WVPRPT, FL1) IF (LHOOK) CALL DR_HOOK('PROPAG_WAM',0,ZHOOK_HANDLE) -!$acc data PRESENT(FL1) CREATE(FL1_EXT,FL3_EXT) +!$acc data present(FL1, WAVNUM, CGROUP, OMOSNH2KD, DEPTH, DELLAM1,COSPHM1,UCUR,VCUR) CREATE(FL1_EXT,FL3_EXT) & +!$acc & create(WAVNUM_EXT,CGROUP_EXT,OMOSNH2KD_EXT,DELLAM1_EXT,COSPHM1_EXT,DEPTH_EXT,UCUR_EXT,VCUR_EXT) IF (NIBLO > 1) THEN IJSG = IJFROMCHNK(1,1) @@ -170,8 +172,8 @@ SUBROUTINE PROPAG_WAM (BLK2GLO, WVENVI, WVPRPT, FL1) ! NEED HALO VALUES CALL PROENVHALO (NINF, NSUP, & -& WVPRPT, & -& WVENVI, & +& WAVNUM, CGROUP, OMOSNH2KD, & +& DEPTH, DELLAM1, COSPHM1, UCUR, VCUR, & & WAVNUM_EXT, CGROUP_EXT, OMOSNH2KD_EXT, & & DELLAM1_EXT, COSPHM1_EXT, & & DEPTH_EXT, UCUR_EXT, VCUR_EXT ) @@ -204,8 +206,8 @@ SUBROUTINE PROPAG_WAM (BLK2GLO, WVENVI, WVPRPT, FL1) IF (LUPDTWGHT) THEN ! NEED HALO VALUES CALL PROENVHALO (NINF, NSUP, & -& WVPRPT, & -& WVENVI, & +& WAVNUM, CGROUP, OMOSNH2KD, & +& DEPTH, DELLAM1, COSPHM1, UCUR, VCUR, & & WAVNUM_EXT, CGROUP_EXT, OMOSNH2KD_EXT, & & DELLAM1_EXT, COSPHM1_EXT, & & DEPTH_EXT, UCUR_EXT, VCUR_EXT ) @@ -286,8 +288,8 @@ SUBROUTINE PROPAG_WAM (BLK2GLO, WVENVI, WVPRPT, FL1) ! NEED HALO VALUES CALL PROENVHALO (NINF, NSUP, & -& WVPRPT, & -& WVENVI, & +& WAVNUM, CGROUP, OMOSNH2KD, & +& DEPTH, DELLAM1, COSPHM1, UCUR, VCUR, & & WAVNUM_EXT, CGROUP_EXT, OMOSNH2KD_EXT, & & DELLAM1_EXT, COSPHM1_EXT, & & DEPTH_EXT, UCUR_EXT, VCUR_EXT ) @@ -311,8 +313,8 @@ SUBROUTINE PROPAG_WAM (BLK2GLO, WVENVI, WVPRPT, FL1) ! NEED HALO VALUES CALL PROENVHALO (NINF, NSUP, & -& WVPRPT, & -& WVENVI, & +& WAVNUM, CGROUP, OMOSNH2KD, & +& DEPTH, DELLAM1, COSPHM1, UCUR, VCUR, & & WAVNUM_EXT, CGROUP_EXT, OMOSNH2KD_EXT, & & DELLAM1_EXT, COSPHM1_EXT, & & DEPTH_EXT, UCUR_EXT, VCUR_EXT ) @@ -380,7 +382,7 @@ SUBROUTINE PROPAG_WAM (BLK2GLO, WVENVI, WVPRPT, FL1) ENDIF ! end propagation ENDIF ! more than one grid point -!$ACC END DATA +!$acc end data L1STCALL=.FALSE. LLCHKCFL=.FALSE. diff --git a/src/ecwam/wamintgr.F90 b/src/ecwam/wamintgr.F90 index e7d71be09..265dd6bde 100644 --- a/src/ecwam/wamintgr.F90 +++ b/src/ecwam/wamintgr.F90 @@ -105,7 +105,9 @@ SUBROUTINE WAMINTGR (CDTPRA, CDATE, CDATEWH, CDTIMP, CDTIMPNEXT, & IF (CDATE == CDTPRA) THEN TIME0=-WAM_USER_CLOCK() - CALL PROPAG_WAM(BLK2GLO, WVENVI, WVPRPT, FL1) + CALL PROPAG_WAM(BLK2GLO, WVPRPT%WAVNUM, WVPRPT%CGROUP, WVPRPT%OMOSNH2KD, FL1,& +& WVENVI%DEPTH, WVENVI%DELLAM1, WVENVI%COSPHM1, WVENVI%UCUR, WVENVI%VCUR) +! CALL PROPAG_WAM(BLK2GLO, WVENVI, WVPRPT, FL1) TIME1(1) = TIME1(1) + (TIME0+WAM_USER_CLOCK())*1.E-06 CDATE = CDTPRO ENDIF diff --git a/src/ecwam/wamintgr_loki_gpu.F90 b/src/ecwam/wamintgr_loki_gpu.F90 index 2049385f9..d68394418 100644 --- a/src/ecwam/wamintgr_loki_gpu.F90 +++ b/src/ecwam/wamintgr_loki_gpu.F90 @@ -94,6 +94,7 @@ SUBROUTINE WAMINTGR_LOKI_GPU(CDTPRA, CDATE, CDATEWH, CDTIMP, CDTIMPNEXT, & INTEGER(KIND=JWIM), POINTER, CONTIGUOUS :: MIJ_DPTR(:,:) => NULL() REAL(KIND=JWRB), POINTER, CONTIGUOUS :: WAVNUM_DPTR(:,:,:) => NULL() REAL(KIND=JWRB), POINTER, CONTIGUOUS :: CGROUP_DPTR(:,:,:) => NULL() +REAL(KIND=JWRB), POINTER, CONTIGUOUS :: OMOSNH2KD_DPTR(:,:,:) => NULL() REAL(KIND=JWRB), POINTER, CONTIGUOUS :: CIWA_DPTR(:,:,:) => NULL() REAL(KIND=JWRB), POINTER, CONTIGUOUS :: CINV_DPTR(:,:,:) => NULL() REAL(KIND=JWRB), POINTER, CONTIGUOUS :: XK2CG_DPTR(:,:,:) => NULL() @@ -101,6 +102,10 @@ SUBROUTINE WAMINTGR_LOKI_GPU(CDTPRA, CDATE, CDATEWH, CDTIMP, CDTIMPNEXT, & REAL(KIND=JWRB), POINTER, CONTIGUOUS :: EMAXDPT_DPTR(:,:) => NULL() INTEGER(KIND=JWIM), POINTER, CONTIGUOUS :: INDEP_DPTR(:,:) => NULL() REAL(KIND=JWRB), POINTER, CONTIGUOUS :: DEPTH_DPTR(:,:) => NULL() +REAL(KIND=JWRB), POINTER, CONTIGUOUS :: DELLAM1_DPTR(:,:) => NULL() +REAL(KIND=JWRB), POINTER, CONTIGUOUS :: COSPHM1_DPTR(:,:) => NULL() +REAL(KIND=JWRB), POINTER, CONTIGUOUS :: UCUR_DPTR(:,:) => NULL() +REAL(KIND=JWRB), POINTER, CONTIGUOUS :: VCUR_DPTR(:,:) => NULL() INTEGER(KIND=JWIM), POINTER, CONTIGUOUS :: IOBND_DPTR(:,:) => NULL() INTEGER(KIND=JWIM), POINTER, CONTIGUOUS :: IODP_DPTR(:,:) => NULL() REAL(KIND=JWRB), POINTER, CONTIGUOUS :: CICOVER_DPTR(:,:) => NULL() @@ -156,17 +161,21 @@ SUBROUTINE WAMINTGR_LOKI_GPU(CDTPRA, CDATE, CDATEWH, CDTIMP, CDTIMPNEXT, & CALL SRC_CONTRIBS%INIT(FL1=FL1) CALL SRC_CONTRIBS%UPDATE_DEVICE(FL1=FL1_DPTR) -!$acc data present(FL1_DPTR) +CALL WVPRPT_FIELD%INIT(WAVNUM=WVPRPT%WAVNUM, CGROUP=WVPRPT%CGROUP, OMOSNH2KD=WVPRPT%OMOSNH2KD) +CALL WVPRPT_FIELD%UPDATE_DEVICE(WAVNUM=WAVNUM_DPTR, CGROUP=CGROUP_DPTR, OMOSNH2KD=OMOSNH2KD_DPTR) +CALL WVENVI_FIELD%INIT(DEPTH=WVENVI%DEPTH, DELLAM1=WVENVI%DELLAM1, COSPHM1=WVENVI%COSPHM1, UCUR=WVENVI%UCUR, VCUR=WVENVI%VCUR) +CALL WVENVI_FIELD%UPDATE_DEVICE(DEPTH=DEPTH_DPTR, DELLAM1=DELLAM1_DPTR, COSPHM1=COSPHM1_DPTR, UCUR=UCUR_DPTR, VCUR=VCUR_DPTR) +!$acc data present(FL1_DPTR, WAVNUM_DPTR, CGROUP_DPTR, OMOSNH2KD_DPTR, DEPTH_DPTR, DELLAM1_DPTR, COSPHM1_DPTR, UCUR_DPTR, VCUR_DPTR) IF (CDATE == CDTPRA) THEN TIME0=-WAM_USER_CLOCK() - CALL PROPAG_WAM(BLK2GLO, WVENVI, WVPRPT, FL1_DPTR) + CALL PROPAG_WAM(BLK2GLO, WAVNUM_DPTR, CGROUP_DPTR, OMOSNH2KD_DPTR, FL1_DPTR,& +& DEPTH_DPTR, DELLAM1_DPTR, COSPHM1_DPTR, UCUR_DPTR, VCUR_DPTR) TIME1(1) = TIME1(1) + (TIME0+WAM_USER_CLOCK())*1.E-06 CDATE = CDTPRO ENDIF !$acc end data - !* RETRIEVING NEW FORCING FIELDS IF NEEDED. ! ---------------------------------------- CALL NEWWIND(CDTIMP, CDATEWH, LLNEWFILE, & @@ -180,9 +189,9 @@ SUBROUTINE WAMINTGR_LOKI_GPU(CDTPRA, CDATE, CDATEWH, CDTIMP, CDTIMPNEXT, & IF (LLSOURCE) THEN TIME2=-WAM_USER_CLOCK() - CALL WVPRPT_FIELD%INIT(WAVNUM=WVPRPT%WAVNUM, CGROUP=WVPRPT%CGROUP, CIWA=WVPRPT%CIWA, CINV=WVPRPT%CINV, XK2CG=WVPRPT%XK2CG, & + CALL WVPRPT_FIELD%INIT(CIWA=WVPRPT%CIWA, CINV=WVPRPT%CINV, XK2CG=WVPRPT%XK2CG, & & STOKFAC=WVPRPT%STOKFAC) - CALL WVENVI_FIELD%INIT(EMAXDPT=WVENVI%EMAXDPT, INDEP=WVENVI%INDEP, DEPTH=WVENVI%DEPTH, IOBND=WVENVI%IOBND, IODP=WVENVI%IODP) + CALL WVENVI_FIELD%INIT(EMAXDPT=WVENVI%EMAXDPT, INDEP=WVENVI%INDEP, IOBND=WVENVI%IOBND, IODP=WVENVI%IODP) CALL FF_NOW_FIELD%INIT(AIRD=FF_NOW%AIRD, WDWAVE=FF_NOW%WDWAVE, CICOVER=FF_NOW%CICOVER, WSWAVE=FF_NOW%WSWAVE, & & WSTAR=FF_NOW%WSTAR, UFRIC=FF_NOW%UFRIC, TAUW=FF_NOW%TAUW, TAUWDIR=FF_NOW%TAUWDIR, & & Z0M=FF_NOW%Z0M, Z0B=FF_NOW%Z0B, CHRNCK=FF_NOW%CHRNCK, CITHICK=FF_NOW%CITHICK) @@ -198,9 +207,9 @@ SUBROUTINE WAMINTGR_LOKI_GPU(CDTPRA, CDATE, CDATEWH, CDTIMP, CDTIMPNEXT, & !$loki update_device - CALL WVPRPT_FIELD%UPDATE_DEVICE(WAVNUM=WAVNUM_DPTR, CGROUP=CGROUP_DPTR, CIWA=CIWA_DPTR, CINV=CINV_DPTR, XK2CG=XK2CG_DPTR, & + CALL WVPRPT_FIELD%UPDATE_DEVICE(CIWA=CIWA_DPTR, CINV=CINV_DPTR, XK2CG=XK2CG_DPTR, & & STOKFAC=STOKFAC_DPTR) - CALL WVENVI_FIELD%UPDATE_DEVICE(EMAXDPT=EMAXDPT_DPTR, INDEP=INDEP_DPTR, DEPTH=DEPTH_DPTR, IOBND=IOBND_DPTR, IODP=IODP_DPTR) + CALL WVENVI_FIELD%UPDATE_DEVICE(EMAXDPT=EMAXDPT_DPTR, INDEP=INDEP_DPTR, IOBND=IOBND_DPTR, IODP=IODP_DPTR) CALL FF_NOW_FIELD%UPDATE_DEVICE(AIRD=AIRD_DPTR, WDWAVE=WDWAVE_DPTR, CICOVER=CICOVER_DPTR, WSWAVE=WSWAVE_DPTR, & & WSTAR=WSTAR_DPTR, UFRIC=UFRIC_DPTR, TAUW=TAUW_DPTR, TAUWDIR=TAUWDIR_DPTR, Z0M=Z0M_DPTR, Z0B=Z0B_DPTR, & & CHRNCK=CHRNCK_DPTR, CITHICK=CITHICK_DPTR) From d026989a5e7787f4041b3191129f86b5b00d3e2c Mon Sep 17 00:00:00 2001 From: Fabio Di Sante Date: Mon, 12 Feb 2024 17:05:24 +0100 Subject: [PATCH 29/30] use MTHREADS=1 when OPENACC is activated --- src/ecwam/ctuwdrv.F90 | 4 ++-- src/ecwam/ctuwupdt.F90 | 4 ++-- src/ecwam/propag_wam.F90 | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/ecwam/ctuwdrv.F90 b/src/ecwam/ctuwdrv.F90 index e028999fd..003e137de 100644 --- a/src/ecwam/ctuwdrv.F90 +++ b/src/ecwam/ctuwdrv.F90 @@ -78,9 +78,9 @@ SUBROUTINE CTUWDRV (DELPRO, MSTART, MEND, & IF (LHOOK) CALL DR_HOOK('CTUWDRV',0,ZHOOK_HANDLE) !! NPROMA=NPROMA_WAM - MTHREADS=1 -#ifdef _OPENACC MTHREADS=OMP_GET_MAX_THREADS() +#ifdef _OPENACC + MTHREADS=1 #endif /*_OPENACC*/ NPROMA=(IJL-IJS+1)/MTHREADS + 1 diff --git a/src/ecwam/ctuwupdt.F90 b/src/ecwam/ctuwupdt.F90 index 944c46117..bfa57c7d0 100644 --- a/src/ecwam/ctuwupdt.F90 +++ b/src/ecwam/ctuwupdt.F90 @@ -190,9 +190,9 @@ SUBROUTINE CTUWUPDT (IJS, IJL, NINF, NSUP, & ! SOME INITIALISATION FOR *CTUW* !! NPROMA=NPROMA_WAM - MTHREADS=1 -#ifdef _OPENACC MTHREADS=OMP_GET_MAX_THREADS() +#ifdef _OPENACC + MTHREADS=1 #endif /*_OPENACC*/ NPROMA=(IJL-IJS+1)/MTHREADS + 1 diff --git a/src/ecwam/propag_wam.F90 b/src/ecwam/propag_wam.F90 index 39152aa24..91f2fb818 100644 --- a/src/ecwam/propag_wam.F90 +++ b/src/ecwam/propag_wam.F90 @@ -105,9 +105,9 @@ SUBROUTINE PROPAG_WAM (BLK2GLO, WAVNUM, CGROUP, OMOSNH2KD, FL1, & IJSG = IJFROMCHNK(1,1) IJLG = IJSG + SUM(KIJL4CHNK) - 1 - MTHREADS=1 -#ifdef _OPENACC MTHREADS=OMP_GET_MAX_THREADS() +#ifdef _OPENACC + MTHREADS=1 #endif /*_OPENACC*/ NPROMA=(IJLG-IJSG+1)/MTHREADS + 1 From 54e56eff81b5bd3a04cdd03f94e6441d76b4dcec Mon Sep 17 00:00:00 2001 From: Ahmad Nawab Date: Wed, 14 Feb 2024 16:42:21 +0000 Subject: [PATCH 30/30] CTUW: make preprocessor flags lower case --- src/ecwam/ctuw.F90 | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/ecwam/ctuw.F90 b/src/ecwam/ctuw.F90 index b29533363..aa9741975 100644 --- a/src/ecwam/ctuw.F90 +++ b/src/ecwam/ctuw.F90 @@ -278,7 +278,7 @@ SUBROUTINE CTUW (DELPRO, MSTART, MEND, & ! LOOP OVER GRID POINTS ! --------------------- -#IFNDEF _OPENACC +#ifndef _OPENACC ! FLUX VELOCITIES AT THE GRID BOX INTERFACE @@ -321,7 +321,7 @@ SUBROUTINE CTUW (DELPRO, MSTART, MEND, & ! BASIC CFL CHECKS (IN EACH DIRECTION) ! ---------------- -#IFNDEF _OPENACC +#ifndef _OPENACC IF (ADXP(2) > ZDELLO(KY))THEN WRITE (IU06,*) '********************************' WRITE (IU06,*) '* CTUW: *' @@ -531,7 +531,7 @@ SUBROUTINE CTUW (DELPRO, MSTART, MEND, & ! AND COMPUTE THEIR SUM AND CHECK IT IS LESS THAN 1 AS WELL !!! THE SUM IS NEEDED LATER ON !!! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! -#IFNDEF _OPENACC +#ifndef _OPENACC DO K=1,NANG DO M = MSTART, MEND DO IJ=KIJS,KIJL