diff --git a/src/ecwam/ecwam_loki.config b/src/ecwam/ecwam_loki.config index a0e87dbe1..42efa4f8f 100644 --- a/src/ecwam/ecwam_loki.config +++ b/src/ecwam/ecwam_loki.config @@ -38,6 +38,11 @@ block = ['ec_parkind', 'parkind_wave'] allowed_aliases = "IJ" inline_elementals = false +# Loop transformations +[transformations.TransformLoopsTransformation] + module = "loki.transformations" +[transformations.TransformLoopsTransformation.options] + # Split-read-write transformation [transformations.SplitReadWriteTransformation] module = "loki.transformations" @@ -73,14 +78,14 @@ block = ['ec_parkind', 'parkind_wave'] # loki pipelines [pipelines.idem] transformations = [ - 'RemoveCodeTransformation', 'SplitReadWriteTransformation', 'InlineTransformation', - 'IdemTransformation', 'ModuleWrapTransformation', 'DependencyTransformation' + 'RemoveCodeTransformation', 'TransformLoopsTransformation', 'SplitReadWriteTransformation', + 'InlineTransformation', 'IdemTransformation', 'ModuleWrapTransformation', 'DependencyTransformation' ] [pipelines.idem-stack] transformations = [ - 'RemoveCodeTransformation', 'SplitReadWriteTransformation', 'InlineTransformation', 'IdemTransformation', - 'TemporariesPoolAllocatorTransformation', 'ModuleWrapTransformation', 'DependencyTransformation' + 'RemoveCodeTransformation', 'TransformLoopsTransformation', 'SplitReadWriteTransformation', 'InlineTransformation', + 'IdemTransformation', 'TemporariesPoolAllocatorTransformation', 'ModuleWrapTransformation', 'DependencyTransformation' ] # Define entry point for call-tree transformation diff --git a/src/ecwam/ecwam_loki_gpu.config b/src/ecwam/ecwam_loki_gpu.config index 1af22ef7e..f1620581c 100644 --- a/src/ecwam/ecwam_loki_gpu.config +++ b/src/ecwam/ecwam_loki_gpu.config @@ -53,6 +53,11 @@ block = ['ec_parkind', 'parkind_wave', 'yowdrvtype'] module = "loki.transformations" [transformations.GlobalVariableAnalysis.options] +# Loop transformations +[transformations.TransformLoopsTransformation] + module = "loki.transformations" +[transformations.TransformLoopsTransformation.options] + # GlobalVarOffloadTransformation [transformations.GlobalVarOffloadTransformation] module = "loki.transformations" @@ -105,20 +110,23 @@ block = ['ec_parkind', 'parkind_wave', 'yowdrvtype'] # loki pipelines [pipelines.scc] transformations = [ - 'RemoveCodeTransformation', 'SplitReadWriteTransformation', 'InlineTransformation', 'GlobalVariableAnalysis', - 'GlobalVarOffloadTransformation', 'SCCVectorPipeline', 'ModuleWrapTransformation', 'DependencyTransformation' + 'RemoveCodeTransformation', 'TransformLoopsTransformation', 'SplitReadWriteTransformation', 'InlineTransformation', + 'GlobalVariableAnalysis', 'GlobalVarOffloadTransformation', 'SCCVectorPipeline', 'ModuleWrapTransformation', + 'DependencyTransformation' ] [pipelines.scc-stack] transformations = [ - 'RemoveCodeTransformation', 'SplitReadWriteTransformation', 'InlineTransformation', 'GlobalVariableAnalysis', - 'GlobalVarOffloadTransformation', 'SCCStackPipeline', 'ModuleWrapTransformation', 'DependencyTransformation' + 'RemoveCodeTransformation', 'TransformLoopsTransformation', 'SplitReadWriteTransformation', 'InlineTransformation', + 'GlobalVariableAnalysis', 'GlobalVarOffloadTransformation', 'SCCStackPipeline', 'ModuleWrapTransformation', + 'DependencyTransformation' ] [pipelines.scc-hoist] transformations = [ - 'RemoveCodeTransformation', 'SplitReadWriteTransformation', 'InlineTransformation', 'GlobalVariableAnalysis', - 'GlobalVarOffloadTransformation', 'SCCHoistPipeline', 'ModuleWrapTransformation', 'DependencyTransformation' + 'RemoveCodeTransformation', 'TransformLoopsTransformation', 'SplitReadWriteTransformation', 'InlineTransformation', + 'GlobalVariableAnalysis', 'GlobalVarOffloadTransformation', 'SCCHoistPipeline', 'ModuleWrapTransformation', + 'DependencyTransformation' ] # Define entry point for call-tree transformation diff --git a/src/ecwam/snonlin.F90 b/src/ecwam/snonlin.F90 index 6ea5d7225..a12b69990 100644 --- a/src/ecwam/snonlin.F90 +++ b/src/ecwam/snonlin.F90 @@ -222,6 +222,7 @@ SUBROUTINE SNONLIN (KIJS, KIJL, FL1, FLD, SL, WAVNUM, DEPTH, AKMEAN) IF (MC > MFR1STFR .AND. MC < MFRLSTFR ) THEN ! the interactions for MC are all within the fully resolved spectral domain + !$loki loop-interchange DO KH=1,2 DO K=1,NANG K1 = K1W (K,KH) @@ -309,6 +310,7 @@ SUBROUTINE SNONLIN (KIJS, KIJL, FL1, FLD, SL, WAVNUM, DEPTH, AKMEAN) ENDDO ELSEIF (MC >= MFRLSTFR ) THEN + !$loki loop-interchange DO KH=1,2 DO K=1,NANG K1 = K1W (K,KH) @@ -411,6 +413,7 @@ SUBROUTINE SNONLIN (KIJS, KIJL, FL1, FLD, SL, WAVNUM, DEPTH, AKMEAN) ELSE + !$loki loop-interchange DO KH=1,2 DO K=1,NANG K1 = K1W (K,KH)