diff --git a/.github/workflows/build-hpc.yml b/.github/workflows/build-hpc.yml new file mode 100644 index 000000000..41f1a40b5 --- /dev/null +++ b/.github/workflows/build-hpc.yml @@ -0,0 +1,157 @@ +name: build-hpc + +# Controls when the action will run +on: + + # Trigger the workflow on all pushes to main and develop, except on tag creation + push: + branches: + - main + - develop + tags-ignore: + - '**' + + # Trigger the workflow on all pull requests + pull_request: ~ + + # Allow workflow to be dispatched on demand + workflow_dispatch: ~ + + # Trigger after public PR approved for CI + pull_request_target: + types: [labeled] + +env: + ECTRANS_TOOLS: ${{ github.workspace }}/.github/tools + CTEST_PARALLEL_LEVEL: 1 + CACHE_SUFFIX: v1 # Increase to force new cache to be created + +jobs: + ci-hpc: + name: ci-hpc + if: ${{ !github.event.pull_request.head.repo.fork && github.event.action != 'labeled' || github.event.label.name == 'approved-for-ci' }} + + strategy: + fail-fast: false # false: try to complete all jobs + + matrix: + name: + - ac-gpu nvhpc + - lumi-g cce + + include: + - name: ac-gpu nvhpc + site: ac-batch + troika_user_secret: HPC_CI_SSH_USER + sbatch_options: | + #SBATCH --time=00:20:00 + #SBATCH --nodes=1 + #SBATCH --ntasks=2 + #SBATCH --cpus-per-task=32 + #SBATCH --gpus-per-task=1 + #SBATCH --mem=200G + #SBATCH --qos=dg + modules: + - cmake + - ninja + - prgenv/nvidia + - hpcx-openmpi/2.14.0-cuda + - fftw + + - name: lumi-g cce + site: lumi + troika_user_secret: LUMI_CI_SSH_USER + sbatch_options: | + #SBATCH --time=00:20:00 + #SBATCH --nodes=1 + #SBATCH --ntasks-per-node=8 + #SBATCH --gpus-per-task=1 + #SBATCH --partition=dev-g + #SBATCH --account=project_465000527 + modules: + - CrayEnv + - PrgEnv-cray + - cce/17.0.1 + - craype-accel-amd-gfx90a + - rocm/6.0.3 + - cray-fftw + - buildtools + output_dir: /scratch/project_465000527/github-actions/ectrans/${{ github.run_id }} + workdir: /scratch/project_465000527/github-actions/ectrans/${{ github.run_id }} + cmake_options: -DOpenMP_C_LIB_NAMES=craymp -DOpenMP_CXX_LIB_NAMES=craymp -DOpenMP_Fortran_LIB_NAMES=craymp -DOpenMP_craymp_LIBRARY=craymp + ctest_options: -E gpu + + + runs-on: [self-hosted, linux, hpc] + env: + GH_TOKEN: ${{ github.token }} + steps: + - uses: ecmwf-actions/reusable-workflows/ci-hpc-generic@v2 + with: + site: ${{ matrix.site }} + troika_user: ${{ secrets[matrix.troika_user_secret] }} + sbatch_options: ${{ matrix.sbatch_options }} + output_dir: ${{ matrix.output_dir || '' }} + workdir: ${{ matrix.workdir || '' }} + template_data: | + cmake_options: + - -DENABLE_MPI=ON + - -DENABLE_ACC=ON + - -DENABLE_GPU=ON + - ${{ matrix.cmake_options || '' }} + ctest_options: ${{ matrix.ctest_options || '' }} + dependencies: + ecmwf/ecbuild: + version: develop + ecmwf-ifs/fiat: + version: develop + cmake_options: + - -DENABLE_MPI=ON + - ${{ matrix.cmake_options || '' }} + template: | + {% for module in "${{ join(matrix.modules, ',') }}".split(',') %} + module load {{module}} + {% endfor %} + + export CMAKE_TEST_LAUNCHER="srun;-n;1" + export DR_HOOK_ASSERT_MPI_INITIALIZED=0 + BASEDIR=$PWD + {% for name, options in dependencies.items() %} + mkdir -p {{name}} + pushd {{name}} + git init + git remote add origin ${{ github.server_url }}/{{name}} + git fetch origin {{options['version']}} + git reset --hard FETCH_HEAD + cmake -G Ninja -S . -B build \ + {% for name in dependencies %} + {% set org, proj = name.split('/') %} + -D{{proj}}_ROOT=$BASEDIR/{{name}}/installation \ + {% endfor %} + {{ options['cmake_options']|join(' ') }} + cmake --build build + cmake --install build --prefix installation + popd + {% endfor %} + mkdir -p ${{ github.repository }} + pushd ${{ github.repository }} + git init + git remote add origin ${{ github.server_url }}/${{ github.repository }} + git fetch origin ${{ github.sha }} + git reset --hard FETCH_HEAD + popd + cmake -G Ninja -S ${{ github.repository }} -B build \ + {% for name in dependencies %} + {% set org, proj = name.split('/') %} + -D{{proj}}_ROOT=$BASEDIR/{{name}}/installation \ + {% endfor %} + {{ cmake_options|join(' ') }} + cmake --build build + ctest --test-dir build --output-on-failure {{ ctest_options }} + + {% for name in dependencies.keys() %} + rm -r {{name}} + {% endfor %} + + rm -r ${{ github.repository }} + rm -r build diff --git a/.github/workflows/label-public-pr.yml b/.github/workflows/label-public-pr.yml new file mode 100644 index 000000000..eda990063 --- /dev/null +++ b/.github/workflows/label-public-pr.yml @@ -0,0 +1,10 @@ +# Manage labels of pull requests that originate from forks +name: label-public-pr + +on: + pull_request_target: + types: [opened, synchronize] + +jobs: + label: + uses: ecmwf-actions/reusable-workflows/.github/workflows/label-pr.yml@v2 \ No newline at end of file diff --git a/.gitignore b/.gitignore index 121365d66..fd987e6a0 100644 --- a/.gitignore +++ b/.gitignore @@ -8,4 +8,4 @@ build/* install/* env.sh *.DS_Store - +*.py[co~] diff --git a/CMakeLists.txt b/CMakeLists.txt index e2c5b8e81..79458f7ca 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -16,6 +16,26 @@ find_package( ecbuild 3.4 REQUIRED HINTS ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CUR project( ectrans LANGUAGES C CXX Fortran ) include( ectrans_macros ) +# CMake 3.29 adds CMAKE_TEST_LAUNCHER defined either as CMake variable or environment. +# This launcher is a semi-colon-separted list of arguments that is used to launch serial tasks, +# and can be defined during the CMake configuration. +# This is e.g. required for GPU tests that need access to slurm resources: +# export CMAKE_TEST_LAUNCHER="srun;-n;1" +# To run the tests then: +# salloc -q --gpus-per-task=1 -n ctest +# Before cmake 3.29 this could only be achieved with CMAKE_CROSSCOMPILING_EMULATOR. +# This next snippet ensures forward compatibility +if( ${CMAKE_VERSION} VERSION_LESS "3.29" ) + if( DEFINED CMAKE_TEST_LAUNCHER ) + set(CMAKE_CROSSCOMPILING_EMULATOR ${CMAKE_TEST_LAUNCHER}) + elseif(DEFINED ENV{CMAKE_TEST_LAUNCHER}) + set(CMAKE_CROSSCOMPILING_EMULATOR $ENV{CMAKE_TEST_LAUNCHER}) + endif() +endif() +if( CMAKE_CROSSCOMPILING_EMULATOR ) + set( CMAKE_TEST_LAUNCHER ${CMAKE_CROSSCOMPILING_EMULATOR} ) +endif() + set(CMAKE_CXX_STANDARD 17) ecbuild_enable_fortran( REQUIRED NO_MODULE_DIRECTORY ) @@ -109,6 +129,11 @@ ecbuild_add_option( FEATURE GPU DESCRIPTION "Compile GPU version of ectrans (Requires OpenACC or sufficient OpenMP offloading support)" CONDITION (HAVE_HIP OR HAVE_CUDA) AND (HAVE_ACC OR HAVE_OMP) ) +# Check CPU or GPU is enabled, and if not, abort +if( (NOT HAVE_CPU) AND (NOT HAVE_GPU) ) + ecbuild_critical("Please enable one or both of the CPU and GPU features") +endif() + if( HAVE_GPU ) if( HAVE_ACC ) set( GPU_OFFLOAD "ACC" ) @@ -142,10 +167,10 @@ ecbuild_add_option( FEATURE GPU_GRAPHS_GEMM CONDITION HAVE_GPU DESCRIPTION "Enable graph-based optimisation of Legendre transform GEMM kernel" ) - ecbuild_add_option( FEATURE GPU_GRAPHS_FFT +ecbuild_add_option( FEATURE GPU_GRAPHS_FFT DEFAULT ON CONDITION HAVE_GPU - DESCRIPTION "Enable graph-based optimisation of FFT kernels" ) + DESCRIPTION "Enable graph-based optimisation of FFT kernels" ) if( BUILD_SHARED_LIBS ) set( GPU_STATIC_DEFAULT OFF ) @@ -156,18 +181,32 @@ ecbuild_add_option( FEATURE GPU_STATIC DEFAULT ${GPU_STATIC_DEFAULT} DESCRIPTION "Compile GPU library as static library") +ecbuild_add_option( FEATURE ETRANS + DEFAULT ON + DESCRIPTION "Include Limited-Area-Model Transforms" ) + + +ecbuild_add_option( FEATURE ECTRANS4PY + DEFAULT OFF + CONDITION HAVE_ETRANS + DESCRIPTION "Compile ectrans4py interface routines for python binding w/ ctypesForFortran" ) + + ectrans_find_lapack() ecbuild_add_option( FEATURE TESTS DEFAULT ON DESCRIPTION "Enable unit testing" - REQUIRED_PACKAGES "MPI COMPONENTS Fortran" - CONDITION HAVE_CPU ) + REQUIRED_PACKAGES "MPI COMPONENTS Fortran" ) -### Add sources and tests +### Add sources include( ectrans_compile_options ) add_subdirectory( src ) -add_subdirectory( tests ) + +### Add tests +if( HAVE_TESTS ) + add_subdirectory( tests ) +endif() ### Export if( BUILD_SHARED_LIBS ) diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 000000000..7ee26d232 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,2 @@ +recursive-include cmake +exclude MANIFEST.in diff --git a/README.md b/README.md index 82cf05f87..be959cbb9 100644 --- a/README.md +++ b/README.md @@ -106,6 +106,28 @@ The benchmark drivers are found in the bin directory. A brief description of available command-line arguments can be obtained with e.g. ectrans-benchmark-cpu-sp --help +Building `ectrans4py` +--------------------- + +The python wheel can be built from the root of the project, assuming above-mentioned variables are defined (`fiat_ROOT` etc...): +``` +python -m build --wheel +``` +and then: +``` +python -m auditwheel +``` +The built python wheel is then to be found in directory `wheelhouse/` and can be locally installed by pip: +``` +pip install wheelhouse/ectrans4py-(...).whl +``` +The `_skbuild` and `dist` directories can be deleted. + +Tests can be run from `tests/test_ectrans4py/`: +``` +python -m pytest +``` + Reporting Bugs ============== diff --git a/VERSION b/VERSION index bc80560fa..26ca59460 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -1.5.0 +1.5.1 diff --git a/cmake/ectrans_compile_options.cmake b/cmake/ectrans_compile_options.cmake index 03cbf0972..64b22802c 100644 --- a/cmake/ectrans_compile_options.cmake +++ b/cmake/ectrans_compile_options.cmake @@ -25,6 +25,9 @@ elseif( CMAKE_Fortran_COMPILER_ID MATCHES "Cray" ) ecbuild_add_fortran_flags("-hnomessage=878") # A module named ... has already been directly or indirectly use associated into this scope ecbuild_add_fortran_flags("-hnomessage=867") # Module ... has no public objects declared in the module, therefore nothing can be use associated from the module. ecbuild_add_fortran_flags("-M7256") # An OpenMP parallel construct in a target region is limited to a single thread. +elseif( CMAKE_Fortran_COMPILER_ID MATCHES "IntelLLVM" ) + ecbuild_add_fortran_flags("-march=core-avx2 -no-fma" BUILD BIT) + ecbuild_add_fortran_flags("-fp-model precise -fp-speculation=safe") elseif( CMAKE_Fortran_COMPILER_ID MATCHES "Intel" ) ecbuild_add_fortran_flags("-march=core-avx2 -no-fma" BUILD BIT) ecbuild_add_fortran_flags("-fast-transcendentals -fp-model precise -fp-speculation=safe") diff --git a/cmake/project_summary.cmake b/cmake/project_summary.cmake index be8f8b78c..cb321aae6 100644 --- a/cmake/project_summary.cmake +++ b/cmake/project_summary.cmake @@ -35,6 +35,9 @@ ecbuild_info( " LAPACK_LIBRARIES : [${LAPACK_LIBRARIES}]" ) endif() ecbuild_info( "FFTW" ) ecbuild_info( " FFTW_LIBRARIES : [${FFTW_LIBRARIES}]" ) + if( CMAKE_TEST_LAUNCHER ) +ecbuild_info( "CMAKE_TEST_LAUNCHER : [${CMAKE_TEST_LAUNCHER}]" ) + endif() ecbuild_info( "---------------------------------------------------------" ) diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 000000000..7e06fc5fc --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,23 @@ +[project] +name = "ectrans4py" +dynamic = ["version"] +description = "ECTRANS interface for Python" +readme = "README.md" +requires-python = ">=3.10" +dependencies=["numpy", "ctypesForFortran<2.0.0"] +classifiers = [ + 'Development Status :: 3 - Alpha', + 'Intended Audience :: Science/Research', + 'Programming Language :: Python', + 'Programming Language :: Python :: 3.10', + 'Programming Language :: Python :: 3.11', + 'Programming Language :: Python :: 3.12', + 'Operating System :: Unix', +] + +[build-system] +requires = ["setuptools", "wheel", "scikit-build"] +build-backend = "setuptools.build_meta" + +[tool.setuptools.dynamic] +version = {attr = "ectrans4py.__version__"} diff --git a/setup.py b/setup.py new file mode 100644 index 000000000..112dc8969 --- /dev/null +++ b/setup.py @@ -0,0 +1,25 @@ +import os +import ast +from skbuild import setup + +_version_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), "VERSION") +with open(_version_file, "r") as f: + __version__ = f.read().strip() + +setup( + name="ectrans4py", + version=__version__, + packages=['ectrans4py'], + cmake_minimum_required_version="3.13", + cmake_args=[ + '-DENABLE_ETRANS=ON', + '-DENABLE_ECTRANS4PY=ON', + '-DENABLE_SINGLE_PRECISION=OFF', + '-DENABLE_OMP=ON', + '-DFFTW_USE_STATIC_LIBS=ON', + ], + package_dir={"": "src"}, + cmake_install_dir="src/ectrans4py", + setup_requires=["scikit-build", "setuptools"], + install_requires=["numpy", "ctypesforfortran==1.1.3"], +) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 7451aa03f..706806cd8 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -11,3 +11,9 @@ add_subdirectory( programs ) if( HAVE_TRANSI ) add_subdirectory(transi) endif() +if( HAVE_ETRANS ) + add_subdirectory(etrans) +endif() +if(HAVE_ECTRANS4PY) + add_subdirectory(ectrans4py) +endif() diff --git a/src/ectrans4py/CMakeLists.txt b/src/ectrans4py/CMakeLists.txt new file mode 100644 index 000000000..857d7d609 --- /dev/null +++ b/src/ectrans4py/CMakeLists.txt @@ -0,0 +1,20 @@ +if(HAVE_ETRANS) + # (using CMAKE_CURRENT_SOURCE_DIR is necessary because sources are in a different directory than the target library (trans_${prec})) + ecbuild_list_add_pattern( + LIST ectrans4py_src + GLOB ${CMAKE_CURRENT_SOURCE_DIR}/*.F90 + QUIET + ) + + set(HAVE_dp ${HAVE_DOUBLE_PRECISION}) + set(prec dp) + + if(HAVE_${prec}) + # Add sources + target_sources(trans_${prec} PRIVATE ${ectrans4py_src}) + endif() + +else() + ecbuild_critical("To activate the ectrans Python interface, you must enable the ETRANS option.") +endif() + diff --git a/src/ectrans4py/__init__.py b/src/ectrans4py/__init__.py new file mode 100644 index 000000000..dd7b365eb --- /dev/null +++ b/src/ectrans4py/__init__.py @@ -0,0 +1,380 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +ectrans4py: + +A Python interface to spectral transforms from ecTrans, using cTypesForFortran for the Fortran/Python binding. +""" + +from __future__ import print_function, absolute_import, unicode_literals, division + +import os +import resource +import numpy as np +import ctypesForFortran +from ctypesForFortran import addReturnCode, treatReturnCode, IN, OUT + + +__version__ = "1.5.1" + + +# Shared objects library +######################## +so_basename = "libtrans_dp.so" # local name of library in the directory +LD_LIBRARY_PATH = [p for p in os.environ.get('LD_LIBRARY_PATH', '').split(':') if p != ''] +lpath = LD_LIBRARY_PATH + [ + os.path.join(os.path.dirname(os.path.realpath(__file__)), 'lib'), + os.path.join(os.path.dirname(os.path.realpath(__file__)), 'lib64'), + ] +for d in lpath: + shared_objects_library = os.path.join(d, so_basename) + if os.path.exists(shared_objects_library): + break + else: + shared_objects_library = None +if shared_objects_library is None: + msg = ' '.join(["'{}' was not found in any of potential locations: {}.", + "You can specify a different location using env var LD_LIBRARY_PATH"]) + msg = msg.format(so_basename, str(lpath)) + raise FileNotFoundError(msg) +ctypesFF, handle = ctypesForFortran.ctypesForFortranFactory(shared_objects_library) + +# Initialization +################ + +def init_env(omp_num_threads=None, + no_mpi=True, + unlimited_stack=True, + ): + """ + Set adequate environment for the inner libraries. + + :param int omp_num_threads: sets OMP_NUM_THREADS + :param bool no_mpi: environment variable DR_HOOK_NOT_MPI set to 1 + :param unlimited_stack: equivalent to 'ulimit -s unlimited' + """ + # because arpifs library is compiled with MPI & openMP + if omp_num_threads is not None: + os.environ['OMP_NUM_THREADS'] = str(omp_num_threads) + if no_mpi: + os.environ['DR_HOOK_NOT_MPI'] = '1' + if unlimited_stack: + resource.setrlimit(resource.RLIMIT_STACK, (resource.RLIM_INFINITY, resource.RLIM_INFINITY)) + +# Transforms interfaces +####################### + +@treatReturnCode +@ctypesFF() +@addReturnCode +def etrans_inq4py(KSIZEI, KSIZEJ, + KPHYSICALSIZEI, KPHYSICALSIZEJ, + KTRUNCX, KTRUNCY, + KNUMMAXRESOL, + PDELATX, PDELATY): + """ + Simplified wrapper to ETRANS_INQ. + + Args:\n + 1,2) KSIZEI, KSIZEJ: size of grid-point field (with extension zone) + 3,4) KPHYSICALSIZEI, KPHYSICALSIZEJ: size of physical part of grid-point field + 5,6) KTRUNCX, KTRUNCY: troncatures + 7) KNUMMAXRESOL: maximum number of troncatures handled + 8,9) PDELTAX, PDELTAY: resolution along x,y axis + + Returns:\n + 1) KGPTOT: number of gridpoints + 2) KSPEC: number of spectral coefficients + """ + return ([KSIZEI, KSIZEJ, + KPHYSICALSIZEI, KPHYSICALSIZEJ, + KTRUNCX, KTRUNCY, + KNUMMAXRESOL, + PDELATX, PDELATY], + [(np.int64, None, IN), + (np.int64, None, IN), + (np.int64, None, IN), + (np.int64, None, IN), + (np.int64, None, IN), + (np.int64, None, IN), + (np.int64, None, IN), + (np.float64, None, IN), + (np.float64, None, IN), + (np.int64, None, OUT), + (np.int64, None, OUT)], + None) + + +@treatReturnCode +@ctypesFF() +@addReturnCode +def trans_inq4py(KSIZEJ, KTRUNC, KSLOEN, KLOEN, KNUMMAXRESOL): + """ + Simplified wrapper to TRANS_INQ. + + Args:\n + 1) KSIZEJ: number of latitudes in grid-point space + 2) KTRUNC: troncature + 3) KSLOEN: Size of KLOEN + 4) KLOEN: number of points on each latitude row + 5) KNUMMAXRESOL: maximum number of troncatures handled + + Returns:\n + 1) KGPTOT: number of gridpoints + 2) KSPEC: number of spectral coefficients + 3) KNMENG: cut-off zonal wavenumber + """ + return ([KSIZEJ, KTRUNC, KSLOEN, KLOEN, KNUMMAXRESOL], + [(np.int64, None, IN), + (np.int64, None, IN), + (np.int64, None, IN), + (np.int64, (KSLOEN,), IN), + (np.int64, None, IN), + (np.int64, None, OUT), + (np.int64, None, OUT), + (np.int64, (KSLOEN,), OUT)], + None) + + +@treatReturnCode +@ctypesFF() +@addReturnCode +def sp2gp_lam4py(KSIZEI, KSIZEJ, + KPHYSICALSIZEI, KPHYSICALSIZEJ, + KTRUNCX, KTRUNCY, + KNUMMAXRESOL, + KSIZE, + LGRADIENT, + LREORDER, + PDELTAX, PDELTAY, + PSPEC): + """ + Transform spectral coefficients into grid-point values. + + Args:\n + 1,2) KSIZEI, KSIZEJ: size of grid-point field (with extension zone) + 3,4) KPHYSICALSIZEI, KPHYSICALSIZEJ: size of physical part of grid-point field + 5,6) KTRUNCX, KTRUNCY: troncatures + 7) KNUMMAXRESOL: maximum number of troncatures handled + 8) KSIZE: size of PSPEC + 9) LGRADIENT: gradient computation + 10) LREORDER: reorder spectral coefficients or not + 11,12) PDELTAX,PDELTAY: resolution along x,y axis + 13) PSPEC: spectral coefficient array + + Returns:\n + 1) PGPT: grid-point field + 2) PGPTM: N-S derivative if LGRADIENT + 3) PGPTL: E-W derivative if LGRADIENT + """ + return ([KSIZEI, KSIZEJ, + KPHYSICALSIZEI, KPHYSICALSIZEJ, + KTRUNCX, KTRUNCY, + KNUMMAXRESOL, + KSIZE, + LGRADIENT, + LREORDER, + PDELTAX, PDELTAY, + PSPEC], + [(np.int64, None, IN), + (np.int64, None, IN), + (np.int64, None, IN), + (np.int64, None, IN), + (np.int64, None, IN), + (np.int64, None, IN), + (np.int64, None, IN), + (np.int64, None, IN), + (bool, None, IN), + (bool, None, IN), + (np.float64, None, IN), + (np.float64, None, IN), + (np.float64, (KSIZE,), IN), + (np.float64, (KSIZEI * KSIZEJ,), OUT), + (np.float64, (KSIZEI * KSIZEJ,), OUT), + (np.float64, (KSIZEI * KSIZEJ,), OUT)], + None) + + +@treatReturnCode +@ctypesFF() +@addReturnCode +def gp2sp_lam4py(KSIZE, + KSIZEI, KSIZEJ, + KPHYSICALSIZEI, KPHYSICALSIZEJ, + KTRUNCX, KTRUNCY, + KNUMMAXRESOL, + PDELTAX, PDELTAY, + LREORDER, + PGPT): + """ + Transform grid point values into spectral coefficients. + + Args:\n + 1) KSIZE: size of spectral field + 2,3) KSIZEI, KSIZEJ: size of grid-point field (with extension zone) + 4,5) KPHYSICALSIZEI, KPHYSICALSIZEJ: size of physical part of grid-point field + 6,7) KTRUNCX, KTRUNCY: troncatures + 8) KNUMMAXRESOL: maximum number of troncatures handled + 9,10) PDELTAX, PDELTAY: resolution along x,y axis + 11) LREORDER: reorder spectral coefficients or not + 12) PGPT: grid-point field + + Returns:\n + 1) PSPEC: spectral coefficient array + """ + return ([KSIZE, + KSIZEI, KSIZEJ, + KPHYSICALSIZEI, KPHYSICALSIZEJ, + KTRUNCX, KTRUNCY, + KNUMMAXRESOL, + PDELTAX, PDELTAY, + LREORDER, + PGPT], + [(np.int64, None, IN), + (np.int64, None, IN), + (np.int64, None, IN), + (np.int64, None, IN), + (np.int64, None, IN), + (np.int64, None, IN), + (np.int64, None, IN), + (np.int64, None, IN), + (np.float64, None, IN), + (np.float64, None, IN), + (bool, None, IN), + (np.float64, (KSIZEI * KSIZEJ,), IN), + (np.float64, (KSIZE,), OUT)], + None) + + +@treatReturnCode +@ctypesFF() +@addReturnCode +def sp2gp_gauss4py(KSIZEJ, + KTRUNC, + KNUMMAXRESOL, + KGPTOT, + KSLOEN, + KLOEN, + KSIZE, + LGRADIENT, + LREORDER, + PSPEC): + """ + Transform spectral coefficients into grid-point values. + + Args:\n + 1) KSIZEJ: Number of latitudes + 2) KTRUNC: troncature + 3) KNUMMAXRESOL: maximum number of troncatures handled + 4) KGPTOT: number of grid-points + 5) KSLOEN: Size of KLOEN + 6) KLOEN: + 7) KSIZE: Size of PSPEC + 8) LGRADIENT: compute derivatives + 9) LREORDER: reorder spectral coefficients or not + 10) PSPEC: spectral coefficient array + + Returns:\n + 1) PGPT: grid-point field + 2) PGPTM: N-S derivative if LGRADIENT + 3) PGPTL: E-W derivative if LGRADIENT + """ + return ([KSIZEJ, + KTRUNC, + KNUMMAXRESOL, + KGPTOT, + KSLOEN, + KLOEN, + KSIZE, + LGRADIENT, + LREORDER, + PSPEC], + [(np.int64, None, IN), + (np.int64, None, IN), + (np.int64, None, IN), + (np.int64, None, IN), + (np.int64, None, IN), + (np.int64, (KSLOEN,), IN), + (np.int64, None, IN), + (bool, None, IN), + (bool, None, IN), + (np.float64, (KSIZE,), IN), + (np.float64, (KGPTOT,), OUT), + (np.float64, (KGPTOT,), OUT), + (np.float64, (KGPTOT,), OUT)], + None) + + +@treatReturnCode +@ctypesFF() +@addReturnCode +def gp2sp_gauss4py(KSPEC, + KSIZEJ, + KTRUNC, + KNUMMAXRESOL, + KSLOEN, + KLOEN, + KSIZE, + LREORDER, + PGPT): + """ + Transform grid-point values into spectral coefficients. + + Args:\n + 1) KSPEC: size of spectral coefficients array + 2) KSIZEJ: Number of latitudes + 3) KTRUNC: troncature + 4) KNUMMAXRESOL: maximum number of troncatures handled + 5) KSLOEN: Size of KLOEN + 6) KLOEN + 7) KSIZE: Size of PGPT + 8) LREORDER: reorder spectral coefficients or not + 9) PGPT: grid-point field + + Returns:\n + 1) PSPEC: spectral coefficient array + """ + return ([KSPEC, + KSIZEJ, + KTRUNC, + KNUMMAXRESOL, + KSLOEN, + KLOEN, + KSIZE, + LREORDER, + PGPT], + [(np.int64, None, IN), + (np.int64, None, IN), + (np.int64, None, IN), + (np.int64, None, IN), + (np.int64, None, IN), + (np.int64, (KSLOEN,), IN), + (np.int64, None, IN), + (bool, None, IN), + (np.float64, (KSIZE,), IN), + (np.float64, (KSPEC,), OUT)], + None) + + +@ctypesFF() +def sp2gp_fft1d4py(KSIZES, KTRUNC, PSPEC, KSIZEG): + """ + Transform spectral coefficients into grid-point values, + for a 1D array (vertical section academic model) + + Args:\n + 1) KSIZES size of PSPEC + 2) KTRUNC: troncature + 3) PSPEC: spectral coefficient array + 4) KSIZEG: size of grid-point field (with extension zone) + + Returns:\n + 1) PGPT: grid-point field + """ + return ([KSIZES, KTRUNC, PSPEC, KSIZEG], + [(np.int64, None, IN), + (np.int64, None, IN), + (np.float64, (KSIZES,), IN), + (np.int64, None, IN), + (np.float64, (KSIZEG,), OUT)], + None) diff --git a/src/ectrans4py/etrans_inq4py.F90 b/src/ectrans4py/etrans_inq4py.F90 new file mode 100644 index 000000000..7f2113fba --- /dev/null +++ b/src/ectrans4py/etrans_inq4py.F90 @@ -0,0 +1,66 @@ +SUBROUTINE ETRANS_INQ4PY(KRETURNCODE, KSIZEI, KSIZEJ, KPHYSICALSIZEI, KPHYSICALSIZEJ, & + &KTRUNCX, KTRUNCY, KNUMMAXRESOL, PDELTAX, PDELTAY, & + &KGPTOT, KSPEC) +! ** PURPOSE +! Simplified wrapper to ETRANS_INQ +! +! ** DUMMY ARGUMENTS +! KSIZEI, KSIZEJ: size of grid-point field (with extension zone) +! KPHYSICALSIZEI, KPHYSICALSIZEJ: size of physical part of grid-point field +! KTRUNCX, KTRUNCY: troncatures +! KNUMMAXRESOL: maximum number of troncatures handled +! PDELTAX: x resolution +! PDELTAY: y resolution +! KGPTOT: number of gridpoints +! KSPEC: number of spectral coefficients +! +! ** AUTHOR +! 9 April 2014, S. Riette +! +! ** MODIFICATIONS +! 6 Jan., S. Riette: PDELTAX and PDELTAY added +! +! I. Dummy arguments declaration +IMPLICIT NONE +INTEGER(KIND=8), INTENT(OUT) :: KRETURNCODE +INTEGER(KIND=8), INTENT(IN) :: KSIZEI, KSIZEJ +INTEGER(KIND=8), INTENT(IN) :: KPHYSICALSIZEI, KPHYSICALSIZEJ +INTEGER(KIND=8), INTENT(IN) :: KTRUNCX, KTRUNCY +INTEGER(KIND=8), INTENT(IN) :: KNUMMAXRESOL +REAL(KIND=8), INTENT(IN) :: PDELTAX +REAL(KIND=8), INTENT(IN) :: PDELTAY +INTEGER(KIND=8), INTENT(OUT) :: KGPTOT +INTEGER(KIND=8), INTENT(OUT) :: KSPEC +! +! II. Local variables declaration +INTEGER, DIMENSION(0:KTRUNCX) :: IESM0 +INTEGER :: ISIZEI, ISIZEJ, & + & IPHYSICALSIZEI, IPHYSICALSIZEJ, & + & ITRUNCX, ITRUNCY, & + & INUMMAXRESOL +LOGICAL :: LLSTOP +INTEGER :: IIDENTRESOL +INTEGER, DIMENSION(1) :: ILOEN +INTEGER :: IGPTOT, ISPEC + +#include "etrans_inq.h" + +ISIZEI=KSIZEI +ISIZEJ=KSIZEJ +IPHYSICALSIZEI=KPHYSICALSIZEI +IPHYSICALSIZEJ=KPHYSICALSIZEJ +ITRUNCX=KTRUNCX +ITRUNCY=KTRUNCY +INUMMAXRESOL=KNUMMAXRESOL + +! III. Setup +CALL SPEC_SETUP4PY(KRETURNCODE, ISIZEI, ISIZEJ, IPHYSICALSIZEI, IPHYSICALSIZEJ, & + &ITRUNCX, ITRUNCY, INUMMAXRESOL, ILOEN, .TRUE., 1, & + &PDELTAX, PDELTAY, IIDENTRESOL, LLSTOP) +IF (.NOT. LLSTOP) THEN + CALL ETRANS_INQ(KRESOL=IIDENTRESOL, KGPTOT=IGPTOT, KSPEC=ISPEC, KESM0=IESM0) + KGPTOT=IGPTOT + KSPEC=ISPEC +ENDIF +! +END SUBROUTINE ETRANS_INQ4PY diff --git a/src/ectrans4py/gp2sp_gauss4py.F90 b/src/ectrans4py/gp2sp_gauss4py.F90 new file mode 100644 index 000000000..76fff02c8 --- /dev/null +++ b/src/ectrans4py/gp2sp_gauss4py.F90 @@ -0,0 +1,113 @@ +SUBROUTINE GP2SP_GAUSS4PY(KRETURNCODE, KSPEC, KSIZEJ, KTRUNC, KNUMMAXRESOL, KSLOEN, KLOEN, KSIZE, LREORDER, PGPT, PSPEC) +! ** PURPOSE +! Transform spectral coefficients into grid-point values +! +! ** DUMMY ARGUMENTS +! KRETURNCODE: error code +! KSPEC: size of spectral coefficients array +! KSIZEJ: Number of latitudes +! KTRUNC: troncature +! KNUMMAXRESOL: maximum number of troncatures handled +! KSLOEN: Size ok KLOEN +! KLOEN +! KSIZE: Size of PGPT +! LREORDER: switch to reorder spectral coefficients or not +! PGPT: grid-point field +! PSPEC: spectral coefficient array +! +! ** AUTHOR +! 9 April 2014, S. Riette +! +! ** MODIFICATIONS +! 6 Jan. 2016, S. Riette: w_spec_setup interface modified +! March, 2016, A.Mary: LREORDER +! +! I. Dummy arguments declaration +USE PARKIND1, ONLY : JPRB +IMPLICIT NONE +INTEGER(KIND=8), INTENT(OUT) :: KRETURNCODE +INTEGER(KIND=8), INTENT(IN) :: KSPEC +INTEGER(KIND=8), INTENT(IN) :: KSIZEJ +INTEGER(KIND=8), INTENT(IN) :: KTRUNC +INTEGER(KIND=8), INTENT(IN) :: KNUMMAXRESOL +INTEGER(KIND=8), INTENT(IN) :: KSLOEN +INTEGER(KIND=8), DIMENSION(KSLOEN), INTENT(IN) :: KLOEN +INTEGER(KIND=8), INTENT(IN) :: KSIZE +LOGICAL, INTENT(IN) :: LREORDER +REAL(KIND=8), DIMENSION(KSIZE), INTENT(IN) :: PGPT +REAL(KIND=8), DIMENSION(KSPEC), INTENT(OUT) :: PSPEC +! +! II. Local variables declaration +INTEGER, DIMENSION(SIZE(KLOEN)) :: ILOEN +INTEGER :: ISIZEI, ISIZEJ, & + & IPHYSICALSIZEI, IPHYSICALSIZEJ, & + & ITRUNCX, ITRUNCY, & + & INUMMAXRESOL +LOGICAL :: LLSTOP +INTEGER :: IIDENTRESOL +INTEGER :: JI, JM, JN +INTEGER, DIMENSION(0:KTRUNC) :: NASM0 +REAL(KIND=JPRB), DIMENSION(1, SIZE(PGPT)) :: ZSPBUF !size over-evaluated +REAL(KIND=JPRB), DIMENSION(SIZE(PGPT), 1, 1) :: ZGPBUF +REAL(KIND=8) :: ZDELTAX, ZDELTAY + +#include "trans_inq.h" +#include "dir_trans.h" +KRETURNCODE=0 +ILOEN(:)=KLOEN(:) +ISIZEI=0 +ISIZEJ=KSIZEJ +IPHYSICALSIZEI=0 +IPHYSICALSIZEJ=0 +ITRUNCX=KTRUNC +ITRUNCY=0 +INUMMAXRESOL=KNUMMAXRESOL +! +! III. Setup +ZDELTAX=0. +ZDELTAY=0. +CALL SPEC_SETUP4PY(KRETURNCODE, ISIZEI, ISIZEJ, IPHYSICALSIZEI, IPHYSICALSIZEJ, & + &ITRUNCX, ITRUNCY, INUMMAXRESOL, ILOEN, .FALSE., SIZE(ILOEN), & + &ZDELTAX, ZDELTAY, IIDENTRESOL, LLSTOP) +! +! IV. Transformation +! IV.a Shape of coefficient array +IF (.NOT. LLSTOP) THEN + JI=1 + DO JN=0, KTRUNC + NASM0(JN)=JI + JI=JI+1+JN+(JN+1) + ENDDO +ENDIF + +! IV.b Direct transform +IF (.NOT. LLSTOP) THEN + ZGPBUF(:,1,1)=REAL(PGPT(:),KIND=JPRB) + CALL DIR_TRANS(PSPSCALAR=ZSPBUF(:,:), PGP=ZGPBUF(:,:,:), KRESOL=IIDENTRESOL) +ENDIF + +! IV.c Reordering +IF (LREORDER) THEN + IF(.NOT. LLSTOP) THEN + PSPEC(:)=0. + JI=1 + DO JM=0, KTRUNC + DO JN=JM, KTRUNC + PSPEC(NASM0(JN)+JM)=REAL(ZSPBUF(1,JI),KIND=8) + JI=JI+1 + IF(JM/=0) THEN + PSPEC(NASM0(JN)-JM)=REAL(ZSPBUF(1,JI),KIND=8) + ENDIF + JI=JI+1 + ENDDO + ENDDO + IF(JI-1/=KSPEC) THEN + PRINT*, "Internal error in GP2SP_GAUSS4PY (spectral reordering)" + KRETURNCODE=-999 + ENDIF + ENDIF +ELSE + PSPEC(1:KSPEC) = REAL(ZSPBUF(1,1:KSPEC),KIND=8) +ENDIF + +END SUBROUTINE GP2SP_GAUSS4PY diff --git a/src/ectrans4py/gp2sp_lam4py.F90 b/src/ectrans4py/gp2sp_lam4py.F90 new file mode 100644 index 000000000..036a4674b --- /dev/null +++ b/src/ectrans4py/gp2sp_lam4py.F90 @@ -0,0 +1,121 @@ +SUBROUTINE GP2SP_LAM4PY(KRETURNCODE, KSIZE, KSIZEI, KSIZEJ, KPHYSICALSIZEI, KPHYSICALSIZEJ, & + &KTRUNCX, KTRUNCY, KNUMMAXRESOL, PDELTAX, PDELTAY, LREORDER, PGPT, PSPEC) +! ** PURPOSE +! Transform grid point values into spectral coefficients +! +! ** DUMMY ARGUMENTS +! KRETURNCODE: error code +! KSIZE: size of spectral field +! KSIZEI, KSIZEJ: size of grid-point field (with extension zone) +! KPHYSICALSIZEI, KPHYSICALSIZEJ: size of physical part of grid-point field +! KTRUNCX, KTRUNCY: troncatures +! KNUMMAXRESOL: maximum number of troncatures handled +! PDELTAX: x resolution +! PDELTAY: y resolution +! LREORDER: switch to reorder spectral coefficients or not +! PGPT: grid-point field +! PSPEC: spectral coefficient array +! +! ** AUTHOR +! 9 April 2014, S. Riette +! +! ** MODIFICATIONS +! 6 Jan., S. Riette: PDELTAX and PDELTAY added +! March, 2016, A.Mary: LREORDER +! +! I. Dummy arguments declaration +USE PARKIND1, ONLY : JPRB +IMPLICIT NONE +INTEGER(KIND=8), INTENT(OUT) :: KRETURNCODE +INTEGER(KIND=8), INTENT(IN) :: KSIZE, KSIZEI, KSIZEJ +INTEGER(KIND=8), INTENT(IN) :: KPHYSICALSIZEI, KPHYSICALSIZEJ +INTEGER(KIND=8), INTENT(IN) :: KTRUNCX, KTRUNCY +INTEGER(KIND=8), INTENT(IN) :: KNUMMAXRESOL +REAL(KIND=8), INTENT(IN) :: PDELTAX +REAL(KIND=8), INTENT(IN) :: PDELTAY +LOGICAL, INTENT(IN) :: LREORDER +REAL(KIND=8), DIMENSION(KSIZEI*KSIZEJ), INTENT(IN) :: PGPT +REAL(KIND=8), DIMENSION(KSIZE), INTENT(OUT) :: PSPEC +! +! II. Local variables declaration +INTEGER, DIMENSION(0:KTRUNCX) :: IESM0 +INTEGER :: IGPTOT, ISPEC +INTEGER, DIMENSION(0:KTRUNCY) :: ISPECINI, ISPECEND +REAL(KIND=JPRB), DIMENSION(1, KSIZEI*KSIZEJ) :: ZSPBUF !size over-evaluated +REAL(KIND=JPRB), DIMENSION(KSIZEI*KSIZEJ, 1, 1) :: ZGPBUF +INTEGER :: JI, JM, JN, IIDENTRESOL +LOGICAL :: LLSTOP +INTEGER :: ISIZEI, ISIZEJ, & + & IPHYSICALSIZEI, IPHYSICALSIZEJ, & + & ITRUNCX, ITRUNCY, & + & INUMMAXRESOL +INTEGER, DIMENSION(1) :: ILOEN + +#include "edir_trans.h" +#include "etrans_inq.h" + +KRETURNCODE=0 +LLSTOP=.FALSE. +ISIZEI=KSIZEI +ISIZEJ=KSIZEJ +IPHYSICALSIZEI=KPHYSICALSIZEI +IPHYSICALSIZEJ=KPHYSICALSIZEJ +ITRUNCX=KTRUNCX +ITRUNCY=KTRUNCY +INUMMAXRESOL=KNUMMAXRESOL + +! III. Setup +CALL SPEC_SETUP4PY(KRETURNCODE, ISIZEI, ISIZEJ, IPHYSICALSIZEI, IPHYSICALSIZEJ, & + &ITRUNCX, ITRUNCY, INUMMAXRESOL, ILOEN, .TRUE., 1, & + &PDELTAX, PDELTAY, IIDENTRESOL, LLSTOP) + +! IV. Transformation + +! IV.a Shape of coefficient array +!IGPTOT is the total number of points in grid-point space +!ISPEC is the number of spectral coefficients +!IESM0(m) is the index of spectral coefficient (m,0) in model +!ISPECINI(n) is the index of the first of the 4 spectral coefficient (0,n) in FA file +!ISPECEND(n) is the index of the last of the last 4 spectral coefficients (:,n) in FA file +IF (.NOT. LLSTOP) THEN + CALL ETRANS_INQ(KRESOL=IIDENTRESOL, KGPTOT=IGPTOT, KSPEC=ISPEC, KESM0=IESM0) + JI=1 + DO JN=0, ITRUNCY + ISPECINI(JN)=(JI-1)*4+1 + JI=JI+COUNT(IESM0(1:ITRUNCX)-IESM0(0:ITRUNCX-1)>JN*4) + IF (ISPEC-IESM0(ITRUNCX)>JN*4) JI=JI+1 + ISPECEND(JN)=(JI-1)*4 + ENDDO +ENDIF + +! III.b transform +IF (.NOT. LLSTOP) THEN + ZGPBUF(:,1,1)=REAL(PGPT(:),KIND=JPRB) + CALL EDIR_TRANS(PSPSCALAR=ZSPBUF(:,:), PGP=ZGPBUF(:,:,:), KRESOL=IIDENTRESOL) +ENDIF + +! III.c Reordering +! reorder Aladin : file ordering = coeffs per blocks of m, 4 reals per coeff +! Aladin array ordering = coeffs per blocks of n, 4 reals per coeff +IF (LREORDER) THEN + IF (.NOT. LLSTOP) THEN + JI=1 + PSPEC(:)=0. + DO JM=0,ITRUNCX*4+4,4 + DO JN=0,ITRUNCY + IF (ISPECINI(JN)+JM+3<=ISPECEND(JN)) THEN + PSPEC(ISPECINI(JN)+JM:ISPECINI(JN)+JM+3) = REAL(ZSPBUF(1,JI:JI+3),KIND=8) + JI=JI+4 + ENDIF + ENDDO + ENDDO + IF(JI/=ISPEC+1) THEN + PRINT*, "Internal error in GP2SP_LAM4PY (spectral reordering)" + KRETURNCODE=-999 + ENDIF + ENDIF +ELSE + PSPEC(1:KSIZE) = REAL(ZSPBUF(1,1:KSIZE),KIND=8) +ENDIF + +END SUBROUTINE GP2SP_LAM4PY diff --git a/src/ectrans4py/sp2gp_fft1d4py.F90 b/src/ectrans4py/sp2gp_fft1d4py.F90 new file mode 100644 index 000000000..060f14f4d --- /dev/null +++ b/src/ectrans4py/sp2gp_fft1d4py.F90 @@ -0,0 +1,114 @@ +SUBROUTINE SP2GP_FFT1D4PY(KSIZES, KTRUNC, PSPEC, KSIZEG, PGPT) +! ** PURPOSE +! Transform spectral coefficients into grid-point values, +! for a 1D array (vertical section academic model) +! +! ** DUMMY ARGUMENTS +! KSIZES size of PSPEC +! KTRUNC: troncature +! PSPEC: spectral coefficient array +! KSIZEG: size of grid-point field (with extension zone) +! PGPT: grid-point field +! +! ** AUTHOR +! 26 March 2015, A. Mary, from utilities/pinuts/module/fa_datas_mod.F90 +! +! ** MODIFICATIONS +! +! I. Dummy arguments declaration +IMPLICIT NONE + +INTEGER(KIND=8), INTENT(IN) :: KSIZES +INTEGER(KIND=8), INTENT(IN) :: KTRUNC +REAL(KIND=8), DIMENSION(KSIZES), INTENT(IN) :: PSPEC +INTEGER(KIND=8), INTENT(IN) :: KSIZEG +REAL(KIND=8), DIMENSION(KSIZEG), INTENT(OUT) :: PGPT + +INTEGER(KIND=8) :: NSEFRE, NFTM, NDGLSUR +REAL(KIND=8), DIMENSION(:), ALLOCATABLE :: SP2, TRIGSE +INTEGER(KIND=8), DIMENSION(:), ALLOCATABLE :: NFAXE +INTEGER(KIND=8), PARAMETER :: NZERO=0 + +NDGLSUR = KSIZEG+MOD(KSIZEG,2)+2 +NFTM = 2*(KTRUNC+1) +ALLOCATE(SP2(NDGLSUR*NFTM)) +SP2 = 0.0 +SP2 = CONVRT2FFT(PSPEC,NZERO,KTRUNC,NDGLSUR) +ALLOCATE(NFAXE(1:10)) +ALLOCATE(TRIGSE(1:KSIZEG)) +CALL SET99(TRIGSE,NFAXE,KSIZEG) +CALL FFT992(SP2(1:KSIZEG), TRIGSE, NFAXE, 1, NDGLSUR, KSIZEG, 1, 1) +DEALLOCATE(TRIGSE) +DEALLOCATE(NFAXE) +PGPT(:) = SP2(1:KSIZEG) + +CONTAINS + +! from utilities/pinuts/module/fa_datas_mod.F90 +! and utilities/pinuts/module/array_lib_mod.F90 + +FUNCTION CONVRT2FFT(IN,X,Y,N) RESULT(OU) +REAL(KIND=8),DIMENSION(:),INTENT(IN) :: IN +INTEGER(KIND=8),INTENT(IN) :: X, Y, N +REAL(KIND=8),DIMENSION(N*2*(X+1)) :: OU + +INTEGER(KIND=8),DIMENSION(2*(X+1),(N/2)) :: MINQ +INTEGER(KIND=8),DIMENSION((N/2),2*(X+1)) :: TMINQ +REAL(KIND=8),DIMENSION(2*(X+1),(N/2)) :: OMINQ, EMINQ +REAL(KIND=8),DIMENSION((N/2),2*(X+1)) :: TOMINQ, TEMINQ +REAL(KIND=8),DIMENSION(N*(X+1)) :: OINI, EINI +REAL(KIND=8), PARAMETER :: ZZERO=0.0 + +CALL SPLIT_ODEV(IN,OINI,EINI) +MINQ = MASQ(X,Y,N) +OMINQ = UNPACK(OINI,MINQ == 1,ZZERO) +TOMINQ = TRANSPOSE(OMINQ) +EMINQ = UNPACK(EINI,MINQ == 1,ZZERO) +TEMINQ = TRANSPOSE(EMINQ) +TMINQ = 1 +OINI = PACK(TOMINQ,TMINQ > 0) +EINI = PACK(TEMINQ,TMINQ > 0) +OU = MIX_ODEV(OINI,EINI) +END FUNCTION CONVRT2FFT + +FUNCTION MASQ(X,Y,N) RESULT(T) +INTEGER(KIND=8),INTENT(IN) :: X, Y, N +INTEGER(KIND=8),DIMENSION(1:2*(X+1),1:(N/2)) :: T + +INTEGER(KIND=8) :: I, J +INTEGER(KIND=8),DIMENSION(0:X) :: KM +INTEGER(KIND=8),DIMENSION(0:Y) :: KN +CALL ELLIPS64(X,Y,KN,KM) +T = 0 +DO I=0,Y + DO J=0,2*KN(I)+1 + T(J+1,I+1)=1 + END DO +END DO +END FUNCTION MASQ + +FUNCTION MIX_ODEV(TO,TE) RESULT(T) +REAL(KIND=8),DIMENSION(:),INTENT(IN) :: TO,TE +REAL(KIND=8),DIMENSION(SIZE(TO)+SIZE(TE)) :: T + +INTEGER(KIND=8) :: I + +DO I=1,(SIZE(TO)+SIZE(TE))/2 + T((2*I)-1)=TE(I) + T(2*I)=TO(I) +END DO +END FUNCTION MIX_ODEV + +SUBROUTINE SPLIT_ODEV(T,TO,TE) +REAL(KIND=8),DIMENSION(:),INTENT(IN) :: T +REAL(KIND=8),DIMENSION(SIZE(T)/2),INTENT(OUT) :: TO,TE + +INTEGER(KIND=8) :: I + +DO I=1,SIZE(T)/2 + TO(I)=T(2*I) + TE(I)=T((2*I)-1) +END DO +END SUBROUTINE SPLIT_ODEV + +END SUBROUTINE SP2GP_FFT1D4PY \ No newline at end of file diff --git a/src/ectrans4py/sp2gp_gauss4py.F90 b/src/ectrans4py/sp2gp_gauss4py.F90 new file mode 100644 index 000000000..61186f53f --- /dev/null +++ b/src/ectrans4py/sp2gp_gauss4py.F90 @@ -0,0 +1,123 @@ +SUBROUTINE SP2GP_GAUSS4PY(KRETURNCODE, KSIZEJ, KTRUNC, KNUMMAXRESOL, KGPTOT, KSLOEN, KLOEN, KSIZE, & + & LGRADIENT, LREORDER, PSPEC, PGPT, PGPTM, PGPTL) +! ** PURPOSE +! Transform spectral coefficients into grid-point values +! +! ** DUMMY ARGUMENTS +! KSIZEJ: Number of latitudes +! KTRUNC: troncature +! KNUMMAXRESOL: maximum number of troncatures handled +! KGPTOT: number of grid-points +! KSLOEN: Size of KLOEN +! KLOEN: +! KSIZE: Size of PSPEC +! LREORDER: switch to reorder spectral coefficients or not +! LGRADIENT: switch to compute or not gradient +! PSPEC: spectral coefficient array +! PGPT: grid-point field +! PGPTM: N-S derivative if LGRADIENT +! PGPTL: E-W derivative if LGRADIENT +! +! ** AUTHOR +! 9 April 2014, S. Riette +! +! ** MODIFICATIONS +! 6 Jan., S. Riette: w_spec_setup interface modified +! March, 2016, A.Mary: LREORDER +! Sept., 2016, A.Mary: LGRADIENT +! +! I. Dummy arguments declaration +USE PARKIND1, ONLY : JPRB +IMPLICIT NONE +INTEGER(KIND=8), INTENT(OUT) :: KRETURNCODE +INTEGER(KIND=8), INTENT(IN) :: KSIZEJ +INTEGER(KIND=8), INTENT(IN) :: KTRUNC +INTEGER(KIND=8), INTENT(IN) :: KNUMMAXRESOL +INTEGER(KIND=8), INTENT(IN) :: KGPTOT +INTEGER(KIND=8), INTENT(IN) :: KSLOEN +INTEGER(KIND=8), DIMENSION(KSLOEN), INTENT(IN) :: KLOEN +INTEGER(KIND=8), INTENT(IN) :: KSIZE +LOGICAL, INTENT(IN) :: LGRADIENT +LOGICAL, INTENT(IN) :: LREORDER +REAL(KIND=8), DIMENSION(KSIZE), INTENT(IN) :: PSPEC +REAL(KIND=8), DIMENSION(KGPTOT), INTENT(OUT) :: PGPT +REAL(KIND=8), DIMENSION(KGPTOT), INTENT(OUT) :: PGPTM +REAL(KIND=8), DIMENSION(KGPTOT), INTENT(OUT) :: PGPTL +! +! II. Local variables declaration +INTEGER, DIMENSION(SIZE(KLOEN)) :: ILOEN +INTEGER :: ISIZEI, ISIZEJ, & + & IPHYSICALSIZEI, IPHYSICALSIZEJ, & + & ITRUNCX, ITRUNCY, & + & INUMMAXRESOL +LOGICAL :: LLSTOP +INTEGER :: IIDENTRESOL +INTEGER :: JI, JM, JN +INTEGER, DIMENSION(0:KTRUNC) :: NASM0 +REAL(KIND=8), DIMENSION(1, KSIZE) :: ZSPBUF +REAL(KIND=JPRB), DIMENSION(KGPTOT, 3, 1) :: ZGPBUF +REAL(KIND=8) :: ZDELTAX, ZDELTAY +#include "trans_inq.h" +#include "inv_trans.h" + +ILOEN(:)=KLOEN(:) +ISIZEI=0 +ISIZEJ=KSIZEJ +IPHYSICALSIZEI=0 +IPHYSICALSIZEJ=0 +ITRUNCX=KTRUNC +ITRUNCY=0 +INUMMAXRESOL=KNUMMAXRESOL +! +! III. Setup +ZDELTAX=0. +ZDELTAY=0. +CALL SPEC_SETUP4PY(KRETURNCODE, ISIZEI, ISIZEJ, IPHYSICALSIZEI, IPHYSICALSIZEJ, & + &ITRUNCX, ITRUNCY, INUMMAXRESOL, ILOEN, .FALSE., SIZE(ILOEN), & + &ZDELTAX, ZDELTAY, IIDENTRESOL, LLSTOP) +! +! IV. Transformation +IF (LREORDER) THEN + ! IV.a Shape of coefficient array + IF (.NOT. LLSTOP) THEN + JI=1 + DO JN=0, KTRUNC + NASM0(JN)=JI + JI=JI+1+JN+(JN+1) + ENDDO + ENDIF + + ! IV.b Reordering + IF(.NOT. LLSTOP) THEN + ZSPBUF(1,:)=0. + JI=1 + DO JM=0, KTRUNC + DO JN=JM, KTRUNC + ZSPBUF(1,JI)=PSPEC(NASM0(JN)+JM) + JI=JI+1 + IF(JM==0) THEN + ZSPBUF(1,JI)=0 + ELSE + ZSPBUF(1,JI)=PSPEC(NASM0(JN)-JM) + ENDIF + JI=JI+1 + ENDDO + ENDDO + ENDIF +ELSE + ZSPBUF(1,:) = PSPEC(:) +ENDIF + +! IV.c Inverse transform +IF (.NOT. LLSTOP) THEN + IF (.NOT. LGRADIENT) THEN + CALL INV_TRANS(PSPSCALAR=REAL(ZSPBUF(:,:),KIND=JPRB), PGP=ZGPBUF(:,:,:), KRESOL=IIDENTRESOL) + PGPT(:)=REAL(ZGPBUF(:,1,1),KIND=8) + ELSE + CALL INV_TRANS(PSPSCALAR=REAL(ZSPBUF(:,:),KIND=JPRB), PGP=ZGPBUF(:,:,:), KRESOL=IIDENTRESOL, LDSCDERS=.TRUE.) + PGPT(:)=REAL(ZGPBUF(:,1,1),KIND=8) + PGPTM(:)=REAL(ZGPBUF(:,2,1),KIND=8) + PGPTL(:)=REAL(ZGPBUF(:,3,1),KIND=8) + ENDIF +ENDIF +END SUBROUTINE SP2GP_GAUSS4PY diff --git a/src/ectrans4py/sp2gp_lam4py.F90 b/src/ectrans4py/sp2gp_lam4py.F90 new file mode 100644 index 000000000..17657966f --- /dev/null +++ b/src/ectrans4py/sp2gp_lam4py.F90 @@ -0,0 +1,140 @@ +SUBROUTINE SP2GP_LAM4PY(KRETURNCODE, KSIZEI, KSIZEJ, KPHYSICALSIZEI, KPHYSICALSIZEJ, & + &KTRUNCX, KTRUNCY, KNUMMAXRESOL, KSIZE, LGRADIENT, LREORDER, PDELTAX, PDELTAY, & + &PSPEC, PGPT, PGPTM, PGPTL) +! ** PURPOSE +! Transform spectral coefficients into grid-point values +! +! ** DUMMY ARGUMENTS +! KRETURNCODE: error code +! KSIZEI, KSIZEJ: size of grid-point field (with extension zone) +! KPHYSICALSIZEI, KPHYSICALSIZEJ: size of physical part of grid-point field +! KTRUNCX, KTRUNCY: troncatures +! KNUMMAXRESOL: maximum number of troncatures handled +! KSIZE: size of PSPEC +! LREORDER: switch to reorder spectral coefficients or not +! LGRADIENT: switch to compute or not gradient +! PDELTAX: x resolution +! PDELTAY: y resolution +! PSPEC: spectral coefficient array +! PGPT: grid-point field +! PGPTM: N-S derivative if LGRADIENT +! PGPTL: E-W derivative if LGRADIENT +! +! ** AUTHOR +! 9 April 2014, S. Riette +! +! ** MODIFICATIONS +! 5 Jan., S. Riette: PDELTAX, PDELTAY, LGRADIENT, PGPTM and PGPTL added +! March, 2016, A.Mary: LREORDER +! +! I. Dummy arguments declaration +USE PARKIND1, ONLY : JPRB +IMPLICIT NONE +INTEGER(KIND=8), INTENT(OUT) :: KRETURNCODE +INTEGER(KIND=8), INTENT(IN) :: KSIZEI, KSIZEJ +INTEGER(KIND=8), INTENT(IN) :: KPHYSICALSIZEI, KPHYSICALSIZEJ +INTEGER(KIND=8), INTENT(IN) :: KTRUNCX, KTRUNCY +INTEGER(KIND=8), INTENT(IN) :: KNUMMAXRESOL +INTEGER(KIND=8), INTENT(IN) :: KSIZE +LOGICAL, INTENT(IN) :: LGRADIENT +LOGICAL, INTENT(IN) :: LREORDER +REAL(KIND=8), INTENT(IN) :: PDELTAX +REAL(KIND=8), INTENT(IN) :: PDELTAY +REAL(KIND=8), DIMENSION(KSIZE), INTENT(IN) :: PSPEC +REAL(KIND=8), DIMENSION(KSIZEI*KSIZEJ), INTENT(OUT) :: PGPT +REAL(KIND=8), DIMENSION(KSIZEI*KSIZEJ), INTENT(OUT) :: PGPTM +REAL(KIND=8), DIMENSION(KSIZEI*KSIZEJ), INTENT(OUT) :: PGPTL +! +! II. Local variables declaration +INTEGER, DIMENSION(0:KTRUNCX) :: IESM0 +INTEGER :: IGPTOT, ISPEC +INTEGER, DIMENSION(0:KTRUNCY) :: ISPECINI, ISPECEND +REAL(KIND=8), DIMENSION(1, KSIZE) :: ZSPBUF +REAL(KIND=JPRB), DIMENSION(KSIZEI*KSIZEJ, 3, 1) :: ZGPBUF +INTEGER :: JI, JM, JN, IINDEX, IIDENTRESOL +LOGICAL :: LLSTOP +INTEGER :: ISIZEI, ISIZEJ, & + & IPHYSICALSIZEI, IPHYSICALSIZEJ, & + & ITRUNCX, ITRUNCY, & + & INUMMAXRESOL +INTEGER, DIMENSION(1) :: ILOEN + +#include "einv_trans.h" +#include "etrans_inq.h" + +KRETURNCODE=0 +LLSTOP=.FALSE. +ISIZEI=KSIZEI +ISIZEJ=KSIZEJ +IPHYSICALSIZEI=KPHYSICALSIZEI +IPHYSICALSIZEJ=KPHYSICALSIZEJ +ITRUNCX=KTRUNCX +ITRUNCY=KTRUNCY +INUMMAXRESOL=KNUMMAXRESOL +ILOEN(:)=0 + +! III. Setup +CALL SPEC_SETUP4PY(KRETURNCODE, ISIZEI, ISIZEJ, IPHYSICALSIZEI, IPHYSICALSIZEJ, & + &ITRUNCX, ITRUNCY, INUMMAXRESOL, ILOEN, .TRUE., 1, & + &PDELTAX, PDELTAY, IIDENTRESOL, LLSTOP) + +! IV. Transformation + +! IV.a Shape of coefficient array +!IGPTOT is the total number of points in grid-point space +!ISPEC is the number of spectral coefficients +!IESM0(m) is the index of spectral coefficient (m,0) in model +!ISPECINI(n) is the index of the first of the 4 spectral coefficient (0,n) in FA file +!ISPECEND(n) is the index of the last of the last 4 spectral coefficients (:,n) in FA file +IF (.NOT. LLSTOP) THEN + CALL ETRANS_INQ(KRESOL=IIDENTRESOL, KGPTOT=IGPTOT, KSPEC=ISPEC, KESM0=IESM0) + JI=1 + DO JN=0, ITRUNCY + ISPECINI(JN)=(JI-1)*4+1 + JI=JI+COUNT(IESM0(1:ITRUNCX)-IESM0(0:ITRUNCX-1)>JN*4) + IF (ISPEC-IESM0(ITRUNCX)>JN*4) JI=JI+1 + ISPECEND(JN)=(JI-1)*4 + ENDDO +ENDIF + +! III.b Reordering +! reorder Aladin : file ordering = coeffs per blocks of m, 4 reals per coeff +! Aladin array ordering = coeffs per blocks of n, 4 reals per coeff +IF (LREORDER) THEN + IF (.NOT. LLSTOP) THEN + ZSPBUF(:,:)=0. + JI=1 + DO JM=0,ITRUNCX+1 + DO JN=0,ITRUNCY + IF (ISPECINI(JN)+JM*4+3<=ISPECEND(JN)) THEN + DO IINDEX=ISPECINI(JN)+JM*4, ISPECINI(JN)+JM*4+3 + ZSPBUF(1,JI)=PSPEC(IINDEX) + JI=JI+1 + ENDDO + ENDIF + ENDDO + ENDDO + IF (JI/=ISPEC+1) THEN + PRINT*, "Internal error in SP2GP_LAM4PY (spectral reordering)" + KRETURNCODE=-999 + LLSTOP=.TRUE. + ENDIF + ENDIF +ELSE + ZSPBUF(1,:) = PSPEC(:) +ENDIF + +! III.c Inverse transform +IF (.NOT. LLSTOP) THEN + IF (.NOT. LGRADIENT) THEN + CALL EINV_TRANS(PSPSCALAR=REAL(ZSPBUF(:,:),KIND=JPRB), PGP=ZGPBUF(:,:,:), KRESOL=IIDENTRESOL) + PGPT(:)=REAL(ZGPBUF(:,1,1),KIND=8) + ELSE + CALL EINV_TRANS(PSPSCALAR=REAL(ZSPBUF(:,:),KIND=JPRB), PGP=ZGPBUF(:,:,:), KRESOL=IIDENTRESOL, LDSCDERS=.TRUE.) + PGPT(:)=REAL(ZGPBUF(:,1,1),KIND=8) + PGPTM(:)=REAL(ZGPBUF(:,2,1),KIND=8) + PGPTL(:)=REAL(ZGPBUF(:,3,1),KIND=8) + ENDIF +ENDIF + +END SUBROUTINE SP2GP_LAM4PY diff --git a/src/ectrans4py/spec_setup4py.F90 b/src/ectrans4py/spec_setup4py.F90 new file mode 100644 index 000000000..644962e3a --- /dev/null +++ b/src/ectrans4py/spec_setup4py.F90 @@ -0,0 +1,160 @@ +SUBROUTINE SPEC_SETUP4PY(KRETURNCODE, KSIZEI, KSIZEJ, KPHYSICALSIZEI, KPHYSICALSIZEJ, & + &KTRUNCX, KTRUNCY, KNUMMAXRESOL, KLOEN, LDLAM, & + &KSIZEKLOEN, PDELTAX, PDELTAY, & + &KIDENTRESOL, LDSTOP) +! ** PURPOSE +! Setup spectral transform for LAM and global +! +! ** DUMMY ARGUMENTS +! KRETURNCODE: error code +! KSIZEI, KSIZEJ: size of grid-point field (with extension zone for LAM), put max size for KSIZEI in global +! KPHYSICALSIZEI, KPHYSICALSIZEJ: size of physical part of grid-point field for LAM (put 0 for global) +! KTRUNCX, KTRUNCY: troncatures for LAM (only KTRUNCX is used for global, put 0 for KTRUNCY) +! KNUMMAXRESOL: maximum number of troncatures handled +! KLOEN: number of points on each latitude row +! KSIZEKLOEN: size of KLOEN array +! PDELTAX: x resolution +! PDELTAY: y resolution +! LDLAM: LAM (.TRUE.) or global (.FALSE.) +! KIDENTRESOL: identification of resolution +! LDSTOP: exception raised? +! +! ** AUTHOR +! 9 April 2014, S. Riette +! +! ** MODIFICATIONS +! 6 Jan 2016, S. Riette: PDELTAX and PDELTAY added +! 31 Jan 2019 R. El Khatib fix for single precision compilation +! +! I. Dummy arguments declaration +USE PARKIND1, ONLY : JPRB +IMPLICIT NONE +INTEGER(KIND=8), INTENT(OUT) :: KRETURNCODE +INTEGER, INTENT(IN) :: KSIZEI, KSIZEJ +INTEGER, INTENT(IN) :: KPHYSICALSIZEI, KPHYSICALSIZEJ +INTEGER, INTENT(IN) :: KTRUNCX, KTRUNCY +INTEGER, INTENT(IN) :: KNUMMAXRESOL +INTEGER, DIMENSION(KSIZEKLOEN), INTENT(IN) :: KLOEN +LOGICAL, INTENT(IN) :: LDLAM +INTEGER, INTENT(IN) :: KSIZEKLOEN +REAL(KIND=8), INTENT(IN) :: PDELTAX +REAL(KIND=8), INTENT(IN) :: PDELTAY +INTEGER, INTENT(OUT) :: KIDENTRESOL +LOGICAL, INTENT(OUT) :: LDSTOP +! +! II. Local variables declaration +INTEGER, DIMENSION(2*KSIZEJ) :: ILOEN +INTEGER :: JI +LOGICAL, SAVE :: LLFIRSTCALL=.TRUE. +LOGICAL :: LLNEWRESOL +INTEGER, SAVE :: INBRESOL=0 +INTEGER(KIND=8) :: ICODEILOEN +INTEGER, SAVE :: INUMMAXRESOLSAVE=-1 +INTEGER, DIMENSION(:), ALLOCATABLE, SAVE :: ITRUNCXSAVE, ITRUNCYSAVE, & + IPHYSICALSIZEISAVE, & + IPHYSICALSIZEJSAVE, & + ISIZEISAVE, ISIZEJSAVE, & + IIDENTRESOLSAVE +INTEGER(KIND=8), DIMENSION(:), ALLOCATABLE, SAVE :: ILOENSAVE +REAL(KIND=8), DIMENSION(:), ALLOCATABLE, SAVE :: ZDELTAXSAVE, & + ZDELTAYSAVE +REAL(KIND=8) :: ZEXWN, ZEYWN + +#include "setup_trans0.h" +#include "esetup_trans.h" +#include "setup_trans.h" + +KRETURNCODE=0 +LDSTOP=.FALSE. +! III. Setup + +! III.a Setup LAM and global spectral transform - all resolutions +! Maximum number of resolution is set now and cannot be change anymore +IF (LLFIRSTCALL) THEN + !This code is called only once, whatever is the number of resolutions + CALL SETUP_TRANS0(KPRINTLEV=0, LDMPOFF=.TRUE., KMAX_RESOL=KNUMMAXRESOL) + ALLOCATE(ITRUNCXSAVE(KNUMMAXRESOL)) + ALLOCATE(ITRUNCYSAVE(KNUMMAXRESOL)) + ALLOCATE(IPHYSICALSIZEISAVE(KNUMMAXRESOL)) + ALLOCATE(IPHYSICALSIZEJSAVE(KNUMMAXRESOL)) + ALLOCATE(ISIZEJSAVE(KNUMMAXRESOL)) + ALLOCATE(ISIZEISAVE(KNUMMAXRESOL)) + ALLOCATE(ILOENSAVE(KNUMMAXRESOL)) + ALLOCATE(IIDENTRESOLSAVE(KNUMMAXRESOL)) + ALLOCATE(ZDELTAXSAVE(KNUMMAXRESOL)) + ALLOCATE(ZDELTAYSAVE(KNUMMAXRESOL)) + ITRUNCXSAVE=-1 + ITRUNCYSAVE=-1 + IPHYSICALSIZEISAVE=-1 + IPHYSICALSIZEJSAVE=-1 + ISIZEJSAVE=-1 + ISIZEISAVE=-1 + ILOENSAVE=-1 + IIDENTRESOLSAVE=-1 + ZDELTAXSAVE=-1. + ZDELTAXSAVE=-1. + LLFIRSTCALL=.FALSE. + INUMMAXRESOLSAVE=KNUMMAXRESOL +ENDIF +! +! III.b Is-it a new resolution? +LLNEWRESOL=.TRUE. +IF(LDLAM) THEN + ILOEN(:)=KSIZEI +ELSE + ILOEN(:)=0 + ILOEN(1:MIN(SIZE(ILOEN),SIZE(KLOEN)))=KLOEN(1:MIN(SIZE(ILOEN),SIZE(KLOEN))) +ENDIF +ICODEILOEN=0 +DO JI=1, SIZE(ILOEN) + ICODEILOEN=ICODEILOEN+ILOEN(JI)*JI**4 +ENDDO +DO JI=1, INBRESOL + IF (KTRUNCX==ITRUNCXSAVE(JI) .AND. KTRUNCY==ITRUNCYSAVE(JI) .AND. & + &KPHYSICALSIZEI==IPHYSICALSIZEISAVE(JI) .AND. & + &KPHYSICALSIZEJ==IPHYSICALSIZEJSAVE(JI) .AND. & + &KSIZEJ==ISIZEJSAVE(JI) .AND. KSIZEI==ISIZEISAVE(JI) .AND. & + &ICODEILOEN==ILOENSAVE(JI) .AND. & + &PDELTAX==ZDELTAXSAVE(JI) .AND. PDELTAY==ZDELTAYSAVE(JI)) THEN + KIDENTRESOL=IIDENTRESOLSAVE(JI) + LLNEWRESOL=.FALSE. + ENDIF +ENDDO +IF(LLNEWRESOL) THEN + INBRESOL=INBRESOL+1 + IF(INBRESOL>INUMMAXRESOLSAVE) THEN + PRINT*, "Error in SPEC_SETUP4PY : Maximum number of resolution is exceeded." + KRETURNCODE=-999 + LDSTOP=.TRUE. + ENDIF +ENDIF +! +! III.c Setup LAM or global spectral transform - once by resolution +IF(LLNEWRESOL .AND. .NOT. LDSTOP) THEN + ! The following code is exectuded once for each resolution + ITRUNCXSAVE(INBRESOL)=KTRUNCX + ITRUNCYSAVE(INBRESOL)=KTRUNCY + IPHYSICALSIZEISAVE(INBRESOL)=KPHYSICALSIZEI + IPHYSICALSIZEJSAVE(INBRESOL)=KPHYSICALSIZEJ + ISIZEISAVE(INBRESOL)=KSIZEI + ISIZEJSAVE(INBRESOL)=KSIZEJ + ILOENSAVE(INBRESOL)=ICODEILOEN + ZDELTAXSAVE(INBRESOL)=PDELTAX + ZDELTAYSAVE(INBRESOL)=PDELTAY + IF(LDLAM) THEN + ZEXWN=2*3.141592653589797/(KSIZEI*PDELTAX) + ZEYWN=2*3.141592653589797/(KSIZEJ*PDELTAY) + CALL ESETUP_TRANS(KMSMAX=ITRUNCXSAVE(INBRESOL), KSMAX=ITRUNCYSAVE(INBRESOL), & + &KDGUX=IPHYSICALSIZEJSAVE(INBRESOL), & + &KDGL=ISIZEJSAVE(INBRESOL), KLOEN=ILOEN(:), KRESOL=IIDENTRESOLSAVE(INBRESOL), & + &PEXWN=REAL(ZEXWN,KIND=JPRB), PEYWN=REAL(ZEYWN,KIND=JPRB)) + ELSE + PRINT*, "Setup spectral transform" + CALL SETUP_TRANS(KSMAX=ITRUNCXSAVE(INBRESOL), KDGL=ISIZEJSAVE(INBRESOL), & + &KLOEN=ILOEN(1:ISIZEJSAVE(INBRESOL)), KRESOL=IIDENTRESOLSAVE(INBRESOL)) + PRINT*, "End Setup spectral transform" + ENDIF + KIDENTRESOL=IIDENTRESOLSAVE(INBRESOL) +ENDIF +END SUBROUTINE SPEC_SETUP4PY + diff --git a/src/ectrans4py/trans_inq4py.F90 b/src/ectrans4py/trans_inq4py.F90 new file mode 100644 index 000000000..f989ef175 --- /dev/null +++ b/src/ectrans4py/trans_inq4py.F90 @@ -0,0 +1,70 @@ +SUBROUTINE TRANS_INQ4PY(KRETURNCODE, KSIZEJ, KTRUNC, KSLOEN, KLOEN, KNUMMAXRESOL, & + &KGPTOT, KSPEC, KNMENG) +! ** PURPOSE +! Simplified wrapper to TRANS_INQ +! +! ** DUMMY ARGUMENTS +! KSIZEJ: number of latitudes in grid-point space +! KTRUNC: troncature +! KSLOEN: Size of KLOEN +! KLOEN: number of points on each latitude row +! KNUMMAXRESOL: maximum number of troncatures handled +! KGPTOT: number of gridpoints +! KSPEC: number of spectral coefficients +! KNMENG: cut-off zonal wavenumber +! +! ** AUTHOR +! 9 April 2014, S. Riette +! +! ** MODIFICATIONS +! 6 Jan., S. Riette: w_spec_setup interfaced modified +! +! I. Dummy arguments declaration +IMPLICIT NONE +INTEGER(KIND=8), INTENT(OUT) :: KRETURNCODE +INTEGER(KIND=8), INTENT(IN) :: KSIZEJ +INTEGER(KIND=8), INTENT(IN) :: KTRUNC +INTEGER(KIND=8), INTENT(IN) :: KSLOEN +INTEGER(KIND=8), DIMENSION(KSLOEN), INTENT(IN) :: KLOEN +INTEGER(KIND=8), INTENT(IN) :: KNUMMAXRESOL +INTEGER(KIND=8), INTENT(OUT) :: KGPTOT +INTEGER(KIND=8), INTENT(OUT) :: KSPEC +INTEGER(KIND=8), DIMENSION(KSLOEN), INTENT(OUT) :: KNMENG +! +! II. Local variables declaration +INTEGER, DIMENSION(SIZE(KLOEN)) :: ILOEN +INTEGER :: ISIZEI, ISIZEJ, & + & IPHYSICALSIZEI, IPHYSICALSIZEJ, & + & ITRUNCX, ITRUNCY, & + & INUMMAXRESOL +LOGICAL :: LLSTOP +INTEGER :: IIDENTRESOL +INTEGER :: IGPTOT, ISPEC +INTEGER, DIMENSION(SIZE(KLOEN)) :: INMENG +REAL(KIND=8) :: ZDELTAX, ZDELTAY +#include "trans_inq.h" + +ILOEN(:)=KLOEN(:) +ISIZEI=0 +ISIZEJ=KSIZEJ +IPHYSICALSIZEI=0 +IPHYSICALSIZEJ=0 +ITRUNCX=KTRUNC +ITRUNCY=0 +INUMMAXRESOL=KNUMMAXRESOL +INMENG(:)=KNMENG(:) +! +! III. Setup +ZDELTAX=0. +ZDELTAY=0. +CALL SPEC_SETUP4PY(KRETURNCODE, ISIZEI, ISIZEJ, IPHYSICALSIZEI, IPHYSICALSIZEJ, & + &ITRUNCX, ITRUNCY, INUMMAXRESOL, ILOEN, .FALSE., SIZE(ILOEN), & + &ZDELTAX, ZDELTAY, IIDENTRESOL, LLSTOP) +IF (.NOT. LLSTOP) THEN + CALL TRANS_INQ(KRESOL=IIDENTRESOL, KGPTOT=IGPTOT, KSPEC=ISPEC, KNMENG=INMENG) + KGPTOT=IGPTOT + KSPEC=ISPEC + KNMENG=INMENG +ENDIF +! +END SUBROUTINE TRANS_INQ4PY diff --git a/src/etrans/CMakeLists.txt b/src/etrans/CMakeLists.txt new file mode 100644 index 000000000..8f88adc6d --- /dev/null +++ b/src/etrans/CMakeLists.txt @@ -0,0 +1,154 @@ + + +# (C) Copyright 2020- ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +function(generate_file) + set (options) + set (oneValueArgs INPUT OUTPUT BACKEND) + set (multiValueArgs) + cmake_parse_arguments(_PAR "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + + set(output ${_PAR_OUTPUT}) + set(input ${_PAR_INPUT}) + set(backend ${_PAR_BACKEND}) + set(sed_rules ${PROJECT_SOURCE_DIR}/src/etrans/sedrenames.txt) + + set( JPRB_dp JPRD ) + set( jprb_dp jprd ) + set( JPRB_sp JPRM ) + set( jprb_sp jprm ) + set( JPRB_gpu_dp JPRD ) + set( jprb_gpu_dp jprd ) + set( JPRB_gpu_sp JPRM ) + set( jprb_gpu_sp jprm ) + + add_custom_command( + OUTPUT ${output} + COMMAND cat ${sed_rules} | + sed -e "s/VARIANTDESIGNATOR/${backend}/g" | + sed -e "s/TYPEDESIGNATOR_UPPER/${JPRB_${backend}}/g" | + sed -e "s/TYPEDESIGNATOR_LOWER/${jprb_${backend}}/g" | + sed -rf - ${input} > ${output} + DEPENDS ${input} ${sed_rules} + COMMENT "Generating ${output}" + VERBATIM + ) +endfunction(generate_file) + + +function(generate_backend_includes) + set (options) + set (oneValueArgs BACKEND TARGET DESTINATION INCLUDE_DIRECTORY) + set (multiValueArgs) + cmake_parse_arguments(_PAR "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + + set(destination ${_PAR_DESTINATION} ) + set(backend ${_PAR_BACKEND}) + + file(MAKE_DIRECTORY ${destination}) + file(MAKE_DIRECTORY ${destination}/etrans_${backend}) + + ecbuild_list_add_pattern( LIST absolute_files GLOB etrans/*.h SOURCE_DIR ${_PAR_INCLUDE_DIRECTORY} QUIET ) + set( files ) + foreach(file_i ${absolute_files}) + file(RELATIVE_PATH file_i ${_PAR_INCLUDE_DIRECTORY} ${file_i}) + list(APPEND files ${file_i}) + endforeach() + set( outfiles ) + foreach(file_i ${files}) + get_filename_component(outfile_name ${file_i} NAME) + get_filename_component(outfile_name_we ${file_i} NAME_WE) + get_filename_component(outfile_ext ${file_i} EXT) + get_filename_component(outfile_dir ${file_i} DIRECTORY) + if (${file_i} IN_LIST ectrans_common_includes) + configure_file(${_PAR_INCLUDE_DIRECTORY}/${file_i} ${destination}/${outfile_name}) + else() + set(outfile "${destination}/${outfile_name_we}_${backend}${outfile_ext}") + ecbuild_debug("Generate ${outfile}") + generate_file(BACKEND ${backend} INPUT ${_PAR_INCLUDE_DIRECTORY}/${file_i} OUTPUT ${outfile}) + list(APPEND outfiles ${outfile}) + string(TOUPPER ${outfile_name_we} OUTFILE_NAME_WE ) + ecbuild_debug("Generate ${destination}/trans_${backend}/${outfile_name}") + file(WRITE ${destination}/trans_${backend}/${outfile_name} "! Automatically generated interface header for backward compatibility of generic symbols !\n") + file(APPEND ${destination}/trans_${backend}/${outfile_name} "#if defined(${outfile_name_we})\n") + file(APPEND ${destination}/trans_${backend}/${outfile_name} "#undef ${outfile_name_we}\n") + file(APPEND ${destination}/trans_${backend}/${outfile_name} "#endif\n") + file(APPEND ${destination}/trans_${backend}/${outfile_name} "#if defined(${OUTFILE_NAME_WE})\n") + file(APPEND ${destination}/trans_${backend}/${outfile_name} "#undef ${OUTFILE_NAME_WE}\n") + file(APPEND ${destination}/trans_${backend}/${outfile_name} "#endif\n") + file(APPEND ${destination}/trans_${backend}/${outfile_name} "#include \"${outfile_name_we}_${backend}${outfile_ext}\"\n") + file(APPEND ${destination}/trans_${backend}/${outfile_name} "#define ${outfile_name_we} ${OUTFILE_NAME_WE}_${backend}\n") + file(APPEND ${destination}/trans_${backend}/${outfile_name} "#define ${OUTFILE_NAME_WE} ${OUTFILE_NAME_WE}_${backend}\n") + endif() + endforeach(file_i) + + add_custom_target(${_PAR_TARGET}_generate DEPENDS ${outfiles}) + ecbuild_add_library(TARGET ${_PAR_TARGET} TYPE INTERFACE) + add_dependencies(${_PAR_TARGET} ${_PAR_TARGET}_generate) + target_include_directories(${_PAR_TARGET} INTERFACE $) +endfunction(generate_backend_includes) + + + + + +# TODO: move precision-independent files to common +#add_subdirectory( common ) + +if( HAVE_CPU) + add_subdirectory( cpu ) +endif() + +# placeholder +#if( HAVE_GPU ) +# add_subdirectory( gpu ) +#endif() + + +if (FALSE) +# original cmake file for etrans; keeping it for reference, but should be cleaned later +message(FATAL_ERROR "Hold it right there!") + +# build list of sources to add to trans library +# (using CMAKE_CURRENT_SOURCE_DIR is necessary because sources are in a different directory than the target library (trans_${prec}) +ecbuild_list_add_pattern( LIST etrans_src + GLOB + ${CMAKE_CURRENT_SOURCE_DIR}/biper/internal/* + ${CMAKE_CURRENT_SOURCE_DIR}/biper/external/* + ${CMAKE_CURRENT_SOURCE_DIR}/etrans/internal/* + ${CMAKE_CURRENT_SOURCE_DIR}/etrans/external/* + QUIET + ) + +# dummies to be able to loop over precisions +set( HAVE_dp ${HAVE_DOUBLE_PRECISION} ) +set( HAVE_sp ${HAVE_SINGLE_PRECISION} ) + +# loop over precisions +foreach( prec sp dp ) + if( HAVE_${prec} ) + # add sources + target_sources(trans_${prec} PRIVATE ${etrans_src}) + # add include directories + target_include_directories(trans_${prec} + PUBLIC + $ + $ + ) + endif() +endforeach() + +# install headers +file( GLOB etrans_interface biper/include/* etrans/include/*) +install( + FILES ${etrans_interface} + DESTINATION include/ectrans +) + +endif() \ No newline at end of file diff --git a/src/etrans/cpu/CMakeLists.txt b/src/etrans/cpu/CMakeLists.txt new file mode 100644 index 000000000..85ef861ee --- /dev/null +++ b/src/etrans/cpu/CMakeLists.txt @@ -0,0 +1,99 @@ +# (C) Copyright 2020- ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +## Apply workarounds for some known compilers +## see trans/ for example + +function(generate_backend_sources) + set (options) + set (oneValueArgs BACKEND DESTINATION OUTPUT) + set (multiValueArgs) + + cmake_parse_arguments(_PAR "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + set(backend ${_PAR_BACKEND}) + set(destination ${_PAR_DESTINATION}) + file(MAKE_DIRECTORY ${destination}/biper/internal) + file(MAKE_DIRECTORY ${destination}/biper/external) + file(MAKE_DIRECTORY ${destination}/internal) + file(MAKE_DIRECTORY ${destination}/external) + + ecbuild_list_add_pattern( LIST files + GLOB + internal/*.F90 + external/*.F90 + biper/internal/*.F90 + biper/external/*.F90 + QUIET + ) + + set(outfiles) + foreach(file_i ${files}) + get_filename_component(outfile_name ${file_i} NAME) + get_filename_component(outfile_name_we ${file_i} NAME_WE) + get_filename_component(outfile_ext ${file_i} EXT) + get_filename_component(outfile_dir ${file_i} DIRECTORY) + set(outfile "${destination}/${file_i}") + ecbuild_debug("Generate ${outfile}") + generate_file(BACKEND ${backend} INPUT ${CMAKE_CURRENT_SOURCE_DIR}/${file_i} OUTPUT ${outfile}) + list(APPEND outfiles ${outfile}) + endforeach(file_i) + set(${_PAR_OUTPUT} ${outfiles} PARENT_SCOPE) +endfunction(generate_backend_sources) + +set( BUILD_INTERFACE_INCLUDE_DIR ${CMAKE_BINARY_DIR}/include/ectrans ) + +foreach( prec dp sp ) + if( HAVE_${prec} ) + + generate_backend_includes(BACKEND ${prec} TARGET ectrans_etrans_${prec}_includes DESTINATION ${BUILD_INTERFACE_INCLUDE_DIR} INCLUDE_DIRECTORY ${PROJECT_SOURCE_DIR}/src/etrans/include ) + generate_backend_sources( BACKEND ${prec} OUTPUT ectrans_etrans_${prec}_src DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/generated/ectrans_etrans_${prec}) + ecbuild_add_library( + TARGET ectrans_etrans_${prec} + LINKER_LANGUAGE Fortran + SOURCES ${ectrans_etrans_${prec}_src} + PUBLIC_INCLUDES $ + $ + $ + PUBLIC_LIBS fiat ectrans_common ectrans_${prec}_includes ectrans_${prec} ectrans_etrans_${prec}_includes + ) + + ectrans_target_fortran_module_directory( + TARGET ectrans_etrans_${prec} + MODULE_DIRECTORY ${CMAKE_BINARY_DIR}/module/ectrans + INSTALL_DIRECTORY module/ectrans + ) + + set( FFTW_LINK PRIVATE ) + if( LAPACK_LIBRARIES MATCHES "mkl" AND NOT FFTW_LIBRARIES MATCHES "mkl" ) + ecbuild_warn( "Danger: Both MKL and FFTW are linked in trans_${prec}. " + "No guarantees on link order can be made for the final executable.") + set( FFTW_LINK PUBLIC ) # Attempt anyway to give FFTW precedence + endif() + ecbuild_debug("target_link_libraries( trans_${prec} ${FFTW_LINK} ${FFTW_LIBRARIES} )") + target_link_libraries( ectrans_etrans_${prec} ${FFTW_LINK} ${FFTW_LIBRARIES} ) + target_include_directories( ectrans_etrans_${prec} PRIVATE ${FFTW_INCLUDE_DIRS} ) + target_compile_definitions( ectrans_etrans_${prec} PRIVATE WITH_FFTW ) + # daand: lam transforms don't need lapack + #ecbuild_debug("target_link_libraries( ectrans_etrans_${prec} PRIVATE ${LAPACK_LIBRARIES} )") + #target_link_libraries( ectrans_${prec} PRIVATE ${LAPACK_LIBRARIES} ) + + if( HAVE_OMP ) + ecbuild_debug("target_link_libraries( ectrans_${prec} PRIVATE OpenMP::OpenMP_Fortran )") + target_link_libraries( ectrans_${prec} PRIVATE OpenMP::OpenMP_Fortran ) + endif() + + # This interface library is for backward compatibility, and provides the older includes + ecbuild_add_library( TARGET etrans_${prec} TYPE INTERFACE ) + target_include_directories( etrans_${prec} INTERFACE $ ) + target_include_directories( etrans_${prec} INTERFACE $ ) + target_link_libraries( trans_${prec} INTERFACE fiat ectrans_${prec} ectrans_etrans_${prec} parkind_${prec}) + endif() +endforeach() + +## Install trans interface +install( DIRECTORY ${BUILD_INTERFACE_INCLUDE_DIR}/ DESTINATION include/ectrans ) diff --git a/src/etrans/cpu/biper/external/etibihie.F90 b/src/etrans/cpu/biper/external/etibihie.F90 new file mode 100644 index 000000000..fed2dbab1 --- /dev/null +++ b/src/etrans/cpu/biper/external/etibihie.F90 @@ -0,0 +1,112 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + + +SUBROUTINE ETIBIHIE(KDLON,KDGL,KNUBI,KDLUX,KDGUX,& + & KSTART,KDLSM,PGPBI,LDBIX,LDBIY,KDADD) + +!**** tool ETIBIHIE : Doubly-periodicisation : isotropic spline +! ------------- method. + +! purpose : +! -------- +! KNUBI horizontal fields which are known on C U I, +! are extended over E, in order to obtain doubly-periodic +! fields. +! IF LDBIX is equal .TRUE. , then the fields are periodicise +! in the x ( or longitude ) direction. If it is not the case, +! KDLUX must be equal to KDLON. +! IF LDBIY is equal .TRUE. , then the fields are periodicise +! in the y ( or latitude ) direction. If it is not the case, +! KDGUX must be equal to KDGL. + +!* *CALL* *ETIBIHIE*(...) + +! externals : +! ---------- +! ESPLIN spline extension +! ESMOOTH smoothing across to get isotropy. + +! explicit arguments : +! ------------------ +! KDLON : upper bound for the x (or longitude) dimension +! of the gridpoint array on C U I U E +! KDGL : upper bound for the y (or latitude) dimension +! of the gridpoint array on C U I U E +! KNUBI : number of horizontal fields to doubly-periodicise. +! KDLUX : upper bound for the x (or longitude) dimension +! of C U I. +! KDGUX : upper bound for the y (or latitude) dimension +! of C U I. +! KSTART : first dimension in x direction of g-p array +! KDLSM : second dimension in x direction of g-p array +! PGPBI : gridpoint array on C U I U E. +! LDBIX : logical to periodicize or not +! in the x ( or longitude ) direction. +! LDBIY : logical to periodicize or not +! in the y ( or latitude ) direction. +! KDADD : 1 to test biperiodiz. + +! references : +! ---------- + +! author : +! ------ +! V. Ducrocq + +! modification : +! ------------ +! A. Stanesic 28/03/2008: KDADD - test of externalized biper. +! ------------------------------------------------------------------------- + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE ESPLINE_MOD +USE ESMOOTHE_MOD + +! ------------------------------------------------------------------------- + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KNUBI +INTEGER(KIND=JPIM),INTENT(IN) :: KSTART +INTEGER(KIND=JPIM),INTENT(IN) :: KDLSM +INTEGER(KIND=JPIM),INTENT(IN) :: KDLON +INTEGER(KIND=JPIM),INTENT(IN) :: KDGL +INTEGER(KIND=JPIM),INTENT(IN) :: KDLUX +INTEGER(KIND=JPIM),INTENT(IN) :: KDGUX +INTEGER(KIND=JPIM),INTENT(IN) :: KDADD +REAL(KIND=JPRB),INTENT(INOUT) :: PGPBI(KSTART:KDLSM+KDADD,KNUBI,1:KDGL+KDADD) +LOGICAL,INTENT(IN) :: LDBIX +LOGICAL,INTENT(IN) :: LDBIY + +! ------------------------------------------------------------------------- + +REAL(KIND=JPRB) :: ZALFA +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------------- +IF (LHOOK) CALL DR_HOOK('ETIBIHIE',0,ZHOOK_HANDLE) +! ------------------------------------------------------------------------- + +!* 1. DOUBLY-PERIODICISE : +! ------------------ + +ZALFA = 0.0_JPRB + +CALL ESPLINE(1,KDLON,1,KDGL,KDLUX,KDGUX,KSTART,& + & KDLSM+KDADD,1,KDGL+KDADD,KNUBI,PGPBI,ZALFA,LDBIX,LDBIY,KDADD) +CALL ESMOOTHE(1,KDLON,1,KDGL,KDLUX,KDGUX,KSTART,& + & KDLSM+KDADD,1,KDGL+KDADD,KNUBI,PGPBI,LDBIX,LDBIY) + +! ------------------------------------------------------------------------- +IF (LHOOK) CALL DR_HOOK('ETIBIHIE',1,ZHOOK_HANDLE) +END SUBROUTINE ETIBIHIE diff --git a/src/etrans/cpu/biper/external/fpbipere.F90 b/src/etrans/cpu/biper/external/fpbipere.F90 new file mode 100644 index 000000000..9d3ee313c --- /dev/null +++ b/src/etrans/cpu/biper/external/fpbipere.F90 @@ -0,0 +1,168 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + + +SUBROUTINE FPBIPERE(KDLUX,KDGUX,KDLON,KDGL,KNUBI,KD1,PGPBI,KDADD,LDZON, & + & LDBOYD, KDBOYD, PLBOYD) + +!**** *FPBIPERE* - Full-POS interface for double periodicisation + +! purpose : +! -------- +! To bi-periodicise the post-processed fields, or just fill the extension zone +! with the mean value of C+I area + +!** INTERFACE. +! ---------- +! *CALL* *FPBIPERE*(...) + +! EXPLICIT ARGUMENTS +! -------------------- +! KDLUX : upper bound for the x (or longitude) dimension of C U I. +! KDGUX : upper bound for the y (or latitude) dimension of C U I. +! KDLON : upper bound for the x (or longitude) dimension of the gridpoint array on C U I U E +! KDGL : upper bound for the y (or latitude) dimension of the gridpoint array on C U I U E +! KNUBI : number of horizontal fields to doubly-periodicise. +! KD1 : dimension of input/output array +! PGPBI : input/output gridpoint array on C U I U E. +! LDZON : .true. if input grid on C U I U E (.false. if C U I) +! KDADD : 1 to test biperiodiz. +! LDBOYD: perform boyd periodization (inside C U I) +! KDBOYD: array containing dimensions of boyd domain +! PLBOYD: scalar parameter for boyd (variable L in paper) + +! IMPLICIT ARGUMENTS +! -------------------- + +! METHOD. +! ------- +! SEE DOCUMENTATION + +! EXTERNALS. +! ---------- +! ESPLINE spline extension +! ESMOOTHE smoothing across to get isotropy. + +! REFERENCE. +! ---------- +! ECMWF Research Department documentation of the IFS + +! AUTHOR. +! ------- +! RYAD EL KHATIB *METEO-FRANCE* + +! MODIFICATIONS. +! -------------- +! R. El Khatib : 01-08-07 Pruning options +! M.Hamrud : 01-Oct-2003 CY28 Cleaning +! F. Taillefer : 04-10-21 Add LDZON +! A. Stanesic : 28-03-08: KDADD - test of externalized biper. +! D. Degrauwe : feb 2012 Boyd periodization +! R. El Khatib 27-Sep-2013 Boyd periodization in Fullpos-2 +! R. El Khatib 04-Aug-2016 new interface to ewindowe + cleaning +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK +USE ESPLINE_MOD +USE ESMOOTHE_MOD +USE EWINDOWE_MOD +! ------------------------------------------------------------------ + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KNUBI +INTEGER(KIND=JPIM),INTENT(IN) :: KD1 +INTEGER(KIND=JPIM),INTENT(IN) :: KDLUX +INTEGER(KIND=JPIM),INTENT(IN) :: KDGUX +INTEGER(KIND=JPIM),INTENT(IN) :: KDLON +INTEGER(KIND=JPIM),INTENT(IN) :: KDGL +INTEGER(KIND=JPIM),INTENT(IN) :: KDADD +REAL(KIND=JPRB) ,INTENT(INOUT) :: PGPBI(KD1,KNUBI) +LOGICAL, OPTIONAL ,INTENT(IN) :: LDZON +LOGICAL ,INTENT(IN) ,OPTIONAL :: LDBOYD +INTEGER(KIND=JPIM),INTENT(IN) ,OPTIONAL :: KDBOYD(6) +REAL(KIND=JPRB) ,INTENT(IN) ,OPTIONAL :: PLBOYD + +! ------------------------------------------------------------------ + +REAL(KIND=JPRB), ALLOCATABLE :: ZGPBI(:,:,:) +INTEGER(KIND=JPIM) :: IND, ISTAE, JGL, JLON, JNUBI, ILONF, ILATF, IBWX, IBWY +INTEGER(KIND=JPIM) :: IBWXH, IBWYH, IND1 +INTEGER(KIND=JPIM) :: ILONI(KDLON), ILATI(KDGL) +INTEGER(KIND=JPIM) :: IDLUN, IDGUN, IDLUX, IDGUX +LOGICAL :: LLZON, LLBOYD +REAL(KIND=JPRB) :: ZALFA +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +#include "abor1.intfb.h" + +! ------------------------------------------------------------------ +IF (LHOOK) CALL DR_HOOK('FPBIPERE',0,ZHOOK_HANDLE) +! ------------------------------------------------------------------ + +LLBOYD=.FALSE. +IF (PRESENT(LDBOYD)) LLBOYD=LDBOYD + + +!* 2. DOUBLY-PERIODICISE +! ------------------ + +IF (LLBOYD) THEN + IF (.NOT.PRESENT(KDBOYD)) CALL ABOR1('FPBIPERE: Boyd periodization requires KDBOYD argument') + IF (.NOT.PRESENT(PLBOYD)) CALL ABOR1('FPBIPERE: Boyd periodization requires PLBOYD argument') + IBWX=KDBOYD(3) + IBWY=KDBOYD(6) + CALL EWINDOWE(KDLON,KDLUX,IBWX,KDGL,KDGUX,IBWY,KNUBI,PGPBI,PLBOYD,.TRUE.,.TRUE.) +ELSE + LLZON=.FALSE. + IF(PRESENT(LDZON)) LLZON=LDZON + ALLOCATE(ZGPBI(KDLON+KDADD,KNUBI,KDGL+KDADD)) + IF(LLZON) THEN +! Copy C+I+E + IND=KDLON + ELSE +! Copy C+I + IND=KDLUX + ENDIF +!$OMP PARALLEL DO SCHEDULE(STATIC,1) PRIVATE(JNUBI,ISTAE,JGL,JLON) + DO JNUBI=1,KNUBI + ISTAE=0 + DO JGL=1,KDGUX + DO JLON=1,KDLUX + ZGPBI(JLON,JNUBI,JGL)=PGPBI(ISTAE+JLON,JNUBI) + ENDDO + ISTAE=ISTAE+IND + ENDDO + ENDDO +!$OMP END PARALLEL DO + ZALFA = 0.0_JPRB + CALL ESPLINE(1,KDLON,1,KDGL,KDLUX,KDGUX,1,KDLON+KDADD,1,KDGL+KDADD,KNUBI,ZGPBI,& + & ZALFA,.TRUE.,.TRUE.,KDADD) + CALL ESMOOTHE(1,KDLON,1,KDGL,KDLUX,KDGUX,1,KDLON+KDADD,1,KDGL+KDADD,KNUBI,ZGPBI,& + & .TRUE.,.TRUE.) +!$OMP PARALLEL DO SCHEDULE(STATIC,1) PRIVATE(JNUBI,ISTAE,JGL,JLON) + DO JNUBI=1,KNUBI + ISTAE=0 + DO JGL=1,KDGL + DO JLON=1,KDLON + PGPBI(ISTAE+JLON,JNUBI)=ZGPBI(JLON,JNUBI,JGL) + ENDDO + ISTAE=ISTAE+KDLON + ENDDO + ENDDO +!$OMP END PARALLEL DO + DEALLOCATE(ZGPBI) +ENDIF + + +! ------------------------------------------------------------------ +IF (LHOOK) CALL DR_HOOK('FPBIPERE',1,ZHOOK_HANDLE) +END SUBROUTINE FPBIPERE diff --git a/src/etrans/cpu/biper/external/horiz_field.F90 b/src/etrans/cpu/biper/external/horiz_field.F90 new file mode 100644 index 000000000..54d13f6a4 --- /dev/null +++ b/src/etrans/cpu/biper/external/horiz_field.F90 @@ -0,0 +1,77 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + + +SUBROUTINE HORIZ_FIELD(KX,KY,PHFIELD) + +! purpose : +! -------- +! To produce test horizontal field of temperature. + +! method : +! --------- +! Test horizontal input field is on horizontal grid size KXxKY points, and it +! represent's temperature. It is obtained form flollwing expression: +! PHFIELD(i,j)=280*(1+0.1*Sin[PPI*(i+0.5*IMAX)*(j+0.7*IMAX)/IMAX^2+1]) (Pierre Benard) + +! interface : +! --------- +! CALL HORIZ_FIELD(KX,KY,PHFIELD) + +! Explicit arguments : +! ------------------- +! KX - number of grid points in x +! KY - number of grid points in y +! PHFIELD - simulated 2D temperature horizontal field + +! externals : +! ---------- +! None. + +! references : +! ---------- + +! author : +! ------ +! 23-May-2008 Antonio Stanesic +! ---------------------------------------------------------------------- + +USE PARKIND1 , ONLY : JPIM ,JPRB +USE YOMHOOK , ONLY : LHOOK, DR_HOOK, JPHOOK + +! ---------------------------------------------------------------------- + +IMPLICIT NONE + +INTEGER(KIND=JPIM), INTENT(IN) :: KX +INTEGER(KIND=JPIM), INTENT(IN) :: KY +REAL(KIND=JPRB), INTENT(OUT) :: PHFIELD(KX,KY) + +! ---------------------------------------------------------------------- + +REAL(KIND=JPRB), PARAMETER :: PPI=3.141592 +INTEGER(KIND=JPIM) :: JX,JY,IMAX +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ---------------------------------------------------------------------- +IF (LHOOK) CALL DR_HOOK('HORIZ_FIELD',0,ZHOOK_HANDLE) +! ---------------------------------------------------------------------- + +IMAX=MAX(KX,KY) + +DO JY=1,KY + DO JX=1,KX + PHFIELD(JX,JY)=280*(1+0.1*SIN(PPI*(JX+0.5*IMAX)*(JY+0.7*IMAX)/(IMAX**2)+1)) + ENDDO +ENDDO + +! ---------------------------------------------------------------------- +IF (LHOOK) CALL DR_HOOK('HORIZ_FIELD',1,ZHOOK_HANDLE) +END SUBROUTINE HORIZ_FIELD diff --git a/src/etrans/cpu/biper/internal/esmoothe_mod.F90 b/src/etrans/cpu/biper/internal/esmoothe_mod.F90 new file mode 100644 index 000000000..ee68ea8bf --- /dev/null +++ b/src/etrans/cpu/biper/internal/esmoothe_mod.F90 @@ -0,0 +1,182 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + + +MODULE ESMOOTHE_MOD +CONTAINS +SUBROUTINE ESMOOTHE(KDLUN,KDLON,KDGUN,KDGL,KDLUX,KDGUX,KSTART,& + & KDLSM,KDGSA,KDGEN,KNUBI,PWORK,LDBIX,LDBIY) + +! purpose : +! -------- +! To smooth the fields over the extension zone. + +!* *CALL* *ESMOOTHE*(...) + +! externals : +! ---------- +! None + +! explicit arguments : +! ------------------ +! KDLUN : lower bound for the x (or longitude) dimension +! of the gridpoint array +! KDLON : upper bound for the x (or longitude) dimension +! of the gridpoint array on C U I U E +! KDGUN : lower bound for the y (or latitude) dimension +! of the gridpoint array +! KDGL : upper bound for the y (or latitude) dimension +! of the gridpoint array on C U I U E +! KDLUX : upper bound for the x (or longitude) dimension +! of C U I. +! KDGUX : upper bound for the y (or latitude) dimension +! of C U I. +! KDLSM : dimension in x direction of g-p array +! KDGSA : first dimension index in y of g-p array +! KDGEN : last dimension index in y of g-p array +! KSTART : first dimension index in x of g-p array +! KDLSM : last dimension index in x of g-p array +! KNUBI : number of levels to biperiodicise + +! PWORK : gridpoint array on C U I U E. + +! LDBIX : .TRUE.: biperiodicise in x direction (and vice versa) +! LDBIY : .TRUE.: biperiodicise in y direction (and vice versa) + +! references : +! ---------- + +! author : +! ------ +! Michal Batka and Radmila Bubnova ( B & B ) + +! modifications : +! ------------- +! R. El Khatib 03-05-05 Optimizations +! -------------------------------------------------------------- + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +! -------------------------------------------------------------- + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KSTART +INTEGER(KIND=JPIM),INTENT(IN) :: KDLSM +INTEGER(KIND=JPIM),INTENT(IN) :: KDGSA +INTEGER(KIND=JPIM),INTENT(IN) :: KDGEN +INTEGER(KIND=JPIM),INTENT(IN) :: KNUBI +INTEGER(KIND=JPIM),INTENT(IN) :: KDLUN +INTEGER(KIND=JPIM),INTENT(IN) :: KDLON +INTEGER(KIND=JPIM),INTENT(IN) :: KDGUN +INTEGER(KIND=JPIM),INTENT(IN) :: KDGL +INTEGER(KIND=JPIM),INTENT(IN) :: KDLUX +INTEGER(KIND=JPIM),INTENT(IN) :: KDGUX +REAL(KIND=JPRB) ,INTENT(INOUT) :: PWORK(KSTART:KDLSM,KNUBI,KDGSA:KDGEN) +LOGICAL ,INTENT(IN) :: LDBIX +LOGICAL ,INTENT(IN) :: LDBIY + +! -------------------------------------------------------------- + +REAL(KIND=JPRB) :: ZPRAC(KDLUN-1:KDLON+1,KDGUN-1:KDGL+1) +INTEGER(KIND=JPIM) :: IEND, IENX1, IENX2, IENY1, IENY2, JFL, JLAT, JLL, JLON +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! -------------------------------------------------------------- +IF (LHOOK) CALL DR_HOOK('ESMOOTHE',0,ZHOOK_HANDLE) +! -------------------------------------------------------------- + +!* 1. Calculation. +! ------------ + +IEND = MAX(KDLON-KDLUX,KDGL-KDGUX) +IEND = (IEND+1)/2 +IENX1= KDLON +IENX2= KDGL +IENY1= KDGL +IENY2= KDLON +IF(LDBIX.AND.(.NOT.LDBIY)) THEN + IENX2 = KDGUX + IENY1 = KDGUX +ELSEIF((.NOT.LDBIX).AND.LDBIY) THEN + IENX1 = KDLUX + IENY2 = KDLUX +ELSEIF((.NOT.LDBIX).AND.(.NOT.LDBIY)) THEN + IF (LHOOK) CALL DR_HOOK('ESMOOTHE',1,ZHOOK_HANDLE) + RETURN +ENDIF + +DO JFL = 1, KNUBI + + DO JLL = 1, IEND + + DO JLON = KDLUX,KDLON + DO JLAT = KDGUN,KDGL + ZPRAC(JLON,JLAT) = PWORK(JLON,JFL,JLAT) + ENDDO + ENDDO + + DO JLON = KDLUX,KDLON + ZPRAC(JLON,KDGUN-1) = PWORK(JLON,JFL,KDGL) + ZPRAC(JLON,KDGL +1) = PWORK(JLON,JFL,KDGUN) + ENDDO + DO JLAT = KDGUN,KDGL + ZPRAC(KDLON+1,JLAT) = PWORK(KDLUN,JFL,JLAT) + ENDDO + ZPRAC(KDLON+1,KDGUN-1) = PWORK(KDLUN,JFL,KDGL) + ZPRAC(KDLON+1,KDGL +1) = PWORK(KDLUN,JFL,KDGUN) + + DO JLON = KDLUX + JLL,IENX1 - JLL + 1 + DO JLAT = KDGUN, IENX2 + PWORK(JLON,JFL,JLAT)=(4._JPRB*ZPRAC(JLON,JLAT)+2.0_JPRB*(ZPRAC(JLON+& + & 1,JLAT)+& + & ZPRAC(JLON-1,JLAT) + ZPRAC(JLON,JLAT+1) +& + & ZPRAC(JLON,JLAT-1)) + ZPRAC(JLON+1,JLAT+1) +& + & ZPRAC(JLON-1,JLAT+1) + ZPRAC(JLON+1,JLAT-1)+& + & ZPRAC(JLON-1,JLAT-1))/16._JPRB + ENDDO + ENDDO + + DO JLAT = KDGUX,KDGL + DO JLON = KDLUN,KDLON + ZPRAC(JLON,JLAT) = PWORK(JLON,JFL,JLAT) + ENDDO + ENDDO + + DO JLAT = KDGUX,KDGL + ZPRAC(KDLUN-1,JLAT) = PWORK(KDLON,JFL,JLAT) + ZPRAC(KDLON+1,JLAT) = PWORK(KDLUN,JFL,JLAT) + ENDDO + DO JLON = KDLUN,KDLON + ZPRAC(JLON,KDGL +1) = PWORK(JLON,JFL,KDGUN) + ENDDO + ZPRAC(KDLUN-1,KDGL +1) = PWORK(KDLON,JFL,KDGUN) + ZPRAC(KDLON+1,KDGL +1) = PWORK(KDLUN,JFL,KDGUN) + + DO JLAT = KDGUX + JLL, IENY1 - JLL + 1 + DO JLON = KDLUN,IENY2 + PWORK(JLON,JFL,JLAT)=(4._JPRB*ZPRAC(JLON,JLAT)+2.0_JPRB*(ZPRAC(JLON+& + & 1,JLAT)+& + & ZPRAC(JLON-1,JLAT) + ZPRAC(JLON,JLAT+1) +& + & ZPRAC(JLON,JLAT-1)) + ZPRAC(JLON+1,JLAT+1) +& + & ZPRAC(JLON-1,JLAT+1) + ZPRAC(JLON+1,JLAT-1)+& + & ZPRAC(JLON-1,JLAT-1))/16._JPRB + ENDDO + ENDDO + + ENDDO + +ENDDO + +! -------------------------------------------------------------- +IF (LHOOK) CALL DR_HOOK('ESMOOTHE',1,ZHOOK_HANDLE) +END SUBROUTINE ESMOOTHE +END MODULE ESMOOTHE_MOD diff --git a/src/etrans/cpu/biper/internal/espline_mod.F90 b/src/etrans/cpu/biper/internal/espline_mod.F90 new file mode 100644 index 000000000..bfec336e3 --- /dev/null +++ b/src/etrans/cpu/biper/internal/espline_mod.F90 @@ -0,0 +1,200 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + + +MODULE ESPLINE_MOD +CONTAINS +SUBROUTINE ESPLINE(KDLUN,KDLON,KDGUN,KDGL,KDLUX,KDGUX,KSTART,& + & KDLSM,KDGSA,KDGEN,KNUBI,PWORK,PALFA,LDBIX,LDBIY,KDAD) + +! purpose : +! -------- +! Make spline extension. + +! *CALL* *ESPLINE*(...) + +! externals : +! ---------- +! None + +! explicit arguments : +! ------------------ +! KDLUN : lower bound for the x (or longitude) dimension +! of the gridpoint array +! KDLON : upper bound for the x (or longitude) dimension +! of the gridpoint array on C U I U E +! KDGUN : lower bound for the y (or latitude) dimension +! of the gridpoint array +! KDGL : upper bound for the y (or latitude) dimension +! of the gridpoint array on C U I U E +! KDLUX : upper bound for the x (or longitude) dimension +! of C U I. +! KDGUX : upper bound for the y (or latitude) dimension +! of C U I. +! KSTART : first dimension in x direction of g-p array +! KDLSM : last dimension in x direction of g-p array +! KDGSA : first dimension in y of g-p array +! KDGEN : last dimension in y of g-p array +! KNUBI : number of levels to biperiodicise +! PWORK : gridpoint array on C U I U E. +! PALFA : boundary condition of a spline: +! = 0. ... natural spline +! = 1. ... boundary condition computed differentially +! (additional option) +! LDBIX : .TRUE. biperiodicisation in x ( and vice versa ) +! LDBIY : .TRUE. biperiodicisation in y ( and vice versa ) +! KDAD : 1 for test of biperiodic. + +! references : +! ---------- + +! author : +! ------ +! Michal Batka and Radmila Bubnova ( B & B ) + +! modifications : +! ------------- +! J.Vivoda 03-2002 2D model fix +! A. Stanesic : 28-03-08: KDADD - test of externalized biper. +! ------------------------------------------------------------- + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +! ------------------------------------------------------------- + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KSTART +INTEGER(KIND=JPIM),INTENT(IN) :: KDLSM +INTEGER(KIND=JPIM),INTENT(IN) :: KDGSA +INTEGER(KIND=JPIM),INTENT(IN) :: KDGEN +INTEGER(KIND=JPIM),INTENT(IN) :: KNUBI +INTEGER(KIND=JPIM),INTENT(IN) :: KDLUN +INTEGER(KIND=JPIM),INTENT(IN) :: KDLON +INTEGER(KIND=JPIM),INTENT(IN) :: KDGUN +INTEGER(KIND=JPIM),INTENT(IN) :: KDGL +INTEGER(KIND=JPIM),INTENT(IN) :: KDLUX +INTEGER(KIND=JPIM),INTENT(IN) :: KDGUX +REAL(KIND=JPRB) ,INTENT(INOUT) :: PWORK(KSTART:KDLSM,KNUBI,KDGSA:KDGEN) +REAL(KIND=JPRB) ,INTENT(IN) :: PALFA +LOGICAL ,INTENT(IN) :: LDBIX +LOGICAL ,INTENT(IN) :: LDBIY +INTEGER(KIND=JPIM),INTENT(IN) :: KDAD + +! ------------------------------------------------------------- + +LOGICAL :: LLBIX +LOGICAL :: LLBIY +INTEGER(KIND=JPIM) :: IENDX, IENDY, JFL, JLAT, JLON, IA +REAL(KIND=JPRB) :: ZA, ZB, ZC, ZD, ZEPSA, ZEPSB, ZJ, ZK, ZKP1,& + & ZLAM, ZLAMB, ZM1, ZM2, ZMM, ZNY +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------- +IF (LHOOK) CALL DR_HOOK('ESPLINE',0,ZHOOK_HANDLE) +! ------------------------------------------------------------- + +!* 1. Spline Extension. +! ------------------- + +LLBIX=LDBIX +LLBIY=LDBIY + +IF( KDLUN==1.AND.KDLUX==1 ) LLBIX=.FALSE. +IF( KDGUN==1.AND.KDGUX==1 ) LLBIY=.FALSE. + +IENDX = KDGUX +IENDY = KDLON + +IF(LLBIX.AND.(.NOT.LLBIY)) THEN + IENDY = KDLUN - 1 + +ELSEIF((.NOT.LLBIX).AND.LLBIY) THEN + IENDX = KDGUN - 1 + IENDY = KDLUX + +ELSEIF((.NOT.LLBIX).AND.(.NOT.LLBIY)) THEN + IF (LHOOK) CALL DR_HOOK('ESPLINE',1,ZHOOK_HANDLE) + RETURN +ENDIF +DO JFL = 1, KNUBI + + ZK = REAL(KDLON-KDLUX+1,JPRB) + ZKP1 = ZK + 1.0_JPRB + ZLAMB = ZK/ZKP1 + ZNY = PALFA/ZKP1 + + DO JLAT=KDGUN,IENDX + + ZEPSA = ((PWORK(KDLUN,JFL,JLAT)-PWORK(KDLUX,JFL,JLAT))/ZK -& + & PWORK(KDLUX,JFL,JLAT)+PWORK(KDLUX-1,JFL,JLAT))*6._JPRB/ZKP1 -& + & ZNY*(PWORK(KDLUX,JFL,JLAT)-2.0_JPRB* PWORK(KDLUX-1,JFL,JLAT)+& + & PWORK(KDLUX-2,JFL,JLAT)) + + ZEPSB = (PWORK(KDLUN+1,JFL,JLAT)-PWORK(KDLUN,JFL,JLAT) -& + & (PWORK(KDLUN,JFL,JLAT)-PWORK(KDLUX,JFL,JLAT))/ZK)*6._JPRB/ZKP1-& + & ZNY*(PWORK(KDLUN+2,JFL,JLAT)-2.0_JPRB* PWORK(KDLUN+1,JFL,JLAT)+& + & PWORK(KDLUN,JFL,JLAT)) + + ZMM = 4._JPRB - ZLAMB*ZLAMB + ZM1 = (2.0_JPRB*ZEPSA - ZLAMB*ZEPSB)/ZMM + ZM2 = (2.0_JPRB*ZEPSB - ZLAMB*ZEPSA)/ZMM + ZA = PWORK(KDLUX,JFL,JLAT) + ZB = (PWORK(KDLUN,JFL,JLAT)-PWORK(KDLUX,JFL,JLAT))/ZK-& + & (2.0_JPRB*ZM1 + ZM2) * ZK/6._JPRB + ZC = 0.5_JPRB * ZM1 + ZD = (ZM2 - ZM1)/(6._JPRB*ZK) + + DO JLON=KDLUX+1,KDLON+KDAD + ZJ = REAL(JLON - KDLUX,JPRB) + PWORK(JLON,JFL,JLAT) = ZA + ZJ * (ZB + ZJ * (ZC + ZD * ZJ)) + ENDDO + ENDDO + + ZK = REAL(KDGL - KDGUX + 1,JPRB) + ZKP1 = ZK + 1 + ZLAM = ZK/ZKP1 + ZNY = PALFA/ZKP1 + + DO JLON=KDLUN,IENDY+KDAD + + ZEPSA = ((PWORK(JLON,JFL,KDGUN)-PWORK(JLON,JFL,KDGUX))/ZK -& + & PWORK(JLON,JFL,KDGUX)+PWORK(JLON,JFL,KDGUX-1))*6._JPRB/ZKP1-& + & ZNY*(PWORK(JLON,JFL,KDGUX)-2.0_JPRB*PWORK(JLON,JFL,KDGUX-1)+& + & PWORK(JLON,JFL,KDGUX-2)) + + ZEPSB = (PWORK(JLON,JFL,KDGUN+1)-PWORK(JLON,JFL,KDGUN) -& + & (PWORK(JLON,JFL,KDGUN)-PWORK(JLON,JFL,KDGUX))/ZK)*6._JPRB/ZKP1-& + & ZNY*(PWORK(JLON,JFL,KDGUN+2)-2.0_JPRB*PWORK(JLON,JFL,KDGUN+1) +& + & PWORK(JLON,JFL,KDGUN)) + + ZMM = 4._JPRB - ZLAMB*ZLAMB + ZM1 = (2.0_JPRB*ZEPSA - ZLAMB*ZEPSB)/ ZMM + ZM2 = (2.0_JPRB*ZEPSB - ZLAMB*ZEPSA)/ ZMM + ZA = PWORK(JLON,JFL,KDGUX) + ZB = (PWORK(JLON,JFL,KDGUN)-PWORK(JLON,JFL,KDGUX))/ZK - (2.0_JPRB*& + & ZM1 & + & + ZM2) * ZK/6._JPRB + ZC = 0.5_JPRB * ZM1 + ZD = (ZM2 - ZM1)/(6._JPRB*ZK) + + DO JLAT=KDGUX+1,KDGL+KDAD + ZJ = REAL(JLAT - KDGUX,JPRB) + PWORK(JLON,JFL,JLAT) = ZA +ZJ*(ZB +ZJ*(ZC + ZJ * ZD)) + ENDDO + ENDDO + +ENDDO + +! ------------------------------------------------------------- +IF (LHOOK) CALL DR_HOOK('ESPLINE',1,ZHOOK_HANDLE) +END SUBROUTINE ESPLINE +END MODULE ESPLINE_MOD diff --git a/src/etrans/cpu/biper/internal/ewindowe_mod.F90 b/src/etrans/cpu/biper/internal/ewindowe_mod.F90 new file mode 100644 index 000000000..8403865c7 --- /dev/null +++ b/src/etrans/cpu/biper/internal/ewindowe_mod.F90 @@ -0,0 +1,173 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + + +MODULE EWINDOWE_MOD + +CONTAINS + +SUBROUTINE EWINDOWE(KDLON,KDLUX,KBWX,KDGL,KDGUX,KBWY,KFLD,PGPIN,PSCAL,LDBIX,LDBIY) + +! purpose : +! -------- +! Make boyd periodic extension. + +! externals : +! ---------- +! None + +! explicit arguments : +! ------------------ +! KDLON : upper bound for the x (or longitude) dimension +! of C U I U P. +! KDGL : upper bound for the y (or latitude) dimension +! of the gridpoint array on C U I U P +! PGPIN : gridpoint array on C U I U P (gp:fields). +! PSCAL : window function scaling parameter +! LDBIX : .TRUE. windowing in x direction ( and vice versa ) +! LDBIY : .TRUE. windowing in y direction ( and vice versa ) + + +! references : +! ---------- + +! author : Fabrice Voitus and Piet Termonia, 07/2009 +! ------ +! +! modification : +! Daan Degrauwe 02/2012 Cleaned and generalized +! S. Martinez 03/2012 Calls to ERF under CPP key __PGI +! (ERF function is not intrinsic with PGI) +! R. El Khatib 27-Sep-2013 implicit sized PGPIN +! R. El Khatib 04-Aug-2016 new interface +! ----------------------------------------------- + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KDLON +INTEGER(KIND=JPIM),INTENT(IN) :: KDLUX +INTEGER(KIND=JPIM),INTENT(IN) :: KBWX +INTEGER(KIND=JPIM),INTENT(IN) :: KDGL +INTEGER(KIND=JPIM),INTENT(IN) :: KDGUX +INTEGER(KIND=JPIM),INTENT(IN) :: KBWY +INTEGER(KIND=JPIM),INTENT(IN) :: KFLD +REAL(KIND=JPRB) ,INTENT(INOUT) :: PGPIN((KDLUX+2*KBWX+2*(KDLON-KDLUX))*(KDGUX+2*KBWY+2*(KDGL-KDGUX)),KFLD) +REAL(KIND=JPRB) ,INTENT(IN) :: PSCAL +LOGICAL ,INTENT(IN) :: LDBIX +LOGICAL ,INTENT(IN) :: LDBIY + +! FERF function +! ------------- + +#ifdef __PGI +REAL(KIND=JPRB), EXTERNAL :: ERF +#endif + +! scalars +! -------- + +INTEGER(KIND=JPIM) :: JFL, JGL, JLON, IOFF, IDLW, IDGW +INTEGER(KIND=JPIM) :: IWX, ILWX, IRWX, IWY, ILWY, IRWY, IBWXO, IBWYO +INTEGER(KIND=JPIM) :: ILATF, ILONF, IND1, IND, IOFF_LEFT,IOFF_RIGHT,IOFF_BOTTOM,IOFF_TOP +REAL(KIND=JPRB) :: ZI, ZJ, ZK, ZL +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! local arrays : +! ------------ + +REAL(KIND=JPRB) :: ZBELX(2*KBWX+(KDLON-KDLUX)) +REAL(KIND=JPRB) :: ZBELY(2*KBWY+(KDGL -KDGUX)) + +!* 1. Boyd Bi-periodic Extension Method. +! --------------------------------- + +IF (LHOOK) CALL DR_HOOK('EWINDOWE',0,ZHOOK_HANDLE) + +IF ((.NOT.LDBIX).AND.(.NOT.LDBIY)) THEN + IF (LHOOK) CALL DR_HOOK('EWINDOWE',1,ZHOOK_HANDLE) + RETURN +ENDIF + +IDGW=SIZE(ZBELY) +IDLW=SIZE(ZBELX) + +! Bell window functions : +! --------------------- + +IF (LDBIX) THEN + DO JLON=1,IDLW + ! variable between -1 and 1 + ZJ=REAL(-IDLW-1+2*JLON,JPRB)/(IDLW+1) + ZL=ZJ/SQRT(1.0_JPRB-(ZJ*ZJ)) +#ifdef __PGI + ZBELX(JLON)=(1.0_JPRB+ERF(REAL(PSCAL*ZL)))/2.0_JPRB +#else + ZBELX(JLON)=(1.0_JPRB+ERF(PSCAL*ZL))/2.0_JPRB +#endif + ENDDO +ENDIF + +IF (LDBIY) THEN + DO JGL=1,IDGW + ! variable between -1 and 1 + ZJ=REAL(-IDGW-1+2*JGL,JPRB)/(IDGW+1) + ZL=ZJ/SQRT(1.0_JPRB-(ZJ*ZJ)) +#ifdef __PGI + ZBELY(JGL)=(1.0_JPRB+ERF(REAL(PSCAL*ZL)))/2.0_JPRB +#else + ZBELY(JGL)=(1.0_JPRB+ERF(PSCAL*ZL))/2.0_JPRB +#endif + ENDDO +ENDIF + + +! Windowing on P+G-zone : +! -------------------- + +IOFF=(KDLUX+2*(KBWX+KDGL-KDGUX)) +IBWXO=KBWX+(KDLON-KDLUX) +IBWYO=KBWY+(KDGL-KDGUX) + +!$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(JFL,JGL,JLON,ILONF,ILATF,IND1,IND,IOFF_LEFT,IOFF_RIGHT,IOFF_BOTTOM,IOFF_TOP) +DO JFL=1,KFLD + IF (LDBIX) THEN + ! X-direction + DO JGL=1,KDGL+IDGW + IOFF_LEFT=(JGL-1)*IOFF + IOFF_RIGHT=IOFF_LEFT+KDLON + DO JLON=1,IDLW + PGPIN(IOFF_RIGHT+JLON,JFL) = ZBELX(JLON)*PGPIN(IOFF_LEFT+JLON,JFL) +& + & (1.0_JPRB-ZBELX(JLON))*PGPIN(IOFF_RIGHT+JLON,JFL) + ENDDO + ENDDO + ENDIF + IF (LDBIY) THEN + ! Y-direction + DO JGL=1,IDGW + IOFF_BOTTOM=(JGL-1)*IOFF + IOFF_TOP=(KDGL+JGL-1)*IOFF +!DIR$ IVDEP + DO JLON=1,KDLON+IDLW + PGPIN(IOFF_TOP+JLON,JFL) = ZBELY(JGL)*PGPIN(IOFF_BOTTOM+JLON,JFL) +& + & (1.0_JPRB-ZBELY(JGL))*PGPIN(IOFF_TOP+JLON,JFL) + ENDDO + ENDDO + ENDIF +ENDDO +!$OMP END PARALLEL DO + +IF (LHOOK) CALL DR_HOOK('EWINDOWE',1,ZHOOK_HANDLE) + +END SUBROUTINE EWINDOWE + +END MODULE EWINDOWE_MOD diff --git a/src/etrans/cpu/biper/internal/extper_mod.F90 b/src/etrans/cpu/biper/internal/extper_mod.F90 new file mode 100644 index 000000000..48df1d3cb --- /dev/null +++ b/src/etrans/cpu/biper/internal/extper_mod.F90 @@ -0,0 +1,155 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + + +MODULE EXTPER_MOD +CONTAINS +SUBROUTINE EXTPER(PWORK,KDIM,KPSTA,KPOINTS,KFLDS,KUNITS,& + & KPOINTERS,KALFA) + +! purpose : +! -------- +! Make spline extension. + +! *CALL* *EXTPER(PWORK,KDIM,KPSTA,KPOINTS,KFLDS,KUNITS,& +! & KPOINTERS,KALFA) + +! externals : +! ---------- +! None + +! explicit arguments : +! ------------------ +! PWORK : Input: values in C U I area +! : Output: input+(spline extension on the E area) +! KDIM : Dimension of the C U I U E unit of work (one row or one m) +! KPSTA : Position where the unit of work starts +! KPOINTS : Position where the unit of work ends +! KFLDS : number of 2D fields +! KUNITS : Number of units of work +! KPOINTERS : Array of pointers for the units of work +! KALFA : boundary condition of a spline: +! = 0 ... natural spline +! = 1 ... boundary condition computed differentially +! (additional option) +! references : +! ---------- + +! author : +! ------ +! M. Hortal 03-11-2009 +! ----------------------------------------------- + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_GEN +USE TPM_DISTR + +IMPLICIT NONE + +REAL(KIND=JPRB) ,INTENT(INOUT) :: PWORK(:,:) +INTEGER(KIND=JPIM),INTENT(IN) :: KDIM +INTEGER(KIND=JPIM),INTENT(IN) :: KPSTA +INTEGER(KIND=JPIM),INTENT(IN) :: KPOINTS +INTEGER(KIND=JPIM),INTENT(IN) :: KFLDS +INTEGER(KIND=JPIM),INTENT(IN) :: KUNITS +INTEGER(KIND=JPIM),INTENT(IN) :: KPOINTERS(:) +INTEGER(KIND=JPIM),INTENT(IN) :: KALFA + +! arrays : +! -------- +INTEGER(KIND=JPIM) :: IENDX, IENDY, JFL, JLAT, JLON, IA + +REAL(KIND=JPRB) :: ZA, ZB, ZC, ZD, ZEPSA, ZEPSB, ZJ, ZK, ZKP1,& + & ZLAM, ZLAMB, ZM1, ZM2, ZMM, ZNY +REAL(KIND=JPRB) :: ZMAX(KUNITS), ZMIN(KUNITS) +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +#include "abor1.intfb.h" + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('EXTPER',0,ZHOOK_HANDLE) + +!* 0. Security +! -------- + +IF(UBOUND(PWORK,1) < KFLDS) THEN + CALL ABOR1(' EXTPER, PWORK first dimension too small') +ENDIF +IF(UBOUND(PWORK,2) < KDIM+2) THEN + WRITE(NOUT,*) ' UBOUND(PWORK,2)=',UBOUND(PWORK,2),' KDIM=',KDIM,' KUNITS=',& + &KUNITS + CALL ABOR1(' EXTPER, PWORK second dimension too small') +ENDIF +IF(UBOUND(KPOINTERS,1) < KUNITS) THEN + CALL ABOR1(' EXTPER, KPOINTERS too small') +ENDIF +IF(UBOUND(PWORK,2) < KPOINTERS(KUNITS)+KDIM) THEN + WRITE(NERR,*) ' EXTPER, KUNITS=',KUNITS,' KPOINTERS=',KPOINTERS(1:KUNITS),& + &' KDIM=',KDIM,' UBOUND(PWORK,2)=',UBOUND(PWORK,2) + CALL ABOR1(' EXTPER, value of KPOINTERS too large') +ENDIF + +!* 1. Spline Extension. +! ------------------- + +DO JFL = 1, KFLDS + + ZK = REAL(KDIM-KPOINTS+1,JPRB) + ZKP1 = ZK + 1.0_JPRB + ZLAMB = ZK/ZKP1 + ZNY = REAL(KALFA,JPRB)/ZKP1 + + DO JLAT=1,KUNITS + ZEPSA = & + &((PWORK(JFL,KPOINTERS(JLAT)+KPSTA)-& + & PWORK(JFL,KPOINTERS(JLAT)+KPOINTS))/ZK -& + & PWORK(JFL,KPOINTERS(JLAT)+KPOINTS)+& + & PWORK(JFL,KPOINTERS(JLAT)+KPOINTS-1))*6._JPRB/ZKP1 -& + & ZNY*(PWORK(JFL,KPOINTERS(JLAT)+KPOINTS)-& + & 2.0_JPRB* PWORK(JFL,KPOINTERS(JLAT)+KPOINTS-1)+& + & PWORK(JFL,KPOINTERS(JLAT)+KPOINTS-2)) + + ZEPSB = (PWORK(JFL,KPOINTERS(JLAT)+KPSTA+1)-& + & PWORK(JFL,KPOINTERS(JLAT)+KPSTA) -& + & (PWORK(JFL,KPOINTERS(JLAT)+KPSTA)-& + & PWORK(JFL,KPOINTERS(JLAT)+KPOINTS))/ZK)*6._JPRB/ZKP1-& + & ZNY*(PWORK(JFL,KPOINTERS(JLAT)+KPSTA+2)-& + & 2.0_JPRB* PWORK(JFL,KPOINTERS(JLAT)+KPSTA+1)+& + & PWORK(JFL,KPOINTERS(JLAT)+KPSTA)) + + ZMM = 4._JPRB - ZLAMB*ZLAMB + ZM1 = (2.0_JPRB*ZEPSA - ZLAMB*ZEPSB)/ZMM + ZM2 = (2.0_JPRB*ZEPSB - ZLAMB*ZEPSA)/ZMM + ZA = PWORK(JFL,KPOINTERS(JLAT)+KPOINTS) + ZB = (PWORK(JFL,KPOINTERS(JLAT)+KPSTA)-& + & PWORK(JFL,KPOINTERS(JLAT)+KPOINTS))/ZK-& + & (2.0_JPRB*ZM1 + ZM2) * ZK/6._JPRB + ZC = 0.5_JPRB * ZM1 + ZD = (ZM2 - ZM1)/(6._JPRB*ZK) + + + DO JLON=KPOINTERS(JLAT)+KPOINTS+1,KPOINTERS(JLAT)+KDIM + + ZJ = REAL(JLON - (KPOINTERS(JLAT)+KPOINTS),JPRB) + PWORK(JFL,JLON) = ZA + ZJ * (ZB + ZJ * (ZC + ZD * ZJ)) + ENDDO + ENDDO + + +ENDDO + +IF (LHOOK) CALL DR_HOOK('EXTPER',1,ZHOOK_HANDLE) +END SUBROUTINE EXTPER +END MODULE EXTPER_MOD diff --git a/src/etrans/cpu/external/edir_trans.F90 b/src/etrans/cpu/external/edir_trans.F90 new file mode 100644 index 000000000..d542ca93c --- /dev/null +++ b/src/etrans/cpu/external/edir_trans.F90 @@ -0,0 +1,511 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + + +SUBROUTINE EDIR_TRANS(PSPVOR,PSPDIV,PSPSCALAR,PSPSC3A,PSPSC3B,PSPSC2,& + & KPROMA,KVSETUV,KVSETSC,KRESOL,KVSETSC3A,KVSETSC3B,KVSETSC2,& + & PGP,PGPUV,PGP3A,PGP3B,PGP2,PMEANU,PMEANV,AUX_PROC) + +!**** *EDIR_TRANS* - Direct spectral transform (from grid-point to spectral). + +! Purpose. +! -------- +! Interface routine for the direct spectral transform + +!** Interface. +! ---------- +! CALL EDIR_TRANS(...) + +! Explicit arguments : All arguments except from PGP are optional. +! -------------------- +! PSPVOR(:,:) - spectral vorticity (output) +! PSPDIV(:,:) - spectral divergence (output) +! PSPSCALAR(:,:) - spectral scalarvalued fields (output) +! PSPSC3A(:,:,:) - alternative to use of PSPSCALAR, see PGP3A below (input) +! PSPSC3B(:,:,:) - alternative to use of PSPSCALAR, see PGP3B below (input) +! PSPSC2(:,:) - alternative to use of PSPSCALAR, see PGP2 below (input) +! KPROMA - required blocking factor for gridpoint output +! KVSETUV(:) - indicating which 'b-set' in spectral space owns a +! vor/div field. Equivalant to NBSETLEV in the IFS. +! The length of KVSETUV should be the GLOBAL number +! of u/v fields which is the dimension of u and v releated +! fields in grid-point space. +! KVESETSC(:) - indicating which 'b-set' in spectral space owns a +! scalar field. As for KVSETUV this argument is required +! if the total number of processors is greater than +! the number of processors used for distribution in +! spectral wave space. +! KVSETSC3A(:) - as KVESETSC for PSPSC3A (distribution on first dimension) +! KVSETSC3B(:) - as KVESETSC for PSPSC3B (distribution on first dimension) +! KVSETSC2(:) - as KVESETSC for PSPSC2 (distribution on first dimension) +! KRESOL - resolution tag which is required ,default is the +! first defined resulution (input) +! PGP(:,:,:) - gridpoint fields (input) +! PGP need to dimensioned (NPROMA,IF_GP,NGPBLKS) where +! NPROMA is the blocking factor, IF_GP the total number +! of output fields and NGPBLKS the number of NPROMA blocks. +! The ordering of the output fields is as follows (all +! parts are optional depending on the input switches): + +! u : IF_UV_G fields (if psvor present) +! v : IF_UV_G fields (if psvor present) +! scalar fields : IF_SCALARS_G fields (if pspscalar present) + +! Here IF_UV_G is the GLOBAL number of u/v fields as given by the length +! of KVSETUV (or by PSPVOR if no split in spectral 'b-set' direction +! IF_SCALARS_G is the GLOBAL number of scalar fields as giben by the +! length of KVESETSC (or by number of fields in PSPSCALAR if no spectral +! 'b-set' split + +! As an alternative to using PGP you can also use a combination of the +! following arrays. The reason for introducing these alternative ways +! of calling DIR_TRANS is to avoid uneccessary copies where your data +! structures don't fit in to the 'PSPVOR,PSPDIV, PSPSCALAR, PGP' layout. +! The use of any of these precludes the use of PGP and vice versa. + +! PGPUV(:,:,:,:) - the 'u-v' related grid-point variables in the order +! described for PGP. The second dimension of PGPUV should +! be the same as the "global" first dimension of +! PSPVOR,PSPDIV (in the IFS this is the number of levels) +! PGPUV need to be dimensioned(NPROMA,ILEVS,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (u,v) +! PGP3A(:,:,:,:) - grid-point array directly connected with PSPSC3A +! dimensioned(NPROMA,ILEVS,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (the same as in +! PSPSC3A ) +! PGP3B(:,:,:,:) - grid-point array directly connected with PSPSC3B +! dimensioned(NPROMA,ILEVS,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (the same as in +! PSPSC3B) +! PGP2(:,:,:) - grid-point array directly connected with PSPSC2 +! dimensioned(NPROMA,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (the same as in +! PSPSC2 ) +! PMEANU(:),PMEANV(:) - mean wind +! AUX_PROC - optional external procedure for biperiodization of +! aux.fields + +! Method. +! ------- + +! Externals. ESET_RESOL - set resolution +! ---------- ELTDIR_CTL - control of Legendre transform +! EFTDIR_CTL - control of Fourier transform + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 +! G. Radnoti: 01-03-13 adaptation to aladin +! P. Smolikova 02-09-30 : AUX_PROC for d4 in NH +! Y. Seity and G. Radnoti : 03-09-29 : phasing for AL27 +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! A.Bogatchev 19-04-2013 Comparison of ubound(pspdiv,1) +! with ubound(pspvor,1) +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +!ifndef INTERFACE + +USE TPM_GEN ,ONLY : NERR, NOUT +USE TPM_TRANS ,ONLY : LDIVGP, LSCDERS, LUVDER, LVORGP, & + & NF_SC2, NF_SC3A, NF_SC3B, NGPBLKS, NPROMA +USE TPM_DISTR ,ONLY : D, NPRTRV, MYSETV + +USE ESET_RESOL_MOD ,ONLY : ESET_RESOL +USE EDIR_TRANS_CTL_MOD ,ONLY : EDIR_TRANS_CTL +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS + +!endif INTERFACE + +IMPLICIT NONE + +! Declaration of arguments + +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PSPSC3A(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PSPSC3B(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PSPSC2(:,:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KPROMA +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KVSETUV(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KVSETSC(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KRESOL +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KVSETSC3A(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KVSETSC3B(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KVSETSC2(:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PGP(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PGPUV(:,:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PGP3A(:,:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PGP3B(:,:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PGP2(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PMEANU(:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PMEANV(:) +EXTERNAL AUX_PROC +OPTIONAL AUX_PROC + +!ifndef INTERFACE + +! Local variables +INTEGER(KIND=JPIM) :: IUBOUND(4),J +INTEGER(KIND=JPIM) :: IF_UV,IF_UV_G,IF_SCALARS,IF_SCALARS_G,IF_FS,IF_GP +INTEGER(KIND=JPIM) :: IF_SC2_G,IF_SC3A_G,IF_SC3B_G +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +! Set current resolution +IF (LHOOK) CALL DR_HOOK('EDIR_TRANS',0,ZHOOK_HANDLE) +CALL GSTATS(1808,0) +CALL ESET_RESOL(KRESOL) + +! Set defaults + +IF_UV = 0 +IF_UV_G = 0 +IF_SCALARS = 0 +IF_SCALARS_G = 0 +NF_SC2 = 0 +NF_SC3A = 0 +NF_SC3B = 0 +IF_SC2_G = 0 +IF_SC3A_G = 0 +IF_SC3B_G = 0 +NPROMA = D%NGPTOT +! This is for use in TRGTOL which is shared with adjoint inverse transform +LSCDERS=.FALSE. +LVORGP=.FALSE. +LDIVGP=.FALSE. +LUVDER=.FALSE. + +! Decide requirements + +IF(PRESENT(KVSETUV)) THEN + IF_UV_G = UBOUND(KVSETUV,1) + DO J=1,IF_UV_G + IF(KVSETUV(J) > NPRTRV .OR. KVSETUV(J) < 1) THEN + WRITE(NERR,*) 'DIR_TRANS:KVSETUV(J) > NPRTRV ',J,KVSETUV(J),NPRTRV + CALL ABORT_TRANS('DIR_TRANS:KVSETUV TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETUV(J) == MYSETV) THEN + IF_UV = IF_UV+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPVOR)) THEN + IF_UV = UBOUND(PSPVOR,1) + IF_UV_G = IF_UV +ENDIF + +IF(PRESENT(KVSETSC)) THEN + IF_SCALARS_G = UBOUND(KVSETSC,1) + DO J=1,IF_SCALARS_G + IF(KVSETSC(J) > NPRTRV .OR. KVSETSC(J) < 1) THEN + WRITE(NERR,*) 'DIR_TRANS:KVSETSC(J) > NPRTRV ',J,KVSETSC(J),NPRTRV + CALL ABORT_TRANS('DIR_TRANS:KVSETSC TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETSC(J) == MYSETV) THEN + IF_SCALARS = IF_SCALARS+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPSCALAR)) THEN + IF_SCALARS = UBOUND(PSPSCALAR,1) + IF_SCALARS_G = IF_SCALARS +ENDIF + +IF(PRESENT(KVSETSC2)) THEN + IF(.NOT.PRESENT(PSPSC2)) THEN + CALL ABORT_TRANS('DIR_TRANS:KVSETSC2 BUT NOT PSPSC2') + ENDIF + IF_SC2_G = UBOUND(KVSETSC2,1) + IF_SCALARS_G = IF_SCALARS_G+IF_SC2_G + DO J=1,UBOUND(KVSETSC2,1) + IF(KVSETSC2(J) > NPRTRV .OR. KVSETSC2(J) < 1) THEN + WRITE(NERR,*) 'DIR_TRANS:KVSETSC2(J) > NPRTRV ',J,KVSETSC2(J),NPRTRV + CALL ABORT_TRANS('DIR_TRANS:KVSETSC2 TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETSC2(J) == MYSETV) THEN + IF_SCALARS = IF_SCALARS+1 + NF_SC2 = NF_SC2+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPSC2)) THEN + IF_SC2_G = UBOUND(PSPSC2,1) + NF_SC2 = UBOUND(PSPSC2,1) + IF_SCALARS = IF_SCALARS+NF_SC2 + IF_SCALARS_G = IF_SCALARS_G +IF_SC2_G +ENDIF + +IF(PRESENT(KVSETSC3A)) THEN + IF(.NOT.PRESENT(PSPSC3A)) THEN + CALL ABORT_TRANS('DIR_TRANS:KVSETSC3A BUT NOT PSPSC3A') + ENDIF + IF_SC3A_G = UBOUND(KVSETSC3A,1) + IF_SCALARS_G = IF_SCALARS_G+IF_SC3A_G*UBOUND(PSPSC3A,3) + DO J=1,UBOUND(KVSETSC3A,1) + IF(KVSETSC3A(J) > NPRTRV .OR. KVSETSC3A(J) < 1) THEN + WRITE(NERR,*) 'DIR_TRANS:KVSETSC3A(J) > NPRTRV ',J,KVSETSC3A(J),NPRTRV + CALL ABORT_TRANS('DIR_TRANS:KVSETSC3A TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETSC3A(J) == MYSETV) THEN + IF_SCALARS = IF_SCALARS+UBOUND(PSPSC3A,3) + NF_SC3A = NF_SC3A+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPSC3A)) THEN + IF_SCALARS = IF_SCALARS+UBOUND(PSPSC3A,1)*UBOUND(PSPSC3A,3) + IF_SC3A_G = UBOUND(PSPSC3A,1) + IF_SCALARS_G = IF_SCALARS_G +IF_SC3A_G*UBOUND(PSPSC3A,3) + NF_SC3A = UBOUND(PSPSC3A,1) +ENDIF + +IF(PRESENT(KVSETSC3B)) THEN + IF(.NOT.PRESENT(PSPSC3B)) THEN + CALL ABORT_TRANS('DIR_TRANS:KVSETSC3B BUT NOT PSPSC3B') + ENDIF + IF_SC3B_G = UBOUND(KVSETSC3B,1) + IF_SCALARS_G = IF_SCALARS_G+IF_SC3B_G*UBOUND(PSPSC3B,3) + DO J=1,UBOUND(KVSETSC3B,1) + IF(KVSETSC3B(J) > NPRTRV .OR. KVSETSC3B(J) < 1) THEN + WRITE(NERR,*) 'DIR_TRANS:KVSETSC3B(J) > NPRTRV ',J,KVSETSC3B(J),NPRTRV + CALL ABORT_TRANS('DIR_TRANS:KVSETSC3B TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETSC3B(J) == MYSETV) THEN + IF_SCALARS = IF_SCALARS+UBOUND(PSPSC3B,3) + NF_SC3B = NF_SC3B+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPSC3B)) THEN + IF_SCALARS = IF_SCALARS+UBOUND(PSPSC3B,1)*UBOUND(PSPSC3B,3) + IF_SC3B_G = UBOUND(PSPSC3B,1) + IF_SCALARS_G = IF_SCALARS_G +IF_SC3B_G*UBOUND(PSPSC3B,3) + NF_SC3B = UBOUND(PSPSC3B,1) +ENDIF + +IF(PRESENT(KPROMA)) THEN + NPROMA = KPROMA +ENDIF + +! Compute derived variables + +NGPBLKS = (D%NGPTOT-1)/NPROMA+1 + +IF_FS = 2*IF_UV + IF_SCALARS + +IF_GP = 2*IF_UV_G+IF_SCALARS_G + +! Consistency checks + +IF (IF_UV > 0) THEN + IF(.NOT. PRESENT(PSPVOR) ) THEN + CALL ABORT_TRANS('DIR_TRANS : IF_UV > 0 BUT PSPVOR MISSING') + ENDIF + IF(UBOUND(PSPVOR,1) < IF_UV) THEN + WRITE(NERR,*)'DIR_TRANS : UBOUND(PSPVOR,1) < IF_UV ',UBOUND(PSPVOR,1),IF_UV + CALL ABORT_TRANS('DIR_TRANS : PSPVOR TOO SHORT') + ENDIF + IF(.NOT. PRESENT(PSPDIV) ) THEN + CALL ABORT_TRANS('DIR_TRANS : PSPVOR PRESENT BUT PSPDIV MISSING') + ENDIF + IF(UBOUND(PSPDIV,1) /= UBOUND(PSPVOR,1)) THEN + WRITE(NERR,*)'DIR_TRANS : UBOUND(PSPDIV,1) < IF_UV ',UBOUND(PSPDIV,1),IF_UV + CALL ABORT_TRANS('DIR_TRANS : INCONSISTENT FIRST DIM. OF PSPVOR AND PSPDIV') + ENDIF +ENDIF + +IF (IF_SCALARS > 0) THEN + IF(PRESENT(PSPSCALAR)) THEN + IF(UBOUND(PSPSCALAR,1) < IF_SCALARS) THEN + WRITE(NERR,*)'DIR_TRANS : UBOUND(PSPSCALAR,1) < IF_SCALARS) ',& + & UBOUND(PSPSCALAR,1),IF_SCALARS + CALL ABORT_TRANS('DIR_TRANS : PSPSCALAR TOO SHORT') + ENDIF + IF(PRESENT(PSPSC3A))THEN + CALL ABORT_TRANS('DIR_TRANS : PSPSCALAR AND PSPSC3A BOTH PRESENT') + ENDIF + IF(PRESENT(PSPSC3B))THEN + CALL ABORT_TRANS('DIR_TRANS : PSPSCALAR AND PSPSC3B BOTH PRESENT') + ENDIF + IF(PRESENT(PSPSC2))THEN + CALL ABORT_TRANS('DIR_TRANS : PSPSCALAR AND PSPSC2 BOTH PRESENT') + ENDIF + ENDIF +ENDIF + +IF(NPRTRV >1) THEN + IF(IF_UV > 0 .AND. .NOT. PRESENT(KVSETUV)) THEN + WRITE(NERR,*)'NPRTRV >1 AND IF_UV > 0 AND NOT PRESENT(KVSETUV)',& + & NPRTRV,IF_UV + CALL ABORT_TRANS('DIR_TRANS: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF + IF(PRESENT(PSPSCALAR) .AND. .NOT. PRESENT(KVSETSC)) THEN + WRITE(NERR,*)'NPRTRV >1 AND PRESENT(PSPSCALAR) AND NOT PRESENT(KVSETSC)',& + & NPRTRV + CALL ABORT_TRANS('DIR_TRANS: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF + IF(PRESENT(PSPSC2) .AND. .NOT. PRESENT(KVSETSC2)) THEN + WRITE(NERR,*)'NPRTRV >1 AND PRESENT(PSPSC2) AND NOT PRESENT(KVSETSC2)',& + & NPRTRV + CALL ABORT_TRANS('DIR_TRANS: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF + IF(PRESENT(PSPSC3A) .AND. .NOT. PRESENT(KVSETSC3A)) THEN + WRITE(NERR,*)'NPRTRV >1 AND PRESENT(PSPSC3A) AND NOT PRESENT(KVSETSC3A)',& + & NPRTRV + CALL ABORT_TRANS('DIR_TRANS: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF + IF(PRESENT(PSPSC3B) .AND. .NOT. PRESENT(KVSETSC3B)) THEN + WRITE(NERR,*)'NPRTRV >1 AND PRESENT(PSPSC3B) AND NOT PRESENT(KVSETSC3B)',& + & NPRTRV + CALL ABORT_TRANS('DIR_TRANS: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF +ENDIF + +IF(PRESENT(PGP)) THEN + IUBOUND(1:3)=UBOUND(PGP) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'DIR_TRANS:FIRST DIM. OF PGP TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('DIR_TRANS:FIRST DIMENSION OF PGP TOO SMALL ') + ENDIF + IF(IUBOUND(2) < IF_GP) THEN + WRITE(NOUT,*)'DIR_TRANS:SEC. DIM. OF PGP TOO SMALL ',IUBOUND(2),IF_GP + CALL ABORT_TRANS('DIR_TRANS:SECOND DIMENSION OF PGP TOO SMALL ') + ENDIF + IF(IUBOUND(3) < NGPBLKS) THEN + WRITE(NOUT,*)'DIR_TRANS:THIRD DIM. OF PGP TOO SMALL ',IUBOUND(3),NGPBLKS + CALL ABORT_TRANS('DIR_TRANS:THIRD DIMENSION OF PGP TOO SMALL ') + ENDIF +ENDIF + +IF(PRESENT(PGPUV)) THEN + IF(.NOT.PRESENT(PSPVOR)) THEN + CALL ABORT_TRANS('DIR_TRANS:PSPVOR HAS TO BE PRESENT WHEN PGPUV IS') + ENDIF + IUBOUND=UBOUND(PGPUV) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'DIR_TRANS:FIRST DIM. OF PGPUV TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('DIR_TRANS:FIRST DIMENSION OF PGPUV TOO SMALL ') + ENDIF + IF(IUBOUND(2) /= IF_UV_G) THEN + WRITE(NOUT,*)'DIR_TRANS:SEC. DIM. OF PGPUV INCONSISTENT ',IUBOUND(2),IF_UV_G + CALL ABORT_TRANS('DIR_TRANS:SEC. DIMENSION OF PGPUV INCONSISTENT ') + ENDIF + IF(IUBOUND(3) < 2) THEN + WRITE(NOUT,*)'DIR_TRANS:THIRD DIM. OF PGPUV TOO SMALL ',IUBOUND(3),2 + CALL ABORT_TRANS('DIR_TRANS:THIRD DIMENSION OF PGPUV TOO SMALL ') + ENDIF + IF(IUBOUND(4) < NGPBLKS) THEN + WRITE(NOUT,*)'DIR_TRANS:FOURTH DIM. OF PGPUV TOO SMALL ',IUBOUND(4),NGPBLKS + CALL ABORT_TRANS('DIR_TRANS:FOURTH DIMENSION OF PGPUV TOO SMALL ') + ENDIF +ENDIF + +IF(PRESENT(PGP2)) THEN + IF(.NOT.PRESENT(PSPSC2)) THEN + CALL ABORT_TRANS('DIR_TRANS:PSPSC2 HAS TO BE PRESENT WHEN PGP2 IS') + ENDIF +ENDIF +IF(IF_SC2_G > 0) THEN + IF(PRESENT(PGP2)) THEN + IUBOUND(1:3)=UBOUND(PGP2) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'DIR_TRANS:FIRST DIM. OF PGP2 TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('DIR_TRANS:FIRST DIMENSION OF PGP2 TOO SMALL ') + ENDIF + IF(IUBOUND(2) /= IF_SC2_G) THEN + WRITE(NOUT,*)'DIR_TRANS:SEC. DIM. OF PGP2 INCONSISTENT ',IUBOUND(2),IF_SC2_G + CALL ABORT_TRANS('DIR_TRANS:SEC. DIMENSION OF PGP2 INCONSISTENT') + ENDIF + IF(IUBOUND(3) < NGPBLKS) THEN + WRITE(NOUT,*)'DIR_TRANS:THIRD DIM. OF PGP2 TOO SMALL ',IUBOUND(3),NGPBLKS + CALL ABORT_TRANS('DIR_TRANS:THIRD DIMENSION OF PGP2 TOO SMALL ') + ENDIF + ELSE + CALL ABORT_TRANS('DIR_TRANS:PGP2 MISSING') + ENDIF +ENDIF + +IF(PRESENT(PGP3A)) THEN + IF(.NOT.PRESENT(PSPSC3A)) THEN + CALL ABORT_TRANS('DIR_TRANS:PSPSC3A HAS TO BE PRESENT WHEN PGP3A IS') + ENDIF +ENDIF +IF(IF_SC3A_G > 0) THEN + IF(PRESENT(PGP3A)) THEN + IUBOUND=UBOUND(PGP3A) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'DIR_TRANS:FIRST DIM. OF PGP3A TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('DIR_TRANS:FIRST DIMENSION OF PGP3A TOO SMALL ') + ENDIF + IF(IUBOUND(2) /= IF_SC3A_G) THEN + WRITE(NOUT,*)'DIR_TRANS:SEC DIM. OF PGP3A INCONSISTENT ',IUBOUND(2),IF_SC3A_G + CALL ABORT_TRANS('DIR_TRANS:SEC. DIMENSION OF PGP3A INCONSISTENT ') + ENDIF + IF(IUBOUND(3) /= UBOUND(PSPSC3A,3) ) THEN + WRITE(NOUT,*)'DIR_TRANS:THIRD DIM. OF PGP3A INCONSISTENT ',& + & IUBOUND(3),UBOUND(PSPSC3A,3) + CALL ABORT_TRANS('DIR_TRANS:THIRD DIMENSION OF PGP3A INCONSISTENT ') + ENDIF + IF(IUBOUND(4) < NGPBLKS) THEN + WRITE(NOUT,*)'DIR_TRANS:FOURTH DIM. OF PGP3A TOO SMALL ',IUBOUND(4),NGPBLKS + CALL ABORT_TRANS('DIR_TRANS:FOURTH DIMENSION OF PGP3A TOO SMALL ') + ENDIF + ELSE + CALL ABORT_TRANS('DIR_TRANS:PGP3A MISSING') + ENDIF +ENDIF + +IF(PRESENT(PGP3B)) THEN + IF(.NOT.PRESENT(PSPSC3B)) THEN + CALL ABORT_TRANS('DIR_TRANS:PSPSC3B HAS TO BE PRESENT WHEN PGP3B IS') + ENDIF +ENDIF +IF(IF_SC3B_G > 0) THEN + IF(PRESENT(PGP3B)) THEN + IUBOUND=UBOUND(PGP3B) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'DIR_TRANS:FIRST DIM. OF PGP3B TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('DIR_TRANS:FIRST DIMENSION OF PGP3B TOO SMALL ') + ENDIF + IF(IUBOUND(2) /= IF_SC3B_G) THEN + WRITE(NOUT,*)'DIR_TRANS:SEC DIM. OF PGP3B INCONSISTENT ',IUBOUND(2),IF_SC3B_G + CALL ABORT_TRANS('DIR_TRANS:SEC. DIMENSION OF PGP3B INCONSISTENT ') + ENDIF + IF(IUBOUND(3) /= UBOUND(PSPSC3B,3) ) THEN + WRITE(NOUT,*)'DIR_TRANS:THIRD DIM. OF PGP3B INCONSISTENT ',& + & IUBOUND(3),UBOUND(PSPSC3B,3) + CALL ABORT_TRANS('DIR_TRANS:THIRD DIMENSION OF PGP3B INCONSISTENT ') + ENDIF + IF(IUBOUND(4) < NGPBLKS) THEN + WRITE(NOUT,*)'DIR_TRANS:FOURTH DIM. OF PGP3B TOO SMALL ',IUBOUND(4),NGPBLKS + CALL ABORT_TRANS('DIR_TRANS:FOURTH DIMENSION OF PGP3B TOO SMALL ') + ENDIF + ELSE + CALL ABORT_TRANS('DIR_TRANS:PGP3B MISSING') + ENDIF +ENDIF +CALL GSTATS(1808,1) + +! ------------------------------------------------------------------ + +CALL EDIR_TRANS_CTL(IF_UV_G,IF_SCALARS_G,IF_GP,IF_FS,IF_UV,IF_SCALARS,& + & PSPVOR,PSPDIV,PSPSCALAR,KVSETUV,KVSETSC,PGP,& + & PSPSC3A,PSPSC3B,PSPSC2,KVSETSC3A,KVSETSC3B,KVSETSC2,PGPUV,PGP3A,PGP3B,PGP2,& + & PMEANU,PMEANV,AUX_PROC) +IF (LHOOK) CALL DR_HOOK('EDIR_TRANS',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ +!endif INTERFACE + +END SUBROUTINE EDIR_TRANS diff --git a/src/etrans/cpu/external/edir_transad.F90 b/src/etrans/cpu/external/edir_transad.F90 new file mode 100644 index 000000000..db2913a56 --- /dev/null +++ b/src/etrans/cpu/external/edir_transad.F90 @@ -0,0 +1,504 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + + +SUBROUTINE EDIR_TRANSAD(PSPVOR,PSPDIV,PSPSCALAR,PSPSC3A,PSPSC3B,PSPSC2,& + & KPROMA,KVSETUV,KVSETSC,KRESOL,KVSETSC3A,KVSETSC3B,KVSETSC2,& + & PGP,PGPUV,PGP3A,PGP3B,PGP2,PMEANU,PMEANV) + +!**** *EDIR_TRANSAD* - Direct spectral transform - adjoint. + +! Purpose. +! -------- +! Interface routine for the direct spectral transform - adjoint + +!** Interface. +! ---------- +! CALL EDIR_TRANSAD(...) + +! Explicit arguments : All arguments except from PGP are optional. +! -------------------- +! PSPVOR(:,:) - spectral vorticity (output) +! PSPDIV(:,:) - spectral divergence (output) +! PSPSCALAR(:,:) - spectral scalarvalued fields (output) +! PSPSC3A(:,:,:) - alternative to use of PSPSCALAR, see PGP3A below (input) +! PSPSC3B(:,:,:) - alternative to use of PSPSCALAR, see PGP3B below (input) +! PSPSC2(:,:) - alternative to use of PSPSCALAR, see PGP2 below (input) +! KPROMA - required blocking factor for gridpoint output +! KVSETUV(:) - indicating which 'b-set' in spectral space owns a +! vor/div field. Equivalant to NBSETLEV in the IFS. +! The length of KVSETUV should be the GLOBAL number +! of u/v fields which is the dimension of u and v releated +! fields in grid-point space. +! KVESETSC(:) - indicating which 'b-set' in spectral space owns a +! scalar field. As for KVSETUV this argument is required +! if the total number of processors is greater than +! the number of processors used for distribution in +! spectral wave space. +! KVSETSC3A(:) - as KVESETSC for PSPSC3A (distribution on first dimension) +! KVSETSC3B(:) - as KVESETSC for PSPSC3B (distribution on first dimension) +! KVSETSC2(:) - as KVESETSC for PSPSC2 (distribution on first dimension) +! KRESOL - resolution tag which is required ,default is the +! first defined resulution (input) +! PGP(:,:,:) - gridpoint fields (input) +! PGP need to dimensioned (NPROMA,IF_GP,NGPBLKS) where +! NPROMA is the blocking factor, IF_GP the total number +! of output fields and NGPBLKS the number of NPROMA blocks. +! The ordering of the output fields is as follows (all +! parts are optional depending on the input switches): + +! u : IF_UV_G fields (if psvor present) +! v : IF_UV_G fields (if psvor present) +! scalar fields : IF_SCALARS_G fields (if pspscalar present) + +! Here IF_UV_G is the GLOBAL number of u/v fields as given by the length +! of KVSETUV (or by PSPVOR if no split in spectral 'b-set' direction +! IF_SCALARS_G is the GLOBAL number of scalar fields as giben by the +! length of KVESETSC (or by number of fields in PSPSCALAR if no spectral +! 'b-set' split + +! As an alternative to using PGP you can also use a combination of the +! following arrays. The reason for introducing these alternative ways +! of calling DIR_TRANS is to avoid uneccessary copies where your data +! structures don't fit in to the 'PSPVOR,PSPDIV, PSPSCALAR, PGP' layout. +! The use of any of these precludes the use of PGP and vice versa. + +! PGPUV(:,:,:,:) - the 'u-v' related grid-point variables in the order +! described for PGP. The second dimension of PGPUV should +! be the same as the "global" first dimension of +! PSPVOR,PSPDIV (in the IFS this is the number of levels) +! PGPUV need to be dimensioned(NPROMA,ILEVS,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (u,v) +! PGP3A(:,:,:,:) - grid-point array directly connected with PSPSC3A +! dimensioned(NPROMA,ILEVS,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (the same as in +! PSPSC3A ) +! PGP3B(:,:,:,:) - grid-point array directly connected with PSPSC3B +! dimensioned(NPROMA,ILEVS,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (the same as in +! PSPSC3B) +! PGP2(:,:,:) - grid-point array directly connected with PSPSC2 +! dimensioned(NPROMA,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (the same as in +! PSPSC2 ) + +! Method. +! ------- + +! Externals. ESET_RESOL - set resolution +! ---------- EDIR_TRANS_CTLAD - control routine + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 +! M.Hamrud 01-Oct-2003 CY28 Cleaning + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +!ifndef INTERFACE + +USE TPM_GEN ,ONLY : NERR, NOUT +USE TPM_TRANS ,ONLY : LDIVGP, LSCDERS, LUVDER, LVORGP, & + & NF_SC2, NF_SC3A, NF_SC3B, NGPBLKS, NPROMA +USE TPM_DISTR ,ONLY : D, NPRTRV, MYSETV + +USE ESET_RESOL_MOD ,ONLY : ESET_RESOL +USE EDIR_TRANS_CTLAD_MOD ,ONLY : EDIR_TRANS_CTLAD +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS + +!endif INTERFACE + +IMPLICIT NONE + +! Declaration of arguments + +REAL(KIND=JPRB) ,OPTIONAL,INTENT(INOUT) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(INOUT) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(INOUT) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(INOUT) :: PSPSC3A(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(INOUT) :: PSPSC3B(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(INOUT) :: PSPSC2(:,:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KPROMA +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KVSETUV(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KVSETSC(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KRESOL +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KVSETSC3A(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KVSETSC3B(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KVSETSC2(:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PGP(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PGPUV(:,:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PGP3A(:,:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PGP3B(:,:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PGP2(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(INOUT) :: PMEANU(:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(INOUT) :: PMEANV(:) +!ifndef INTERFACE + +! Local variables +INTEGER(KIND=JPIM) :: IUBOUND(4),J +INTEGER(KIND=JPIM) :: IF_UV,IF_UV_G,IF_SCALARS,IF_SCALARS_G,IF_FS,IF_GP +INTEGER(KIND=JPIM) :: IF_SC2_G,IF_SC3A_G,IF_SC3B_G +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('EDIR_TRANSAD',0,ZHOOK_HANDLE) + +CALL GSTATS(1810,0) + +! Set current resolution +CALL ESET_RESOL(KRESOL) + +! Set defaults + +IF_UV = 0 +IF_UV_G = 0 +IF_SCALARS = 0 +IF_SCALARS_G = 0 +NF_SC2 = 0 +NF_SC3A = 0 +NF_SC3B = 0 +IF_SC2_G = 0 +IF_SC3A_G = 0 +IF_SC3B_G = 0 +NPROMA = D%NGPTOT +LSCDERS=.FALSE. ! This is for use in TRLTOG which is shared with inverse transform +LVORGP=.FALSE. +LDIVGP=.FALSE. +LUVDER=.FALSE. + +! Decide requirements + +IF(PRESENT(KVSETUV)) THEN + IF_UV_G = UBOUND(KVSETUV,1) + DO J=1,IF_UV_G + IF(KVSETUV(J) > NPRTRV) THEN + WRITE(NERR,*) 'DIR_TRANSAD:KVSETUV(J) > NPRTRV ',J,KVSETUV(J),NPRTRV + CALL ABORT_TRANS('DIR_TRANSAD:KVSETUV CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETUV(J) == MYSETV) THEN + IF_UV = IF_UV+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPVOR)) THEN + IF_UV = UBOUND(PSPVOR,1) + IF_UV_G = IF_UV +ENDIF + +IF(PRESENT(KVSETSC)) THEN + IF_SCALARS_G = UBOUND(KVSETSC,1) + DO J=1,IF_SCALARS_G + IF(KVSETSC(J) > NPRTRV) THEN + WRITE(NERR,*) 'DIR_TRANSAD:KVSETSC(J) > NPRTRV ',J,KVSETSC(J),NPRTRV + CALL ABORT_TRANS('DIR_TRANSAD:KVSETSC CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETSC(J) == MYSETV) THEN + IF_SCALARS = IF_SCALARS+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPSCALAR)) THEN + IF_SCALARS = UBOUND(PSPSCALAR,1) + IF_SCALARS_G = IF_SCALARS +ENDIF + +IF(PRESENT(KVSETSC2)) THEN + IF(.NOT.PRESENT(PSPSC2)) THEN + CALL ABORT_TRANS('DIR_TRANS:KVSETSC2 BUT NOT PSPSC2') + ENDIF + IF_SC2_G = UBOUND(KVSETSC2,1) + IF_SCALARS_G = IF_SCALARS_G+IF_SC2_G + DO J=1,UBOUND(KVSETSC2,1) + IF(KVSETSC2(J) > NPRTRV .OR. KVSETSC2(J) < 1) THEN + WRITE(NERR,*) 'DIR_TRANS:KVSETSC2(J) > NPRTRV ',J,KVSETSC2(J),NPRTRV + CALL ABORT_TRANS('DIR_TRANS:KVSETSC2 TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETSC2(J) == MYSETV) THEN + IF_SCALARS = IF_SCALARS+1 + NF_SC2 = NF_SC2+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPSC2)) THEN + IF_SC2_G = UBOUND(PSPSC2,1) + NF_SC2 = UBOUND(PSPSC2,1) + IF_SCALARS = IF_SCALARS+NF_SC2 + IF_SCALARS_G = IF_SCALARS_G +IF_SC2_G +ENDIF + +IF(PRESENT(KVSETSC3A)) THEN + IF(.NOT.PRESENT(PSPSC3A)) THEN + CALL ABORT_TRANS('DIR_TRANS:KVSETSC3A BUT NOT PSPSC3A') + ENDIF + IF_SC3A_G = UBOUND(KVSETSC3A,1) + IF_SCALARS_G = IF_SCALARS_G+IF_SC3A_G*UBOUND(PSPSC3A,3) + DO J=1,UBOUND(KVSETSC3A,1) + IF(KVSETSC3A(J) > NPRTRV .OR. KVSETSC3A(J) < 1) THEN + WRITE(NERR,*) 'DIR_TRANS:KVSETSC3A(J) > NPRTRV ',J,KVSETSC3A(J),NPRTRV + CALL ABORT_TRANS('DIR_TRANS:KVSETSC3A TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETSC3A(J) == MYSETV) THEN + IF_SCALARS = IF_SCALARS+UBOUND(PSPSC3A,3) + NF_SC3A = NF_SC3A+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPSC3A)) THEN + IF_SCALARS = IF_SCALARS+UBOUND(PSPSC3A,1)*UBOUND(PSPSC3A,3) + IF_SC3A_G = UBOUND(PSPSC3A,1) + IF_SCALARS_G = IF_SCALARS_G +IF_SC3A_G*UBOUND(PSPSC3A,3) + NF_SC3A = UBOUND(PSPSC3A,1) +ENDIF + +IF(PRESENT(KVSETSC3B)) THEN + IF(.NOT.PRESENT(PSPSC3B)) THEN + CALL ABORT_TRANS('DIR_TRANS:KVSETSC3B BUT NOT PSPSC3B') + ENDIF + IF_SC3B_G = UBOUND(KVSETSC3B,1) + IF_SCALARS_G = IF_SCALARS_G+IF_SC3B_G*UBOUND(PSPSC3B,3) + DO J=1,UBOUND(KVSETSC3B,1) + IF(KVSETSC3B(J) > NPRTRV .OR. KVSETSC3B(J) < 1) THEN + WRITE(NERR,*) 'DIR_TRANS:KVSETSC3B(J) > NPRTRV ',J,KVSETSC3B(J),NPRTRV + CALL ABORT_TRANS('DIR_TRANS:KVSETSC3B TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETSC3B(J) == MYSETV) THEN + IF_SCALARS = IF_SCALARS+UBOUND(PSPSC3B,3) + NF_SC3B = NF_SC3B+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPSC3B)) THEN + IF_SCALARS = IF_SCALARS+UBOUND(PSPSC3B,1)*UBOUND(PSPSC3B,3) + IF_SC3B_G = UBOUND(PSPSC3B,1) + IF_SCALARS_G = IF_SCALARS_G +IF_SC3B_G*UBOUND(PSPSC3B,3) + NF_SC3B = UBOUND(PSPSC3B,1) +ENDIF + +IF(PRESENT(KPROMA)) THEN + NPROMA = KPROMA +ENDIF + +! Compute derived variables + +NGPBLKS = (D%NGPTOT-1)/NPROMA+1 + +IF_FS = 2*IF_UV + IF_SCALARS + +IF_GP = 2*IF_UV_G+IF_SCALARS_G + +! Consistency checks + +IF (IF_UV > 0) THEN + IF(.NOT. PRESENT(PSPVOR) ) THEN + CALL ABORT_TRANS('DIR_TRANSAD : IF_UV > 0 BUT PSPVOR MISSING') + ENDIF + IF(UBOUND(PSPVOR,1) < IF_UV) THEN + WRITE(NERR,*)'DIR_TRANSAD : UBOUND(PSPVOR,1) < IF_UV ',& + & UBOUND(PSPVOR,1),IF_UV + CALL ABORT_TRANS('DIR_TRANSAD : PSPVOR TOO SHORT') + ENDIF + IF(.NOT. PRESENT(PSPDIV) ) THEN + CALL ABORT_TRANS('DIR_TRANSAD : PSPVOR PRESENT BUT PSPDIV MISSING') + ENDIF + IF(UBOUND(PSPDIV,1) /= IF_UV) THEN + WRITE(NERR,*)'DIR_TRANSAD : UBOUND(PSPDIV,1) < IF_UV ',& + & UBOUND(PSPDIV,1),IF_UV + CALL ABORT_TRANS('DIR_TRANSAD : INCONSISTENT FIRST DIM. OF PSPVOR AND PSPDIV') + ENDIF +ENDIF + +IF (IF_SCALARS > 0) THEN + IF(PRESENT(PSPSCALAR)) THEN + IF(UBOUND(PSPSCALAR,1) < IF_SCALARS) THEN + WRITE(NERR,*)'DIR_TRANS : UBOUND(PSPSCALAR,1) < IF_SCALARS) ',& + & UBOUND(PSPSCALAR,1),IF_SCALARS + CALL ABORT_TRANS('DIR_TRANS : PSPSCALAR TOO SHORT') + ENDIF + IF(PRESENT(PSPSC3A))THEN + CALL ABORT_TRANS('DIR_TRANS : PSPSCALAR AND PSPSC3A BOTH PRESENT') + ENDIF + IF(PRESENT(PSPSC3B))THEN + CALL ABORT_TRANS('DIR_TRANS : PSPSCALAR AND PSPSC3B BOTH PRESENT') + ENDIF + IF(PRESENT(PSPSC2))THEN + CALL ABORT_TRANS('DIR_TRANS : PSPSCALAR AND PSPSC2 BOTH PRESENT') + ENDIF + ENDIF +ENDIF + +IF(NPRTRV >1) THEN + IF(IF_UV > 0 .AND. .NOT. PRESENT(KVSETUV)) THEN + WRITE(NERR,*)'NPRTRV >1 AND IF_UV > 0 AND NOT PRESENT(KVSETUV)',& + & NPRTRV,IF_UV + CALL ABORT_TRANS('DIR_TRANS: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF + IF(PRESENT(PSPSCALAR) .AND. .NOT. PRESENT(KVSETSC)) THEN + WRITE(NERR,*)'NPRTRV >1 AND PRESENT(PSPSCALAR) AND NOT PRESENT(KVSETSC)',& + & NPRTRV + CALL ABORT_TRANS('DIR_TRANS: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF + IF(PRESENT(PSPSC2) .AND. .NOT. PRESENT(KVSETSC2)) THEN + WRITE(NERR,*)'NPRTRV >1 AND PRESENT(PSPSC2) AND NOT PRESENT(KVSETSC2)',& + & NPRTRV + CALL ABORT_TRANS('DIR_TRANS: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF + IF(PRESENT(PSPSC3A) .AND. .NOT. PRESENT(KVSETSC3A)) THEN + WRITE(NERR,*)'NPRTRV >1 AND PRESENT(PSPSC3A) AND NOT PRESENT(KVSETSC3A)',& + & NPRTRV + CALL ABORT_TRANS('DIR_TRANS: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF + IF(PRESENT(PSPSC3B) .AND. .NOT. PRESENT(KVSETSC3B)) THEN + WRITE(NERR,*)'NPRTRV >1 AND PRESENT(PSPSC3B) AND NOT PRESENT(KVSETSC3B)',& + & NPRTRV + CALL ABORT_TRANS('DIR_TRANS: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF +ENDIF + +IF(PRESENT(PGP)) THEN + IUBOUND(1:3)=UBOUND(PGP) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'DIR_TRANS:FIRST DIM. OF PGP TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('DIR_TRANS:FIRST DIMENSION OF PGP TOO SMALL ') + ENDIF + IF(IUBOUND(2) < IF_GP) THEN + WRITE(NOUT,*)'DIR_TRANS:SEC. DIM. OF PGP TOO SMALL ',IUBOUND(2),IF_GP + CALL ABORT_TRANS('DIR_TRANS:SECOND DIMENSION OF PGP TOO SMALL ') + ENDIF + IF(IUBOUND(3) < NGPBLKS) THEN + WRITE(NOUT,*)'DIR_TRANS:THIRD DIM. OF PGP TOO SMALL ',IUBOUND(3),NGPBLKS + CALL ABORT_TRANS('DIR_TRANS:THIRD DIMENSION OF PGP TOO SMALL ') + ENDIF +ENDIF + +IF(PRESENT(PGPUV)) THEN + IF(.NOT.PRESENT(PSPVOR)) THEN + CALL ABORT_TRANS('DIR_TRANS:PSPVOR HAS TO BE PRESENT WHEN PGPUV IS') + ENDIF + IUBOUND=UBOUND(PGPUV) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'DIR_TRANS:FIRST DIM. OF PGPUV TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('DIR_TRANS:FIRST DIMENSION OF PGPUV TOO SMALL ') + ENDIF + IF(IUBOUND(2) /= IF_UV_G) THEN + WRITE(NOUT,*)'DIR_TRANS:SEC. DIM. OF PGPUV INCONSISTENT ',IUBOUND(2),IF_UV_G + CALL ABORT_TRANS('DIR_TRANS:SEC. DIMENSION OF PGPUV INCONSISTENT ') + ENDIF + IF(IUBOUND(3) < 2) THEN + WRITE(NOUT,*)'DIR_TRANS:THIRD DIM. OF PGPUV TOO SMALL ',IUBOUND(3),2 + CALL ABORT_TRANS('DIR_TRANS:THIRD DIMENSION OF PGPUV TOO SMALL ') + ENDIF + IF(IUBOUND(4) < NGPBLKS) THEN + WRITE(NOUT,*)'DIR_TRANS:FOURTH DIM. OF PGPUV TOO SMALL ',IUBOUND(4),NGPBLKS + CALL ABORT_TRANS('DIR_TRANS:FOURTH DIMENSION OF PGPUV TOO SMALL ') + ENDIF +ENDIF + +IF(PRESENT(PGP2)) THEN + IF(.NOT.PRESENT(PSPSC2)) THEN + CALL ABORT_TRANS('DIR_TRANS:PSPSC2 HAS TO BE PRESENT WHEN PGP2 IS') + ENDIF +ENDIF +IF(IF_SC2_G > 0) THEN + IF(PRESENT(PGP2)) THEN + IUBOUND(1:3)=UBOUND(PGP2) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'DIR_TRANS:FIRST DIM. OF PGP2 TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('DIR_TRANS:FIRST DIMENSION OF PGP2 TOO SMALL ') + ENDIF + IF(IUBOUND(2) /= IF_SC2_G) THEN + WRITE(NOUT,*)'DIR_TRANS:SEC. DIM. OF PGP2 INCONSISTENT ',IUBOUND(2),IF_SC2_G + CALL ABORT_TRANS('DIR_TRANS:SEC. DIMENSION OF PGP2 INCONSISTENT') + ENDIF + IF(IUBOUND(3) < NGPBLKS) THEN + WRITE(NOUT,*)'DIR_TRANS:THIRD DIM. OF PGP2 TOO SMALL ',IUBOUND(3),NGPBLKS + CALL ABORT_TRANS('DIR_TRANS:THIRD DIMENSION OF PGP2 TOO SMALL ') + ENDIF + ELSE + CALL ABORT_TRANS('DIR_TRANS:PGP2 MISSING') + ENDIF +ENDIF + +IF(PRESENT(PGP3A)) THEN + IF(.NOT.PRESENT(PSPSC3A)) THEN + CALL ABORT_TRANS('DIR_TRANS:PSPSC3A HAS TO BE PRESENT WHEN PGP3A IS') + ENDIF +ENDIF +IF(IF_SC3A_G > 0) THEN + IF(PRESENT(PGP3A)) THEN + IUBOUND=UBOUND(PGP3A) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'DIR_TRANS:FIRST DIM. OF PGP3A TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('DIR_TRANS:FIRST DIMENSION OF PGP3A TOO SMALL ') + ENDIF + IF(IUBOUND(2) /= IF_SC3A_G) THEN + WRITE(NOUT,*)'DIR_TRANS:SEC DIM. OF PGP3A INCONSISTENT ',IUBOUND(2),IF_SC3A_G + CALL ABORT_TRANS('DIR_TRANS:SEC. DIMENSION OF PGP3A INCONSISTENT ') + ENDIF + IF(IUBOUND(3) /= UBOUND(PSPSC3A,3) ) THEN + WRITE(NOUT,*)'DIR_TRANS:THIRD DIM. OF PGP3A INCONSISTENT ',& + & IUBOUND(3),UBOUND(PSPSC3A,3) + CALL ABORT_TRANS('DIR_TRANS:THIRD DIMENSION OF PGP3A INCONSISTENT ') + ENDIF + IF(IUBOUND(4) < NGPBLKS) THEN + WRITE(NOUT,*)'DIR_TRANS:FOURTH DIM. OF PGP3A TOO SMALL ',IUBOUND(4),NGPBLKS + CALL ABORT_TRANS('DIR_TRANS:FOURTH DIMENSION OF PGP3A TOO SMALL ') + ENDIF + ELSE + CALL ABORT_TRANS('DIR_TRANS:PGP3A MISSING') + ENDIF +ENDIF + +IF(PRESENT(PGP3B)) THEN + IF(.NOT.PRESENT(PSPSC3B)) THEN + CALL ABORT_TRANS('DIR_TRANS:PSPSC3B HAS TO BE PRESENT WHEN PGP3B IS') + ENDIF +ENDIF +IF(IF_SC3B_G > 0) THEN + IF(PRESENT(PGP3B)) THEN + IUBOUND=UBOUND(PGP3B) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'DIR_TRANS:FIRST DIM. OF PGP3B TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('DIR_TRANS:FIRST DIMENSION OF PGP3B TOO SMALL ') + ENDIF + IF(IUBOUND(2) /= IF_SC3B_G) THEN + WRITE(NOUT,*)'DIR_TRANS:SEC DIM. OF PGP3B INCONSISTENT ',IUBOUND(2),IF_SC3B_G + CALL ABORT_TRANS('DIR_TRANS:SEC. DIMENSION OF PGP3B INCONSISTENT ') + ENDIF + IF(IUBOUND(3) /= UBOUND(PSPSC3B,3) ) THEN + WRITE(NOUT,*)'DIR_TRANS:THIRD DIM. OF PGP3B INCONSISTENT ',& + & IUBOUND(3),UBOUND(PSPSC3B,3) + CALL ABORT_TRANS('DIR_TRANS:THIRD DIMENSION OF PGP3B INCONSISTENT ') + ENDIF + IF(IUBOUND(4) < NGPBLKS) THEN + WRITE(NOUT,*)'DIR_TRANS:FOURTH DIM. OF PGP3B TOO SMALL ',IUBOUND(4),NGPBLKS + CALL ABORT_TRANS('DIR_TRANS:FOURTH DIMENSION OF PGP3B TOO SMALL ') + ENDIF + ELSE + CALL ABORT_TRANS('DIR_TRANS:PGP3B MISSING') + ENDIF +ENDIF +CALL GSTATS(1810,1) + +! Perform transform + +CALL EDIR_TRANS_CTLAD(IF_UV_G,IF_SCALARS_G,IF_GP,IF_FS,IF_UV,IF_SCALARS,& + & PSPVOR,PSPDIV,PSPSCALAR,KVSETUV,KVSETSC,PGP,& + & PSPSC3A,PSPSC3B,PSPSC2,KVSETSC3A,KVSETSC3B,KVSETSC2,PGPUV,PGP3A,PGP3B,PGP2,& + & PMEANU,PMEANV) +IF (LHOOK) CALL DR_HOOK('EDIR_TRANSAD',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ +!endif INTERFACE + +END SUBROUTINE EDIR_TRANSAD + diff --git a/src/etrans/cpu/external/edist_grid.F90 b/src/etrans/cpu/external/edist_grid.F90 new file mode 100644 index 000000000..85d488a32 --- /dev/null +++ b/src/etrans/cpu/external/edist_grid.F90 @@ -0,0 +1,147 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + + +SUBROUTINE EDIST_GRID(PGPG,KPROMA,KFDISTG,KFROM,KRESOL,PGP,KSORT) + +!**** *EDIST_GRID* - Distribute global gridpoint array among processors + +! Purpose. +! -------- +! Interface routine for distributing gridpoint array + +!** Interface. +! ---------- +! CALL EDIST_GRID(...) + +! Explicit arguments : +! -------------------- +! PGPG(:,:) - Global spectral array +! KFDISTG - Global number of fields to be distributed +! KPROMA - required blocking factor for gridpoint input +! KFROM(:) - Processor resposible for distributing each field +! KRESOL - resolution tag which is required ,default is the +! first defined resulution (input) +! PGP(:,:) - Local spectral array + +! Method. +! ------- + +! Externals. ESET_RESOL - set resolution +! ---------- DIST_GRID_CTL - control routine + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! P.Marguinaud 10-Oct-2014 Add KSORT argument + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +!ifndef INTERFACE + +USE TPM_GEN ,ONLY : NERR, NOUT +!USE TPM_DIM +USE TPM_DISTR ,ONLY : D, MYPROC, NPROC + +USE ESET_RESOL_MOD ,ONLY : ESET_RESOL +USE DIST_GRID_CTL_MOD ,ONLY : DIST_GRID_CTL +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS + +!endif INTERFACE + +IMPLICIT NONE + +! Declaration of arguments + +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PGPG(:,:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KPROMA +INTEGER(KIND=JPIM) , INTENT(IN) :: KFDISTG +INTEGER(KIND=JPIM) , INTENT(IN) :: KFROM(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KRESOL +REAL(KIND=JPRB) , INTENT(OUT) :: PGP(:,:,:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KSORT (:) +!ifndef INTERFACE + +INTEGER(KIND=JPIM) :: IFSEND,J,IUBOUND(3),IPROMA,IGPBLKS +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +! Set current resolution +IF (LHOOK) CALL DR_HOOK('EDIST_GRID',0,ZHOOK_HANDLE) +CALL ESET_RESOL(KRESOL) + +IPROMA = D%NGPTOT +IF(PRESENT(KPROMA)) THEN + IPROMA = KPROMA +ENDIF +IGPBLKS = (D%NGPTOT-1)/IPROMA+1 + +IF(UBOUND(KFROM,1) < KFDISTG) THEN + CALL ABORT_TRANS('EDIST_GRID: KFROM TOO SHORT!') +ENDIF +IFSEND = 0 +DO J=1,KFDISTG + IF(KFROM(J) < 1 .OR. KFROM(J) > NPROC) THEN + WRITE(NERR,*) 'EDIST_GRID:ILLEGAL KFROM VALUE',KFROM(J),J + CALL ABORT_TRANS('EDIST_GRID:ILLEGAL KFROM VALUE') + ENDIF + IF(KFROM(J) == MYPROC) IFSEND = IFSEND+1 +ENDDO + +IUBOUND=UBOUND(PGP) +IF(IUBOUND(1) < IPROMA) THEN + WRITE(NOUT,*)'EDIST_GRID:FIRST DIM. OF PGP TOO SMALL ',IUBOUND(1),IPROMA + CALL ABORT_TRANS('EDIST_GRID:FIRST DIMENSION OF PGP TOO SMALL ') +ENDIF +IF(IUBOUND(2) < KFDISTG) THEN + WRITE(NOUT,*)'EDIST_GRID:SEC. DIM. OF PGP TOO SMALL ',IUBOUND(2),KFDISTG + CALL ABORT_TRANS('EDIST_GRID:SECOND DIMENSION OF PGP TOO SMALL ') +ENDIF +IF(IUBOUND(3) < IGPBLKS) THEN + WRITE(NOUT,*)'EDIST_GRID:THIRD DIM. OF PGP TOO SMALL ',IUBOUND(3),IGPBLKS + CALL ABORT_TRANS('EDIST_GRID:THIRD DIMENSION OF PGP TOO SMALL ') +ENDIF + +IF(IFSEND > 0) THEN + IF(.NOT.PRESENT(PGPG)) THEN + CALL ABORT_TRANS('EDIST_GRID:PGPG MISSING') + ENDIF + IF(UBOUND(PGPG,1) < D%NGPTOTG) THEN + CALL ABORT_TRANS('EDIST_GRID:FIRST DIMENSION OF PGPG TOO SMALL') + ENDIF + IF(UBOUND(PGPG,2) < IFSEND) THEN + CALL ABORT_TRANS('EDIST_GRID:FIRST DIMENSION OF PGPG TOO SMALL') + ENDIF +ENDIF + +IF (PRESENT (KSORT)) THEN + IF (UBOUND (KSORT, 1) /= UBOUND (PGP, 2)) THEN + CALL ABORT_TRANS('EDIST_GRID: DIMENSION MISMATCH KSORT, PGP') + ENDIF +ENDIF + +CALL DIST_GRID_CTL(PGPG,KFDISTG,IPROMA,KFROM,PGP,KSORT) +IF (LHOOK) CALL DR_HOOK('EDIST_GRID',1,ZHOOK_HANDLE) + +!endif INTERFACE + +! ------------------------------------------------------------------ + +END SUBROUTINE EDIST_GRID + diff --git a/src/etrans/cpu/external/edist_spec.F90 b/src/etrans/cpu/external/edist_spec.F90 new file mode 100644 index 000000000..8aadb6a77 --- /dev/null +++ b/src/etrans/cpu/external/edist_spec.F90 @@ -0,0 +1,206 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + + +SUBROUTINE EDIST_SPEC(PSPECG,KFDISTG,KFROM,KVSET,KRESOL,PSPEC,& + & LDIM1_IS_FLD,KSORT) + +!**** *EDIST_SPEC* - Distribute global spectral array among processors + +! Purpose. +! -------- +! Interface routine for distributing spectral array + +!** Interface. +! ---------- +! CALL EDIST__SPEC(...) + +! Explicit arguments : +! -------------------- +! PSPECG(:,:) - Global spectral array +! KFDISTG - Global number of fields to be distributed +! KFROM(:) - Processor resposible for distributing each field +! KVSET(:) - "B-Set" for each field +! KRESOL - resolution tag which is required ,default is the +! first defined resulution (input) +! PSPEC(:,:) - Local spectral array + +! Method. +! ------- + +! Externals. ESET_RESOL - set resolution +! ---------- DIST_SPEC_CONTROL - control routine + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! P.Marguinaud 10-Oct-2014 Add KSORT argument (change the order of fields) + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +!ifndef INTERFACE + +USE TPM_GEN ,ONLY : NERR +USE TPM_DIM ,ONLY : R +USE TPMALD_DIM ,ONLY : RALD +USE TPM_DISTR ,ONLY : D, NPRTRV, NPRTRW, MYSETV, MYPROC, NPROC +USE TPMALD_DISTR ,ONLY : DALD + +USE ESET_RESOL_MOD ,ONLY : ESET_RESOL +USE DIST_SPEC_CONTROL_MOD ,ONLY : DIST_SPEC_CONTROL +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS + +!endif INTERFACE + +IMPLICIT NONE + +! Declaration of arguments + +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PSPECG(:,:) +INTEGER(KIND=JPIM),INTENT(IN) :: KFDISTG +INTEGER(KIND=JPIM),INTENT(IN) :: KFROM(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KVSET(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KRESOL +LOGICAL ,OPTIONAL,INTENT(IN) :: LDIM1_IS_FLD +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PSPEC(:,:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KSORT (:) +!ifndef INTERFACE + +INTEGER(KIND=JPIM) :: IVSET(KFDISTG) +INTEGER(KIND=JPIM) :: IFSEND,IFRECV,J, IFLD, ICOEFF +INTEGER(KIND=JPIM) :: ISMAX, ISPEC2, ISPEC2_G, ISPEC2MX +INTEGER(KIND=JPIM) :: IPOSSP(NPRTRW+1) +INTEGER(KIND=JPIM) :: IUMPP(NPRTRW) +INTEGER(KIND=JPIM) :: IPTRMS(NPRTRW) +INTEGER(KIND=JPIM),ALLOCATABLE :: IDIM0G(:) +INTEGER(KIND=JPIM),ALLOCATABLE :: IALLMS(:) +INTEGER(KIND=JPIM),ALLOCATABLE :: IKN(:) +LOGICAL :: LLDIM1_IS_FLD +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE +! ------------------------------------------------------------------ + +! Set current resolution +IF (LHOOK) CALL DR_HOOK('EDIST_SPEC',0,ZHOOK_HANDLE) +CALL ESET_RESOL(KRESOL) + +LLDIM1_IS_FLD=.TRUE. +IF(PRESENT(LDIM1_IS_FLD)) LLDIM1_IS_FLD=LDIM1_IS_FLD +IF(LLDIM1_IS_FLD) THEN + IFLD=1 + ICOEFF=2 +ELSE + IFLD=2 + ICOEFF=1 +ENDIF + +ISMAX = RALD%NMSMAX +ALLOCATE(IDIM0G(0:ISMAX)) +ALLOCATE(IALLMS(ISMAX+1)) +ALLOCATE(IKN(0:ISMAX)) +ISPEC2 = D%NSPEC2 +ISPEC2_G = R%NSPEC2_G +IPOSSP(:) = D%NPOSSP(:) +IDIM0G(:) = D%NDIM0G(:) +ISPEC2MX = D%NSPEC2MX +IUMPP(:) = D%NUMPP(:) +IALLMS(:) = D%NALLMS(:) +IPTRMS(:) = D%NPTRMS(:) +DO J=0,ISMAX + IKN(J)=2*DALD%NCPL2M(J) +ENDDO + +IF(UBOUND(KFROM,1) < KFDISTG) THEN + CALL ABORT_TRANS('EDIST_SPEC: KFROM TOO SHORT!') +ENDIF + +IFSEND = 0 +IFRECV = 0 + +DO J=1,KFDISTG + IF(KFROM(J) < 1 .OR. KFROM(J) > NPROC) THEN + WRITE(NERR,*) 'EDIST_SPEC:ILLEGAL KFROM VALUE',KFROM(J),J + CALL ABORT_TRANS('EDIST_SPEC:ILLEGAL KFROM VALUE') + ENDIF + IF(KFROM(J) == MYPROC) IFSEND = IFSEND+1 +ENDDO + +IF(IFSEND > 0) THEN + IF(.NOT.PRESENT(PSPECG)) THEN + CALL ABORT_TRANS('EDIST_SPEC:PSPECG MISSING') + ENDIF + IF(UBOUND(PSPECG,IFLD) < IFSEND) THEN + WRITE(NERR,*)'EDIST_SPEC: ',IFLD, UBOUND(PSPECG,IFLD), IFSEND + CALL ABORT_TRANS('EDIST_SPEC:FIELD DIMENSION OF PSPECG TOO SMALL') + ENDIF + IF(UBOUND(PSPECG,ICOEFF) < ISPEC2_G) THEN + WRITE(NERR,*)'EDIST_SPEC: ',ICOEFF, UBOUND(PSPECG,ICOEFF), ISPEC2_G + CALL ABORT_TRANS('EDIST_SPEC: COEFF DIMENSION OF PSPECG TOO SMALL') + ENDIF +ENDIF + +IF(PRESENT(KVSET)) THEN + IF(UBOUND(KVSET,1) < KFDISTG) THEN + CALL ABORT_TRANS('EDIST_SPEC: KVSET TOO SHORT!') + ENDIF + DO J=1,KFDISTG + IF(KVSET(J) > NPRTRV .OR. KVSET(J) < 1) THEN + WRITE(NERR,*) 'EDIST_SPEC:KVSET(J) > NPRTRV ',J,KVSET(J),NPRTRV + CALL ABORT_TRANS('EDIST_SPEC:KVSET CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSET(J) == MYSETV) THEN + IFRECV = IFRECV+1 + ENDIF + ENDDO + IVSET(:) = KVSET(1:KFDISTG) +ELSE + IFRECV = KFDISTG + IVSET(:) = MYSETV +ENDIF + +IF(IFRECV > 0 ) THEN + IF(.NOT.PRESENT(PSPEC)) THEN + CALL ABORT_TRANS('EDIST_SPEC: FIELDS TO RECEIVE AND PSPEC NOT PRESENT') + ENDIF + IF(UBOUND(PSPEC,IFLD) < IFRECV) THEN + CALL ABORT_TRANS('EDIST_SPEC: FIELD DIMENSION OF PSPEC TOO SMALL') + ENDIF + IF(UBOUND(PSPEC,ICOEFF) < ISPEC2 ) THEN + CALL ABORT_TRANS('EDIST_SPEC: COEFF DIMENSION OF PSPEC TOO SMALL') + ENDIF +ENDIF + +IF (PRESENT (KSORT)) THEN + IF (.NOT. PRESENT (PSPEC)) THEN + CALL ABORT_TRANS('EDIST_SPEC: KSORT REQUIRES PSPEC') + ENDIF + IF (UBOUND (KSORT, 1) /= UBOUND (PSPEC, IFLD)) THEN + CALL ABORT_TRANS('EDIST_SPEC: DIMENSION MISMATCH KSORT, PSPEC') + ENDIF +ENDIF + +CALL DIST_SPEC_CONTROL(PSPECG,KFDISTG,KFROM,IVSET,PSPEC,LLDIM1_IS_FLD,& + & ISMAX,ISPEC2,ISPEC2MX,ISPEC2_G,IPOSSP,IDIM0G,IUMPP,IALLMS,IPTRMS,IKN,KSORT) +DEALLOCATE(IDIM0G) +IF (LHOOK) CALL DR_HOOK('EDIST_SPEC',1,ZHOOK_HANDLE) + +!endif INTERFACE + +! ------------------------------------------------------------------ + +END SUBROUTINE EDIST_SPEC + diff --git a/src/etrans/cpu/external/egath_grid.F90 b/src/etrans/cpu/external/egath_grid.F90 new file mode 100644 index 000000000..2f713e5b8 --- /dev/null +++ b/src/etrans/cpu/external/egath_grid.F90 @@ -0,0 +1,140 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + + +SUBROUTINE EGATH_GRID(PGPG,KPROMA,KFGATHG,KTO,KRESOL,PGP) + +!**** *EGATH_GRID* - Gather global gridpoint array from processors + +! Purpose. +! -------- +! Interface routine for gathering gripoint array + +!** Interface. +! ---------- +! CALL EGATH_GRID(...) + +! Explicit arguments : +! -------------------- +! PGPG(:,:) - Global gridpoint array +! KFGATHG - Global number of fields to be gathered +! KPROMA - blocking factor for gridpoint input +! KTO(:) - Processor responsible for gathering each field +! KRESOL - resolution tag which is required ,default is the +! first defined resulution (input) +! PGP(:,:,:) - Local spectral array + +! Method. +! ------- + +! Externals. SET_RESOL - set resolution +! ---------- GATH_GRID_CTL - control routine + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 +! M.Hamrud 01-Oct-2003 CY28 Cleaning + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +!ifndef INTERFACE + +USE TPM_GEN ,ONLY : NERR, NOUT +!USE TPM_DIM +USE TPM_DISTR ,ONLY : D, MYPROC, NPROC + +USE ESET_RESOL_MOD ,ONLY : ESET_RESOL +USE GATH_GRID_CTL_MOD ,ONLY : GATH_GRID_CTL +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS + +!endif INTERFACE + +IMPLICIT NONE + +! Declaration of arguments + +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PGPG(:,:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KPROMA +INTEGER(KIND=JPIM),INTENT(IN) :: KFGATHG +INTEGER(KIND=JPIM),INTENT(IN) :: KTO(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KRESOL +REAL(KIND=JPRB) ,INTENT(IN) :: PGP(:,:,:) +!ifndef INTERFACE + +INTEGER(KIND=JPIM) :: IFRECV,J,IUBOUND(3),IPROMA,IGPBLKS +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +! Set current resolution +IF (LHOOK) CALL DR_HOOK('EGATH_GRID',0,ZHOOK_HANDLE) +CALL ESET_RESOL(KRESOL) + +IPROMA = D%NGPTOT +IF(PRESENT(KPROMA)) THEN + IPROMA = KPROMA +ENDIF +IGPBLKS = (D%NGPTOT-1)/IPROMA+1 + +IF(UBOUND(KTO,1) < KFGATHG) THEN + CALL ABORT_TRANS('GATH_GRID: KTO TOO SHORT!') +ENDIF + +IFRECV = 0 +DO J=1,KFGATHG + IF(KTO(J) < 1 .OR. KTO(J) > NPROC) THEN + WRITE(NERR,*) 'GATH_GRID:ILLEGAL KTO VALUE',KTO(J),J + CALL ABORT_TRANS('GATH_GRID:ILLEGAL KTO VALUE') + ENDIF + IF(KTO(J) == MYPROC) IFRECV = IFRECV+1 +ENDDO + +IUBOUND=UBOUND(PGP) +IF(IUBOUND(1) < IPROMA) THEN + WRITE(NOUT,*)'GATH_GRID:FIRST DIM. OF PGP TOO SMALL ',IUBOUND(1),IPROMA + CALL ABORT_TRANS('GATH_GRID:FIRST DIMENSION OF PGP TOO SMALL ') +ENDIF +IF(IUBOUND(2) < KFGATHG) THEN + WRITE(NOUT,*)'GATH_GRID:SEC. DIM. OF PGP TOO SMALL ',IUBOUND(2),KFGATHG + CALL ABORT_TRANS('GATH_GRID:SECOND DIMENSION OF PGP TOO SMALL ') +ENDIF +IF(IUBOUND(3) < IGPBLKS) THEN + WRITE(NOUT,*)'GATH_GRID:THIRD DIM. OF PGP TOO SMALL ',IUBOUND(3),IGPBLKS + CALL ABORT_TRANS('GATH_GRID:THIRD DIMENSION OF PGP TOO SMALL ') +ENDIF + +IF(IFRECV > 0) THEN + IF(.NOT.PRESENT(PGPG)) THEN + CALL ABORT_TRANS('GATH_GRID:PGPG MISSING') + ENDIF + IF(UBOUND(PGPG,1) < D%NGPTOTG) THEN + CALL ABORT_TRANS('GATH_GRID:FIRST DIMENSION OF PGPG TOO SMALL') + ENDIF + IF(UBOUND(PGPG,2) < IFRECV) THEN + CALL ABORT_TRANS('GATH_GRID:SECOND DIMENSION OF PGPG TOO SMALL') + ENDIF +ENDIF + +CALL GATH_GRID_CTL(PGPG,KFGATHG,IPROMA,KTO,PGP) +IF (LHOOK) CALL DR_HOOK('EGATH_GRID',1,ZHOOK_HANDLE) + +!endif INTERFACE + +! ------------------------------------------------------------------ + +END SUBROUTINE EGATH_GRID + diff --git a/src/etrans/cpu/external/egath_spec.F90 b/src/etrans/cpu/external/egath_spec.F90 new file mode 100644 index 000000000..75dc8ede2 --- /dev/null +++ b/src/etrans/cpu/external/egath_spec.F90 @@ -0,0 +1,214 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + + +SUBROUTINE EGATH_SPEC(PSPECG,KFGATHG,KTO,KVSET,KRESOL,PSPEC,LDIM1_IS_FLD,KSMAX,KMSMAX,LDZA0IP) + +!**** *EGATH_SPEC* - Gather global spectral array from processors + +! Purpose. +! -------- +! Interface routine for gathering spectral array + +!** Interface. +! ---------- +! CALL EGATH_SPEC(...) + +! Explicit arguments : +! -------------------- +! PSPECG(:,:) - Global spectral array +! KFGATHG - Global number of fields to be gathered +! KTO(:) - Processor responsible for gathering each field +! KVSET(:) - "B-Set" for each field +! KRESOL - resolution tag which is required ,default is the +! first defined resulution (input) +! PSPEC(:,:) - Local spectral array +! LDZA0IP - Set to zero imaginary part of first coefficients + +! +! Method. +! ------- + +! Externals. SET_RESOL - set resolution +! ---------- GATH_SPEC_CONTROL - control routine + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 +! Modified 03-09-30 Y. Seity, bug correction IFSEND=0 +! R. El Khatib 23-Oct-2012 Monkey business +! P.Marguinaud 10-Oct-2013 Add an option to set (or not) first +! coefficients imaginary part to zero +! R. El Khatib 01-Dec-2020 Merge egath_spec_control and gath_spec_control +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +!ifndef INTERFACE + +USE TPM_GEN ,ONLY : NERR +USE TPM_DIM ,ONLY : R +USE TPMALD_DIM ,ONLY : RALD +USE TPM_DISTR ,ONLY : D, NPRTRV, NPRTRW, MYSETV, MYPROC, NPROC +USE TPMALD_DISTR + +USE ESET_RESOL_MOD ,ONLY : ESET_RESOL +USE GATH_SPEC_CONTROL_MOD ,ONLY : GATH_SPEC_CONTROL + +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +!endif INTERFACE + +IMPLICIT NONE + +! Declaration of arguments + +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPECG(:,:) +INTEGER(KIND=JPIM) , INTENT(IN) :: KFGATHG +INTEGER(KIND=JPIM) , INTENT(IN) :: KTO(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSET(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KRESOL +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPEC(:,:) +LOGICAL ,OPTIONAL, INTENT(IN) :: LDIM1_IS_FLD +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KSMAX +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KMSMAX +LOGICAL ,OPTIONAL, INTENT(IN) :: LDZA0IP + +!ifndef INTERFACE + +INTEGER(KIND=JPIM) :: IVSET(KFGATHG) +INTEGER(KIND=JPIM) :: IFRECV,IFSEND,J +INTEGER(KIND=JPIM) :: IFLD,ICOEFF +INTEGER(KIND=JPIM) :: ISMAX, IMSMAX, ISPEC2, ISPEC2_G,ISPEC2MX +INTEGER(KIND=JPIM) :: IPOSSP(NPRTRW+1) +INTEGER(KIND=JPIM) :: IUMPP(NPRTRW) +INTEGER(KIND=JPIM) :: IPTRMS(NPRTRW) +INTEGER(KIND=JPIM),ALLOCATABLE :: IDIM0G(:) +INTEGER(KIND=JPIM),ALLOCATABLE :: IALLMS(:) +INTEGER(KIND=JPIM),ALLOCATABLE :: IKN(:) +LOGICAL :: LLDIM1_IS_FLD +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('EGATH_SPEC',0,ZHOOK_HANDLE) +! Set current resolution +CALL ESET_RESOL(KRESOL) + +LLDIM1_IS_FLD = .TRUE. +IF(PRESENT(LDIM1_IS_FLD)) LLDIM1_IS_FLD = LDIM1_IS_FLD + +IF(LLDIM1_IS_FLD) THEN + IFLD = 1 + ICOEFF = 2 +ELSE + IFLD = 2 + ICOEFF = 1 +ENDIF +IF(UBOUND(KTO,1) < KFGATHG) THEN + CALL ABORT_TRANS('EGATH_SPEC: KTO TOO SHORT!') +ENDIF + +ISMAX = R%NSMAX +IMSMAX = RALD%NMSMAX +IF(PRESENT(KSMAX)) ISMAX = KSMAX +IF(PRESENT(KMSMAX)) IMSMAX = KMSMAX +ALLOCATE(IDIM0G(0:IMSMAX)) +ALLOCATE(IALLMS(IMSMAX+1)) +ALLOCATE(IKN(0:IMSMAX)) +IF(IMSMAX /= RALD%NMSMAX .OR. ISMAX /= R%NSMAX) THEN + CALL ABORT_TRANS('EGATH_SPEC:TRUNCATION CHANGE NOT YET CODED') +ELSE + ISPEC2 = D%NSPEC2 + ISPEC2_G = R%NSPEC2_G + IPOSSP(:) = D%NPOSSP(:) + IDIM0G(:) = D%NDIM0G(:) + ISPEC2MX = D%NSPEC2MX + IUMPP(:) = D%NUMPP(:) + IALLMS(:) = D%NALLMS(:) + IPTRMS(:) = D%NPTRMS(:) + DO J=0,IMSMAX + IKN(J)=2*DALD%NCPL2M(J) + ENDDO +ENDIF + +IFSEND = 0 +IFRECV = 0 +DO J=1,KFGATHG + IF(KTO(J) < 1 .OR. KTO(J) > NPROC) THEN + WRITE(NERR,*) 'EGATH_SPEC:ILLEGAL KTO VALUE',KTO(J),J + CALL ABORT_TRANS('EGATH_SPEC:ILLEGAL KTO VALUE') + ENDIF + IF(KTO(J) == MYPROC) IFRECV = IFRECV+1 +ENDDO + +IF(IFRECV > 0) THEN + IF(.NOT.PRESENT(PSPECG)) THEN + CALL ABORT_TRANS('EGATH_SPEC:PSPECG MISSING') + ENDIF + IF(UBOUND(PSPECG,IFLD) < IFRECV) THEN + WRITE(NERR,*) 'EGATH_SPEC: ',IFLD, UBOUND(PSPECG,IFLD), IFRECV + CALL ABORT_TRANS('EGATH_SPEC:FIELD DIMENSION OF PSPECG TOO SMALL') + ENDIF + IF(UBOUND(PSPECG,ICOEFF) < ISPEC2_G) THEN + WRITE(NERR,*) 'EGATH_SPEC: ',ICOEFF, UBOUND(PSPECG,ICOEFF), ISPEC2_G + CALL ABORT_TRANS('EGATH_SPEC:COEFF DIMENSION OF PSPECG TOO SMALL') + ENDIF +ENDIF + +IF(PRESENT(KVSET)) THEN + IF(UBOUND(KVSET,1) < KFGATHG) THEN + CALL ABORT_TRANS('EGATH_SPEC: KVSET TOO SHORT!') + ENDIF + DO J=1,KFGATHG + IF(KVSET(J) > NPRTRV .OR. KVSET(J) < 1) THEN + WRITE(NERR,*) 'EGATH_SPEC:KVSET(J) > NPRTRV ',J,KVSET(J),NPRTRV + CALL ABORT_TRANS('EGATH_SPEC:KVSET CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSET(J) == MYSETV) THEN + IFSEND = IFSEND+1 + ENDIF + ENDDO + IVSET(:) = KVSET(1:KFGATHG) +ELSEIF(NPRTRV > 1) THEN + WRITE(NERR,*) 'EGATH_SPEC:KVSET MISSING, NPRTRV ',NPRTRV + CALL ABORT_TRANS('EGATH_SPEC:KVSET MISSING, NPRTRV > 1') +ELSE + IFSEND = KFGATHG + IVSET(:) = 1 +ENDIF + +IF(IFSEND > 0 ) THEN + IF(.NOT.PRESENT(PSPEC)) THEN + CALL ABORT_TRANS('EGATH_SPEC: FIELDS TO RECIEVE AND PSPEC NOT PRESENT') + ENDIF + IF(UBOUND(PSPEC,IFLD) < IFSEND) THEN + CALL ABORT_TRANS('EGATH_SPEC: FIELD DIMENSION OF PSPEC TOO SMALL') + ENDIF + IF(UBOUND(PSPEC,ICOEFF) < ISPEC2 ) THEN + CALL ABORT_TRANS('EGATH_SPEC: COEFF DIMENSION OF PSPEC TOO SMALL') + ENDIF +ENDIF + +CALL GATH_SPEC_CONTROL(PSPECG,KFGATHG,KTO,IVSET,PSPEC,LLDIM1_IS_FLD,& + & IMSMAX,ISPEC2,ISPEC2MX,ISPEC2_G,IPOSSP,IDIM0G,IUMPP,IALLMS,IPTRMS,IKN,LDZA0IP) +DEALLOCATE(IDIM0G) + +IF (LHOOK) CALL DR_HOOK('EGATH_SPEC',1,ZHOOK_HANDLE) +!endif INTERFACE + +! ------------------------------------------------------------------ + +END SUBROUTINE EGATH_SPEC diff --git a/src/etrans/cpu/external/egpnorm_trans.F90 b/src/etrans/cpu/external/egpnorm_trans.F90 new file mode 100644 index 000000000..9d2986b8d --- /dev/null +++ b/src/etrans/cpu/external/egpnorm_trans.F90 @@ -0,0 +1,104 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + + +SUBROUTINE EGPNORM_TRANS(PGP,KFIELDS,KPROMA,PAVE,PMIN,PMAX,LDAVE_ONLY,KRESOL) + + +!**** *EGPNORM_TRANS* - calculate grid-point norms + +! Purpose. +! -------- +! calculate grid-point norms + +!** Interface. +! ---------- +! CALL EGPNORM_TRANS(...) + +! Explicit arguments : +! -------------------- +! PGP(:,:,:) - gridpoint fields (input) +! PGP is dimensioned (NPROMA,KFIELDS,NGPBLKS) where +! NPROMA is the blocking factor, KFIELDS the total number +! of fields and NGPBLKS the number of NPROMA blocks. +! KFIELDS - number of fields (input) +! (these do not have to be just levels) +! KPROMA - required blocking factor (input) +! PAVE - average (output) +! PMIN - minimum (input/output) +! PMAX - maximum (input/output) +! LDAVE_ONLY - T : PMIN and PMAX already contain local MIN and MAX +! KRESOL - resolution tag (optional) +! default assumes first defined resolution +! + +! Author. +! ------- +! George Mozdzynski *ECMWF* + +! Modifications. +! -------------- +! Original : 19th Sept 2008 +! R. El Khatib 07-08-2009 Optimisation directive for NEC +! R. El Khatib 16-Sep-2019 merge with global model code +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB, JPRD + +!ifndef INTERFACE + +USE TPM_DIM ,ONLY : R +USE TPM_GEOMETRY ,ONLY : G +USE TPM_DIM ,ONLY : R +USE ESET_RESOL_MOD ,ONLY : ESET_RESOL +USE GPNORM_TRANS_CTL_MOD, ONLY : GPNORM_TRANS_CTL +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +!endif INTERFACE + +IMPLICIT NONE + +! Declaration of arguments + +REAL(KIND=JPRB) ,INTENT(IN) :: PGP(:,:,:) +REAL(KIND=JPRB) ,INTENT(OUT) :: PAVE(:) +REAL(KIND=JPRB) ,INTENT(INOUT) :: PMIN(:) +REAL(KIND=JPRB) ,INTENT(INOUT) :: PMAX(:) +INTEGER(KIND=JPIM),INTENT(IN) :: KFIELDS +INTEGER(KIND=JPIM),INTENT(IN) :: KPROMA +LOGICAL ,INTENT(IN) :: LDAVE_ONLY +INTEGER(KIND=JPIM),OPTIONAL, INTENT(IN) :: KRESOL + +!ifndef INTERFACE + +! Local variables +INTEGER(KIND=JPIM) :: JGL +REAL(KIND=JPRD) :: ZW(R%NDGL) + +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE +! ------------------------------------------------------------------ +IF (LHOOK) CALL DR_HOOK('EGPNORM_TRANS',0,ZHOOK_HANDLE) + +! Set current resolution +CALL ESET_RESOL(KRESOL) + +DO JGL=1,R%NDGL + ZW(1:)=1._JPRB/G%NLOEN(JGL) +ENDDO +CALL GPNORM_TRANS_CTL(PGP,KFIELDS,KPROMA,PAVE,PMIN,PMAX,LDAVE_ONLY,ZW(1:R%NDGL)) + +IF (LHOOK) CALL DR_HOOK('EGPNORM_TRANS',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +!endif INTERFACE + + +END SUBROUTINE EGPNORM_TRANS diff --git a/src/etrans/cpu/external/einv_trans.F90 b/src/etrans/cpu/external/einv_trans.F90 new file mode 100644 index 000000000..2b0225671 --- /dev/null +++ b/src/etrans/cpu/external/einv_trans.F90 @@ -0,0 +1,618 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + + +SUBROUTINE EINV_TRANS(PSPVOR,PSPDIV,PSPSCALAR,PSPSC3A,PSPSC3B,PSPSC2,& + & FSPGL_PROC,& + & LDSCDERS,LDVORGP,LDDIVGP,LDUVDER,KPROMA,KVSETUV,KVSETSC,KRESOL,& + & KVSETSC3A,KVSETSC3B,KVSETSC2,& + & PGP,PGPUV,PGP3A,PGP3B,PGP2,PMEANU,PMEANV) + +!**** *EINV_TRANS* - Inverse spectral transform. + +! Purpose. +! -------- +! Interface routine for the inverse spectral transform + +!** Interface. +! ---------- +! CALL EINV_TRANS(...) + +! Explicit arguments : All arguments are optional. +! -------------------- +! PSPVOR(:,:) - spectral vorticity (input) +! PSPDIV(:,:) - spectral divergence (input) +! PSPSCALAR(:,:) - spectral scalarvalued fields (input) +! PSPSC3A(:,:,:) - alternative to use of PSPSCALAR, see PGP3A below (input) +! PSPSC3B(:,:,:) - alternative to use of PSPSCALAR, see PGP3B below (input) +! PSPSC2(:,:) - alternative to use of PSPSCALAR, see PGP2 below (input) +! FSPGL_PROC - external procedure to be executed in fourier space +! before transposition +! LDSCDERS - indicating if derivatives of scalar variables are req. +! LDVORGP - indicating if grid-point vorticity is req. +! LDDIVGP - indicating if grid-point divergence is req. +! LDUVDER - indicating if E-W derivatives of u and v are req. +! KPROMA - required blocking factor for gridpoint output +! KVSETUV(:) - indicating which 'b-set' in spectral space owns a +! vor/div field. Equivalant to NBSETLEV in the IFS. +! The length of KVSETUV should be the GLOBAL number +! of u/v fields which is the dimension of u and v releated +! fields in grid-point space. +! KVESETSC(:) - indicating which 'b-set' in spectral space owns a +! scalar field. As for KVSETUV this argument is required +! if the total number of processors is greater than +! the number of processors used for distribution in +! spectral wave space. +! KVSETSC3A(:) - as KVESETSC for PSPSC3A (distribution on first dimension) +! KVSETSC3B(:) - as KVESETSC for PSPSC3B (distribution on first dimension) +! KVSETSC2(:) - as KVESETSC for PSPSC2 (distribution on first dimension) +! KRESOL - resolution tag which is required ,default is the +! first defined resulution (input) +! PGP(:,:,:) - gridpoint fields (output) +! PGP need to dimensioned (NPROMA,IF_GP,NGPBLKS) where +! NPROMA is the blocking factor, IF_GP the total number +! of output fields and NGPBLKS the number of NPROMA blocks. +! The ordering of the output fields is as follows (all +! parts are optional depending on the input switches): + +! vorticity : IF_UV_G fields (if psvor present and LDVORGP) +! divergence : IF_UV_G fields (if psvor present and LDDIVGP) +! u : IF_UV_G fields (if psvor present) +! v : IF_UV_G fields (if psvor present) +! scalar fields : IF_SCALARS_G fields (if pspscalar present) +! N-S derivative of scalar fields : IF_SCALARS_G fields (if pspscalar +! present and LDSCDERS) +! E-W derivative of u : IF_UV_G fields (if psvor present and and LDUVDER) +! E-W derivative of v : IF_UV_G fields (if psvor present and and LDUVDER) +! E-W derivative of scalar fields : IF_SCALARS_G fields (if pspscalar +! present and LDSCDERS) + +! Here IF_UV_G is the GLOBAL number of u/v fields as given by the length +! of KVSETUV (or by PSPVOR if no split in spectral 'b-set' direction +! IF_SCALARS_G is the GLOBAL number of scalar fields as giben by the +! length of KVESETSC (or by number of fields in PSPSCALAR if no spectral +! 'b-set' split + +! As an alternative to using PGP you can also use a combination of the +! following arrays. The reason for introducing these alternative ways +! of calling INV_TRANS is to avoid uneccessary copies where your data +! structures don't fit in to the 'PSPVOR,PSPDIV, PSPSCALAR, PGP' layout. +! The use of any of these precludes the use of PGP and vice versa. + +! PGPUV(:,:,:,:) - the 'u-v' related grid-point variables in the order +! described for PGP. The second dimension of PGPUV should +! be the same as the "global" first dimension of +! PSPVOR,PSPDIV (in the IFS this is the number of levels) +! PGPUV need to be dimensioned(NPROMA,ILEVS,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (u,v,vor,div ...) +! PGP3A(:,:,:,:) - grid-point array directly connected with PSPSC3A +! dimensioned(NPROMA,ILEVS,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (the same as in +! PSPSC3A if no derivatives, 3 times that with der.) +! PGP3B(:,:,:,:) - grid-point array directly connected with PSPSC3B +! dimensioned(NPROMA,ILEVS,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (the same as in +! PSPSC3B if no derivatives, 3 times that with der.) +! PGP2(:,:,:) - grid-point array directly connected with PSPSC2 +! dimensioned(NPROMA,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (the same as in +! PSPSC2 if no derivatives, 3 times that with der.) +! Method. +! ------- + +! Externals. SET_RESOL - set resolution +! ---------- LTINV_CTL - control of Legendre transform +! FTINV_CTL - control of Fourier transform + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 +! 26-02-03 Mats Hamrud & Gabor Radnoti : modified condition for scalar fields +! and derivatives (IF_SCALARS_G) +! Y. Seity and G. Radnoti : 03-09-29 : phasing for AL27 +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +!ifndef INTERFACE + +USE TPM_GEN ,ONLY : NERR, NOUT, NPROMATR +!USE TPM_DIM +USE TPM_TRANS ,ONLY : LDIVGP, LSCDERS, LUVDER, LVORGP, & + & NF_SC2, NF_SC3A, NF_SC3B, NGPBLKS, NPROMA +USE TPM_DISTR ,ONLY : D, NPRTRV, MYSETV +!USE TPM_GEOMETRY +!USE TPM_FIELDS +!USE TPM_FFT + +USE ESET_RESOL_MOD ,ONLY : ESET_RESOL +USE EINV_TRANS_CTL_MOD ,ONLY : EINV_TRANS_CTL +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS + +!endif INTERFACE + +IMPLICIT NONE + +! Declaration of arguments + +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PSPSC3A(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PSPSC3B(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PSPSC2(:,:) +LOGICAL ,OPTIONAL,INTENT(IN) :: LDSCDERS +LOGICAL ,OPTIONAL,INTENT(IN) :: LDVORGP +LOGICAL ,OPTIONAL,INTENT(IN) :: LDDIVGP +LOGICAL ,OPTIONAL,INTENT(IN) :: LDUVDER +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KPROMA +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KVSETUV(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KVSETSC(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KRESOL +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KVSETSC3A(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KVSETSC3B(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KVSETSC2(:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PGP(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PGPUV(:,:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PGP3A(:,:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PGP3B(:,:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PGP2(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PMEANU(:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PMEANV(:) +EXTERNAL FSPGL_PROC +OPTIONAL FSPGL_PROC +!ifndef INTERFACE + +! Local varaibles +INTEGER(KIND=JPIM) :: IUBOUND(4),J +INTEGER(KIND=JPIM) :: IF_UV,IF_UV_G,IF_SCALARS,IF_SCALARS_G,IF_FS,IF_GP,IF_OUT_LT +INTEGER(KIND=JPIM) :: IF_SCDERS,IF_UV_PAR +INTEGER(KIND=JPIM) :: IF_SC2_G,IF_SC3A_G2,IF_SC3A_G3,IF_SC3B_G2,IF_SC3B_G3 +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('EINV_TRANS',0,ZHOOK_HANDLE) +CALL GSTATS(1807,0) + +! Set current resolution +CALL ESET_RESOL(KRESOL) + +! Set defaults + +LVORGP = .FALSE. +LDIVGP = .FALSE. +LUVDER = .FALSE. +IF_UV = 0 +IF_UV_G = 0 +IF_UV_PAR = 0 +IF_SCALARS = 0 +IF_SCALARS_G = 0 +IF_SCDERS = 0 +NF_SC2 = 0 +NF_SC3A = 0 +NF_SC3B = 0 +IF_SC2_G = 0 +IF_SC3A_G2 = 0 +IF_SC3B_G2 = 0 +IF_SC3A_G3 = 0 +IF_SC3B_G3 = 0 +NPROMA = D%NGPTOT +LSCDERS = .FALSE. + +! Decide requirements + +IF(PRESENT(KVSETUV)) THEN + IF_UV_G = UBOUND(KVSETUV,1) + IF_UV_PAR = 2 + DO J=1,IF_UV_G + IF(KVSETUV(J) > NPRTRV .OR. KVSETUV(J) < 1) THEN + WRITE(NERR,*) 'INV_TRANS:KVSETUV(J) > NPRTRV ',J,KVSETUV(J),NPRTRV + CALL ABORT_TRANS('INV_TRANS:KVSETUV TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETUV(J) == MYSETV) THEN + IF_UV = IF_UV+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPVOR)) THEN + IF_UV = UBOUND(PSPVOR,1) + IF_UV_G = IF_UV + IF_UV_PAR = 2 +ENDIF + +IF(PRESENT(KVSETSC)) THEN + IF(.NOT. PRESENT(PSPSCALAR) ) THEN + CALL ABORT_TRANS('INV_TRANS : KVSETSC PRESENT BUT PSPSCALAR MISSING') + ENDIF + IF_SCALARS_G = UBOUND(KVSETSC,1) + DO J=1,IF_SCALARS_G + IF(KVSETSC(J) > NPRTRV .OR. KVSETSC(J) < 1) THEN + WRITE(NERR,*) 'INV_TRANS:KVSETSC(J) > NPRTRV ',J,KVSETSC(J),NPRTRV + CALL ABORT_TRANS('INV_TRANS:KVSETSC TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETSC(J) == MYSETV) THEN + IF_SCALARS = IF_SCALARS+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPSCALAR)) THEN + IF_SCALARS = UBOUND(PSPSCALAR,1) + IF_SCALARS_G = IF_SCALARS +ENDIF + +IF(PRESENT(KVSETSC2)) THEN + IF(.NOT.PRESENT(PSPSC2)) THEN + CALL ABORT_TRANS('INV_TRANS:KVSETSC2 BUT NOT PSPSC2') + ENDIF + IF_SC2_G = UBOUND(KVSETSC2,1) + IF_SCALARS_G = IF_SCALARS_G+UBOUND(KVSETSC2,1) + DO J=1,UBOUND(KVSETSC2,1) + IF(KVSETSC2(J) > NPRTRV .OR. KVSETSC2(J) < 1) THEN + WRITE(NERR,*) 'INV_TRANS:KVSETSC2(J) > NPRTRV ',J,KVSETSC2(J),NPRTRV + CALL ABORT_TRANS('INV_TRANS:KVSETSC2 TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETSC2(J) == MYSETV) THEN + IF_SCALARS = IF_SCALARS+1 + NF_SC2 = NF_SC2+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPSC2)) THEN + IF_SC2_G = UBOUND(PSPSC2,1) + IF_SCALARS = IF_SCALARS+UBOUND(PSPSC2,1) + IF_SCALARS_G = IF_SCALARS_G +UBOUND(PSPSC2,1) + NF_SC2 = UBOUND(PSPSC2,1) +ENDIF + +IF(PRESENT(KVSETSC3A)) THEN + IF(.NOT.PRESENT(PSPSC3A)) THEN + CALL ABORT_TRANS('INV_TRANS:KVSETSC3A BUT NOT PSPSC3A') + ENDIF + IF_SC3A_G2 = UBOUND(KVSETSC3A,1) + IF_SC3A_G3 = UBOUND(PSPSC3A,3) + IF_SCALARS_G = IF_SCALARS_G+IF_SC3A_G2*IF_SC3A_G3 + DO J=1,UBOUND(KVSETSC3A,1) + IF(KVSETSC3A(J) > NPRTRV .OR. KVSETSC3A(J) < 1) THEN + WRITE(NERR,*) 'INV_TRANS:KVSETSC3A(J) > NPRTRV ',J,KVSETSC3A(J),NPRTRV + CALL ABORT_TRANS& + & ('INV_TRANS:KVSETSC3A TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETSC3A(J) == MYSETV) THEN + IF_SCALARS = IF_SCALARS+UBOUND(PSPSC3A,3) + NF_SC3A = NF_SC3A+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPSC3A)) THEN + IF_SCALARS = IF_SCALARS+UBOUND(PSPSC3A,1)*UBOUND(PSPSC3A,3) + IF_SC3A_G2 = UBOUND(PSPSC3A,1) + IF_SC3A_G3 = UBOUND(PSPSC3A,3) + IF_SCALARS_G = IF_SCALARS_G + IF_SC3A_G2*IF_SC3A_G3 + NF_SC3A = UBOUND(PSPSC3A,1) +ENDIF + +IF(PRESENT(KVSETSC3B)) THEN + IF(.NOT.PRESENT(PSPSC3B)) THEN + CALL ABORT_TRANS('INV_TRANS:KVSETSC3B BUT NOT PSPSC3B') + ENDIF + IF_SC3B_G2 = UBOUND(KVSETSC3B,1) + IF_SC3B_G3 = UBOUND(PSPSC3B,3) + IF_SCALARS_G = IF_SCALARS_G+IF_SC3B_G2*IF_SC3B_G3 + DO J=1,UBOUND(KVSETSC3B,1) + IF(KVSETSC3B(J) > NPRTRV .OR. KVSETSC3B(J) < 1) THEN + WRITE(NERR,*) 'INV_TRANS:KVSETSC3B(J) > NPRTRV ',J,KVSETSC3B(J),NPRTRV + CALL ABORT_TRANS('INV_TRANS:KVSETSC3B TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETSC3B(J) == MYSETV) THEN + IF_SCALARS = IF_SCALARS+UBOUND(PSPSC3B,3) + NF_SC3B = NF_SC3B+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPSC3B)) THEN + IF_SCALARS = IF_SCALARS+UBOUND(PSPSC3B,1)*UBOUND(PSPSC3B,3) + IF_SC3B_G2 = UBOUND(PSPSC3B,1) + IF_SC3B_G3 = UBOUND(PSPSC3B,3) + IF_SCALARS_G = IF_SCALARS_G + IF_SC3B_G2*IF_SC3B_G3 + NF_SC3B = UBOUND(PSPSC3B,1) +ENDIF + +IF (IF_SCALARS_G > 0 ) THEN + IF(PRESENT(LDSCDERS)) THEN + LSCDERS = LDSCDERS + IF (LSCDERS) IF_SCDERS = IF_SCALARS + ENDIF +ENDIF + +IF(PRESENT(KPROMA)) THEN + NPROMA = KPROMA +ENDIF + +IF(PRESENT(LDVORGP)) THEN + LVORGP = LDVORGP +ENDIF + +IF(PRESENT(LDDIVGP)) THEN + LDIVGP = LDDIVGP +ENDIF + +IF(PRESENT(LDUVDER)) THEN + LUVDER = LDUVDER +ENDIF + +! Compute derived variables + +IF(LVORGP) LDIVGP = .TRUE. + +NGPBLKS = (D%NGPTOT-1)/NPROMA+1 + +IF_OUT_LT = 2*IF_UV + IF_SCALARS+IF_SCDERS + +IF(IF_UV > 0 .AND. LVORGP) THEN + IF_OUT_LT = IF_OUT_LT+IF_UV +ENDIF +IF(IF_UV > 0 .AND. LDIVGP) THEN + IF_OUT_LT = IF_OUT_LT+IF_UV +ENDIF +IF_FS = IF_OUT_LT+IF_SCDERS +IF(IF_UV > 0 .AND. LUVDER) THEN + IF_FS = IF_FS+2*IF_UV +ENDIF + +IF_GP = 2*IF_UV_G+IF_SCALARS_G +IF(LSCDERS) THEN + IF_GP = IF_GP+2*IF_SCALARS_G + IF_SC2_G = IF_SC2_G*3 + IF_SC3A_G3 = IF_SC3A_G3*3 + IF_SC3B_G3 = IF_SC3B_G3*3 +ENDIF +IF(IF_UV_G > 0 .AND. LVORGP) THEN + IF_GP = IF_GP+IF_UV_G + IF_UV_PAR = IF_UV_PAR+1 +ENDIF +IF(IF_UV_G > 0 .AND. LDIVGP) THEN + IF_GP = IF_GP+IF_UV_G + IF_UV_PAR = IF_UV_PAR+1 +ENDIF +IF(IF_UV_G > 0 .AND. LUVDER) THEN + IF_GP = IF_GP+2*IF_UV_G + IF_UV_PAR = IF_UV_PAR+2 +ENDIF + +! Consistency checks + +IF (IF_UV > 0) THEN + IF(.NOT. PRESENT(PSPVOR) ) THEN + CALL ABORT_TRANS('INV_TRANS : IF_UV > 0 BUT PSPVOR MISSING') + ENDIF + IF(UBOUND(PSPVOR,1) < IF_UV) THEN + WRITE(NERR,*)'INV_TRANS : UBOUND(PSPVOR,1) < IF_UV ',UBOUND(PSPVOR,1),IF_UV + CALL ABORT_TRANS('INV_TRANS : PSPVOR TOO SHORT') + ENDIF + IF(.NOT. PRESENT(PSPDIV) ) THEN + CALL ABORT_TRANS('INV_TRANS : IF_UV > 0 BUT PSPDIV MISSING') + ENDIF + IF(UBOUND(PSPDIV,1) < IF_UV) THEN + WRITE(NERR,*)'INV_TRANS : UBOUND(PSPDIV,1) < IF_UV ',UBOUND(PSPDIV,1),IF_UV + CALL ABORT_TRANS('INV_TRANS : PSPDIV TOO SHORT') + ENDIF +ENDIF + +IF (IF_SCALARS > 0) THEN + IF(PRESENT(PSPSCALAR)) THEN + IF(PRESENT(PSPSC3A))THEN + CALL ABORT_TRANS('INV_TRANS : PSPSCALAR AND PSPSC3A BOTH PRESENT') + ENDIF + IF(PRESENT(PSPSC3B))THEN + CALL ABORT_TRANS('INV_TRANS : PSPSCALAR AND PSPSC3B BOTH PRESENT') + ENDIF + IF(PRESENT(PSPSC2))THEN + CALL ABORT_TRANS('INV_TRANS : PSPSCALAR AND PSPSC2 BOTH PRESENT') + ENDIF + IF(UBOUND(PSPSCALAR,1) < IF_SCALARS) THEN + WRITE(NERR,*)'INV_TRANS : UBOUND(PSPSCALAR,1) < IF_SCALARS) ',& + & UBOUND(PSPSCALAR,1),IF_SCALARS + CALL ABORT_TRANS('INV_TRANS : PSPSCALAR TOO SHORT') + ENDIF + ELSEIF(PRESENT(PSPSC3A)) THEN + ENDIF +ENDIF + +IF(IF_UV_G == 0) THEN + LUVDER = .FALSE. +ENDIF + +IF(NPRTRV >1) THEN + IF(IF_UV > 0 .AND. .NOT. PRESENT(KVSETUV)) THEN + WRITE(NERR,*)'NPRTRV >1 AND IF_UV > 0 AND NOT PRESENT(KVSETUV)',& + & NPRTRV,IF_UV + CALL ABORT_TRANS('INV_TRANS: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF + IF(PRESENT(PSPSCALAR) .AND. .NOT. PRESENT(KVSETSC)) THEN + WRITE(NERR,*)'NPRTRV >1 AND PRESENT(PSPSCALAR) AND NOT PRESENT(KVSETSC)',& + & NPRTRV + CALL ABORT_TRANS('INV_TRANS: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF + IF(PRESENT(PSPSC2) .AND. .NOT. PRESENT(KVSETSC2)) THEN + WRITE(NERR,*)'NPRTRV >1 AND PRESENT(PSPSC2) AND NOT PRESENT(KVSETSC2)',& + & NPRTRV + CALL ABORT_TRANS('INV_TRANS: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF + IF(PRESENT(PSPSC3A) .AND. .NOT. PRESENT(KVSETSC3A)) THEN + WRITE(NERR,*)'NPRTRV >1 AND PRESENT(PSPSC3A) AND NOT PRESENT(KVSETSC3A)',& + & NPRTRV + CALL ABORT_TRANS('INV_TRANS: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF + IF(PRESENT(PSPSC3B) .AND. .NOT. PRESENT(KVSETSC3B)) THEN + WRITE(NERR,*)'NPRTRV >1 AND PRESENT(PSPSC3B) AND NOT PRESENT(KVSETSC3B)',& + & NPRTRV + CALL ABORT_TRANS('INV_TRANS: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF +ENDIF + +IF(PRESENT(PGP)) THEN + IF(PRESENT(PGPUV)) THEN + CALL ABORT_TRANS('INV_TRANS:PGP AND PGPUV CAN NOT BOTH BE PRESENT') + ENDIF + IF(PRESENT(PGP3A)) THEN + CALL ABORT_TRANS('INV_TRANS:PGP AND PGP3A CAN NOT BOTH BE PRESENT') + ENDIF + IF(PRESENT(PGP3B)) THEN + CALL ABORT_TRANS('INV_TRANS:PGP AND PGP3B CAN NOT BOTH BE PRESENT') + ENDIF + IF(PRESENT(PGP2)) THEN + CALL ABORT_TRANS('INV_TRANS:PGP AND PGP2 CAN NOT BOTH BE PRESENT') + ENDIF + IUBOUND(1:3)=UBOUND(PGP) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'INV_TRANS:FIRST DIM. OF PGP TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('INV_TRANS:FIRST DIMENSION OF PGP TOO SMALL ') + ENDIF + IF(IUBOUND(2) < IF_GP) THEN + WRITE(NOUT,*)'INV_TRANS:SEC. DIM. OF PGP TOO SMALL ',IUBOUND(2),IF_GP + WRITE(NOUT,*)'IF_UV_G,IF_SCALARS_G,LSCDERS,LVORGP,LDIVGP,LUVDER ',& + & IF_UV_G,IF_SCALARS_G,LSCDERS,LVORGP,LDIVGP,LUVDER + CALL ABORT_TRANS('INV_TRANS:SECOND DIMENSION OF PGP TOO SMALL ') + ENDIF + IF(IUBOUND(3) < NGPBLKS) THEN + WRITE(NOUT,*)'INV_TRANS:THIRD DIM. OF PGP TOO SMALL ',IUBOUND(3),NGPBLKS + CALL ABORT_TRANS('INV_TRANS:THIRD DIMENSION OF PGP TOO SMALL ') + ENDIF +ELSE + IF(NPROMATR > 0 .AND. 2*IF_UV_G+IF_SCALARS_G > NPROMATR) THEN + CALL ABORT_TRANS('INV_TRANS:ALTERNATIVES TO USING PGP NOT SUPPORTED WITH NPROMATR>0') + ENDIF +ENDIF + +IF(PRESENT(PGPUV)) THEN + IF(.NOT.PRESENT(PSPVOR)) THEN + CALL ABORT_TRANS('INV_TRANS:PSPVOR HAS TO BE PRESENT WHEN PGPUV IS') + ENDIF + IUBOUND(1:4)=UBOUND(PGPUV) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'INV_TRANS:FIRST DIM. OF PGPUV TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('INV_TRANS:FIRST DIMENSION OF PGPUV TOO SMALL ') + ENDIF + IF(IUBOUND(2) /= IF_UV_G) THEN + WRITE(NOUT,*)'INV_TRANS:SEC. DIM. OF PGPUV INCONSISTENT ',IUBOUND(2),IF_UV_G + CALL ABORT_TRANS('INV_TRANS:SEC. DIMENSION OF PGPUV INCONSISTENT ') + ENDIF + IF(IUBOUND(3) < IF_UV_PAR) THEN + WRITE(NOUT,*)'INV_TRANS:THIRD DIM. OF PGPUV TOO SMALL ',IUBOUND(3),IF_UV_PAR + CALL ABORT_TRANS('INV_TRANS:THIRD DIMENSION OF PGPUV TOO SMALL ') + ENDIF + IF(IUBOUND(4) < NGPBLKS) THEN + WRITE(NOUT,*)'INV_TRANS:THIRD DIM. OF PGPUV TOO SMALL ',IUBOUND(4),NGPBLKS + CALL ABORT_TRANS('INV_TRANS:THIRD DIMENSION OF PGPUV TOO SMALL ') + ENDIF +ENDIF +IF(PRESENT(PGP2)) THEN + IF(.NOT.PRESENT(PSPSC2)) THEN + CALL ABORT_TRANS('INV_TRANS:PSPSC2 HAS TO BE PRESENT WHEN PGP2 IS') + ENDIF +ENDIF +IF(IF_SC2_G > 0) THEN + IF(PRESENT(PGP2)) THEN + IUBOUND(1:3)=UBOUND(PGP2) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'INV_TRANS:FIRST DIM. OF PGP2 TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('INV_TRANS:FIRST DIMENSION OF PGP2 TOO SMALL ') + ENDIF + IF(IUBOUND(2) /= IF_SC2_G) THEN + WRITE(NOUT,*)'INV_TRANS:SEC. DIM. OF PGP2 INCONSISTENT ',IUBOUND(2),IF_SC2_G + CALL ABORT_TRANS('INV_TRANS:SEC. DIMENSION OF PGP2 INCONSISTENT') + ENDIF + IF(IUBOUND(3) < NGPBLKS) THEN + WRITE(NOUT,*)'INV_TRANS:THIRD DIM. OF PGP2 TOO SMALL ',IUBOUND(3),NGPBLKS + CALL ABORT_TRANS('INV_TRANS:THIRD DIMENSION OF PGP2 TOO SMALL ') + ENDIF + ELSE + CALL ABORT_TRANS('INV_TRANS:PGP2 MISSING') + ENDIF +ENDIF + +IF(PRESENT(PGP3A)) THEN + IF(.NOT.PRESENT(PSPSC3A)) THEN + CALL ABORT_TRANS('INV_TRANS:PSPSC3A HAS TO BE PRESENT WHEN PGP3A IS') + ENDIF +ENDIF +IF(IF_SC3A_G3 > 0) THEN + IF(PRESENT(PGP3A)) THEN + IUBOUND=UBOUND(PGP3A) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'INV_TRANS:FIRST DIM. OF PGP3A TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('INV_TRANS:FIRST DIMENSION OF PGP3A TOO SMALL ') + ENDIF + IF(IUBOUND(2) /= IF_SC3A_G2) THEN + WRITE(NOUT,*)'INV_TRANS:SEC DIM. OF PGP3A INCONSISTENT ',IUBOUND(2),IF_SC3A_G2 + CALL ABORT_TRANS('INV_TRANS:SEC. DIMENSION OF PGP3A INCONSISTENT ') + ENDIF + IF(IUBOUND(3) /= IF_SC3A_G3 ) THEN + WRITE(NOUT,*)'INV_TRANS:THIRD DIM. OF PGP3A INCONSISTENT ',& + & IUBOUND(3),IF_SC3A_G3 + CALL ABORT_TRANS('INV_TRANS:THIRD DIMENSION OF PGP3A INCONSISTENT ') + ENDIF + IF(IUBOUND(4) < NGPBLKS) THEN + WRITE(NOUT,*)'INV_TRANS:FOURTH DIM. OF PGP3A TOO SMALL ',IUBOUND(4),NGPBLKS + CALL ABORT_TRANS('INV_TRANS:FOURTH DIMENSION OF PGP3A TOO SMALL ') + ENDIF + ELSE + CALL ABORT_TRANS('INV_TRANS:PGP3A MISSING') + ENDIF +ENDIF + +IF(PRESENT(PGP3B)) THEN + IF(.NOT.PRESENT(PSPSC3B)) THEN + CALL ABORT_TRANS('INV_TRANS:PSPSC3B HAS TO BE PRESENT WHEN PGP3B IS') + ENDIF +ENDIF +IF(IF_SC3B_G3 > 0) THEN + IF(PRESENT(PGP3B)) THEN + IUBOUND=UBOUND(PGP3B) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'INV_TRANS:FIRST DIM. OF PGP3B TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('INV_TRANS:FIRST DIMENSION OF PGP3B TOO SMALL ') + ENDIF + IF(IUBOUND(2) /= IF_SC3B_G2) THEN + WRITE(NOUT,*)'INV_TRANS:SEC DIM. OF PGP3B INCONSISTENT ',IUBOUND(2),IF_SC3B_G2 + CALL ABORT_TRANS('INV_TRANS:SEC. DIMENSION OF PGP3B INCONSISTENT ') + ENDIF + IF(IUBOUND(3) /= IF_SC3B_G3 ) THEN + WRITE(NOUT,*)'INV_TRANS:THIRD DIM. OF PGP3B INCONSISTENT ',& + & IUBOUND(3),IF_SC3B_G3 + CALL ABORT_TRANS('INV_TRANS:THIRD DIMENSION OF PGP3B INCONSISTENT ') + ENDIF + IF(IUBOUND(4) < NGPBLKS) THEN + WRITE(NOUT,*)'INV_TRANS:FOURTH DIM. OF PGP3B TOO SMALL ',IUBOUND(4),NGPBLKS + CALL ABORT_TRANS('INV_TRANS:FOURTH DIMENSION OF PGP3B TOO SMALL ') + ENDIF + ELSE + CALL ABORT_TRANS('INV_TRANS:PGP3B MISSING') + ENDIF +ENDIF +CALL GSTATS(1807,1) + +! ------------------------------------------------------------------ + +! Perform transform +CALL EINV_TRANS_CTL(IF_UV_G,IF_SCALARS_G,IF_GP,IF_FS,IF_OUT_LT,& + & IF_UV,IF_SCALARS,IF_SCDERS,& + & PSPVOR,PSPDIV,PSPSCALAR,KVSETUV,KVSETSC,PGP,FSPGL_PROC,& + & PSPSC3A,PSPSC3B,PSPSC2,KVSETSC3A,KVSETSC3B,KVSETSC2,PGPUV,PGP3A,PGP3B,PGP2,& + & PMEANU,PMEANV ) +IF (LHOOK) CALL DR_HOOK('EINV_TRANS',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +!endif INTERFACE + +END SUBROUTINE EINV_TRANS + diff --git a/src/etrans/cpu/external/einv_transad.F90 b/src/etrans/cpu/external/einv_transad.F90 new file mode 100644 index 000000000..3afd66432 --- /dev/null +++ b/src/etrans/cpu/external/einv_transad.F90 @@ -0,0 +1,620 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + + +SUBROUTINE EINV_TRANSAD(PSPVOR,PSPDIV,PSPSCALAR,PSPSC3A,PSPSC3B,PSPSC2,& + & FSPGL_PROC,& + & LDSCDERS,LDVORGP,LDDIVGP,LDUVDER,KPROMA,KVSETUV,KVSETSC,KRESOL,& + & KVSETSC3A,KVSETSC3B,KVSETSC2,& + & PGP,PGPUV,PGP3A,PGP3B,PGP2,PMEANU,PMEANV) + +!**** *EINV_TRANSAD* - Inverse spectral transform - adjoint. + +! Purpose. +! -------- +! Interface routine for the inverse spectral transform - adjoint + +!** Interface. +! ---------- +! CALL EINV_TRANSAD(...) + +! Explicit arguments : All arguments except from PGP are optional. +! -------------------- +! PSPVOR(:,:) - spectral vorticity (input) +! PSPDIV(:,:) - spectral divergence (input) +! PSPSCALAR(:,:) - spectral scalarvalued fields (input) +! PSPSC3A(:,:,:) - alternative to use of PSPSCALAR, see PGP3A below (input) +! PSPSC3B(:,:,:) - alternative to use of PSPSCALAR, see PGP3B below (input) +! PSPSC2(:,:) - alternative to use of PSPSCALAR, see PGP2 below (input) +! FSPGL_PROC - external procedure to be executed in fourier space +! before transposition +! LDSCDERS - indicating if derivatives of scalar variables are req. +! LDVORGP - indicating if grid-point vorticity is req. +! LDDIVGP - indicating if grid-point divergence is req. +! LDUVDER - indicating if E-W derivatives of u and v are req. +! KPROMA - required blocking factor for gridpoint output +! KVSETUV(:) - indicating which 'b-set' in spectral space owns a +! vor/div field. Equivalant to NBSETLEV in the IFS. +! The length of KVSETUV should be the GLOBAL number +! of u/v fields which is the dimension of u and v releated +! fields in grid-point space. +! KVESETSC(:) - indicating which 'b-set' in spectral space owns a +! scalar field. As for KVSETUV this argument is required +! if the total number of processors is greater than +! the number of processors used for distribution in +! spectral wave space. +! KVSETSC3A(:) - as KVESETSC for PSPSC3A (distribution on first dimension) +! KVSETSC3B(:) - as KVESETSC for PSPSC3B (distribution on first dimension) +! KVSETSC2(:) - as KVESETSC for PSPSC2 (distribution on first dimension) +! KRESOL - resolution tag which is required ,default is the +! first defined resulution (input) +! PGP(:,:,:) - gridpoint fields (output) +! PGP need to dimensioned (NPROMA,IF_GP,NGPBLKS) where +! NPROMA is the blocking factor, IF_GP the total number +! of output fields and NGPBLKS the number of NPROMA blocks. +! The ordering of the output fields is as follows (all +! parts are optional depending on the input switches): + +! vorticity : IF_UV_G fields (if psvor present and LDVORGP) +! divergence : IF_UV_G fields (if psvor present and LDDIVGP) +! u : IF_UV_G fields (if psvor present) +! v : IF_UV_G fields (if psvor present) +! scalar fields : IF_SCALARS_G fields (if pspscalar present) +! N-S derivative of scalar fields : IF_SCALARS_G fields (if pspscalar +! present and LDSCDERS) +! E-W derivative of u : IF_UV_G fields (if psvor present and and LDUVDER) +! E-W derivative of v : IF_UV_G fields (if psvor present and and LDUVDER) +! E-W derivative of scalar fields : IF_SCALARS_G fields (if pspscalar +! present and LDSCDERS) + +! Here IF_UV_G is the GLOBAL number of u/v fields as given by the length +! of KVSETUV (or by PSPVOR if no split in spectral 'b-set' direction +! IF_SCALARS_G is the GLOBAL number of scalar fields as giben by the +! length of KVESETSC (or by number of fields in PSPSCALAR if no spectral +! 'b-set' split + +! As an alternative to using PGP you can also use a combination of the +! following arrays. The reason for introducing these alternative ways +! of calling INV_TRANS is to avoid uneccessary copies where your data +! structures don't fit in to the 'PSPVOR,PSPDIV, PSPSCALAR, PGP' layout. +! The use of any of these precludes the use of PGP and vice versa. + +! PGPUV(:,:,:,:) - the 'u-v' related grid-point variables in the order +! described for PGP. The second dimension of PGPUV should +! be the same as the "global" first dimension of +! PSPVOR,PSPDIV (in the IFS this is the number of levels) +! PGPUV need to be dimensioned(NPROMA,ILEVS,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (u,v,vor,div ...) +! PGP3A(:,:,:,:) - grid-point array directly connected with PSPSC3A +! dimensioned(NPROMA,ILEVS,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (the same as in +! PSPSC3A if no derivatives, 3 times that with der.) +! PGP3B(:,:,:,:) - grid-point array directly connected with PSPSC3B +! dimensioned(NPROMA,ILEVS,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (the same as in +! PSPSC3B if no derivatives, 3 times that with der.) +! PGP2(:,:,:) - grid-point array directly connected with PSPSC2 +! dimensioned(NPROMA,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (the same as in +! PSPSC2 if no derivatives, 3 times that with der.) + +! Method. +! ------- + +! Externals. ESET_RESOL - set resolution +! ---------- ELTDIR_CTLAD - control of Legendre transform +! EFTDIR_CTLAD - control of Fourier transform + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 +! G. Radnoti: like in direct code: IF_SCALARS_G fields (if pspscalar +! present and LDSCDERS) +! M.Hamrud 01-Oct-2003 CY28 Cleaning + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +!ifndef INTERFACE + +USE TPM_GEN ,ONLY : NERR, NOUT, NPROMATR +!USE TPM_DIM +USE TPM_TRANS ,ONLY : LDIVGP, LSCDERS, LUVDER, LVORGP, & + & NF_SC2, NF_SC3A, NF_SC3B, NGPBLKS, NPROMA +USE TPM_DISTR ,ONLY : D, NPRTRV, MYSETV +!USE TPM_GEOMETRY +!USE TPM_FIELDS +!USE TPM_FFT + +USE ESET_RESOL_MOD ,ONLY : ESET_RESOL +USE EINV_TRANS_CTLAD_MOD ,ONLY : EINV_TRANS_CTLAD +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS + +!endif INTERFACE + +IMPLICIT NONE + +! Declaration of arguments + +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PSPSC3A(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PSPSC3B(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PSPSC2(:,:) +LOGICAL ,OPTIONAL,INTENT(IN) :: LDSCDERS +LOGICAL ,OPTIONAL,INTENT(IN) :: LDVORGP +LOGICAL ,OPTIONAL,INTENT(IN) :: LDDIVGP +LOGICAL ,OPTIONAL,INTENT(IN) :: LDUVDER +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KPROMA +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KVSETUV(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KVSETSC(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KRESOL +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KVSETSC3A(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KVSETSC3B(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KVSETSC2(:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PGP(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PGPUV(:,:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PGP3A(:,:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PGP3B(:,:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PGP2(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PMEANU(:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PMEANV(:) +EXTERNAL FSPGL_PROC +OPTIONAL FSPGL_PROC +!ifndef INTERFACE + +! Local varaibles +INTEGER(KIND=JPIM) :: IUBOUND(4),J +INTEGER(KIND=JPIM) :: IF_UV,IF_UV_G,IF_SCALARS,IF_SCALARS_G,IF_FS,IF_GP,IF_OUT_LT +INTEGER(KIND=JPIM) :: IF_SCDERS,IF_UV_PAR +INTEGER(KIND=JPIM) :: IF_SC2_G,IF_SC3A_G2,IF_SC3A_G3,IF_SC3B_G2,IF_SC3B_G3 +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('EINV_TRANSAD',0,ZHOOK_HANDLE) +CALL GSTATS(1809,0) + +! Set current resolution +CALL ESET_RESOL(KRESOL) + +! Set defaults + +LVORGP = .FALSE. +LDIVGP = .FALSE. +LUVDER = .FALSE. +IF_UV = 0 +IF_UV_G = 0 +IF_UV_PAR = 0 +IF_SCALARS = 0 +IF_SCALARS_G = 0 +IF_SCDERS = 0 +NF_SC2 = 0 +NF_SC3A = 0 +NF_SC3B = 0 +IF_SC2_G = 0 +IF_SC3A_G2 = 0 +IF_SC3B_G2 = 0 +IF_SC3A_G3 = 0 +IF_SC3B_G3 = 0 +NPROMA = D%NGPTOT +LSCDERS = .FALSE. + +! Decide requirements + +IF(PRESENT(KVSETUV)) THEN + IF_UV_G = UBOUND(KVSETUV,1) + IF_UV_PAR = 2 + DO J=1,IF_UV_G + IF(KVSETUV(J) > NPRTRV .OR. KVSETUV(J) < 1) THEN + WRITE(NERR,*) 'EINV_TRANSAD:KVSETUV(J) > NPRTRV ',J,KVSETUV(J),NPRTRV + CALL ABORT_TRANS('EINV_TRANSAD:KVSETUV TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETUV(J) == MYSETV) THEN + IF_UV = IF_UV+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPVOR)) THEN + IF_UV = UBOUND(PSPVOR,1) + IF_UV_G = IF_UV + IF_UV_PAR = 2 +ENDIF + +IF(PRESENT(KVSETSC)) THEN + IF_SCALARS_G = UBOUND(KVSETSC,1) + DO J=1,IF_SCALARS_G + IF(KVSETSC(J) > NPRTRV .OR. KVSETSC(J) < 1) THEN + WRITE(NERR,*) 'EINV_TRANSAD:KVSETSC(J) > NPRTRV ',J,KVSETSC(J),NPRTRV + CALL ABORT_TRANS('EINV_TRANSAD:KVSETSC TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETSC(J) == MYSETV) THEN + IF_SCALARS = IF_SCALARS+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPSCALAR)) THEN + IF_SCALARS = UBOUND(PSPSCALAR,1) + IF_SCALARS_G = IF_SCALARS +ENDIF + +IF(PRESENT(KVSETSC2)) THEN + IF(.NOT.PRESENT(PSPSC2)) THEN + CALL ABORT_TRANS('EINV_TRANSAD:KVSETSC2 BUT NOT PSPSC2') + ENDIF + IF_SC2_G = UBOUND(KVSETSC2,1) + IF_SCALARS_G = IF_SCALARS_G+UBOUND(KVSETSC2,1) + DO J=1,UBOUND(KVSETSC2,1) + IF(KVSETSC2(J) > NPRTRV .OR. KVSETSC2(J) < 1) THEN + WRITE(NERR,*) 'EINV_TRANSAD:KVSETSC2(J) > NPRTRV ',J,KVSETSC2(J),NPRTRV + CALL ABORT_TRANS('EINV_TRANSAD:KVSETSC2 TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETSC2(J) == MYSETV) THEN + IF_SCALARS = IF_SCALARS+1 + NF_SC2 = NF_SC2+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPSC2)) THEN + IF_SC2_G = UBOUND(PSPSC2,1) + IF_SCALARS = IF_SCALARS+UBOUND(PSPSC2,1) + IF_SCALARS_G = IF_SCALARS_G +UBOUND(PSPSC2,1) + NF_SC2 = UBOUND(PSPSC2,1) +ENDIF + +IF(PRESENT(KVSETSC3A)) THEN + IF(.NOT.PRESENT(PSPSC3A)) THEN + CALL ABORT_TRANS('EINV_TRANSAD:KVSETSC3A BUT NOT PSPSC3A') + ENDIF + IF_SC3A_G2 = UBOUND(KVSETSC3A,1) + IF_SC3A_G3 = UBOUND(PSPSC3A,3) + IF_SCALARS_G = IF_SCALARS_G+IF_SC3A_G2*IF_SC3A_G3 + DO J=1,UBOUND(KVSETSC3A,1) + IF(KVSETSC3A(J) > NPRTRV .OR. KVSETSC3A(J) < 1) THEN + WRITE(NERR,*) 'EINV_TRANSAD:KVSETSC3A(J) > NPRTRV ',J,KVSETSC3A(J),NPRTRV + CALL ABORT_TRANS& + & ('INV_TRANSAD:KVSETSC3A TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETSC3A(J) == MYSETV) THEN + IF_SCALARS = IF_SCALARS+UBOUND(PSPSC3A,3) + NF_SC3A = NF_SC3A+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPSC3A)) THEN + IF_SCALARS = IF_SCALARS+UBOUND(PSPSC3A,1)*UBOUND(PSPSC3A,3) + IF_SC3A_G2 = UBOUND(PSPSC3A,1) + IF_SC3A_G3 = UBOUND(PSPSC3A,3) + IF_SCALARS_G = IF_SCALARS_G + IF_SC3A_G2*IF_SC3A_G3 + NF_SC3A = UBOUND(PSPSC3A,1) +ENDIF + +IF(PRESENT(KVSETSC3B)) THEN + IF(.NOT.PRESENT(PSPSC3B)) THEN + CALL ABORT_TRANS('EINV_TRANSAD:KVSETSC3B BUT NOT PSPSC3B') + ENDIF + IF_SC3B_G2 = UBOUND(KVSETSC3B,1) + IF_SC3B_G3 = UBOUND(PSPSC3B,3) + IF_SCALARS_G = IF_SCALARS_G+IF_SC3B_G2*IF_SC3B_G3 + DO J=1,UBOUND(KVSETSC3B,1) + IF(KVSETSC3B(J) > NPRTRV .OR. KVSETSC3B(J) < 1) THEN + WRITE(NERR,*) 'EINV_TRANSAD:KVSETSC3B(J) > NPRTRV ',J,KVSETSC3B(J),NPRTRV + CALL ABORT_TRANS('EINV_TRANSAD:KVSETSC3B TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSETSC3B(J) == MYSETV) THEN + IF_SCALARS = IF_SCALARS+UBOUND(PSPSC3B,3) + NF_SC3B = NF_SC3B+1 + ENDIF + ENDDO +ELSEIF(PRESENT(PSPSC3B)) THEN + IF_SCALARS = IF_SCALARS+UBOUND(PSPSC3B,1)*UBOUND(PSPSC3B,3) + IF_SC3B_G2 = UBOUND(PSPSC3B,1) + IF_SC3B_G3 = UBOUND(PSPSC3B,3) + IF_SCALARS_G = IF_SCALARS_G + IF_SC3B_G2*IF_SC3B_G3 + NF_SC3B = UBOUND(PSPSC3B,1) +ENDIF + +IF (IF_SCALARS > 0) THEN + IF(PRESENT(LDSCDERS)) THEN + LSCDERS = LDSCDERS + IF (LSCDERS) IF_SCDERS = IF_SCALARS + ENDIF +ENDIF + +IF(PRESENT(KPROMA)) THEN + NPROMA = KPROMA +ENDIF + +IF(PRESENT(LDVORGP)) THEN + LVORGP = LDVORGP +ENDIF + +IF(PRESENT(LDDIVGP)) THEN + LDIVGP = LDDIVGP +ENDIF + +IF(PRESENT(LDUVDER)) THEN + LUVDER = LDUVDER +ENDIF + +! Compute derived variables + +IF(LVORGP) LDIVGP = .TRUE. + +NGPBLKS = (D%NGPTOT-1)/NPROMA+1 + +IF_OUT_LT = 2*IF_UV + IF_SCALARS+IF_SCDERS + +IF(IF_UV > 0 .AND. LVORGP) THEN + IF_OUT_LT = IF_OUT_LT+IF_UV +ENDIF +IF(IF_UV > 0 .AND. LDIVGP) THEN + IF_OUT_LT = IF_OUT_LT+IF_UV +ENDIF +IF_FS = IF_OUT_LT+IF_SCDERS +IF(IF_UV > 0 .AND. LUVDER) THEN + IF_FS = IF_FS+2*IF_UV +ENDIF + +IF_GP = 2*IF_UV_G+IF_SCALARS_G +IF(LSCDERS) THEN + IF_GP = IF_GP+2*IF_SCALARS_G + IF_SC2_G = IF_SC2_G*3 + IF_SC3A_G3 = IF_SC3A_G3*3 + IF_SC3B_G3 = IF_SC3B_G3*3 +ENDIF +IF(IF_UV_G > 0 .AND. LVORGP) THEN + IF_GP = IF_GP+IF_UV_G + IF_UV_PAR = IF_UV_PAR+1 +ENDIF +IF(IF_UV_G > 0 .AND. LDIVGP) THEN + IF_GP = IF_GP+IF_UV_G + IF_UV_PAR = IF_UV_PAR+1 +ENDIF +IF(IF_UV_G > 0 .AND. LUVDER) THEN + IF_GP = IF_GP+2*IF_UV_G + IF_UV_PAR = IF_UV_PAR+2 +ENDIF + +! Consistency checks + +IF (IF_UV > 0) THEN + IF(.NOT. PRESENT(PSPVOR) ) THEN + CALL ABORT_TRANS("EINV_TRANSAD : IF_UV > 0 BUT PSPVOR MISSING") + ENDIF + IF(UBOUND(PSPVOR,1) < IF_UV) THEN + WRITE(NERR,*)'EINV_TRANSAD : UBOUND(PSPVOR,1) < IF_UV ',& + & UBOUND(PSPVOR,1),IF_UV + CALL ABORT_TRANS("EINV_TRANSAD : PSPVOR TOO SHORT") + ENDIF + IF(.NOT. PRESENT(PSPDIV) ) THEN + CALL ABORT_TRANS("EINV_TRANSAD : IF_UV > 0 BUT PSPDIV MISSING") + ENDIF + IF(UBOUND(PSPDIV,1) < IF_UV) THEN + WRITE(NERR,*)'INV_TRANSAD : UBOUND(PSPDIV,1) < IF_UV ',& + & UBOUND(PSPDIV,1),IF_UV + CALL ABORT_TRANS("EINV_TRANSAD : PSPDIV TOO SHORT") + ENDIF +ENDIF + +IF (IF_SCALARS > 0) THEN + IF(PRESENT(PSPSCALAR)) THEN + IF(PRESENT(PSPSC3A))THEN + CALL ABORT_TRANS('EINV_TRANS : PSPSCALAR AND PSPSC3A BOTH PRESENT') + ENDIF + IF(PRESENT(PSPSC3B))THEN + CALL ABORT_TRANS('EINV_TRANS : PSPSCALAR AND PSPSC3B BOTH PRESENT') + ENDIF + IF(PRESENT(PSPSC2))THEN + CALL ABORT_TRANS('EINV_TRANS : PSPSCALAR AND PSPSC2 BOTH PRESENT') + ENDIF + IF(UBOUND(PSPSCALAR,1) < IF_SCALARS) THEN + WRITE(NERR,*)'EINV_TRANS : UBOUND(PSPSCALAR,1) < IF_SCALARS) ',& + & UBOUND(PSPSCALAR,1),IF_SCALARS + CALL ABORT_TRANS('EINV_TRANS : PSPSCALAR TOO SHORT') + ENDIF + ELSEIF(PRESENT(PSPSC3A)) THEN + ENDIF +ENDIF + +IF(IF_UV_G == 0) THEN + LUVDER = .FALSE. +ENDIF + +IF(NPRTRV >1) THEN + IF(IF_UV > 0 .AND. .NOT. PRESENT(KVSETUV)) THEN + WRITE(NERR,*)'NPRTRV >1 AND IF_UV > 0 AND NOT PRESENT(KVSETUV)',& + & NPRTRV,IF_UV + CALL ABORT_TRANS('EINV_TRANSAD: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF + IF(PRESENT(PSPSCALAR) .AND. .NOT. PRESENT(KVSETSC)) THEN + WRITE(NERR,*)'NPRTRV >1 AND PRESENT(PSPSCALAR) AND NOT PRESENT(KVSETSC)',& + & NPRTRV + CALL ABORT_TRANS('EINV_TRANSAD: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF + IF(PRESENT(PSPSC2) .AND. .NOT. PRESENT(KVSETSC2)) THEN + WRITE(NERR,*)'NPRTRV >1 AND PRESENT(PSPSC2) AND NOT PRESENT(KVSETSC2)',& + & NPRTRV + CALL ABORT_TRANS('EINV_TRANSAD: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF + IF(PRESENT(PSPSC3A) .AND. .NOT. PRESENT(KVSETSC3A)) THEN + WRITE(NERR,*)'NPRTRV >1 AND PRESENT(PSPSC3A) AND NOT PRESENT(KVSETSC3A)',& + & NPRTRV + CALL ABORT_TRANS('EINV_TRANSAD: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF + IF(PRESENT(PSPSC3B) .AND. .NOT. PRESENT(KVSETSC3B)) THEN + WRITE(NERR,*)'NPRTRV >1 AND PRESENT(PSPSC3B) AND NOT PRESENT(KVSETSC3B)',& + & NPRTRV + CALL ABORT_TRANS('EINV_TRANSAD: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF +ENDIF + +IF(PRESENT(PGP)) THEN + IF(PRESENT(PGPUV)) THEN + CALL ABORT_TRANS('EINV_TRANSAD:PGP AND PGPUV CAN NOT BOTH BE PRESENT') + ENDIF + IF(PRESENT(PGP3A)) THEN + CALL ABORT_TRANS('EINV_TRANSAD:PGP AND PGP3A CAN NOT BOTH BE PRESENT') + ENDIF + IF(PRESENT(PGP3B)) THEN + CALL ABORT_TRANS('EINV_TRANSAD:PGP AND PGP3B CAN NOT BOTH BE PRESENT') + ENDIF + IF(PRESENT(PGP2)) THEN + CALL ABORT_TRANS('EINV_TRANSAD:PGP AND PGP2 CAN NOT BOTH BE PRESENT') + ENDIF + IUBOUND(1:3)=UBOUND(PGP) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'EINV_TRANSAD:FIRST DIM. OF PGP TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('EINV_TRANSAD:FIRST DIMENSION OF PGP TOO SMALL ') + ENDIF + IF(IUBOUND(2) < IF_GP) THEN + WRITE(NOUT,*)'EINV_TRANSAD:SEC. DIM. OF PGP TOO SMALL ',IUBOUND(2),IF_GP + WRITE(NOUT,*)'IF_UV_G,IF_SCALARS_G,LSCDERS,LVORGP,LDIVGP,LUVDER ',& + & IF_UV_G,IF_SCALARS_G,LSCDERS,LVORGP,LDIVGP,LUVDER + CALL ABORT_TRANS('EINV_TRANSAD:SECOND DIMENSION OF PGP TOO SMALL ') + ENDIF + IF(IUBOUND(3) < NGPBLKS) THEN + WRITE(NOUT,*)'INV_TRANSAD:THIRD DIM. OF PGP TOO SMALL ',IUBOUND(3),NGPBLKS + CALL ABORT_TRANS('EINV_TRANSAD:THIRD DIMENSION OF PGP TOO SMALL ') + ENDIF +ELSE + IF(NPROMATR > 0 .AND. 2*IF_UV_G+IF_SCALARS_G > NPROMATR) THEN + CALL ABORT_TRANS('EINV_TRANSAD:ALTERNATIVES TO USING PGP NOT SUPPORTED WITH NPROMATR>0') + ENDIF +ENDIF + +IF(PRESENT(PGPUV)) THEN + IF(.NOT.PRESENT(PSPVOR)) THEN + CALL ABORT_TRANS('EINV_TRANSAD:PSPVOR HAS TO BE PRESENT WHEN PGPUV IS') + ENDIF + IUBOUND(1:4)=UBOUND(PGPUV) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'EINV_TRANSAD:FIRST DIM. OF PGPUV TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('EINV_TRANSAD:FIRST DIMENSION OF PGPUV TOO SMALL ') + ENDIF + IF(IUBOUND(2) /= IF_UV_G) THEN + WRITE(NOUT,*)'EINV_TRANSAD:SEC. DIM. OF PGPUV INCONSISTENT ',IUBOUND(2),IF_UV_G + CALL ABORT_TRANS('EINV_TRANSAD:SEC. DIMENSION OF PGPUV INCONSISTENT ') + ENDIF + IF(IUBOUND(3) < IF_UV_PAR) THEN + WRITE(NOUT,*)'EINV_TRANSAD:THIRD DIM. OF PGPUV TOO SMALL ',IUBOUND(3),IF_UV_PAR + CALL ABORT_TRANS('EINV_TRANSAD:THIRD DIMENSION OF PGPUV TOO SMALL ') + ENDIF + IF(IUBOUND(4) < NGPBLKS) THEN + WRITE(NOUT,*)'EINV_TRANSAD:THIRD DIM. OF PGPUV TOO SMALL ',IUBOUND(4),NGPBLKS + CALL ABORT_TRANS('EINV_TRANSAD:THIRD DIMENSION OF PGPUV TOO SMALL ') + ENDIF +ENDIF + +IF(PRESENT(PGP2)) THEN + IF(.NOT.PRESENT(PSPSC2)) THEN + CALL ABORT_TRANS('EINV_TRANSAD:PSPSC2 HAS TO BE PRESENT WHEN PGP2 IS') + ENDIF +ENDIF +IF(IF_SC2_G > 0) THEN + IF(PRESENT(PGP2)) THEN + IUBOUND(1:3)=UBOUND(PGP2) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'INV_TRANSAD:FIRST DIM. OF PGP2 TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('INV_TRANSAD:FIRST DIMENSION OF PGP2 TOO SMALL ') + ENDIF + IF(IUBOUND(2) /= IF_SC2_G) THEN + WRITE(NOUT,*)'EINV_TRANSAD:SEC. DIM. OF PGP2 INCONSISTENT ',IUBOUND(2),IF_SC2_G + CALL ABORT_TRANS('EINV_TRANSAD:SEC. DIMENSION OF PGP2 INCONSISTENT') + ENDIF + IF(IUBOUND(3) < NGPBLKS) THEN + WRITE(NOUT,*)'EINV_TRANSAD:THIRD DIM. OF PGP2 TOO SMALL ',IUBOUND(3),NGPBLKS + CALL ABORT_TRANS('EINV_TRANSAD:THIRD DIMENSION OF PGP2 TOO SMALL ') + ENDIF + ELSE + CALL ABORT_TRANS('EINV_TRANSAD:PGP2 MISSING') + ENDIF +ENDIF + +IF(PRESENT(PGP3A)) THEN + IF(.NOT.PRESENT(PSPSC3A)) THEN + CALL ABORT_TRANS('EINV_TRANSAD:PSPSC3A HAS TO BE PRESENT WHEN PGP3A IS') + ENDIF +ENDIF +IF(IF_SC3A_G3 > 0) THEN + IF(PRESENT(PGP3A)) THEN + IUBOUND=UBOUND(PGP3A) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'EINV_TRANSAD:FIRST DIM. OF PGP3A TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('EINV_TRANSAD:FIRST DIMENSION OF PGP3A TOO SMALL ') + ENDIF + IF(IUBOUND(2) /= IF_SC3A_G2) THEN + WRITE(NOUT,*)'EINV_TRANSAD:SEC DIM. OF PGP3A INCONSISTENT ',IUBOUND(2),IF_SC3A_G2 + CALL ABORT_TRANS('EINV_TRANSAD:SEC. DIMENSION OF PGP3A INCONSISTENT ') + ENDIF + IF(IUBOUND(3) /= IF_SC3A_G3 ) THEN + WRITE(NOUT,*)'EINV_TRANSAD:THIRD DIM. OF PGP3A INCONSISTENT ',& + & IUBOUND(3),IF_SC3A_G3 + CALL ABORT_TRANS('EINV_TRANSAD:THIRD DIMENSION OF PGP3A INCONSISTENT ') + ENDIF + IF(IUBOUND(4) < NGPBLKS) THEN + WRITE(NOUT,*)'EINV_TRANSAD:FOURTH DIM. OF PGP3A TOO SMALL ',IUBOUND(4),NGPBLKS + CALL ABORT_TRANS('EINV_TRANSAD:FOURTH DIMENSION OF PGP3A TOO SMALL ') + ENDIF + ELSE + CALL ABORT_TRANS('EINV_TRANSAD:PGP3A MISSING') + ENDIF +ENDIF + +IF(PRESENT(PGP3B)) THEN + IF(.NOT.PRESENT(PSPSC3B)) THEN + CALL ABORT_TRANS('EINV_TRANSAD:PSPSC3B HAS TO BE PRESENT WHEN PGP3B IS') + ENDIF +ENDIF +IF(IF_SC3B_G3 > 0) THEN + IF(PRESENT(PGP3B)) THEN + IUBOUND=UBOUND(PGP3B) + IF(IUBOUND(1) < NPROMA) THEN + WRITE(NOUT,*)'EINV_TRANSAD:FIRST DIM. OF PGP3B TOO SMALL ',IUBOUND(1),NPROMA + CALL ABORT_TRANS('EINV_TRANSAD:FIRST DIMENSION OF PGP3B TOO SMALL ') + ENDIF + IF(IUBOUND(2) /= IF_SC3B_G2) THEN + WRITE(NOUT,*)'EINV_TRANSAD:SEC DIM. OF PGP3B INCONSISTENT ',IUBOUND(2),IF_SC3B_G2 + CALL ABORT_TRANS('EINV_TRANSAD:SEC. DIMENSION OF PGP3B INCONSISTENT ') + ENDIF + IF(IUBOUND(3) /= IF_SC3B_G3 ) THEN + WRITE(NOUT,*)'EINV_TRANSAD:THIRD DIM. OF PGP3B INCONSISTENT ',& + & IUBOUND(3),IF_SC3B_G3 + CALL ABORT_TRANS('EINV_TRANSAD:THIRD DIMENSION OF PGP3B INCONSISTENT ') + ENDIF + IF(IUBOUND(4) < NGPBLKS) THEN + WRITE(NOUT,*)'EINV_TRANSAD:FOURTH DIM. OF PGP3B TOO SMALL ',IUBOUND(4),NGPBLKS + CALL ABORT_TRANS('EINV_TRANSAD:FOURTH DIMENSION OF PGP3B TOO SMALL ') + ENDIF + ELSE + CALL ABORT_TRANS('EINV_TRANSAD:PGP3B MISSING') + ENDIF +ENDIF +CALL GSTATS(1809,1) + +! ------------------------------------------------------------------ + +! Perform transform + +CALL EINV_TRANS_CTLAD(IF_UV_G,IF_SCALARS_G,IF_GP,IF_FS,IF_OUT_LT,& + & IF_UV,IF_SCALARS,IF_SCDERS,& + & PSPVOR,PSPDIV,PSPSCALAR,KVSETUV,KVSETSC,PGP,& + & PSPSC3A,PSPSC3B,PSPSC2,KVSETSC3A,KVSETSC3B,KVSETSC2,PGPUV,PGP3A,PGP3B,PGP2,& + & PMEANU,PMEANV) +IF (LHOOK) CALL DR_HOOK('EINV_TRANSAD',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +!endif INTERFACE + +END SUBROUTINE EINV_TRANSAD + diff --git a/src/etrans/cpu/external/esetup_trans.F90 b/src/etrans/cpu/external/esetup_trans.F90 new file mode 100644 index 000000000..72d1aa7e3 --- /dev/null +++ b/src/etrans/cpu/external/esetup_trans.F90 @@ -0,0 +1,316 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + + +SUBROUTINE ESETUP_TRANS(KMSMAX,KSMAX,KDGL,KDGUX,KLOEN,LDSPLIT,& + & KTMAX,KRESOL,PEXWN,PEYWN,PWEIGHT,LDGRIDONLY,KNOEXTZL,KNOEXTZG, & + & LDUSEFFTW,LD_ALL_FFTW) +!**** *ESETUP_TRANS* - Setup transform package for specific resolution + +! Purpose. +! -------- +! To setup for making spectral transforms. Each call to this routine +! creates a new resolution up to a maximum of NMAX_RESOL set up in +! SETUP_TRANS0. You need to call SETUP_TRANS0 before this routine can +! be called. + +!** Interface. +! ---------- +! CALL ESETUP_TRANS(...) + +! Explicit arguments : KLOEN,LDSPLIT are optional arguments +! -------------------- +! KSMAX - spectral truncation required +! KDGL - number of Gaussian latitudes +! KLOEN(:) - number of points on each Gaussian latitude [2*KDGL] +! LDSPLIT - true if split latitudes in grid-point space [false] +! KTMAX - truncation order for tendencies? +! KRESOL - the resolution identifier +! KSMAX,KDGL,KTMAX and KLOEN are GLOBAL variables desribing the resolution +! in spectral and grid-point space +! LDGRIDONLY - true if only grid space is required + + +! LDSPLIT describe the distribution among processors of +! grid-point data and has no relevance if you are using a single processor + +! LDUSEFFTW - Use FFTW for FFTs +! LD_ALL_FFTW : T to transform all fields in one call, F to transforms fields one after another + +! Method. +! ------- + +! Externals. ESET_RESOL - set resolution +! ---------- ESETUP_DIMS - setup distribution independent dimensions +! SUEMP_TRANS_PRELEG - first part of setup of distr. environment +! SULEG - Compute Legandre polonomial and Gaussian +! Latitudes and Weights +! ESETUP_GEOM - Compute arrays related to grid-point geometry +! SUEMP_TRANS - Second part of setup of distributed environment +! SUEFFT - setup for FFT + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 +! 02-04-11 A. Bogatchev: Passing of TCDIS +! 02-11-14 C. Fischer: soften test on KDGL +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! A.Nmiri 15-Nov-2007 Phasing with TFL 32R3 +! A.Bogatchev 16-Sep-2010 Phasing cy37 +! D. Degrauwe (Feb 2012): Alternative extension zone (E') +! R. El Khatib 02-Mar-2012 Support for mixed multi-resolutions +! R. El Khatib 09-Aug-2012 %LAM in GEOM_TYPE +! R. El Khatib 14-Jun-2013 LENABLED +! R. El Khatib 01-Sep-2015 Support for FFTW +! R. El Khatib 08-Jun-2023 LALL_FFTW for better flexibility +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +!ifndef INTERFACE + +USE TPM_GEN ,ONLY : NERR, NOUT, NPRINTLEV, MSETUP0, & + & NCUR_RESOL, NDEF_RESOL, NMAX_RESOL, LENABLED +USE TPM_DIM ,ONLY : R, DIM_RESOL +USE TPM_DISTR ,ONLY : D, DISTR_RESOL +USE TPM_GEOMETRY ,ONLY : G, GEOM_RESOL +USE TPM_FIELDS ,ONLY : FIELDS_RESOL +#ifdef WITH_FFT992 +USE TPM_FFT ,ONLY : T, FFT_RESOL +USE TPMALD_FFT ,ONLY : TALD, ALDFFT_RESOL +#endif +USE TPM_FFTW ,ONLY : TW, FFTW_RESOL +USE TPM_FLT ,ONLY : FLT_RESOL +USE TPM_CTL ,ONLY : CTL_RESOL + +USE TPMALD_DIM ,ONLY : RALD, ALDDIM_RESOL +USE TPMALD_DISTR ,ONLY : ALDDISTR_RESOL +USE TPMALD_FIELDS ,ONLY : ALDFIELDS_RESOL +USE TPMALD_GEO ,ONLY : GALD, ALDGEO_RESOL + +USE ESET_RESOL_MOD ,ONLY : ESET_RESOL +USE ESETUP_DIMS_MOD ,ONLY : ESETUP_DIMS +USE SUEMP_TRANS_MOD ,ONLY : SUEMP_TRANS +USE SUEMP_TRANS_PRELEG_MOD ,ONLY : SUEMP_TRANS_PRELEG +!USE SULEG_MOD +USE ESETUP_GEOM_MOD ,ONLY : ESETUP_GEOM +USE SUEFFT_MOD ,ONLY : SUEFFT +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS + +!endif INTERFACE + +IMPLICIT NONE + +! Dummy arguments +INTEGER(KIND=JPIM),INTENT(IN) :: KMSMAX +INTEGER(KIND=JPIM),INTENT(IN) :: KSMAX +INTEGER(KIND=JPIM),INTENT(IN) :: KDGL +INTEGER(KIND=JPIM),INTENT(IN) :: KDGUX +INTEGER(KIND=JPIM),INTENT(IN) :: KLOEN(:) +LOGICAL ,OPTIONAL,INTENT(IN) :: LDSPLIT +LOGICAL ,OPTIONAL,INTENT(IN) :: LDGRIDONLY +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KTMAX +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KRESOL +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PEXWN +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PEYWN +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PWEIGHT(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KNOEXTZL +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KNOEXTZG +LOGICAL ,OPTIONAL,INTENT(IN) :: LDUSEFFTW +LOGICAL ,OPTIONAL,INTENT(IN) :: LD_ALL_FFTW + +!ifndef INTERFACE + +! Local variables +LOGICAL :: LLP1,LLP2 +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('ESETUP_TRANS',0,ZHOOK_HANDLE) + +IF(MSETUP0 == 0) THEN + CALL ABORT_TRANS('ESETUP_TRANS: SETUP_TRANS0 HAS TO BE CALLED BEFORE ESETUP_TRANS') +ENDIF +LLP1 = NPRINTLEV>0 +LLP2 = NPRINTLEV>1 +IF(LLP1) WRITE(NOUT,*) '=== ENTER ROUTINE ESETUP_TRANS ===' + +! Allocate resolution dependent structures common to global and LAM +IF(.NOT. ALLOCATED(DIM_RESOL)) THEN + NDEF_RESOL = 1 + ALLOCATE(DIM_RESOL(NMAX_RESOL)) + ALLOCATE(FIELDS_RESOL(NMAX_RESOL)) + ALLOCATE(GEOM_RESOL(NMAX_RESOL)) + ALLOCATE(DISTR_RESOL(NMAX_RESOL)) +#ifdef WITH_FFT992 + ALLOCATE(FFT_RESOL(NMAX_RESOL)) +#endif + ALLOCATE(FFTW_RESOL(NMAX_RESOL)) + ALLOCATE(FLT_RESOL(NMAX_RESOL)) + ALLOCATE(CTL_RESOL(NMAX_RESOL)) + GEOM_RESOL(:)%LAM=.FALSE. + ALLOCATE(LENABLED(NMAX_RESOL)) + LENABLED(:)=.FALSE. +ELSE + NDEF_RESOL = NDEF_RESOL+1 + IF(NDEF_RESOL > NMAX_RESOL) THEN + CALL ABORT_TRANS('ESETUP_TRANS:NDEF_RESOL > NMAX_RESOL') + ENDIF +ENDIF +! Allocate LAM-specific resolution dependent structures +IF(.NOT. ALLOCATED(ALDDIM_RESOL)) THEN + ALLOCATE(ALDDIM_RESOL(NMAX_RESOL)) + ALLOCATE(ALDFIELDS_RESOL(NMAX_RESOL)) + ALLOCATE(ALDGEO_RESOL(NMAX_RESOL)) + ALLOCATE(ALDDISTR_RESOL(NMAX_RESOL)) +#ifdef WITH_FFT992 + ALLOCATE(ALDFFT_RESOL(NMAX_RESOL)) +#endif +ENDIF + + +IF (PRESENT(KRESOL)) THEN + KRESOL=NDEF_RESOL +ENDIF + +! Point at structures due to be initialized +CALL ESET_RESOL(NDEF_RESOL) +IF(LLP1) WRITE(NOUT,*) '=== DEFINING RESOLUTION ',NCUR_RESOL + +! Defaults for optional arguments + +G%LREDUCED_GRID = .FALSE. +D%LGRIDONLY = .FALSE. +D%LSPLIT = .FALSE. +#ifdef WITH_FFT992 +TALD%LFFT992=.TRUE. ! Use FFT992 interface for FFTs +#endif +TW%LALL_FFTW=.FALSE. ! transform fields one at a time + +! NON-OPTIONAL ARGUMENTS +R%NSMAX = KSMAX +RALD%NMSMAX=KMSMAX +RALD%NDGUX=KDGUX +R%NDGL = KDGL +RALD%NDGLSUR=KDGL+2 +R%NDLON =KLOEN(1) + +! IMPLICIT argument : +G%LAM = .TRUE. + +IF (KDGL <= 0) THEN + CALL ABORT_TRANS ('ESETUP_TRANS: KDGL IS NOT A POSITIVE NUMBER') +ENDIF + +! Optional arguments + +ALLOCATE(G%NLOEN(R%NDGL)) +IF(LLP2)WRITE(NOUT,9) 'NLOEN ',SIZE(G%NLOEN ),SHAPE(G%NLOEN ) + +IF (G%LREDUCED_GRID) THEN + G%NLOEN(:) = KLOEN(1:R%NDGL) +ELSE + G%NLOEN(:) = R%NDLON +ENDIF + +IF(PRESENT(LDSPLIT)) THEN + D%LSPLIT = LDSPLIT +ENDIF + +IF(PRESENT(KTMAX)) THEN + R%NTMAX = KTMAX +ELSE + R%NTMAX = R%NSMAX +ENDIF +IF(R%NTMAX /= R%NSMAX) THEN + !This SHOULD work but I don't know how to test it /MH + WRITE(NERR,*) 'R%NTMAX /= R%NSMAX',R%NTMAX,R%NSMAX + CALL ABORT_TRANS('ESETUP_TRANS:R%NTMAX /= R%NSMAX HAS NOT BEEN VALIDATED') +ENDIF + +IF(PRESENT(PWEIGHT)) THEN + D%LWEIGHTED_DISTR = .TRUE. + IF( D%LWEIGHTED_DISTR .AND. .NOT.D%LSPLIT )THEN + CALL ABORT_TRANS('SETUP_TRANS: LWEIGHTED_DISTR=T AND LSPLIT=F NOT SUPPORTED') + ENDIF + IF(SIZE(PWEIGHT) /= SUM(G%NLOEN(:)) )THEN + CALL ABORT_TRANS('SETUP_TRANS:SIZE(PWEIGHT) /= SUM(G%NLOEN(:))') + ENDIF + ALLOCATE(D%RWEIGHT(SIZE(PWEIGHT))) + D%RWEIGHT(:)=PWEIGHT(:) +ELSE + D%LWEIGHTED_DISTR = .FALSE. +ENDIF + +IF(PRESENT(LDGRIDONLY)) THEN + D%LGRIDONLY=LDGRIDONLY +ENDIF + +IF (PRESENT(KNOEXTZL)) THEN + R%NNOEXTZL=KNOEXTZL +ELSE + R%NNOEXTZL=0 +ENDIF + +IF (PRESENT(KNOEXTZG)) THEN + R%NNOEXTZG=KNOEXTZG +ELSE + R%NNOEXTZG=0 +ENDIF + +IF(PRESENT(LD_ALL_FFTW)) THEN + TW%LALL_FFTW=LD_ALL_FFTW +ENDIF + +#ifdef WITH_FFT992 +IF(PRESENT(LDUSEFFTW)) THEN + TALD%LFFT992=.NOT.LDUSEFFTW +ELSE + TALD%LFFT992=.TRUE. +ENDIF +#endif + +! Setup resolution dependent structures +! ------------------------------------- + +! Setup distribution independent dimensions +CALL ESETUP_DIMS +IF (PRESENT(PEXWN)) GALD%EXWN=PEXWN +IF (PRESENT(PEYWN)) GALD%EYWN=PEYWN + +! First part of setup of distributed environment +CALL SUEMP_TRANS_PRELEG + +CALL GSTATS(1802,0) +! Compute arrays related to grid-point geometry +CALL ESETUP_GEOM +! Second part of setup of distributed environment +CALL SUEMP_TRANS +! Initialize Fast Fourier Transform package +CALL SUEFFT +CALL GSTATS(1802,1) + +! Signal the current resolution is active +LENABLED(NDEF_RESOL)=.TRUE. + +IF (LHOOK) CALL DR_HOOK('ESETUP_TRANS',1,ZHOOK_HANDLE) +! ------------------------------------------------------------------ +9 FORMAT(1X,'ARRAY ',A10,' ALLOCATED ',8I8) + +!endif INTERFACE + +END SUBROUTINE ESETUP_TRANS + diff --git a/src/etrans/cpu/external/especnorm.F90 b/src/etrans/cpu/external/especnorm.F90 new file mode 100644 index 000000000..6a40ad4f4 --- /dev/null +++ b/src/etrans/cpu/external/especnorm.F90 @@ -0,0 +1,147 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + + +SUBROUTINE ESPECNORM(PSPEC,KVSET,KMASTER,KRESOL,PMET,PNORM) + +!**** *ESPECNORM* - Compute global spectral norms + +! Purpose. +! -------- +! Interface routine for computing spectral norms + +!** Interface. +! ---------- +! CALL ESPECNORM(...) + +! Explicit arguments : All arguments optional +! -------------------- +! PSPEC(:,:) - Spectral array +! KVSET(:) - "B-Set" for each field +! KMASTER - processor to recieve norms +! KRESOL - resolution tag which is required ,default is the +! first defined resulution (input) +! PMET(:) - metric +! PNORM(:) - Norms (output for processor KMASTER) + +! Method. +! ------- + +! Externals. ESET_RESOL - set resolution +! ---------- ESPNORM_CTL - control routine + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 +! M.Hamrud 01-Oct-2003 CY28 Cleaning + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +!ifndef INTERFACE + +USE TPM_GEN ,ONLY : NERR +!USE TPM_DIM +USE TPM_DISTR ,ONLY : D, NPRTRV, MYSETV, MYPROC + +USE ESET_RESOL_MOD ,ONLY : ESET_RESOL +USE ESPNORM_CTL_MOD ,ONLY : ESPNORM_CTL +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS + +!endif INTERFACE + +IMPLICIT NONE + +! Declaration of arguments + +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PSPEC(:,:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KVSET(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KMASTER +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KRESOL +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PMET(:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PNORM(:) +!ifndef INTERFACE + +INTEGER(KIND=JPIM) :: IMASTER,IFLD,IFLD_G,J +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +! Set current resolution +IF (LHOOK) CALL DR_HOOK('ESPECNORM',0,ZHOOK_HANDLE) +CALL ESET_RESOL(KRESOL) + +! Set defaults +IMASTER = 1 +IFLD = 0 + +IF(PRESENT(KMASTER)) THEN + IMASTER = KMASTER +ENDIF + +IF(PRESENT(KVSET)) THEN + IFLD_G = UBOUND(KVSET,1) + DO J=1,IFLD_G + IF(KVSET(J) > NPRTRV) THEN + WRITE(NERR,*) 'ESPECNORM:KVSET(J) > NPRTRV ',J,KVSET(J),NPRTRV + CALL ABORT_TRANS('ESPECNORM:KVSET TOO LONG OR CONTAINS VALUES OUTSIDE RANGE') + ENDIF + IF(KVSET(J) == MYSETV) THEN + IFLD = IFLD+1 + ENDIF + ENDDO +ELSE + IF(PRESENT(PSPEC)) THEN + IFLD = UBOUND(PSPEC,1) + ENDIF + IFLD_G = IFLD +ENDIF + +IF(NPRTRV >1) THEN + IF(IFLD > 0 .AND. .NOT. PRESENT(KVSET)) THEN + WRITE(NERR,*)'NPRTRV >1 AND IFLD > 0 AND NOT PRESENT(KVSET)',& + & NPRTRV,IFLD + CALL ABORT_TRANS('ESPECNORM: SPECIFY VERTICAL SPECTRAL DISTRIBUTION!') + ENDIF +ENDIF +IF(MYPROC == IMASTER) THEN + IF(.NOT. PRESENT(PNORM)) THEN + CALL ABORT_TRANS('ESPECNORM: PNORM NOT PRESENT') + ENDIF + IF(UBOUND(PNORM,1) < IFLD_G) THEN + CALL ABORT_TRANS('ESPECNORM: PNORM TOO SMALL') + ENDIF +ENDIF +IF(IFLD > 0 ) THEN + IF(.NOT. PRESENT(PSPEC)) THEN + CALL ABORT_TRANS('ESPECNORM: PSPEC NOT PRESENT') + ENDIF + IF(UBOUND(PSPEC,1) < IFLD) THEN + CALL ABORT_TRANS('ESPECNORM: FIRST DIMENSION OF PSPEC TOO SMALL') + ENDIF + IF(UBOUND(PSPEC,2) < D%NSPEC2) THEN + CALL ABORT_TRANS('ESPECNORM: FIRST DIMENSION OF PSPEC TOO SMALL') + ENDIF +ENDIF + +CALL ESPNORM_CTL(PSPEC,IFLD,IFLD_G,KVSET,IMASTER,PMET,PNORM) +IF (LHOOK) CALL DR_HOOK('ESPECNORM',1,ZHOOK_HANDLE) + +!endif INTERFACE + +! ------------------------------------------------------------------ + +END SUBROUTINE ESPECNORM diff --git a/src/etrans/cpu/external/etrans_end.F90 b/src/etrans/cpu/external/etrans_end.F90 new file mode 100644 index 000000000..001a4a67b --- /dev/null +++ b/src/etrans/cpu/external/etrans_end.F90 @@ -0,0 +1,158 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + + +SUBROUTINE ETRANS_END(CDMODE) + +!**** *ETRANS_END* - Terminate transform package + +! Purpose. +! -------- +! Terminate transform package. Release all allocated arrays. + +!** Interface. +! ---------- +! CALL ETRANS_END + +! Explicit arguments : None +! -------------------- + +! Method. +! ------- + +! Externals. None +! ---------- + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! A.Nmiri 15-Nov-2007 Phasing with TFL 32R3 +! A.Bogatchev 16-Sep-2010 Phasing cy37 after G.Radnoti +! R. El Khatib 02-Mar-2012 Support for mixed multi-resolutions +! R. El Khatib 09-Jul-2013 LENABLED +! R. El Khatib 01-Set-2015 Support for FFTW +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +!ifndef INTERFACE + +USE TPM_GEN ,ONLY : MSETUP0, NCUR_RESOL, NDEF_RESOL, NMAX_RESOL, LENABLED +USE TPM_DIM ,ONLY : R, DIM_RESOL +USE TPM_DISTR ,ONLY : D, DISTR_RESOL, NPRCIDS +USE TPM_GEOMETRY ,ONLY : G, GEOM_RESOL +USE TPM_FIELDS ,ONLY : F, FIELDS_RESOL +#ifdef WITH_FFT992 +USE TPM_FFT ,ONLY : T, FFT_RESOL +USE TPMALD_FFT ,ONLY : TALD, ALDFFT_RESOL +#endif +USE TPM_FFTW ,ONLY : TW, FFTW_RESOL +USE TPM_FLT ,ONLY : S, FLT_RESOL +USE TPM_TRANS ,ONLY : FOUBUF, FOUBUF_IN +USE TPMALD_DIM ,ONLY : RALD, ALDDIM_RESOL +USE TPMALD_DISTR ,ONLY : DALD, ALDDISTR_RESOL +USE TPMALD_FIELDS ,ONLY : FALD, ALDFIELDS_RESOL +USE TPMALD_GEO ,ONLY : GALD, ALDGEO_RESOL + +USE ESET_RESOL_MOD ,ONLY : ESET_RESOL +USE EQ_REGIONS_MOD ,ONLY : N_REGIONS +USE EDEALLOC_RESOL_MOD ,ONLY : EDEALLOC_RESOL + +IMPLICIT NONE + +CHARACTER*5, OPTIONAL, INTENT(IN) :: CDMODE +! Local variables +CHARACTER*5 :: CLMODE +INTEGER(KIND=JPIM) :: JRES +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('ETRANS_END',0,ZHOOK_HANDLE) +CLMODE='FINAL' +IF (PRESENT(CDMODE)) CLMODE=CDMODE +IF (CLMODE == 'FINAL') THEN + DO JRES=1,NDEF_RESOL + CALL EDEALLOC_RESOL(JRES) + ENDDO + NULLIFY(R) + IF (ALLOCATED(DIM_RESOL)) DEALLOCATE(DIM_RESOL) + NULLIFY(RALD) + IF (ALLOCATED(ALDDIM_RESOL)) DEALLOCATE(ALDDIM_RESOL) +!EQ_REGIONS + IF (ASSOCIATED(N_REGIONS)) THEN + DEALLOCATE(N_REGIONS) + NULLIFY (N_REGIONS) + ENDIF +!TPM_DISTR + NULLIFY(D) + IF (ALLOCATED(DISTR_RESOL)) DEALLOCATE(DISTR_RESOL) + NULLIFY(DALD) + IF (ALLOCATED(ALDDISTR_RESOL)) DEALLOCATE(ALDDISTR_RESOL) +#ifdef WITH_FFT992 +!TPM_FFT + NULLIFY(T) + IF (ALLOCATED(FFT_RESOL)) DEALLOCATE(FFT_RESOL) +#endif + !TPM_FFTW + NULLIFY(TW) + DEALLOCATE(FFTW_RESOL) +!TPM_FLT + NULLIFY(S) + IF (ALLOCATED(FLT_RESOL)) DEALLOCATE(FLT_RESOL) +#ifdef WITH_FFT992 + NULLIFY(TALD) + IF (ALLOCATED(ALDFFT_RESOL)) DEALLOCATE(ALDFFT_RESOL) +#endif + +!TPM_FIELDS + NULLIFY(F) + IF (ALLOCATED(FIELDS_RESOL)) DEALLOCATE(FIELDS_RESOL) + NULLIFY(FALD) + IF (ALLOCATED(ALDFIELDS_RESOL)) DEALLOCATE(ALDFIELDS_RESOL) + +!TPM_GEOMETRY + NULLIFY(G) + IF(ALLOCATED(GEOM_RESOL)) DEALLOCATE(GEOM_RESOL) + NULLIFY(GALD) + IF(ALLOCATED(ALDGEO_RESOL)) DEALLOCATE(ALDGEO_RESOL) +!TPM_TRANS + IF(ALLOCATED(FOUBUF_IN)) DEALLOCATE(FOUBUF_IN) + IF(ALLOCATED(FOUBUF)) DEALLOCATE(FOUBUF) + + IF (ALLOCATED(LENABLED)) DEALLOCATE(LENABLED) + MSETUP0 = 0 + NMAX_RESOL = 0 + NCUR_RESOL = 0 + NDEF_RESOL = 0 +ENDIF + +IF (CLMODE == 'FINAL' .OR. CLMODE == 'INTER') THEN + !EQ_REGIONS + IF (ASSOCIATED(N_REGIONS)) THEN + DEALLOCATE(N_REGIONS) + NULLIFY (N_REGIONS) + ENDIF + !TPM_DISTR + IF (ALLOCATED(NPRCIDS)) DEALLOCATE(NPRCIDS) +ENDIF +IF (LHOOK) CALL DR_HOOK('ETRANS_END',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +!endif INTERFACE + +END SUBROUTINE ETRANS_END + diff --git a/src/etrans/cpu/external/etrans_inq.F90 b/src/etrans/cpu/external/etrans_inq.F90 new file mode 100644 index 000000000..7b2ad7013 --- /dev/null +++ b/src/etrans/cpu/external/etrans_inq.F90 @@ -0,0 +1,550 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + + +SUBROUTINE ETRANS_INQ(KRESOL,KSPEC,KSPEC2,KSPEC2G,KSPEC2MX,KNUMP,& + & KGPTOT,KGPTOTG,KGPTOTMX,KGPTOTL,& + & KMYMS,KESM0,KUMPP,KPOSSP,KPTRMS,KALLMS,KDIM0G,& + & KFRSTLAT,KLSTLAT,KFRSTLOFF,KPTRLAT,& + & KPTRFRSTLAT,KPTRLSTLAT,KPTRFLOFF,KSTA,KONL,& + & KULTPP,KPTRLS,& + & KPRTRW,KMYSETW,KMYSETV,KMY_REGION_NS,KMY_REGION_EW,& + & LDSPLITLAT,LDLINEAR_GRID,& + & KSMAX,KMSMAX,KNVALUE,KMVALUE,PLEPINM,KDEF_RESOL,LDLAM,& + & PMU,PGW,PRPNM,KLEI3,KSPOLEGL,KPMS,KCPL2M,KCPL4M ,KPROCM) + +!**** *ETRANS_INQ* - Extract information from the transform package + +! Purpose. +! -------- +! Interface routine for extracting information from the T.P. + +!** Interface. +! ---------- +! CALL ETRANS_INQ(...) +! Explicit arguments : All arguments are optional. +! -------------------- +! KRESOL - resolution tag for which info is required ,default is the +! first defined resolution (input) + +! MULTI-TRANSFORMS MANAGEMENT +! KDEF_RESOL - number or resolutions defined +! LDLAM - .T. if the corresponding resolution is LAM, .F. if it is global + +! SPECTRAL SPACE +! KSPEC - number of complex spectral coefficients on this PE +! KSPEC2 - 2*KSPEC +! KSPEC2G - global KSPEC2 +! KSPEC2MX - maximun KSPEC2 among all PEs +! KNUMP - Number of spectral waves handled by this PE +! KGPTOT - Total number of grid columns on this PE +! KGPTOTG - Total number of grid columns on the Globe +! KGPTOTMX - Maximum number of grid columns on any of the PEs +! KGPTOTL - Number of grid columns one each PE (dimension +! N_REGIONS_NS:N_REGIONS_EW) +! KMYMS - This PEs spectral zonal wavenumbers +! KESM0 - Address in a spectral array of (m, n=m) +! KUMPP - No. of wave numbers each wave set is responsible for +! KPOSSP - Defines partitioning of global spectral fields among PEs +! KPTRMS - Pointer to the first wave number of a given a-set +! KALLMS - Wave numbers for all wave-set concatenated together +! to give all wave numbers in wave-set order +! KDIM0G - Defines partitioning of global spectral fields among PEs +! KSMAX - spectral truncation - n direction +! KMSMAX - spectral truncation - m direction +! KNVALUE - n value for each KSPEC2 spectral coeffient +! KMVALUE - m value for each KSPEC2 spectral coeffient +! LDLINEAR_GRID : .TRUE. if the grid is linear + +! GRIDPOINT SPACE +! KFRSTLAT - First latitude of each a-set in grid-point space +! KLSTTLAT - Last latitude of each a-set in grid-point space +! KFRSTLOFF - Offset for first lat of own a-set in grid-point space +! KPTRLAT - Pointer to the start of each latitude +! KPTRFRSTLAT - Pointer to the first latitude of each a-set in +! NSTA and NONL arrays +! KPTRLSTLAT - Pointer to the last latitude of each a-set in +! NSTA and NONL arrays +! KPTRFLOFF - Offset for pointer to the first latitude of own a-set +! NSTA and NONL arrays, i.e. nptrfrstlat(myseta)-1 +! KSTA - Position of first grid column for the latitudes on a +! processor. The information is available for all processors. +! The b-sets are distinguished by the last dimension of +! nsta().The latitude band for each a-set is addressed by +! nptrfrstlat(jaset),nptrlstlat(jaset), and +! nptrfloff=nptrfrstlat(myseta) on this processors a-set. +! Each split latitude has two entries in nsta(,:) which +! necessitates the rather complex addressing of nsta(,:) +! and the overdimensioning of nsta by N_REGIONS_NS. +! KONL - Number of grid columns for the latitudes on a processor. +! Similar to nsta() in data structure. +! LDSPLITLAT - TRUE if latitude is split in grid point space over +! two a-sets + +! FOURIER SPACE +! KULTPP - number of latitudes for which each a-set is calculating +! the FFT's. +! KPTRLS - pointer to first global latitude of each a-set for which +! it performs the Fourier calculations + +! LEGENDRE +! PMU - sin(Gaussian latitudes) +! PGW - Gaussian weights +! PRPNM - Legendre polynomials +! KLEI3 - First dimension of Legendre polynomials +! KSPOLEGL - Second dimension of Legendre polynomials +! KPMS - Adress for legendre polynomial for given M (NSMAX) +! PLEPINM - Eigen-values of the inverse Laplace operator + +! Method. +! ------- + +! Externals. ESET_RESOL - set resolution +! ---------- + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! A.Nmiri 15-Nov-2007 Phasing with TFL 32R3 +! A.Bogatchev 16-Sep-2010 Phasing with TFL 36R4 +! R. El Khatib 08-Aug-2012 KSMAX,KMSMAX,KNVALUE,KMVALUE,PLEPINM,LDLAM,KDEF_RESOL,LDLINEAR_GRID +! T. Dalkilic 28-Aug-2012 KCPL4M +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +!ifndef INTERFACE + +USE TPM_GEN ,ONLY : NDEF_RESOL +USE TPM_DIM ,ONLY : R +USE TPM_DISTR ,ONLY : D, NPRTRNS, NPRTRW, MYSETV, MYSETW +USE TPMALD_DIM ,ONLY : RALD +USE TPMALD_DISTR ,ONLY : DALD +USE TPM_GEOMETRY ,ONLY : G +USE TPM_FIELDS ,ONLY : F +USE TPMALD_FIELDS + +USE EQ_REGIONS_MOD ,ONLY : MY_REGION_EW, MY_REGION_NS, & + & N_REGIONS_EW, N_REGIONS_NS +USE ESET_RESOL_MOD ,ONLY : ESET_RESOL +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS + +!endif INTERFACE + +IMPLICIT NONE + +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KRESOL +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KSPEC +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KSPEC2 +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KSPEC2G +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KSPEC2MX +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KNUMP +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KGPTOT +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KGPTOTG +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KGPTOTMX +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KGPTOTL(:,:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KMYMS(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KESM0(0:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KUMPP(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KPOSSP(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KPTRMS(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KALLMS(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KDIM0G(0:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KFRSTLAT(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KLSTLAT(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KFRSTLOFF +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KPTRLAT(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KPTRFRSTLAT(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KPTRLSTLAT(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KPTRFLOFF +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KSTA(:,:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KONL(:,:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KULTPP(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KPTRLS(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KPRTRW +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KMYSETW +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KMYSETV +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KMY_REGION_NS +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KMY_REGION_EW +LOGICAL ,OPTIONAL,INTENT(INOUT) :: LDSPLITLAT(:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(INOUT) :: PMU(:) +REAL(KIND=JPRB) ,OPTIONAL :: PGW(:) ! Argument NOT used +REAL(KIND=JPRB) ,OPTIONAL,INTENT(INOUT) :: PRPNM(:,:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KLEI3 +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KSPOLEGL +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KPMS(0:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KCPL2M(0:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KCPL4M(0:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KPROCM(0:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KSMAX +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KMSMAX +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KNVALUE(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KMVALUE(:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PLEPINM(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KDEF_RESOL +LOGICAL ,OPTIONAL,INTENT(OUT) :: LDLAM +LOGICAL ,OPTIONAL,INTENT(OUT) :: LDLINEAR_GRID +!ifndef INTERFACE + +INTEGER(KIND=JPIM) :: IU1,IU2 +INTEGER(KIND=JPIM) :: IC, JN, JMLOC, IM, JJ, JM +INTEGER(KIND=JPIM) :: ISMAX(0:R%NSMAX),ISNAX(0:RALD%NMSMAX),ICPLM(0:RALD%NMSMAX) + +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE +! ------------------------------------------------------------------ + +! Set current resolution +IF (LHOOK) CALL DR_HOOK('ETRANS_INQ',0,ZHOOK_HANDLE) +CALL ESET_RESOL(KRESOL) + +IF(PRESENT(KSPEC)) KSPEC = D%NSPEC +IF(PRESENT(KSPEC2)) KSPEC2 = D%NSPEC2 +IF(PRESENT(KSPEC2G)) KSPEC2G = R%NSPEC2_G +IF(PRESENT(KSPEC2MX)) KSPEC2MX = D%NSPEC2MX +IF(PRESENT(KNUMP)) KNUMP = D%NUMP +IF(PRESENT(KGPTOT)) KGPTOT = D%NGPTOT +IF(PRESENT(KGPTOTG)) KGPTOTG = D%NGPTOTG +IF(PRESENT(KGPTOTMX)) KGPTOTMX = D%NGPTOTMX +IF(PRESENT(KFRSTLOFF)) KFRSTLOFF = D%NFRSTLOFF +IF(PRESENT(KPTRFLOFF)) KPTRFLOFF = D%NPTRFLOFF +IF(PRESENT(KPRTRW)) KPRTRW = NPRTRW +IF(PRESENT(KMYSETW)) KMYSETW = MYSETW +IF(PRESENT(KMYSETV)) KMYSETV = MYSETV +IF(PRESENT(KMY_REGION_NS)) KMY_REGION_NS = MY_REGION_NS +IF(PRESENT(KMY_REGION_EW)) KMY_REGION_EW = MY_REGION_EW +IF(PRESENT(LDLAM)) LDLAM = G%LAM +IF(PRESENT(KDEF_RESOL)) KDEF_RESOL = NDEF_RESOL + +IF(PRESENT(KGPTOTL)) THEN + IF(UBOUND(KGPTOTL,1) < N_REGIONS_NS) THEN + CALL ABORT_TRANS('ETRANS_INQ: KGPTOTL DIM 1 TOO SMALL') + ELSEIF(UBOUND(KGPTOTL,2) < N_REGIONS_EW) THEN + CALL ABORT_TRANS('ETRANS_INQ: KGPTOTL DIM 2 TOO SMALL') + ELSE + KGPTOTL(1:N_REGIONS_NS,1:N_REGIONS_EW) = D%NGPTOTL(:,:) + ENDIF +ENDIF + +IF(PRESENT(KMYMS)) THEN + IF(D%LGRIDONLY) THEN + CALL ABORT_TRANS('ETRANS_INQ: KMYMS REQUIRED BUT LGRIDONLY=T') + ENDIF + IF(UBOUND(KMYMS,1) < D%NUMP) THEN + CALL ABORT_TRANS('ETRANS_INQ: KMYMS TOO SMALL') + ELSE + KMYMS(1:D%NUMP) = D%MYMS(:) + ENDIF +ENDIF + +IF(PRESENT(KESM0)) THEN + IF(D%LGRIDONLY) THEN + CALL ABORT_TRANS('ETRANS_INQ: KESM0 REQUIRED BUT LGRIDONLY=T') + ENDIF + IF(UBOUND(KESM0,1) < RALD%NMSMAX) THEN + CALL ABORT_TRANS('ETRANS_INQ: KESM0 TOO SMALL') + ELSE + KESM0(0:RALD%NMSMAX) = DALD%NESM0(:) + ENDIF +ENDIF + +IF(PRESENT(KCPL2M)) THEN + IF(UBOUND(KCPL2M,1) < RALD%NMSMAX) THEN + CALL ABORT_TRANS('ETRANS_INQ: KCPL2M TOO SMALL') + ELSE + KCPL2M(0:RALD%NMSMAX) = DALD%NCPL2M(:) + ENDIF +ENDIF +IF(PRESENT(KPROCM)) THEN + IF(UBOUND(KPROCM,1) < RALD%NMSMAX) THEN + CALL ABORT_TRANS('ETRANS_INQ: KPROCM TOO SMALL') + ELSE + KPROCM(0:RALD%NMSMAX) = D%NPROCM(:) + ENDIF +ENDIF + +IF(PRESENT(KUMPP)) THEN + IF(D%LGRIDONLY) THEN + CALL ABORT_TRANS('ETRANS_INQ: KUMPP REQUIRED BUT LGRIDONLY=T') + ENDIF + IF(UBOUND(KUMPP,1) < NPRTRW) THEN + CALL ABORT_TRANS('ETRANS_INQ: KUMPP TOO SMALL') + ELSE + KUMPP(1:NPRTRW) = D%NUMPP(:) + ENDIF +ENDIF + +IF(PRESENT(KPOSSP)) THEN + IF(D%LGRIDONLY) THEN + CALL ABORT_TRANS('ETRANS_INQ: KPOSSP REQUIRED BUT LGRIDONLY=T') + ENDIF + IF(UBOUND(KPOSSP,1) < NPRTRW+1) THEN + CALL ABORT_TRANS('ETRANS_INQ: KPOSSP TOO SMALL') + ELSE + KPOSSP(1:NPRTRW+1) = D%NPOSSP(:) + ENDIF +ENDIF + +IF(PRESENT(KPTRMS)) THEN + IF(D%LGRIDONLY) THEN + CALL ABORT_TRANS('ETRANS_INQ: KPTRMS REQUIRED BUT LGRIDONLY=T') + ENDIF + IF(UBOUND(KPTRMS,1) < NPRTRW) THEN + CALL ABORT_TRANS('ETRANS_INQ: KPTRMS TOO SMALL') + ELSE + KPTRMS(1:NPRTRW) = D%NPTRMS(:) + ENDIF +ENDIF + +IF(PRESENT(KALLMS)) THEN + IF(D%LGRIDONLY) THEN + CALL ABORT_TRANS('ETRANS_INQ: KALLMS REQUIRED BUT LGRIDONLY=T') + ENDIF + IF(UBOUND(KALLMS,1) < RALD%NMSMAX+1) THEN + CALL ABORT_TRANS('ETRANS_INQ: KALLMS TOO SMALL') + ELSE + KALLMS(1:RALD%NMSMAX+1) = D%NALLMS(:) + ENDIF +ENDIF + +IF(PRESENT(KDIM0G)) THEN + IF(D%LGRIDONLY) THEN + CALL ABORT_TRANS('ETRANS_INQ: KDIM0G REQUIRED BUT LGRIDONLY=T') + ENDIF + IF(UBOUND(KDIM0G,1) < RALD%NMSMAX) THEN + CALL ABORT_TRANS('ETRANS_INQ: KDIM0G TOO SMALL') + ELSE + KDIM0G(0:RALD%NMSMAX) = D%NDIM0G(0:RALD%NMSMAX) + ENDIF +ENDIF + +IF(PRESENT(KFRSTLAT)) THEN + IF(UBOUND(KFRSTLAT,1) < N_REGIONS_NS) THEN + CALL ABORT_TRANS('ETRANS_INQ: KFRSTLAT TOO SMALL') + ELSE + KFRSTLAT(1:N_REGIONS_NS) = D%NFRSTLAT(:) + ENDIF +ENDIF + +IF(PRESENT(KLSTLAT)) THEN + IF(UBOUND(KLSTLAT,1) < N_REGIONS_NS) THEN + CALL ABORT_TRANS('ETRANS_INQ: KLSTLAT TOO SMALL') + ELSE + KLSTLAT(1:N_REGIONS_NS) = D%NLSTLAT(:) + ENDIF +ENDIF + +IF(PRESENT(KPTRLAT)) THEN + IF(UBOUND(KPTRLAT,1) < R%NDGL) THEN + CALL ABORT_TRANS('ETRANS_INQ: KPTRLAT TOO SMALL') + ELSE + KPTRLAT(1:R%NDGL) = D%NPTRLAT(:) + ENDIF +ENDIF + +IF(PRESENT(KPTRFRSTLAT)) THEN + IF(UBOUND(KPTRFRSTLAT,1) < N_REGIONS_NS) THEN + CALL ABORT_TRANS('ETRANS_INQ: KPTRFRSTLAT TOO SMALL') + ELSE + KPTRFRSTLAT(1:N_REGIONS_NS) = D%NPTRFRSTLAT(:) + ENDIF +ENDIF + +IF(PRESENT(KPTRLSTLAT)) THEN + IF(UBOUND(KPTRLSTLAT,1) < N_REGIONS_NS) THEN + CALL ABORT_TRANS('ETRANS_INQ: KPTRLSTLAT TOO SMALL') + ELSE + KPTRLSTLAT(1:N_REGIONS_NS) = D%NPTRLSTLAT(:) + ENDIF +ENDIF + +IF(PRESENT(KSTA)) THEN + IF(UBOUND(KSTA,1) < R%NDGL+N_REGIONS_NS-1) THEN + CALL ABORT_TRANS('ETRANS_INQ: KSTA DIM 1 TOO SMALL') + ELSEIF(UBOUND(KSTA,2) < N_REGIONS_EW) THEN + CALL ABORT_TRANS('ETRANS_INQ: KSTA DIM 2 TOO SMALL') + ELSE + KSTA(1:R%NDGL+N_REGIONS_NS-1,1:N_REGIONS_EW) = D%NSTA(:,:) + ENDIF +ENDIF + +IF(PRESENT(KONL)) THEN + IF(UBOUND(KONL,1) < R%NDGL+N_REGIONS_NS-1) THEN + CALL ABORT_TRANS('ETRANS_INQ: KONL DIM 1 TOO SMALL') + ELSEIF(UBOUND(KONL,2) < N_REGIONS_EW) THEN + CALL ABORT_TRANS('ETRANS_INQ: KONL DIM 2 TOO SMALL') + ELSE + KONL(1:R%NDGL+N_REGIONS_NS-1,1:N_REGIONS_EW) = D%NONL(:,:) + ENDIF +ENDIF + +IF(PRESENT(LDSPLITLAT)) THEN + IF(UBOUND(LDSPLITLAT,1) < R%NDGL) THEN + CALL ABORT_TRANS('ETRANS_INQ: LDSPLITLAT TOO SMALL') + ELSE + LDSPLITLAT(1:R%NDGL) = D%LSPLITLAT(:) + ENDIF +ENDIF + +IF(PRESENT(KULTPP)) THEN + IF(D%LGRIDONLY) THEN + CALL ABORT_TRANS('ETRANS_INQ: KULTPP REQUIRED BUT LGRIDONLY=T') + ENDIF + IF(UBOUND(KULTPP,1) < NPRTRNS) THEN + CALL ABORT_TRANS('ETRANS_INQ: KULTPP TOO SMALL') + ELSE + KULTPP(1:NPRTRNS) = D%NULTPP(:) + ENDIF +ENDIF + +IF(PRESENT(KPTRLS)) THEN + IF(D%LGRIDONLY) THEN + CALL ABORT_TRANS('ETRANS_INQ: KPTRLS REQUIRED BUT LGRIDONLY=T') + ENDIF + IF(UBOUND(KPTRLS,1) < NPRTRNS) THEN + CALL ABORT_TRANS('ETRANS_INQ: KPTRLS TOO SMALL') + ELSE + KPTRLS(1:NPRTRNS) = D%NPTRLS(:) + ENDIF +ENDIF + +IF(PRESENT(PMU)) THEN + IF(UBOUND(PMU,1) < R%NDGL) THEN + CALL ABORT_TRANS('ETRANS_INQ: PMU TOO SMALL') + ELSE + PMU(1:R%NDGL) = F%RMU + ENDIF +ENDIF + +IF(PRESENT(PRPNM)) THEN + IF(D%LGRIDONLY) THEN + CALL ABORT_TRANS('ETRANS_INQ: PRPNM REQUIRED BUT LGRIDONLY=T') + ENDIF + IU1 = UBOUND(PRPNM,1) + IU2 = UBOUND(PRPNM,2) + IF(IU1 < R%NDGNH) THEN + CALL ABORT_TRANS('ETRANS_INQ:FIRST DIM. OF PRNM TOO SMALL') + ELSE + IU1 = MIN(IU1,R%NLEI3) + IU2 = MIN(IU2,D%NSPOLEGL) + PRPNM(1:IU1,1:IU2) = F%RPNM(1:IU1,1:IU2) + ENDIF +ENDIF +IF(PRESENT(KLEI3)) THEN + KLEI3=R%NLEI3 +ENDIF +IF(PRESENT(KSPOLEGL)) THEN + KSPOLEGL=D%NSPOLEGL +ENDIF +IF(PRESENT(KPMS)) THEN + IF(D%LGRIDONLY) THEN + CALL ABORT_TRANS('ETRANS_INQ: KPMS REQUIRED BUT LGRIDONLY=T') + ENDIF + IF(UBOUND(KPMS,1) < R%NSMAX) THEN + CALL ABORT_TRANS('ETRANS_INQ: KPMS TOO SMALL') + ELSE + KPMS(0:R%NSMAX) = D%NPMS(0:R%NSMAX) + ENDIF +ENDIF + +IF(PRESENT(KSMAX)) KSMAX = R%NSMAX +IF(PRESENT(KMSMAX)) KMSMAX = RALD%NMSMAX +IF(PRESENT(PLEPINM)) THEN + IF(D%LGRIDONLY) THEN + CALL ABORT_TRANS('ETRANS_INQ: PLEPINM REQUIRED BUT LGRIDONLY=T') + ENDIF + IF(UBOUND(PLEPINM,1) < R%NSPEC_G/2) THEN + CALL ABORT_TRANS('ETRANS_INQ: PLEPINM TOO SMALL') + ELSEIF (LBOUND(PLEPINM,1) /= -1) THEN + CALL ABORT_TRANS('ETRANS_INQ: LOWER BOUND OF PLEPINM SHOULD BE -1') + ELSE + PLEPINM(:) = FALD%RLEPINM(:) + ENDIF +ENDIF +IF(PRESENT(KNVALUE)) THEN + IF(D%LGRIDONLY) THEN + CALL ABORT_TRANS('ETRANS_INQ: KNVALUE REQUIRED BUT LGRIDONLY=T') + ENDIF + IF(SIZE(KNVALUE) < D%NSPEC2) THEN + CALL ABORT_TRANS('ETRANS_INQ: KNVALUE TOO SMALL') + ELSE + CALL ELLIPS(R%NSMAX,RALD%NMSMAX,ISNAX,ISMAX) + DO JM=0,RALD%NMSMAX + ICPLM(JM) = 1*(ISNAX(JM)+1) + ENDDO + IC=1 + DO JMLOC=1,D%NUMP + IM=D%MYMS(JMLOC) + DO JN=0,ICPLM(IM)-1 + DO JJ=0,3 + KNVALUE(IC+JJ)=JN + ENDDO + IC=IC+4 + ENDDO + ENDDO + ENDIF +ENDIF + +IF(PRESENT(KMVALUE)) THEN + IF(D%LGRIDONLY) THEN + CALL ABORT_TRANS('ETRANS_INQ: KNVALUE REQUIRED BUT LGRIDONLY=T') + ENDIF + IF(SIZE(KMVALUE) < D%NSPEC2) THEN + CALL ABORT_TRANS('ETRANS_INQ: KMVALUE TOO SMALL') + ELSE + CALL ELLIPS(R%NSMAX,RALD%NMSMAX,ISNAX,ISMAX) + DO JM=0,RALD%NMSMAX + ICPLM(JM) = 1*(ISNAX(JM)+1) + ENDDO + IC=1 + DO JMLOC=1,D%NUMP + IM=D%MYMS(JMLOC) + DO JN=0,ICPLM(IM)-1 + DO JJ=0,3 + KMVALUE(IC+JJ)=IM + ENDDO + IC=IC+4 + ENDDO + ENDDO + ENDIF +ENDIF + +IF(PRESENT(KCPL4M)) THEN + IF(D%LGRIDONLY) THEN + CALL ABORT_TRANS('ETRANS_INQ: KCPL4M REQUIRED BUT LGRIDONLY=T') + ENDIF + IF(UBOUND(KCPL4M,1) < RALD%NMSMAX) THEN + CALL ABORT_TRANS('ETRANS_INQ: KCPL4M TOO SMALL') + ELSE + CALL ELLIPS(R%NSMAX,RALD%NMSMAX,ISNAX,ISMAX) + DO JM=0,RALD%NMSMAX + KCPL4M(JM) = 4*(ISNAX(JM)+1) + ENDDO + ENDIF +ENDIF + + +IF(PRESENT(LDLINEAR_GRID)) THEN + LDLINEAR_GRID = R%NSMAX > (R%NDGL -1)/3 .OR. RALD%NMSMAX > (R%NDLON -1)/3 +ENDIF + + +IF (LHOOK) CALL DR_HOOK('ETRANS_INQ',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +!endif INTERFACE + +END SUBROUTINE ETRANS_INQ diff --git a/src/etrans/cpu/external/etrans_release.F90 b/src/etrans/cpu/external/etrans_release.F90 new file mode 100644 index 000000000..ce60067e7 --- /dev/null +++ b/src/etrans/cpu/external/etrans_release.F90 @@ -0,0 +1,62 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + + +SUBROUTINE ETRANS_RELEASE(KRESOL) + +!**** *ETRANS_RELEASE* - release a spectral resolution + +! Purpose. +! -------- +! Release all arrays related to a given resolution tag + +!** Interface. +! ---------- +! CALL ETRANS_RELEASE + +! Explicit arguments : KRESOL : resolution tag +! -------------------- + +! Method. +! ------- + +! Externals. None +! ---------- + +! Author. +! ------- +! R. El Khatib *METEO-FRANCE* + +! Modifications. +! -------------- +! Original : 09-Jul-2013 + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM + +!ifndef INTERFACE + +USE EDEALLOC_RESOL_MOD ,ONLY : EDEALLOC_RESOL +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KRESOL + +!endif INTERFACE + +! ------------------------------------------------------------------ + +CALL EDEALLOC_RESOL(KRESOL) + +! ------------------------------------------------------------------ + +END SUBROUTINE ETRANS_RELEASE diff --git a/src/etrans/cpu/internal/cpl_int_mod.F90 b/src/etrans/cpu/internal/cpl_int_mod.F90 new file mode 100644 index 000000000..476ebb7dd --- /dev/null +++ b/src/etrans/cpu/internal/cpl_int_mod.F90 @@ -0,0 +1,44 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + + +MODULE CPL_INT_MOD +CONTAINS +SUBROUTINE CPL_INT(PGTF,KENDROWL,KFIELDS,KFFIELDS,KLEN,KSTA,CPL_PROC,KPTRGP) + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +IMPLICIT NONE +INTEGER(KIND=JPIM), INTENT(IN) :: KENDROWL +INTEGER(KIND=JPIM), INTENT(IN) :: KFIELDS +INTEGER(KIND=JPIM), INTENT(IN) :: KFFIELDS +INTEGER(KIND=JPIM), INTENT(IN) :: KLEN +INTEGER(KIND=JPIM), INTENT(IN) :: KSTA(KENDROWL) +INTEGER(KIND=JPIM), OPTIONAL, INTENT(IN) :: KPTRGP(:) +REAL(KIND=JPRB), INTENT(INOUT) :: PGTF(KFIELDS,KLEN) +EXTERNAL CPL_PROC + +INTEGER(KIND=JPIM) :: IPTRGP(KFIELDS) +INTEGER(KIND=JPIM) :: J +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE +!-------------------------------------------------------------- +IF (LHOOK) CALL DR_HOOK('CPL_INT_MOD:CPL_INT',0,ZHOOK_HANDLE) +IF(PRESENT(KPTRGP)) THEN + IPTRGP(:)=KPTRGP(1:KFIELDS) +ELSE + DO J=1,KFIELDS + IPTRGP(J)=J + ENDDO +ENDIF +CALL CPL_PROC(PGTF,KENDROWL,KFIELDS,KFFIELDS,KLEN,KSTA,IPTRGP) +IF (LHOOK) CALL DR_HOOK('CPL_INT_MOD:CPL_INT',1,ZHOOK_HANDLE) +END SUBROUTINE CPL_INT +END MODULE CPL_INT_MOD diff --git a/src/etrans/cpu/internal/easre1ad_mod.F90 b/src/etrans/cpu/internal/easre1ad_mod.F90 new file mode 100644 index 000000000..ef0dc66c6 --- /dev/null +++ b/src/etrans/cpu/internal/easre1ad_mod.F90 @@ -0,0 +1,91 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + + +MODULE EASRE1AD_MOD +CONTAINS +SUBROUTINE EASRE1AD(KM,KMLOC,KF_OUT_LT,PIA) + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +!USE TPM_TRANS +USE EASRE1BAD_MOD ,ONLY : EASRE1BAD + +!**** *EASRE1AD* - Recombine antisymmetric and symmetric parts - adjoint + +! Purpose. +! -------- +! To recombine the antisymmetric and symmetric parts of the +! Fourier arrays and update the correct parts of the state +! variables. + +!** Interface. +! ---------- +! *CALL* *EASRE1AD(...) + +! Explicit arguments : +! -------------------- +! KM - zonal wavenumber +! KMLOC - local zonal wavenumber +! PAOA1 - antisymmetric part of Fourier +! fields for zonal wavenumber KM (basic +! variables and N-S derivatives) +! PSOA1 - symmetric part of Fourier +! fields for zonal wavenumber KM (basic +! variables and N-S derivatives) + +! Implicit arguments : None +! -------------------- + +! Method. +! ------- + +! Externals. EASRE1BAD - basic recombination routine +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 00-02-01 From ASRE1AD in IFS CY22R1 +! M.Hamrud 01-Oct-2003 CY28 Cleaning + +! ------------------------------------------------------------------ + +IMPLICIT NONE + +INTEGER(KIND=JPIM) , INTENT(IN) :: KM +INTEGER(KIND=JPIM) , INTENT(IN) :: KMLOC +INTEGER(KIND=JPIM) , INTENT(IN) :: KF_OUT_LT + +REAL(KIND=JPRB) , INTENT(OUT) :: PIA(:,:) + +INTEGER(KIND=JPIM) :: IFLDS +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('EASRE1AD_MOD:EASRE1AD',0,ZHOOK_HANDLE) +IFLDS = KF_OUT_LT + +CALL EASRE1BAD(IFLDS,KM,KMLOC,PIA) +IF (LHOOK) CALL DR_HOOK('EASRE1AD_MOD:EASRE1AD',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE EASRE1AD +END MODULE EASRE1AD_MOD diff --git a/src/etrans/cpu/internal/easre1b_mod.F90 b/src/etrans/cpu/internal/easre1b_mod.F90 new file mode 100644 index 000000000..5cced838a --- /dev/null +++ b/src/etrans/cpu/internal/easre1b_mod.F90 @@ -0,0 +1,104 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + + +MODULE EASRE1B_MOD +CONTAINS +SUBROUTINE EASRE1B(KFC,KM,KMLOC,PIA) + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_DIM ,ONLY : R +USE TPMALD_DIM ,ONLY : RALD +USE TPM_TRANS ,ONLY : FOUBUF_IN +USE TPM_DISTR ,ONLY : D + +!**** *ASRE1B* - Recombine antisymmetric and symmetric parts + +! Purpose. +! -------- +! To recombine the antisymmetric and symmetric parts of the +! Fourier arrays and update the correct parts of the state +! variables. + +!** Interface. +! ---------- +! *CALL* *ASRE1B(..) + +! Explicit arguments : +! ------------------- KFC - number of fields (input-c) +! KM - zonal wavenumber(input-c) +! KMLOC - local version of KM (input-c) +! PAOA - antisymmetric part of Fourier +! fields for zonal wavenumber KM (input) +! PSOA - symmetric part of Fourier +! fields for zonal wavenumber KM (input) + +! Implicit arguments : FOUBUF_IN - output buffer (output) +! -------------------- + +! Method. +! ------- + +! Externals. None. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 00-02-01 From ASRE1B in IFS CY22R1 +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! R. El Khatib 26-Aug-2021 Optimizations +! ------------------------------------------------------------------ + +IMPLICIT NONE + +INTEGER(KIND=JPIM), INTENT(IN) :: KFC +INTEGER(KIND=JPIM), INTENT(IN) :: KM +INTEGER(KIND=JPIM), INTENT(IN) :: KMLOC +REAL(KIND=JPRB), INTENT(IN) :: PIA(RALD%NDGLSUR+R%NNOEXTZG,KFC) + +INTEGER(KIND=JPIM) :: JFLD, JGL ,IPROC +INTEGER(KIND=JPIM) :: IISTAN +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE +! ------------------------------------------------------------------ + +!* 1. RECOMBINATION OF SYMMETRIC AND ANTSYMMETRIC PARTS. +! --------------------------------------------------- +IF (LHOOK) CALL DR_HOOK('EASRE1B_MOD:EASRE1B',0,ZHOOK_HANDLE) +#ifdef __INTEL_COMPILER +!$OMP SIMD PRIVATE(JGL) +DO JFLD=1,KFC + DO JGL=1,R%NDGL + FOUBUF_IN((D%NSTAGT0B(D%NPROCL(JGL))+D%NPNTGTB1(KMLOC,JGL))*KFC+JFLD)=PIA(JGL,JFLD) + ENDDO +ENDDO +#else +DO JGL=1,R%NDGL + IPROC=D%NPROCL(JGL) + IISTAN=(D%NSTAGT0B(IPROC) + D%NPNTGTB1(KMLOC,JGL))*KFC + DO JFLD =1,KFC + FOUBUF_IN(IISTAN+JFLD)=PIA(JGL,JFLD) + ENDDO +ENDDO +#endif +IF (LHOOK) CALL DR_HOOK('EASRE1B_MOD:EASRE1B',1,ZHOOK_HANDLE) +! ------------------------------------------------------------------ + +END SUBROUTINE EASRE1B +END MODULE EASRE1B_MOD diff --git a/src/etrans/cpu/internal/easre1bad_mod.F90 b/src/etrans/cpu/internal/easre1bad_mod.F90 new file mode 100644 index 000000000..2299f1040 --- /dev/null +++ b/src/etrans/cpu/internal/easre1bad_mod.F90 @@ -0,0 +1,108 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + + +MODULE EASRE1BAD_MOD +CONTAINS +SUBROUTINE EASRE1BAD(KFC,KM,KMLOC,PIA) + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_DIM ,ONLY : R +USE TPMALD_DIM ,ONLY : RALD +USE TPM_TRANS ,ONLY : FOUBUF_IN +USE TPM_DISTR ,ONLY : D + +!**** *EASRE1BAD* - Recombine antisymmetric and symmetric parts - adjoint + +! Purpose. +! -------- +! To recombine the antisymmetric and symmetric parts of the +! Fourier arrays and update the correct parts of the state +! variables. + +!** Interface. +! ---------- +! *CALL* *EASRE1BAD(..) + +! Explicit arguments : +! ------------------- KFC - number of fields (input-c) +! KM - zonal wavenumber(input-c) +! KMLOC - local version of KM (input-c) +! PAOA - antisymmetric part of Fourier +! fields for zonal wavenumber KM (input) +! PSOA - symmetric part of Fourier +! fields for zonal wavenumber KM (input) + +! Implicit arguments : FOUBUF_IN - output buffer (output) +! -------------------- + +! Method. +! ------- + +! Externals. None. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 00-02-01 From ASRE1BAD in IFS CY22R1 +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! R. El Khatib 26-Aug-2021 Optimizations +! ------------------------------------------------------------------ + +IMPLICIT NONE + +INTEGER(KIND=JPIM), INTENT(IN) :: KFC +INTEGER(KIND=JPIM), INTENT(IN) :: KM +INTEGER(KIND=JPIM), INTENT(IN) :: KMLOC + +REAL(KIND=JPRB), INTENT(OUT) :: PIA(RALD%NDGLSUR+R%NNOEXTZG,KFC) + +INTEGER(KIND=JPIM) :: JFLD, JGL ,IPROC +INTEGER(KIND=JPIM) :: IISTAN +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +!* 1. RECOMBINATION OF SYMMETRIC AND ANTSYMMETRIC PARTS. +! --------------------------------------------------- + +IF (LHOOK) CALL DR_HOOK('EASRE1BAD_MOD:EASRE1BAD',0,ZHOOK_HANDLE) +#ifdef __INTEL_COMPILER +!$OMP SIMD PRIVATE(JGL) +DO JFLD =1,KFC + DO JGL=1,R%NDGL + PIA(JGL,JFLD)=FOUBUF_IN((D%NSTAGT0B(D%NPROCL(JGL))+D%NPNTGTB1(KMLOC,JGL))*KFC+JFLD) + ENDDO +ENDDO +#else +DO JGL=1,R%NDGL + IPROC=D%NPROCL(JGL) + DO JFLD =1,KFC + IISTAN=(D%NSTAGT0B(IPROC) + D%NPNTGTB1(KMLOC,JGL))*KFC + PIA(JGL,JFLD)=FOUBUF_IN(IISTAN+JFLD) + ENDDO +ENDDO +#endif +IF (LHOOK) CALL DR_HOOK('EASRE1BAD_MOD:EASRE1BAD',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE EASRE1BAD +END MODULE EASRE1BAD_MOD diff --git a/src/etrans/cpu/internal/edealloc_resol_mod.F90 b/src/etrans/cpu/internal/edealloc_resol_mod.F90 new file mode 100644 index 000000000..0c4546401 --- /dev/null +++ b/src/etrans/cpu/internal/edealloc_resol_mod.F90 @@ -0,0 +1,111 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + + +MODULE EDEALLOC_RESOL_MOD +CONTAINS +SUBROUTINE EDEALLOC_RESOL(KRESOL) + +!**** *EDEALLOC_RESOL_MOD* - Deallocations of a resolution + +! Purpose. +! -------- +! Release allocated arrays for a given resolution + +!** Interface. +! ---------- +! CALL EDEALLOC_RESOL_MOD + +! Explicit arguments : KRESOL : resolution tag +! -------------------- + +! Method. +! ------- + +! Externals. None +! ---------- + +! Author. +! ------- +! R. El Khatib *METEO-FRANCE* + +! Modifications. +! -------------- +! Original : 09-Jul-2013 from etrans_end +! B. Bochenek (Apr 2015): Phasing: update +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +USE TPM_GEN ,ONLY : LENABLED, NOUT +USE TPM_DISTR ,ONLY : D +USE TPM_GEOMETRY ,ONLY : G +USE TPM_FIELDS ,ONLY : F +#ifdef WITH_FFT992 +USE TPM_FFT ,ONLY : T +#endif +USE TPM_FFTW ,ONLY : TW,DESTROY_PLANS_FFTW +USE TPM_FLT ,ONLY : S + +USE ESET_RESOL_MOD ,ONLY : ESET_RESOL + +IMPLICIT NONE + +INTEGER(KIND=JPIM), INTENT(IN) :: KRESOL + +! ------------------------------------------------------------------ + +IF (.NOT.LENABLED(KRESOL)) THEN + + WRITE(UNIT=NOUT,FMT='('' EDEALLOC_RESOL WARNING: KRESOL = '',I3,'' ALREADY DISABLED '')') KRESOL + +ELSE + + CALL ESET_RESOL(KRESOL) + + !TPM_DISTR + DEALLOCATE(D%NFRSTLAT,D%NLSTLAT,D%NPTRLAT,D%NPTRFRSTLAT,D%NPTRLSTLAT) + DEALLOCATE(D%LSPLITLAT,D%NSTA,D%NONL,D%NGPTOTL,D%NPROCA_GP) + + IF(D%LWEIGHTED_DISTR) THEN + DEALLOCATE(D%RWEIGHT) + ENDIF + + IF(.NOT.D%LGRIDONLY) THEN + + DEALLOCATE(D%MYMS,D%NUMPP,D%NPOSSP,D%NPROCM,D%NDIM0G,D%NASM0,D%NATM0) + DEALLOCATE(D%NLATLS,D%NLATLE,D%NPMT,D%NPMS,D%NPMG,D%NULTPP,D%NPROCL) + DEALLOCATE(D%NPTRLS,D%NALLMS,D%NPTRMS,D%NSTAGT0B,D%NSTAGT1B,D%NPNTGTB0) + DEALLOCATE(D%NPNTGTB1,D%NLTSFTB,D%NLTSGTB,D%MSTABF) + DEALLOCATE(D%NSTAGTF) + +#ifdef WITH_FFT992 + !TPM_FFT + DEALLOCATE(T%TRIGS,T%NFAX) +#endif + !TPM_FFTW + CALL DESTROY_PLANS_FFTW + !TPM_GEOMETRY + DEALLOCATE(G%NMEN,G%NDGLU) + + ELSE + + DEALLOCATE(G%NLOEN) + + ENDIF + + LENABLED(KRESOL)=.FALSE. + +ENDIF + +! ------------------------------------------------------------------ + +END SUBROUTINE EDEALLOC_RESOL +END MODULE EDEALLOC_RESOL_MOD diff --git a/src/etrans/cpu/internal/edir_trans_ctl_mod.F90 b/src/etrans/cpu/internal/edir_trans_ctl_mod.F90 new file mode 100644 index 000000000..ece17b24e --- /dev/null +++ b/src/etrans/cpu/internal/edir_trans_ctl_mod.F90 @@ -0,0 +1,213 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + + +MODULE EDIR_TRANS_CTL_MOD +CONTAINS +SUBROUTINE EDIR_TRANS_CTL(KF_UV_G,KF_SCALARS_G,KF_GP,KF_FS,KF_UV,KF_SCALARS,& + & PSPVOR,PSPDIV,PSPSCALAR,KVSETUV,KVSETSC,PGP,& + & PSPSC3A,PSPSC3B,PSPSC2,KVSETSC3A,KVSETSC3B,KVSETSC2,PGPUV,PGP3A,PGP3B,PGP2,& + & PMEANU,PMEANV,AUX_PROC) + +!**** *EDIR_TRANS_CTL* - Control routine for direct spectral transform. + +! Purpose. +! -------- +! Control routine for the direct spectral transform + +!** Interface. +! ---------- +! CALL EDIR_TRANS_CTL(...) + +! Explicit arguments : +! -------------------- +! KF_UV_G - global number of spectral u-v fields +! KF_SCALARS_G - global number of scalar spectral fields +! KF_GP - total number of output gridpoint fields +! KF_FS - total number of fields in fourier space +! KF_UV - local number of spectral u-v fields +! KF_SCALARS - local number of scalar spectral fields +! PSPVOR(:,:) - spectral vorticity +! PSPDIV(:,:) - spectral divergence +! PSPSCALAR(:,:) - spectral scalarvalued fields +! KVSETUV(:) - indicating which 'b-set' in spectral space owns a +! vor/div field. Equivalant to NBSETLEV in the IFS. +! The length of KVSETUV should be the GLOBAL number +! of u/v fields which is the dimension of u and v releated +! fields in grid-point space. +! KVESETSC(:) - indicating which 'b-set' in spectral space owns a +! scalar field. As for KVSETUV this argument is required +! if the total number of processors is greater than +! the number of processors used for distribution in +! spectral wave space. +! PMEANU,PMEANV - mean winds +! AUX_PROC - optional external procedure for biperiodization of +! aux.fields +! PGP(:,:,:) - gridpoint fields + +! The ordering of the output fields is as follows (all +! parts are optional depending on the input switches): + +! u : KF_UV_G fields +! v : KF_UV_G fields +! scalar fields : KF_SCALARS_G fields + +! Method. +! ------- + +! Externals. SHUFFLE - reshuffle fields for load balancing +! ---------- FIELD_SPLIT - split fields in NPROMATR packets +! LTDIR_CTL - control of Legendre transform +! FTDIR_CTL - control of Fourier transform + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 01-01-03 +! G. Radnoti 01-03-13 adaptation to aladin +! 01-08-28 : G. Radnoti & R. El Khatib Fix for NPROMATR /= 0 +! 02-09-30 : P. Smolikova AUX_PROC for d4 in NH +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_GEN ,ONLY : NPROMATR +!USE TPM_TRANS +!USE TPM_DISTR + +USE SHUFFLE_MOD ,ONLY : SHUFFLE +USE FIELD_SPLIT_MOD ,ONLY : FIELD_SPLIT +USE ELTDIR_CTL_MOD ,ONLY : ELTDIR_CTL +USE EFTDIR_CTL_MOD ,ONLY : EFTDIR_CTL + +IMPLICIT NONE + +! Declaration of arguments + +INTEGER(KIND=JPIM), INTENT(IN) :: KF_UV_G +INTEGER(KIND=JPIM), INTENT(IN) :: KF_SCALARS_G +INTEGER(KIND=JPIM), INTENT(IN) :: KF_GP +INTEGER(KIND=JPIM), INTENT(IN) :: KF_FS +INTEGER(KIND=JPIM), INTENT(IN) :: KF_UV +INTEGER(KIND=JPIM), INTENT(IN) :: KF_SCALARS +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSC3A(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSC3B(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSC2(:,:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETUV(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3A(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3B(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC2(:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PGP(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PGPUV(:,:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PGP3A(:,:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PGP3B(:,:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PGP2(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PMEANU(:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PMEANV(:) +EXTERNAL AUX_PROC +OPTIONAL AUX_PROC + +! Local variables + +INTEGER(KIND=JPIM) :: IPTRGP(KF_GP),IPTRSPUV(NPROMATR),IPTRSPSC(NPROMATR) +INTEGER(KIND=JPIM) :: ISHFUV_G(KF_GP),ISHFSC_G(KF_GP) +INTEGER(KIND=JPIM) :: IVSETUV(KF_GP),IVSETSC(KF_GP) +INTEGER(KIND=JPIM) :: IBLKS,JBLK,ISTUV_G,IENUV_G +INTEGER(KIND=JPIM) :: IF_UV_G,IF_UV,ISTUV,IF_SCALARS,IF_SCALARS_G,IF_FS,IF_GP +INTEGER(KIND=JPIM) :: JFLD,ISTSC_G,IENSC_G,ISTSC,IENSC,IENUV,IF_GPB +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +! Perform transform + +IF (LHOOK) CALL DR_HOOK('EDIR_TRANS_CTL_MOD:EDIR_TRANS_CTL',0,ZHOOK_HANDLE) +IF_GPB = 2*KF_UV_G+KF_SCALARS_G +IF(NPROMATR > 0 .AND. IF_GPB > NPROMATR) THEN + + ! Fields to be split into packets + + CALL SHUFFLE(KF_UV_G,KF_SCALARS_G,ISHFUV_G,IVSETUV,ISHFSC_G,IVSETSC, & + & KVSETUV,KVSETSC) + + IBLKS=(IF_GPB-1)/NPROMATR+1 + + DO JBLK=1,IBLKS + + CALL FIELD_SPLIT(JBLK,KF_GP,KF_UV_G,IVSETUV,IVSETSC,& + & ISTUV_G,IENUV_G,IF_UV_G,ISTSC_G,IENSC_G,IF_SCALARS_G,& + & ISTUV,IENUV,IF_UV,ISTSC,IENSC,IF_SCALARS) + + IF_FS = 2*IF_UV + IF_SCALARS + IF_GP = 2*IF_UV_G+IF_SCALARS_G + DO JFLD=1,IF_UV_G + IPTRGP(JFLD) = ISHFUV_G(ISTUV_G+JFLD-1) + IPTRGP(JFLD+IF_UV_G) = KF_UV_G+ISHFUV_G(ISTUV_G+JFLD-1) + ENDDO + DO JFLD=1,IF_SCALARS_G + IPTRGP(JFLD+2*IF_UV_G) = 2*KF_UV_G+ISHFSC_G(ISTSC_G+JFLD-1) + ENDDO + DO JFLD=1,IF_UV + IPTRSPUV(JFLD) = ISTUV+JFLD-1 + ENDDO + DO JFLD=1,IF_SCALARS + IPTRSPSC(JFLD) = ISTSC+JFLD-1 + ENDDO + + IF(IF_UV_G > 0 .AND. IF_SCALARS_G > 0) THEN + CALL EFTDIR_CTL(IF_UV_G,IF_SCALARS_G,IF_GP,IF_FS,IF_GPB,& + & KVSETUV=IVSETUV(ISTUV_G:IENUV_G),& + & KVSETSC=IVSETSC(ISTSC_G:IENSC_G),KPTRGP=IPTRGP,PGP=PGP) + ELSEIF(IF_UV_G > 0) THEN + CALL EFTDIR_CTL(IF_UV_G,IF_SCALARS_G,IF_GP,IF_FS,IF_GPB,& + & KVSETUV=IVSETUV(ISTUV_G:IENUV_G),& + & KPTRGP=IPTRGP,PGP=PGP) + ELSEIF(IF_SCALARS_G > 0) THEN + CALL EFTDIR_CTL(IF_UV_G,IF_SCALARS_G,IF_GP,IF_FS,IF_GPB,& + & KVSETSC=IVSETSC(ISTSC_G:IENSC_G),KPTRGP=IPTRGP,PGP=PGP,& + & AUX_PROC=AUX_PROC) + ENDIF + CALL ELTDIR_CTL(IF_FS,IF_UV,IF_SCALARS, & + & PSPVOR=PSPVOR,PSPDIV=PSPDIV,PSPSCALAR=PSPSCALAR,& + & KFLDPTRUV=IPTRSPUV,KFLDPTRSC=IPTRSPSC,& + & PSPMEANU=PMEANU,PSPMEANV=PMEANV,AUX_PROC=AUX_PROC) + ENDDO +ELSE + + ! No splitting of fields, transform done in one go + + CALL EFTDIR_CTL(KF_UV_G,KF_SCALARS_G,KF_GP,KF_FS,IF_GPB,& + & KVSETUV=KVSETUV,KVSETSC=KVSETSC,& + & KVSETSC3A=KVSETSC3A,KVSETSC3B=KVSETSC3B,KVSETSC2=KVSETSC2,& + & PGP=PGP,PGPUV=PGPUV,PGP3A=PGP3A,PGP3B=PGP3B,PGP2=PGP2,& + & AUX_PROC=AUX_PROC) + + CALL ELTDIR_CTL(KF_FS,KF_UV,KF_SCALARS, & + & PSPVOR=PSPVOR,PSPDIV=PSPDIV,PSPSCALAR=PSPSCALAR,& + & PSPSC3A=PSPSC3A,PSPSC3B=PSPSC3B,PSPSC2=PSPSC2,& + & PSPMEANU=PMEANU,PSPMEANV=PMEANV,& + & AUX_PROC=AUX_PROC) + +ENDIF +IF (LHOOK) CALL DR_HOOK('EDIR_TRANS_CTL_MOD:EDIR_TRANS_CTL',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE EDIR_TRANS_CTL +END MODULE EDIR_TRANS_CTL_MOD diff --git a/src/etrans/cpu/internal/edir_trans_ctlad_mod.F90 b/src/etrans/cpu/internal/edir_trans_ctlad_mod.F90 new file mode 100644 index 000000000..b95d45828 --- /dev/null +++ b/src/etrans/cpu/internal/edir_trans_ctlad_mod.F90 @@ -0,0 +1,205 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + + +MODULE EDIR_TRANS_CTLAD_MOD +CONTAINS +SUBROUTINE EDIR_TRANS_CTLAD(KF_UV_G,KF_SCALARS_G,KF_GP,KF_FS,KF_UV,KF_SCALARS,& + & PSPVOR,PSPDIV,PSPSCALAR,KVSETUV,KVSETSC,PGP,& + & PSPSC3A,PSPSC3B,PSPSC2,KVSETSC3A,KVSETSC3B,KVSETSC2,PGPUV,PGP3A,PGP3B,PGP2,& + & PMEANU,PMEANV) + +!**** *EDIR_TRANS_CTLAD* - Control routine for direct spectral transform-adj. + +! Purpose. +! -------- +! Control routine for the direct spectral transform + +!** Interface. +! ---------- +! CALL EDIR_TRANS_CTLAD(...) + +! Explicit arguments : +! -------------------- +! KF_UV_G - global number of spectral u-v fields +! KF_SCALARS_G - global number of scalar spectral fields +! KF_GP - total number of output gridpoint fields +! KF_FS - total number of fields in fourier space +! KF_UV - local number of spectral u-v fields +! KF_SCALARS - local number of scalar spectral fields +! PSPVOR(:,:) - spectral vorticity +! PSPDIV(:,:) - spectral divergence +! PSPSCALAR(:,:) - spectral scalarvalued fields +! KVSETUV(:) - indicating which 'b-set' in spectral space owns a +! vor/div field. Equivalant to NBSETLEV in the IFS. +! The length of KVSETUV should be the GLOBAL number +! of u/v fields which is the dimension of u and v releated +! fields in grid-point space. +! KVESETSC(:) - indicating which 'b-set' in spectral space owns a +! scalar field. As for KVSETUV this argument is required +! if the total number of processors is greater than +! the number of processors used for distribution in +! spectral wave space. +! PGP(:,:,:) - gridpoint fields + +! The ordering of the output fields is as follows (all +! parts are optional depending on the input switches): + +! u : KF_UV_G fields +! v : KF_UV_G fields +! scalar fields : KF_SCALARS_G fields + +! Method. +! ------- + +! Externals. SHUFFLE - reshuffle fields for load balancing +! ---------- FIELD_SPLIT - split fields in NPROMATR packets +! ELTDIR_CTLAD - control of Legendre transform +! EFTDIR_CTLAD - control of Fourier transform + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 01-01-03 +! 01-08-28 : G. Radnoti & R. El Khatib Fix for NPROMATR /= 0 +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_GEN ,ONLY : NPROMATR +!USE TPM_TRANS +!USE TPM_DISTR + +USE SHUFFLE_MOD ,ONLY : SHUFFLE +USE FIELD_SPLIT_MOD ,ONLY : FIELD_SPLIT +USE ELTDIR_CTLAD_MOD ,ONLY : ELTDIR_CTLAD +USE EFTDIR_CTLAD_MOD ,ONLY : EFTDIR_CTLAD + +IMPLICIT NONE + +! Declaration of arguments + +INTEGER(KIND=JPIM), INTENT(IN) :: KF_UV_G +INTEGER(KIND=JPIM), INTENT(IN) :: KF_SCALARS_G +INTEGER(KIND=JPIM), INTENT(IN) :: KF_GP +INTEGER(KIND=JPIM), INTENT(IN) :: KF_FS +INTEGER(KIND=JPIM), INTENT(IN) :: KF_UV +INTEGER(KIND=JPIM), INTENT(IN) :: KF_SCALARS +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPSC3A(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPSC3B(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPSC2(:,:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETUV(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3A(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3B(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC2(:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PGP(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PGPUV(:,:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PGP3A(:,:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PGP3B(:,:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PGP2(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(INOUT) :: PMEANU(:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(INOUT) :: PMEANV(:) + +! Local variables + +INTEGER(KIND=JPIM) :: IPTRGP(KF_GP),IPTRSPUV(NPROMATR),IPTRSPSC(NPROMATR) +INTEGER(KIND=JPIM) :: ISHFUV_G(KF_GP),ISHFSC_G(KF_GP) +INTEGER(KIND=JPIM) :: IVSETUV(KF_GP),IVSETSC(KF_GP) +INTEGER(KIND=JPIM) :: IBLKS,JBLK,ISTUV_G,IENUV_G +INTEGER(KIND=JPIM) :: IF_UV_G,IF_UV,ISTUV,IF_SCALARS,IF_SCALARS_G,IF_FS,IF_GP +INTEGER(KIND=JPIM) :: JFLD,ISTSC_G,IENSC_G,ISTSC,IENSC,IENUV,IF_GPB +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +! Perform transform + +IF (LHOOK) CALL DR_HOOK('EDIR_TRANS_CTLAD_MOD:EDIR_TRANS_CTLAD',0,ZHOOK_HANDLE) +IF_GPB = 2*KF_UV_G+KF_SCALARS_G +IF(NPROMATR > 0 .AND. IF_GPB > NPROMATR) THEN + + ! Fields to be split into packets + + CALL SHUFFLE(KF_UV_G,KF_SCALARS_G,ISHFUV_G,IVSETUV,ISHFSC_G,IVSETSC, & + & KVSETUV,KVSETSC) + + IBLKS=(IF_GPB-1)/NPROMATR+1 + + DO JBLK=1,IBLKS + + CALL FIELD_SPLIT(JBLK,KF_GP,KF_UV_G,IVSETUV,IVSETSC,& + & ISTUV_G,IENUV_G,IF_UV_G,ISTSC_G,IENSC_G,IF_SCALARS_G,& + & ISTUV,IENUV,IF_UV,ISTSC,IENSC,IF_SCALARS) + + IF_FS = 2*IF_UV + IF_SCALARS + IF_GP = 2*IF_UV_G+IF_SCALARS_G + DO JFLD=1,IF_UV_G + IPTRGP(JFLD) = ISHFUV_G(ISTUV_G+JFLD-1) + IPTRGP(JFLD+IF_UV_G) = KF_UV_G+ISHFUV_G(ISTUV_G+JFLD-1) + ENDDO + DO JFLD=1,IF_SCALARS_G + IPTRGP(JFLD+2*IF_UV_G) = 2*KF_UV_G+ISHFSC_G(ISTSC_G+JFLD-1) + ENDDO + DO JFLD=1,IF_UV + IPTRSPUV(JFLD) = ISTUV+JFLD-1 + ENDDO + DO JFLD=1,IF_SCALARS + IPTRSPSC(JFLD) = ISTSC+JFLD-1 + ENDDO + + CALL ELTDIR_CTLAD(IF_FS,IF_UV,IF_SCALARS, & + & PSPVOR=PSPVOR,PSPDIV=PSPDIV,PSPSCALAR=PSPSCALAR,& + & KFLDPTRUV=IPTRSPUV,KFLDPTRSC=IPTRSPSC,& + & PSPMEANU=PMEANU,PSPMEANV=PMEANV) + IF(IF_UV_G > 0 .AND. IF_SCALARS_G > 0) THEN + CALL EFTDIR_CTLAD(IF_UV_G,IF_SCALARS_G,IF_GP,IF_FS,IF_GPB,& + & KVSETUV=IVSETUV(ISTUV_G:IENUV_G),& + & KVSETSC=IVSETSC(ISTSC_G:IENSC_G),KPTRGP=IPTRGP,& + & PGP=PGP) + ELSEIF(IF_UV_G > 0) THEN + CALL EFTDIR_CTLAD(IF_UV_G,IF_SCALARS_G,IF_GP,IF_FS,IF_GPB,& + & KVSETUV=IVSETUV(ISTUV_G:IENUV_G),& + & KPTRGP=IPTRGP,PGP=PGP) + ELSEIF(IF_SCALARS_G > 0) THEN + CALL EFTDIR_CTLAD(IF_UV_G,IF_SCALARS_G,IF_GP,IF_FS,IF_GPB,& + & KVSETSC=IVSETSC(ISTSC_G:IENSC_G),KPTRGP=IPTRGP,& + & PGP=PGP) + ENDIF + ENDDO + +ELSE + + ! No splitting of fields, transform done in one go + + CALL ELTDIR_CTLAD(KF_FS,KF_UV,KF_SCALARS, & + & PSPVOR=PSPVOR,PSPDIV=PSPDIV,PSPSCALAR=PSPSCALAR,& + & PSPSC3A=PSPSC3A,PSPSC3B=PSPSC3B,PSPSC2=PSPSC2,& + & PSPMEANU=PMEANU,PSPMEANV=PMEANV) + + CALL EFTDIR_CTLAD(KF_UV_G,KF_SCALARS_G,KF_GP,KF_FS,IF_GPB,& + & KVSETUV=KVSETUV,KVSETSC=KVSETSC,& + & KVSETSC3A=KVSETSC3A,KVSETSC3B=KVSETSC3B,KVSETSC2=KVSETSC2,& + & PGP=PGP,PGPUV=PGPUV,PGP3A=PGP3A,PGP3B=PGP3B,PGP2=PGP2) +ENDIF +IF (LHOOK) CALL DR_HOOK('EDIR_TRANS_CTLAD_MOD:EDIR_TRANS_CTLAD',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE EDIR_TRANS_CTLAD +END MODULE EDIR_TRANS_CTLAD_MOD diff --git a/src/etrans/cpu/internal/edist_spec_control_mod.F90 b/src/etrans/cpu/internal/edist_spec_control_mod.F90 new file mode 100644 index 000000000..23ae29d7c --- /dev/null +++ b/src/etrans/cpu/internal/edist_spec_control_mod.F90 @@ -0,0 +1,14 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + + +MODULE EDIST_SPEC_CONTROL_MOD + ! dead code - merged with DIST_SPEC_CONTROL_MOD +END MODULE EDIST_SPEC_CONTROL_MOD diff --git a/src/etrans/cpu/internal/efsc_mod.F90 b/src/etrans/cpu/internal/efsc_mod.F90 new file mode 100644 index 000000000..a34f7644d --- /dev/null +++ b/src/etrans/cpu/internal/efsc_mod.F90 @@ -0,0 +1,121 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + + +MODULE EFSC_MOD +CONTAINS +SUBROUTINE EFSC(KGL,KF_UV,KF_SCALARS,KF_SCDERS,& + & PUV,PSCALAR,PNSDERS,PEWDERS,PUVDERS) + +!**** *FSC - Division by a*cos(theta), east-west derivatives + +! Purpose. +! -------- +! In Fourier space divide u and v and all north-south +! derivatives by a*cos(theta). Also compute east-west derivatives +! of u,v,thermodynamic, passiv scalar variables and surface +! pressure. + +!** Interface. +! ---------- +! CALL FSC(..) +! Explicit arguments : PUV - u and v +! -------------------- PSCALAR - scalar valued varaibles +! PNSDERS - N-S derivative of S.V.V. +! PEWDERS - E-W derivative of S.V.V. +! PUVDERS - E-W derivative of u and v +! Method. +! ------- + +! Externals. None. +! ---------- + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 (From SC2FSC) +! M.Hamrud 01-Oct-2003 CY28 Cleaning + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_TRANS ,ONLY : LUVDER +USE TPM_DISTR ,ONLY : D, MYSETW +!USE TPM_FIELDS +USE TPM_GEOMETRY ,ONLY : G +USE TPMALD_GEO ,ONLY : GALD +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM) , INTENT(IN) :: KGL,KF_UV,KF_SCALARS,KF_SCDERS +REAL(KIND=JPRB) , INTENT(INOUT) :: PUV(:,:) +REAL(KIND=JPRB) , INTENT(IN ) :: PSCALAR(:,:) +REAL(KIND=JPRB) , INTENT(INOUT) :: PNSDERS(:,:) +REAL(KIND=JPRB) , INTENT( OUT) :: PEWDERS(:,:) +REAL(KIND=JPRB) , INTENT( OUT) :: PUVDERS(:,:) + +INTEGER(KIND=JPIM) :: IMEN,ISTAGTF + +INTEGER(KIND=JPIM) :: JF,IGLG,II,IR,JM +REAL(KIND=JPRB) :: ZIM +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('EFSC_MOD:EFSC',0,ZHOOK_HANDLE) +IGLG = D%NPTRLS(MYSETW)+KGL-1 +IMEN = G%NMEN(IGLG) +ISTAGTF = D%NSTAGTF(KGL) + +! ------------------------------------------------------------------ + +!* EAST-WEST DERIVATIVES +! --------------------- + +!* 2.1 U AND V. + +IF(LUVDER)THEN + DO JM=0,IMEN + ZIM=REAL(JM,JPRB)*GALD%EXWN + IR = ISTAGTF+2*JM+1 + II = IR+1 +! use unroll to provoke vectorization of outer loop +!cdir unroll=4 + DO JF=1,2*KF_UV + PUVDERS(JF,IR) = -PUV(JF,II)*ZIM + PUVDERS(JF,II) = PUV(JF,IR)*ZIM + ENDDO + ENDDO +ENDIF + +!* 2.2 SCALAR VARIABLES + +IF(KF_SCDERS > 0)THEN + DO JM=0,IMEN + ZIM=REAL(JM,JPRB)*GALD%EXWN + IR = ISTAGTF+2*JM+1 + II = IR+1 + DO JF=1,KF_SCALARS + PEWDERS(JF,IR) = -PSCALAR(JF,II)*ZIM + PEWDERS(JF,II) = PSCALAR(JF,IR)*ZIM + ENDDO + ENDDO +ENDIF +IF (LHOOK) CALL DR_HOOK('EFSC_MOD:EFSC',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE EFSC +END MODULE EFSC_MOD diff --git a/src/etrans/cpu/internal/efscad_mod.F90 b/src/etrans/cpu/internal/efscad_mod.F90 new file mode 100644 index 000000000..2981bae04 --- /dev/null +++ b/src/etrans/cpu/internal/efscad_mod.F90 @@ -0,0 +1,132 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + + +MODULE EFSCAD_MOD +CONTAINS +SUBROUTINE EFSCAD(KGL,KF_UV,KF_SCALARS,KF_SCDERS,& + & PUV,PSCALAR,PNSDERS,PEWDERS,PUVDERS) + +!**** *EFSCAD - Division by a*cos(theta), east-west derivatives - adjoint + +! Purpose. +! -------- +! In Fourier space divide u and v and all north-south +! derivatives by a*cos(theta). Also compute east-west derivatives +! of u,v,thermodynamic, passiv scalar variables and surface +! pressure. + +!** Interface. +! ---------- +! CALL EFSCAD(..) +! Explicit arguments : PUV - u and v +! -------------------- PSCALAR - scalar valued varaibles +! PNSDERS - N-S derivative of S.V.V. +! PEWDERS - E-W derivative of S.V.V. +! PUVDERS - E-W derivative of u and v +! Method. +! ------- + +! Externals. None. +! ---------- + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 (From SC2FSC) +! M.Hamrud 01-Oct-2003 CY28 Cleaning + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_TRANS ,ONLY : LUVDER +USE TPM_DISTR ,ONLY : D, MYSETW +!USE TPM_FIELDS +USE TPM_GEOMETRY ,ONLY : G + +USE TPMALD_GEO ,ONLY : GALD + +IMPLICIT NONE + +INTEGER(KIND=JPIM) , INTENT(IN) :: KGL,KF_UV,KF_SCALARS,KF_SCDERS +REAL(KIND=JPRB) , INTENT(INOUT) :: PUV(:,:) +REAL(KIND=JPRB) , INTENT(INOUT) :: PSCALAR(:,:) +REAL(KIND=JPRB) , INTENT(INOUT) :: PNSDERS(:,:) +REAL(KIND=JPRB) , INTENT(INOUT) :: PEWDERS(:,:) +REAL(KIND=JPRB) , INTENT(INOUT) :: PUVDERS(:,:) + +INTEGER(KIND=JPIM) :: IMEN,ISTAGTF + +INTEGER(KIND=JPIM) :: JF,IGLG,II,IR,JM + +REAL(KIND=JPRB) :: ZIM +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('EFSCAD_MOD:EFSCAD',0,ZHOOK_HANDLE) +IGLG = D%NPTRLS(MYSETW)+KGL-1 +IMEN = G%NMEN(IGLG) +ISTAGTF = D%NSTAGTF(KGL) + +! ------------------------------------------------------------------ + +!* 2. EAST-WEST DERIVATIVES +! --------------------- + +!* 2.1 U AND V. + +IF(LUVDER)THEN + DO JM=0,IMEN + + ZIM=REAL(JM,JPRB)*GALD%EXWN + + IR = ISTAGTF+2*JM+1 + II = IR+1 + DO JF=1,2*KF_UV + + PUV(JF,II) = PUV(JF,II) - ZIM*PUVDERS(JF,IR) + PUV(JF,IR) = PUV(JF,IR) + ZIM*PUVDERS(JF,II) + + PUVDERS(JF,IR) = 0.0_JPRB + PUVDERS(JF,II) = 0.0_JPRB + ENDDO + ENDDO +ENDIF + +!* 2.2 SCALAR VARIABLES + +IF(KF_SCDERS > 0)THEN + DO JM=0,IMEN + + ZIM=REAL(JM,JPRB)*GALD%EXWN + + IR = ISTAGTF+2*JM+1 + II = IR+1 + DO JF=1,KF_SCALARS + + PSCALAR(JF,II) = PSCALAR(JF,II) - ZIM* PEWDERS(JF,IR) + PSCALAR(JF,IR) = PSCALAR(JF,IR) + ZIM* PEWDERS(JF,II) + + PEWDERS(JF,IR) = 0.0_JPRB + PEWDERS(JF,II) = 0.0_JPRB + ENDDO + ENDDO +ENDIF +IF (LHOOK) CALL DR_HOOK('EFSCAD_MOD:EFSCAD',1,ZHOOK_HANDLE) +! ------------------------------------------------------------------ + +END SUBROUTINE EFSCAD +END MODULE EFSCAD_MOD diff --git a/src/etrans/cpu/internal/eftdir_ctl_mod.F90 b/src/etrans/cpu/internal/eftdir_ctl_mod.F90 new file mode 100644 index 000000000..bb7d8f9f5 --- /dev/null +++ b/src/etrans/cpu/internal/eftdir_ctl_mod.F90 @@ -0,0 +1,225 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + + +MODULE EFTDIR_CTL_MOD +CONTAINS +SUBROUTINE EFTDIR_CTL(KF_UV_G,KF_SCALARS_G,KF_GP,KF_FS,KF_GPB, & + & KVSETUV,KVSETSC,KPTRGP,& + & KVSETSC3A,KVSETSC3B,KVSETSC2,& + & PGP,PGPUV,PGP3A,PGP3B,PGP2,AUX_PROC) + +!**** *EFTDIR_CTL - Direct Fourier transform control + +! Purpose. Control routine for Grid-point to Fourier transform +! -------- + +!** Interface. +! ---------- +! CALL FTDIR_CTL(..) + +! Explicit arguments : +! -------------------- +! KF_UV_G - global number of spectral u-v fields +! KF_SCALARS_G - global number of scalar spectral fields +! KF_GP - total number of output gridpoint fields +! KF_FS - total number of fields in fourier space +! KF_GPB - total global number of output gridpoint fields +! PGP - gridpoint array +! KVSETUV - "B" set in spectral/fourier space for +! u and v variables +! KVSETSC - "B" set in spectral/fourier space for +! scalar variables +! KPTRGP - pointer array to fields in gridpoint space + +! Method. +! ------- + +! Externals. TRGTOL - transposition routine +! ---------- FOURIER_OUT - copy fourier data to Fourier buffer +! FTDIR - fourier transform + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 +! G. Radnoti 01-03-13 adaptation to aladin (coupling) +! 01-08-28 : G. Radnoti & R. El Khatib Fix for NPROMATR /= 0 +! 19-11-01 : G. Radnoti bug corection by introducing cpl_int interface +! 02-09-30 : P. Smolikova AUX_PROC for d4 in NH +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! D. Degrauwe (Feb 2012): Alternative extension zone (E') +! R. El Khatib 09-Sep-2020 NSTACK_MEMORY_TR +! R. El Khatib 02-Jun-2022 Optimization/Cleaning +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_GEN ,ONLY : NSTACK_MEMORY_TR +USE TPM_DIM ,ONLY : R +USE TPM_TRANS ,ONLY : FOUBUF_IN +USE TPM_DISTR ,ONLY : D + +USE TRGTOL_MOD ,ONLY : TRGTOL +USE FOURIER_OUT_MOD ,ONLY : FOURIER_OUT +USE FTDIR_MOD ,ONLY : FTDIR +USE EXTPER_MOD ,ONLY : EXTPER +! + +IMPLICIT NONE + +! Dummy arguments + +INTEGER(KIND=JPIM),INTENT(IN) :: KF_UV_G,KF_SCALARS_G,KF_GP,KF_FS,KF_GPB +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETUV(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KPTRGP(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3A(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3B(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC2(:) +REAL(KIND=JPRB),OPTIONAL , INTENT(IN) :: PGP(:,:,:) +REAL(KIND=JPRB),OPTIONAL , INTENT(IN) :: PGPUV(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL , INTENT(IN) :: PGP3A(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL , INTENT(IN) :: PGP3B(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL , INTENT(IN) :: PGP2(:,:,:) +EXTERNAL AUX_PROC +OPTIONAL AUX_PROC + +! Local variables +REAL(KIND=JPRB),TARGET :: ZGTF_STACK(KF_FS*MIN(1,MAX(0,NSTACK_MEMORY_TR)),D%NLENGTF) +REAL(KIND=JPRB),TARGET, ALLOCATABLE :: ZGTF_HEAP(:,:) +REAL(KIND=JPRB),POINTER, CONTIGUOUS :: ZGTF(:,:) +REAL(KIND=JPRB) :: ZDUM +INTEGER(KIND=JPIM) :: IST,INUL,JGL,IGL,IBLEN +INTEGER(KIND=JPIM) :: IVSETUV(KF_UV_G) +INTEGER(KIND=JPIM) :: IVSETSC(KF_SCALARS_G) +INTEGER(KIND=JPIM) :: IVSET(KF_GP) +INTEGER(KIND=JPIM) :: IFGP2,IFGP3A,IFGP3B,IOFF,J3 +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +! Field distribution in Spectral/Fourier space + +IF (LHOOK) CALL DR_HOOK('EFTDIR_CTL_MOD:EFTDIR_CTL',0,ZHOOK_HANDLE) + +IF(PRESENT(KVSETUV)) THEN + IVSETUV(:) = KVSETUV(:) +ELSE + IVSETUV(:) = -1 +ENDIF +IVSETSC(:) = -1 +IF(PRESENT(KVSETSC)) THEN + IVSETSC(:) = KVSETSC(:) +ELSE + IOFF=0 + IF(PRESENT(KVSETSC2)) THEN + IFGP2=UBOUND(KVSETSC2,1) + IVSETSC(1:IFGP2)=KVSETSC2(:) + IOFF=IOFF+IFGP2 + ENDIF + IF(PRESENT(KVSETSC3A)) THEN + IFGP3A=UBOUND(KVSETSC3A,1) + DO J3=1,UBOUND(PGP3A,3) + IVSETSC(IOFF+1:IOFF+IFGP3A)=KVSETSC3A(:) + IOFF=IOFF+IFGP3A + ENDDO + ENDIF + IF(PRESENT(KVSETSC3B)) THEN + IFGP3B=UBOUND(KVSETSC3B,1) + DO J3=1,UBOUND(PGP3B,3) + IVSETSC(IOFF+1:IOFF+IFGP3B)=KVSETSC3B(:) + IOFF=IOFF+IFGP3B + ENDDO + ENDIF +ENDIF + +IST = 1 +IF(KF_UV_G > 0) THEN + IVSET(IST:IST+KF_UV_G-1) = IVSETUV(:) + IST = IST+KF_UV_G + IVSET(IST:IST+KF_UV_G-1) = IVSETUV(:) + IST = IST+KF_UV_G +ENDIF +IF(KF_SCALARS_G > 0) THEN + IVSET(IST:IST+KF_SCALARS_G-1) = IVSETSC(:) + IST = IST+KF_SCALARS_G +ENDIF + +IF (NSTACK_MEMORY_TR == 1) THEN + ZGTF => ZGTF_STACK(:,:) +ELSE + ALLOCATE(ZGTF_HEAP(KF_FS,D%NLENGTF)) +! Now, force the OS to allocate this shared array right now, not when it starts +! to be used which is an OPEN-MP loop, that would cause a threads +! synchronization lock : + IF (KF_FS > 0 .AND. D%NLENGTF > 0) THEN + ZGTF_HEAP(1,1)=HUGE(1._JPRB) + ENDIF + ZGTF => ZGTF_HEAP(:,:) +ENDIF + +! Transposition + +CALL GSTATS(158,0) +CALL TRGTOL(ZGTF,KF_FS,KF_GP,KF_SCALARS_G,IVSET,KPTRGP,& + & PGP,PGPUV,PGP3A,PGP3B,PGP2) +CALL GSTATS(158,1) +CALL GSTATS(106,0) + +! Periodization of auxiliary fields in x direction +IF(R%NNOEXTZL>0) THEN + CALL EXTPER(ZGTF,R%NDLON+R%NNOEXTZL,1,R%NDLON,KF_FS,D%NDGL_FS,INT(D%NSTAGTF,KIND=JPIM),0) +ELSE + IF (PRESENT(AUX_PROC)) THEN + CALL AUX_PROC(ZGTF,ZDUM,KF_FS,D%NLENGTF,1,D%NDGL_FS,0,.TRUE.,& + & D%NSTAGTF,INUL,INUL,INUL) + ENDIF +ENDIF + +! Fourier transform + +IBLEN=D%NLENGT0B*2*KF_FS +IF (ALLOCATED(FOUBUF_IN)) THEN + IF (MAX(1,IBLEN) > SIZE(FOUBUF_IN)) THEN + DEALLOCATE(FOUBUF_IN) + ALLOCATE(FOUBUF_IN(MAX(1,IBLEN))) + FOUBUF_IN(1)=0._JPRB ! force allocation here + ENDIF +ELSE + ALLOCATE(FOUBUF_IN(MAX(1,IBLEN))) + FOUBUF_IN(1)=0._JPRB ! force allocation here +ENDIF + +CALL GSTATS(1640,0) +!$OMP PARALLEL DO SCHEDULE(DYNAMIC,1) PRIVATE(JGL,IGL) +DO JGL=1,D%NDGL_FS + IGL = JGL + IF(KF_FS>0) THEN + CALL FTDIR(ZGTF,KF_FS,IGL) + ENDIF + +! Save Fourier data in FOUBUF_IN + + CALL FOURIER_OUT(ZGTF,KF_FS,IGL) +ENDDO +!$OMP END PARALLEL DO +CALL GSTATS(1640,1) +CALL GSTATS(106,1) +IF (LHOOK) CALL DR_HOOK('EFTDIR_CTL_MOD:EFTDIR_CTL',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE EFTDIR_CTL +END MODULE EFTDIR_CTL_MOD diff --git a/src/etrans/cpu/internal/eftdir_ctlad_mod.F90 b/src/etrans/cpu/internal/eftdir_ctlad_mod.F90 new file mode 100644 index 000000000..6c4d4d59c --- /dev/null +++ b/src/etrans/cpu/internal/eftdir_ctlad_mod.F90 @@ -0,0 +1,212 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + + +MODULE EFTDIR_CTLAD_MOD +CONTAINS +SUBROUTINE EFTDIR_CTLAD(KF_UV_G,KF_SCALARS_G,KF_GP,KF_FS,KF_GPB, & + & KVSETUV,KVSETSC,KPTRGP,& + & KVSETSC3A,KVSETSC3B,KVSETSC2,& + & PGP,PGPUV,PGP3A,PGP3B,PGP2) + +!**** *EFTDIR_CTLAD - Direct Fourier transform control - adjoint + +! Purpose. Control routine for Grid-point to Fourier transform +! -------- + +!** Interface. +! ---------- +! CALL EFTDIR_CTLAD(..) + +! Explicit arguments : +! -------------------- +! KF_UV_G - global number of spectral u-v fields +! KF_SCALARS_G - global number of scalar spectral fields +! KF_GP - total number of output gridpoint fields +! KF_FS - total number of fields in fourier space +! PGP - gridpoint array +! KVSETUV - "B" set in spectral/fourier space for +! u and v variables +! KVSETSC - "B" set in spectral/fourier space for +! scalar variables +! KPTRGP - pointer array to fields in gridpoint space + +! Method. +! ------- + +! Externals. TRGTOL - transposition routine +! ---------- FOURIER_OUT - copy fourier data to Fourier buffer +! EFTDIRAD - fourier transform + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 +! 01-08-28 : G. Radnoti & R. El Khatib Fix for NPROMATR /= 0 +! 19-11-01 G. Radnoti bug correction by introducing CPL_INT interface +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! R. El Khatib 05-03-15 remove HLOMP +! D. Degrauwe (Feb 2012): Alternative extension zone (E') +! R. El Khatib 09-Sep-2020 NSTACK_MEMORY_TR +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_GEN ,ONLY : NSTACK_MEMORY_TR +USE TPM_DISTR ,ONLY : D + +USE TRLTOG_MOD ,ONLY : TRLTOG +USE FOURIER_OUTAD_MOD ,ONLY : FOURIER_OUTAD +USE EFTDIRAD_MOD ,ONLY : EFTDIRAD +! + +IMPLICIT NONE + +! Dummy arguments + +INTEGER(KIND=JPIM),INTENT(IN) :: KF_UV_G,KF_SCALARS_G,KF_GP,KF_FS,KF_GPB +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETUV(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3A(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3B(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC2(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KPTRGP(:) +REAL(KIND=JPRB),OPTIONAL , INTENT(OUT) :: PGP(:,:,:) +REAL(KIND=JPRB),OPTIONAL , INTENT(OUT) :: PGPUV(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL , INTENT(OUT) :: PGP3A(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL , INTENT(OUT) :: PGP3B(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL , INTENT(OUT) :: PGP2(:,:,:) + +! Local variables +REAL(KIND=JPRB),TARGET :: ZGTF_STACK(KF_FS*MIN(1,MAX(0,NSTACK_MEMORY_TR)),D%NLENGTF) +REAL(KIND=JPRB),TARGET, ALLOCATABLE :: ZGTF_HEAP(:,:) +REAL(KIND=JPRB),POINTER :: ZGTF(:,:) + +INTEGER(KIND=JPIM) :: IST +INTEGER(KIND=JPIM) :: IVSETUV(KF_UV_G) +INTEGER(KIND=JPIM) :: IVSETSC(KF_SCALARS_G) +INTEGER(KIND=JPIM) :: IVSET(KF_GP) +INTEGER(KIND=JPIM) :: JGL,IGL,J1,J2 +INTEGER(KIND=JPIM) :: IFGP2,IFGP3A,IFGP3B,IOFF,J3 +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +! Field distribution in Spectral/Fourier space + +IF (LHOOK) CALL DR_HOOK('EFTDIR_CTLAD_MOD:EFTDIR_CTLAD',0,ZHOOK_HANDLE) +CALL GSTATS(133,0) + +IF (NSTACK_MEMORY_TR == 1) THEN + ZGTF => ZGTF_STACK(:,:) +ELSE + ALLOCATE(ZGTF_HEAP(KF_FS,D%NLENGTF)) + ZGTF => ZGTF_HEAP(:,:) +ENDIF + +ZGTF(:,:)=0._JPRB + +IF(PRESENT(KVSETUV)) THEN + IVSETUV(:) = KVSETUV(:) +ELSE + IVSETUV(:) = -1 +ENDIF +IF(PRESENT(KVSETSC)) THEN + IVSETSC(:) = KVSETSC(:) +ELSE + IVSETSC(:) = -1 +ENDIF +IST = 1 +IF(KF_UV_G > 0) THEN + IVSET(IST:IST+KF_UV_G-1) = IVSETUV(:) + IST = IST+KF_UV_G + IVSET(IST:IST+KF_UV_G-1) = IVSETUV(:) + IST = IST+KF_UV_G +ENDIF +IF(KF_SCALARS_G > 0) THEN + IVSET(IST:IST+KF_SCALARS_G-1) = IVSETSC(:) + IST = IST+KF_SCALARS_G +ENDIF + +CALL GSTATS(1642,0) +!$OMP PARALLEL DO SCHEDULE(DYNAMIC,1) PRIVATE(JGL,IGL) +DO JGL=1,D%NDGL_FS + IGL = JGL + CALL FOURIER_OUTAD(ZGTF,KF_FS,IGL) + +! Fourier transform + + IF(KF_FS>0) THEN + CALL EFTDIRAD(ZGTF,KF_FS,IGL) + ENDIF +ENDDO +!$OMP END PARALLEL DO +CALL GSTATS(1642,1) +CALL GSTATS(133,1) + +! Transposition + +CALL GSTATS(183,0) +IF(PRESENT(KVSETUV)) THEN + IVSETUV(:) = KVSETUV(:) +ELSE + IVSETUV(:) = -1 +ENDIF +IVSETSC(:) = -1 +IF(PRESENT(KVSETSC)) THEN + IVSETSC(:) = KVSETSC(:) +ELSE + IOFF=0 + IF(PRESENT(KVSETSC2)) THEN + IFGP2=UBOUND(KVSETSC2,1) + IVSETSC(1:IFGP2)=KVSETSC2(:) + IOFF=IOFF+IFGP2 + ENDIF + IF(PRESENT(KVSETSC3A)) THEN + IFGP3A=UBOUND(KVSETSC3A,1) + DO J3=1,UBOUND(PGP3A,3) + IVSETSC(IOFF+1:IOFF+IFGP3A)=KVSETSC3A(:) + IOFF=IOFF+IFGP3A + ENDDO + ENDIF + IF(PRESENT(KVSETSC3B)) THEN + IFGP3B=UBOUND(KVSETSC3B,1) + DO J3=1,UBOUND(PGP3B,3) + IVSETSC(IOFF+1:IOFF+IFGP3B)=KVSETSC3B(:) + IOFF=IOFF+IFGP3B + ENDDO + ENDIF +ENDIF + +IST = 1 +IF(KF_UV_G > 0) THEN + IVSET(IST:IST+KF_UV_G-1) = IVSETUV(:) + IST = IST+KF_UV_G + IVSET(IST:IST+KF_UV_G-1) = IVSETUV(:) + IST = IST+KF_UV_G +ENDIF +IF(KF_SCALARS_G > 0) THEN + IVSET(IST:IST+KF_SCALARS_G-1) = IVSETSC(:) + IST = IST+KF_SCALARS_G +ENDIF +CALL TRLTOG(ZGTF,KF_FS,KF_GP,KF_SCALARS_G,IVSET,KPTRGP,& + & PGP,PGPUV,PGP3A,PGP3B,PGP2) + +CALL GSTATS(183,1) +IF (LHOOK) CALL DR_HOOK('EFTDIR_CTLAD_MOD:EFTDIR_CTLAD',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE EFTDIR_CTLAD +END MODULE EFTDIR_CTLAD_MOD diff --git a/src/etrans/cpu/internal/eftdirad_mod.F90 b/src/etrans/cpu/internal/eftdirad_mod.F90 new file mode 100644 index 000000000..d3cb36349 --- /dev/null +++ b/src/etrans/cpu/internal/eftdirad_mod.F90 @@ -0,0 +1,122 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + + +MODULE EFTDIRAD_MOD +CONTAINS +SUBROUTINE EFTDIRAD(PREEL,KFIELDS,KGL) + +!**** *EFTDIRAD - Direct Fourier transform + +! Purpose. Routine for Grid-point to Fourier transform - adjoint +! -------- + +!** Interface. +! ---------- +! CALL EFTDIRAD(..) + +! Explicit arguments : PREEL - Fourier/grid-point array +! -------------------- KFIELDS - number of fields + +! Method. +! ------- + +! Externals. FFT992 - FFT routine +! ---------- + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! R. El Khatib 01-Sep-2015 support for FFTW transforms +! R. El Khatib 08-Jun-2023 LALL_FFTW for better flexibility +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM, JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_DISTR ,ONLY : D, MYSETW +!USE TPM_TRANS +USE TPM_GEOMETRY ,ONLY : G +#ifdef WITH_FFT992 +USE TPM_FFT ,ONLY : T +USE TPMALD_FFT , ONLY : TALD +#endif +USE TPM_FFTW ,ONLY : TW, EXEC_FFTW +USE TPM_DIM ,ONLY : R +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KFIELDS,KGL +REAL(KIND=JPRB), INTENT(INOUT) :: PREEL(:,:) + +INTEGER(KIND=JPIM) :: IGLG,IST,ILEN,IJUMP,JJ,JF,ILOEN +INTEGER(KIND=JPIM) :: IOFF,IRLEN,ICLEN,ITYPE +REAL(KIND=JPRB) :: ZNORM +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('EFTDIRAD_MOD:EFTDIRAD',0,ZHOOK_HANDLE) + +ITYPE = 1 +IJUMP = 1 +IGLG = D%NPTRLS(MYSETW)+KGL-1 +ILOEN = G%NLOEN(IGLG) +IST = 2*(G%NMEN(IGLG)+1)+1 +ILEN = ILOEN+3-IST +IOFF = D%NSTAGTF(KGL)+1 + +DO JJ=1,ILEN + DO JF=1,KFIELDS + PREEL(JF,IST+IOFF-1+JJ-1) = 0.0_JPRB + ENDDO +ENDDO +DO JJ=1,1 + DO JF=1,KFIELDS + PREEL(JF,IOFF-1+JJ) = 2.0_JPRB * PREEL(JF,IOFF-1+JJ) + ENDDO +ENDDO + +#ifdef WITH_FFT992 +IF( TALD%LFFT992 )THEN + + CALL FFT992(PREEL(1,IOFF),T%TRIGS(1,KGL),& + &T%NFAX(1,KGL),KFIELDS,IJUMP,ILOEN,KFIELDS,ITYPE) + +ELSE +#endif + + IRLEN=G%NLOEN(IGLG)+R%NNOEXTZL + ICLEN=(IRLEN/2+1)*2 + CALL EXEC_FFTW(ITYPE,IRLEN,ICLEN,IOFF,KFIELDS,TW%LALL_FFTW,PREEL) + +#ifdef WITH_FFT992 +ENDIF +#endif + + + ! Change of metric (not in forward routine) +ZNORM=1.0_JPRB/(2.0_JPRB*REAL(ILOEN,JPRB)) +DO JJ=1,ILOEN + DO JF=1,KFIELDS + PREEL(JF,IOFF-1+JJ) = ZNORM * PREEL(JF,IOFF-1+JJ) + ENDDO +ENDDO +IF (LHOOK) CALL DR_HOOK('EFTDIRAD_MOD:EFTDIRAD',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE EFTDIRAD +END MODULE EFTDIRAD_MOD diff --git a/src/etrans/cpu/internal/eftinv_ctl_mod.F90 b/src/etrans/cpu/internal/eftinv_ctl_mod.F90 new file mode 100644 index 000000000..70c747003 --- /dev/null +++ b/src/etrans/cpu/internal/eftinv_ctl_mod.F90 @@ -0,0 +1,284 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + + +MODULE EFTINV_CTL_MOD +CONTAINS +SUBROUTINE EFTINV_CTL(KF_UV_G,KF_SCALARS_G,& + & KF_UV,KF_SCALARS,KF_SCDERS,KF_GP,KF_FS,KF_OUT_LT,KVSETUV,KVSETSC,KPTRGP, & + & KVSETSC3A,KVSETSC3B,KVSETSC2,& + & PGP,PGPUV,PGP3A,PGP3B,PGP2) + +!**** *EFTINV_CTL - Inverse Fourier transform control + +! Purpose. Control routine for Fourier to Gridpoint transform +! -------- + +!** Interface. +! ---------- +! CALL EFTINV_CTL(..) + +! Explicit arguments : +! -------------------- +! PGP - gridpoint array +! KF_UV_G - global number of spectral u-v fields +! KF_SCALARS_G - global number of scalar spectral fields +! KF_UV - local number of spectral u-v fields +! KF_SCALARS - local number of scalar spectral fields +! KF_SCDERS - local number of derivatives of scalar spectral fields +! KF_GP - total number of output gridpoint fields +! KF_FS - total number of fields in fourier space +! KF_OUT_LT - total number of fields coming out from inverse LT +! KVSETUV - "B" set in spectral/fourier space for +! u and v variables +! KVSETSC - "B" set in spectral/fourier space for +! scalar variables +! KPTRGP - pointer array to fi3elds in gridpoint space + +! Method. +! ------- + +! Externals. TRLTOG - transposition routine +! ---------- FOURIER_IN - copy fourier data from Fourier buffer +! FTINV - fourier transform +! FSC - Fourier space computations + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 +! G. Hello : 03-10-14 old way of calling +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! O.Spaniel Oct-2004 phasing for AL29 +! D. Degrauwe (Feb 2012): Alternative extension zone (E') +! R. El Khatib 09-Sep-2020 NSTACK_MEMORY_TR +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_GEN ,ONLY : NERR ,NSTACK_MEMORY_TR +USE TPM_TRANS ,ONLY : LDIVGP, LSCDERS, LUVDER, LVORGP +USE TPM_DISTR ,ONLY : D + +USE FOURIER_IN_MOD ,ONLY : FOURIER_IN +USE EFSC_MOD ,ONLY : EFSC +USE FTINV_MOD ,ONLY : FTINV +USE TRLTOG_MOD ,ONLY : TRLTOG +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM) ,INTENT(IN) :: KF_UV_G +INTEGER(KIND=JPIM) ,INTENT(IN) :: KF_SCALARS_G +INTEGER(KIND=JPIM) ,INTENT(IN) :: KF_UV +INTEGER(KIND=JPIM) ,INTENT(IN) :: KF_SCALARS +INTEGER(KIND=JPIM) ,INTENT(IN) :: KF_SCDERS +INTEGER(KIND=JPIM) ,INTENT(IN) :: KF_GP +INTEGER(KIND=JPIM) ,INTENT(IN) :: KF_FS +INTEGER(KIND=JPIM) ,INTENT(IN) :: KF_OUT_LT +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETUV(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3A(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3B(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC2(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KPTRGP(:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PGP(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PGPUV(:,:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PGP3A(:,:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PGP3B(:,:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PGP2(:,:,:) + +REAL(KIND=JPRB),TARGET :: ZGTF_STACK(KF_FS*MIN(1,MAX(0,NSTACK_MEMORY_TR)),D%NLENGTF) +REAL(KIND=JPRB),TARGET, ALLOCATABLE :: ZGTF_HEAP(:,:) +REAL(KIND=JPRB),POINTER :: ZGTF(:,:) +REAL(KIND=JPRB),TARGET :: ZDUM(1,D%NLENGTF) +REAL(KIND=JPRB),POINTER :: ZUV(:,:) +REAL(KIND=JPRB),POINTER :: ZSCALAR(:,:) +REAL(KIND=JPRB),POINTER :: ZNSDERS(:,:) +REAL(KIND=JPRB),POINTER :: ZEWDERS(:,:) +REAL(KIND=JPRB),POINTER :: ZUVDERS(:,:) + +INTEGER(KIND=JPIM) :: IST +INTEGER(KIND=JPIM) :: IVSETUV(KF_UV_G) +INTEGER(KIND=JPIM) :: IVSETSC(KF_SCALARS_G) +INTEGER(KIND=JPIM) :: IVSET(KF_GP) +INTEGER(KIND=JPIM) :: J3,JGL,IGL,IOFF,IFGP2,IFGP3A,IFGP3B,IGP3APAR,IGP3BPAR +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +! 1. Copy Fourier data to local array + +IF (LHOOK) CALL DR_HOOK('EFTINV_CTL_MOD:EFTINV_CTL',0,ZHOOK_HANDLE) +CALL GSTATS(107,0) + +IF (NSTACK_MEMORY_TR == 1) THEN + ZGTF => ZGTF_STACK(:,:) +ELSE + ALLOCATE(ZGTF_HEAP(KF_FS,D%NLENGTF)) +! Now, force the OS to allocate this shared array right now, not when it starts +! to be used which is an OPEN-MP loop, that would cause a threads synchronization lock : + IF (KF_FS > 0 .AND. D%NLENGTF > 0) THEN + ZGTF_HEAP(1,1)=HUGE(1._JPRB) + ENDIF + ZGTF => ZGTF_HEAP(:,:) +ENDIF + +IF(KF_UV > 0 .OR. KF_SCDERS > 0) THEN + IST = 1 + IF(LVORGP) THEN + IST = IST+KF_UV + ENDIF + IF(LDIVGP) THEN + IST = IST+KF_UV + ENDIF + ZUV => ZGTF(IST:IST+2*KF_UV-1,:) + IST = IST+2*KF_UV + ZSCALAR => ZGTF(IST:IST+KF_SCALARS-1,:) + IST = IST+KF_SCALARS + ZNSDERS => ZGTF(IST:IST+KF_SCDERS-1,:) + IST = IST+KF_SCDERS + IF(LUVDER) THEN + ZUVDERS => ZGTF(IST:IST+2*KF_UV-1,:) + IST = IST+2*KF_UV + ELSE + ZUVDERS => ZDUM(1:1,:) + ENDIF + IF(KF_SCDERS > 0) THEN + ZEWDERS => ZGTF(IST:IST+KF_SCDERS-1,:) + ELSE + ZEWDERS => ZDUM(1:1,:) + ENDIF +ENDIF + +CALL GSTATS(1639,0) +!$OMP PARALLEL DO SCHEDULE(DYNAMIC,1) PRIVATE(JGL,IGL) +DO JGL=1,D%NDGL_FS + IGL = JGL + CALL FOURIER_IN(ZGTF,KF_OUT_LT,IGL) + +! 2. Fourier space computations + + IF(KF_UV > 0 .OR. KF_SCDERS > 0) THEN + CALL EFSC(IGL,KF_UV,KF_SCALARS,KF_SCDERS,& + & ZUV,ZSCALAR,ZNSDERS,ZEWDERS,ZUVDERS) + ENDIF + +! 3. Fourier transform + IF(KF_FS > 0) THEN + CALL FTINV(ZGTF,KF_FS,IGL) + ENDIF +ENDDO +!$OMP END PARALLEL DO +CALL GSTATS(1639,1) + +IF(KF_UV > 0 .OR. KF_SCDERS > 0) THEN + NULLIFY(ZUV) + NULLIFY(ZSCALAR) + NULLIFY(ZNSDERS) + NULLIFY(ZUVDERS) + NULLIFY(ZEWDERS) +ENDIF +CALL GSTATS(107,1) + +! 4. Transposition + +IF(PRESENT(KVSETUV)) THEN + IVSETUV(:) = KVSETUV(:) +ELSE + IVSETUV(:) = -1 +ENDIF +IVSETSC(:)=-1 +IF(PRESENT(KVSETSC)) THEN + IVSETSC(:) = KVSETSC(:) +ELSEIF(PRESENT(KVSETSC2).OR.PRESENT(KVSETSC3A)& + & .OR.PRESENT(KVSETSC3B)) THEN + IOFF=0 + IF(PRESENT(KVSETSC2)) THEN + IFGP2=UBOUND(KVSETSC2,1) + IVSETSC(1:IFGP2)=KVSETSC2(:) + IOFF=IOFF+IFGP2 + ENDIF + IF(PRESENT(KVSETSC3A)) THEN + IFGP3A=UBOUND(KVSETSC3A,1) + IGP3APAR=UBOUND(PGP3A,3) + IF(LSCDERS) IGP3APAR=IGP3APAR/3 + DO J3=1,IGP3APAR + IVSETSC(IOFF+1:IOFF+IFGP3A)=KVSETSC3A(:) + IOFF=IOFF+IFGP3A + ENDDO + ENDIF + IF(PRESENT(KVSETSC3B)) THEN + IFGP3B=UBOUND(KVSETSC3B,1) + IGP3BPAR=UBOUND(PGP3B,3) + IF(LSCDERS) IGP3BPAR=IGP3BPAR/3 + DO J3=1,IGP3BPAR + IVSETSC(IOFF+1:IOFF+IFGP3B)=KVSETSC3B(:) + IOFF=IOFF+IFGP3B + ENDDO + ENDIF + IF(IOFF > 0 .AND. IOFF /= KF_SCALARS_G ) THEN + WRITE(NERR,*)'FTINV:IOFF,KF_SCALARS_G ',IOFF,KF_SCALARS_G + CALL ABORT_TRANS('FTINV_CTL_MOD:IOFF /= KF_SCALARS_G') + ENDIF +ENDIF + +IST = 1 +IF(KF_UV_G > 0) THEN + IF( LVORGP) THEN + IVSET(IST:IST+KF_UV_G-1) = IVSETUV(:) + IST = IST+KF_UV_G + ENDIF + IF( LDIVGP) THEN + IVSET(IST:IST+KF_UV_G-1) = IVSETUV(:) + IST = IST+KF_UV_G + ENDIF + IVSET(IST:IST+KF_UV_G-1) = IVSETUV(:) + IST = IST+KF_UV_G + IVSET(IST:IST+KF_UV_G-1) = IVSETUV(:) + IST = IST+KF_UV_G +ENDIF +IF(KF_SCALARS_G > 0) THEN + IVSET(IST:IST+KF_SCALARS_G-1) = IVSETSC(:) + IST = IST+KF_SCALARS_G + IF(LSCDERS) THEN + IVSET(IST:IST+KF_SCALARS_G-1) = IVSETSC(:) + IST = IST+KF_SCALARS_G + ENDIF +ENDIF +IF(KF_UV_G > 0 .AND. LUVDER) THEN + IVSET(IST:IST+KF_UV_G-1) = IVSETUV(:) + IST = IST+KF_UV_G + IVSET(IST:IST+KF_UV_G-1) = IVSETUV(:) + IST = IST+KF_UV_G +ENDIF +IF(KF_SCALARS_G > 0) THEN + IF(LSCDERS) THEN + IVSET(IST:IST+KF_SCALARS_G-1) = IVSETSC(:) + IST = IST+KF_SCALARS_G + ENDIF +ENDIF + +CALL GSTATS(157,0) +CALL TRLTOG(ZGTF,KF_FS,KF_GP,KF_SCALARS_G,IVSET,KPTRGP,& + & PGP,PGPUV,PGP3A,PGP3B,PGP2) +CALL GSTATS(157,1) + +IF (LHOOK) CALL DR_HOOK('EFTINV_CTL_MOD:EFTINV_CTL',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE EFTINV_CTL +END MODULE EFTINV_CTL_MOD diff --git a/src/etrans/cpu/internal/eftinv_ctlad_mod.F90 b/src/etrans/cpu/internal/eftinv_ctlad_mod.F90 new file mode 100644 index 000000000..cf476b0a0 --- /dev/null +++ b/src/etrans/cpu/internal/eftinv_ctlad_mod.F90 @@ -0,0 +1,306 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + + +MODULE EFTINV_CTLAD_MOD +CONTAINS +SUBROUTINE EFTINV_CTLAD(KF_UV_G,KF_SCALARS_G,& + & KF_UV,KF_SCALARS,KF_SCDERS,KF_GP,KF_FS,KF_OUT_LT,KVSETUV,KVSETSC,KPTRGP, & + & KVSETSC3A,KVSETSC3B,KVSETSC2,& + & PGP,PGPUV,PGP3A,PGP3B,PGP2) + +!**** *EFTINV_CTLAD - Inverse Fourier transform control - adjoint + +! Purpose. Control routine for Fourier to Gridpoint transform +! -------- + +!** Interface. +! ---------- +! CALL EFTINV_CTLAD(..) + +! Explicit arguments : +! -------------------- +! PGP - gridpoint array +! KF_UV_G - global number of spectral u-v fields +! KF_SCALARS_G - global number of scalar spectral fields +! KF_UV - local number of spectral u-v fields +! KF_SCALARS - local number of scalar spectral fields +! KF_SCDERS - local number of derivatives of scalar spectral fields +! KF_GP - total number of output gridpoint fields +! KF_FS - total number of fields in fourier space +! KF_OUT_LT - total number of fields coming out from inverse LT +! KVSETUV - "B" set in spectral/fourier space for +! u and v variables +! KVSETSC - "B" set in spectral/fourier space for +! scalar variables +! KPTRGP - pointer array to fi3elds in gridpoint space + +! Method. +! ------- + +! Externals. TRLTOG - transposition routine +! ---------- FOURIER_IN - copy fourier data from Fourier buffer +! FTINV - fourier transform +! FSC - Fourier space computations + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! D. Degrauwe (Feb 2012): Alternative extension zone (E') +! R. El Khatib 09-Sep-2020 NSTACK_MEMORY_TR +! R. El Khatib 02-Jun-2022 Optimization/Cleaning +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_GEN ,ONLY : NERR ,NSTACK_MEMORY_TR +USE TPM_DIM ,ONLY : R +USE TPM_TRANS ,ONLY : FOUBUF, LDIVGP, LSCDERS, LUVDER, LVORGP +USE TPM_DISTR ,ONLY : D + +USE FOURIER_INAD_MOD ,ONLY : FOURIER_INAD +USE EFSCAD_MOD ,ONLY : EFSCAD +USE EFTINVAD_MOD ,ONLY : EFTINVAD +USE TRGTOL_MOD ,ONLY : TRGTOL +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS + +USE EXTPER_MOD ,ONLY : EXTPER +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM) ,INTENT(IN) :: KF_UV_G +INTEGER(KIND=JPIM) ,INTENT(IN) :: KF_SCALARS_G +INTEGER(KIND=JPIM) ,INTENT(IN) :: KF_UV +INTEGER(KIND=JPIM) ,INTENT(IN) :: KF_SCALARS +INTEGER(KIND=JPIM) ,INTENT(IN) :: KF_SCDERS +INTEGER(KIND=JPIM) ,INTENT(IN) :: KF_GP +INTEGER(KIND=JPIM) ,INTENT(IN) :: KF_FS +INTEGER(KIND=JPIM) ,INTENT(IN) :: KF_OUT_LT +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETUV(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3A(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3B(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC2(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KPTRGP(:) +REAL(KIND=JPRB),OPTIONAL , INTENT(IN) :: PGP(:,:,:) +REAL(KIND=JPRB),OPTIONAL , INTENT(IN) :: PGPUV(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL , INTENT(IN) :: PGP3A(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL , INTENT(IN) :: PGP3B(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL , INTENT(IN) :: PGP2(:,:,:) + +! ------------------------------------------------------------------ + +REAL(KIND=JPRB),TARGET :: ZGTF_STACK(KF_FS*MIN(1,MAX(0,NSTACK_MEMORY_TR)),D%NLENGTF) +REAL(KIND=JPRB),TARGET, ALLOCATABLE :: ZGTF_HEAP(:,:) +REAL(KIND=JPRB),POINTER :: ZGTF(:,:) +REAL(KIND=JPRB),TARGET :: ZDUM(1,D%NLENGTF) +REAL(KIND=JPRB),POINTER :: ZUV(:,:) +REAL(KIND=JPRB),POINTER :: ZSCALAR(:,:) +REAL(KIND=JPRB),POINTER :: ZNSDERS(:,:) +REAL(KIND=JPRB),POINTER :: ZEWDERS(:,:) +REAL(KIND=JPRB),POINTER :: ZUVDERS(:,:) + +INTEGER(KIND=JPIM) :: IST, IBLEN +INTEGER(KIND=JPIM) :: IVSETUV(KF_UV_G) +INTEGER(KIND=JPIM) :: IVSETSC(KF_SCALARS_G) +INTEGER(KIND=JPIM) :: IVSET(KF_GP) +INTEGER(KIND=JPIM) :: J3,JGL,IGL,IOFF,IFGP2,IFGP3A,IFGP3B,IGP3APAR,IGP3BPAR +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +! 4. Transposition + +IF (LHOOK) CALL DR_HOOK('EFTINV_CTLAD_MOD:EFTINV_CTLAD',0,ZHOOK_HANDLE) + +IF (NSTACK_MEMORY_TR == 1) THEN + ZGTF => ZGTF_STACK(:,:) +ELSE + ALLOCATE(ZGTF_HEAP(KF_FS,D%NLENGTF)) + ZGTF => ZGTF_HEAP(:,:) +ENDIF + +ZGTF(:,:)=0._JPRB + +IF(PRESENT(KVSETUV)) THEN + IVSETUV(:) = KVSETUV(:) +ELSE + IVSETUV(:) = -1 +ENDIF + +IVSETSC(:)=-1 +IF(PRESENT(KVSETSC)) THEN + IVSETSC(:) = KVSETSC(:) +ELSE + IOFF=0 + IF(PRESENT(KVSETSC2)) THEN + IFGP2=UBOUND(KVSETSC2,1) + IVSETSC(1:IFGP2)=KVSETSC2(:) + IOFF=IOFF+IFGP2 + ENDIF + IF(PRESENT(KVSETSC3A)) THEN + IFGP3A=UBOUND(KVSETSC3A,1) + IGP3APAR=UBOUND(PGP3A,3) + IF(LSCDERS) IGP3APAR=IGP3APAR/3 + DO J3=1,IGP3APAR + IVSETSC(IOFF+1:IOFF+IFGP3A)=KVSETSC3A(:) + IOFF=IOFF+IFGP3A + ENDDO + ENDIF + IF(PRESENT(KVSETSC3B)) THEN + IFGP3B=UBOUND(KVSETSC3B,1) + IGP3BPAR=UBOUND(PGP3B,3) + IF(LSCDERS) IGP3BPAR=IGP3BPAR/3 + DO J3=1,IGP3BPAR + IVSETSC(IOFF+1:IOFF+IFGP3B)=KVSETSC3B(:) + IOFF=IOFF+IFGP3B + ENDDO + ENDIF + IF(IOFF /= KF_SCALARS_G ) THEN + WRITE(NERR,*)'FTINV_CTLAD:IOFF,KF_SCALARS_G ',IOFF,KF_SCALARS_G + CALL ABORT_TRANS('FTINV_CTLAD_MOD:IOFF /= KF_SCALARS_G') + ENDIF +ENDIF + +IST = 1 +IF(KF_UV_G > 0) THEN + IF( LVORGP) THEN + IVSET(IST:IST+KF_UV_G-1) = IVSETUV(:) + IST = IST+KF_UV_G + ENDIF + IF( LDIVGP) THEN + IVSET(IST:IST+KF_UV_G-1) = IVSETUV(:) + IST = IST+KF_UV_G + ENDIF + IVSET(IST:IST+KF_UV_G-1) = IVSETUV(:) + IST = IST+KF_UV_G + IVSET(IST:IST+KF_UV_G-1) = IVSETUV(:) + IST = IST+KF_UV_G +ENDIF +IF(KF_SCALARS_G > 0) THEN + IVSET(IST:IST+KF_SCALARS_G-1) = IVSETSC(:) + IST = IST+KF_SCALARS_G + IF(LSCDERS) THEN + IVSET(IST:IST+KF_SCALARS_G-1) = IVSETSC(:) + IST = IST+KF_SCALARS_G + ENDIF +ENDIF +IF(KF_UV_G > 0 .AND. LUVDER) THEN + IVSET(IST:IST+KF_UV_G-1) = IVSETUV(:) + IST = IST+KF_UV_G + IVSET(IST:IST+KF_UV_G-1) = IVSETUV(:) + IST = IST+KF_UV_G +ENDIF +IF(KF_SCALARS_G > 0) THEN + IF(LSCDERS) THEN + IVSET(IST:IST+KF_SCALARS_G-1) = IVSETSC(:) + IST = IST+KF_SCALARS_G + ENDIF +ENDIF + +CALL GSTATS(182,0) +CALL TRGTOL(ZGTF,KF_FS,KF_GP,KF_SCALARS_G,IVSET,KPTRGP,& + & PGP,PGPUV,PGP3A,PGP3B,PGP2) +CALL GSTATS(182,1) + +! Periodization of auxiliary fields in x direction +IF(R%NNOEXTZL>0) THEN + CALL EXTPER(ZGTF,R%NDLON+R%NNOEXTZL,1,R%NDLON,KF_FS,D%NDGL_FS,INT(D%NSTAGTF,KIND=JPIM),0) +ENDIF + + +! 3. Fourier transform + +IF(KF_UV > 0 .OR. KF_SCDERS > 0) THEN + IST = 1 + IF(LVORGP) THEN + IST = IST+KF_UV + ENDIF + IF(LDIVGP) THEN + IST = IST+KF_UV + ENDIF + ZUV => ZGTF(IST:IST+2*KF_UV-1,:) + IST = IST+2*KF_UV + ZSCALAR => ZGTF(IST:IST+KF_SCALARS-1,:) + IST = IST+KF_SCALARS + ZNSDERS => ZGTF(IST:IST+KF_SCDERS-1,:) + IST = IST+KF_SCDERS + IF(LUVDER) THEN + ZUVDERS => ZGTF(IST:IST+2*KF_UV-1,:) + IST = IST+2*KF_UV + ELSE + ZUVDERS => ZDUM(1:1,:) + ENDIF + IF(KF_SCDERS > 0) THEN + ZEWDERS => ZGTF(IST:IST+KF_SCDERS-1,:) + ELSE + ZEWDERS => ZDUM(1:1,:) + ENDIF +ENDIF + +IBLEN = D%NLENGT0B*2*KF_OUT_LT +IF (ALLOCATED(FOUBUF)) THEN + IF (MAX(1,IBLEN) > SIZE(FOUBUF)) THEN + DEALLOCATE(FOUBUF) + ALLOCATE(FOUBUF(MAX(1,IBLEN))) + FOUBUF(1)=0._JPRB ! force allocation here + ENDIF +ELSE + ALLOCATE(FOUBUF(MAX(1,IBLEN))) + FOUBUF(1)=0._JPRB ! force allocation here +ENDIF + +CALL GSTATS(132,0) + +CALL GSTATS(1641,0) +!$OMP PARALLEL DO SCHEDULE(DYNAMIC,1) PRIVATE(JGL,IGL) +DO JGL=1,D%NDGL_FS + IGL = JGL + IF(KF_FS > 0) THEN + CALL EFTINVAD(ZGTF,KF_FS,IGL) + ENDIF + +! 2. Fourier space computations + + IF(KF_UV > 0 .OR. KF_SCDERS > 0) THEN + CALL EFSCAD(IGL,KF_UV,KF_SCALARS,KF_SCDERS,& + & ZUV,ZSCALAR,ZNSDERS,ZEWDERS,ZUVDERS) + ENDIF + +! 1. Copy Fourier data to local array + + CALL FOURIER_INAD(ZGTF,KF_OUT_LT,IGL) + +ENDDO +!$OMP END PARALLEL DO +CALL GSTATS(1641,1) + +IF(KF_UV > 0 .OR. KF_SCDERS > 0) THEN + NULLIFY(ZUV) + NULLIFY(ZSCALAR) + NULLIFY(ZNSDERS) + NULLIFY(ZUVDERS) + NULLIFY(ZEWDERS) +ENDIF + +CALL GSTATS(132,1) +IF (LHOOK) CALL DR_HOOK('EFTINV_CTLAD_MOD:EFTINV_CTLAD',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE EFTINV_CTLAD +END MODULE EFTINV_CTLAD_MOD diff --git a/src/etrans/cpu/internal/eftinvad_mod.F90 b/src/etrans/cpu/internal/eftinvad_mod.F90 new file mode 100644 index 000000000..606ded25f --- /dev/null +++ b/src/etrans/cpu/internal/eftinvad_mod.F90 @@ -0,0 +1,126 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + + +MODULE EFTINVAD_MOD +CONTAINS +SUBROUTINE EFTINVAD(PREEL,KFIELDS,KGL) + +!**** *EFTINVAD - Inverse Fourier transform - adjoint + +! Purpose. Routine for Fourier to Grid-point transform +! -------- + +!** Interface. +! ---------- +! CALL EFTINVAD(..) + +! Explicit arguments : PREEL - Fourier/grid-point array +! -------------------- KFIELDS - number of fields + +! Method. +! ------- + +! Externals. FFT992 - FFT routine +! ---------- + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! D. Degrauwe (Feb 2012): Alternative extension zone (E') +! R. El Khatib 01-Sep-2015 support for FFTW transforms +! R. El Khatib 08-Jun-2023 LALL_FFTW for better flexibility +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM, JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_DISTR ,ONLY : D, MYSETW +USE TPM_DIM ,ONLY : R +USE TPM_GEOMETRY ,ONLY : G +#ifdef WITH_FFT992 +USE TPM_FFT ,ONLY : T +USE TPMALD_FFT, ONLY :: TALD +#endif +USE TPM_FFTW ,ONLY : TW, EXEC_FFTW +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KFIELDS,KGL +REAL(KIND=JPRB), INTENT(OUT) :: PREEL(:,:) + +INTEGER(KIND=JPIM) :: IGLG,IST,ILEN,IJUMP,JJ,JF,ILOEN +INTEGER(KIND=JPIM) :: IOFF,IRLEN,ICLEN,ITYPE + +REAL(KIND=JPRB) :: ZNORM +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('EFTINVAD_MOD:EFTINVAD',0,ZHOOK_HANDLE) + +ITYPE =-1 +IJUMP = 1 +IGLG = D%NPTRLS(MYSETW)+KGL-1 +ILOEN = G%NLOEN(IGLG)+R%NNOEXTZL +IST = 2*(G%NMEN(IGLG)+1)+1 +ILEN = ILOEN+3-IST +IOFF = D%NSTAGTF(KGL)+1 + +! ! Change of metric (not in forward routine) + +#ifdef WITH_FFT992 +IF( TALD%LFFT992 )THEN + + CALL FFT992(PREEL(1,IOFF),T%TRIGS(1,KGL),& + &T%NFAX(1,KGL),KFIELDS,IJUMP,ILOEN,KFIELDS,ITYPE) + +ELSE +#endif + + IRLEN=G%NLOEN(IGLG)+R%NNOEXTZL + ICLEN=(IRLEN/2+1)*2 + CALL EXEC_FFTW(ITYPE,IRLEN,ICLEN,IOFF,KFIELDS,TW%LALL_FFTW,PREEL) + +#ifdef WITH_FFT992 +ENDIF +#endif + +ZNORM=2.0_JPRB*REAL(ILOEN,JPRB) +DO JJ=1,1 + DO JF=1,KFIELDS + PREEL(JF,IOFF-1+JJ) = (ZNORM/2.0_JPRB) * PREEL(JF,IOFF-1+JJ) + ENDDO +ENDDO + +DO JJ=3,ILOEN+1 + DO JF=1,KFIELDS + PREEL(JF,IOFF-1+JJ) = ZNORM * PREEL(JF,IOFF-1+JJ) + ENDDO +ENDDO + +DO JJ=1,ILEN + DO JF=1,KFIELDS + PREEL(JF,IST+IOFF-1+JJ-1) = 0.0_JPRB + ENDDO +ENDDO +IF (LHOOK) CALL DR_HOOK('EFTINVAD_MOD:EFTINVAD',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE EFTINVAD +END MODULE EFTINVAD_MOD diff --git a/src/etrans/cpu/internal/egath_spec_control_mod.F90 b/src/etrans/cpu/internal/egath_spec_control_mod.F90 new file mode 100644 index 000000000..c2ec2e256 --- /dev/null +++ b/src/etrans/cpu/internal/egath_spec_control_mod.F90 @@ -0,0 +1,212 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + + +MODULE EGATH_SPEC_CONTROL_MOD +CONTAINS +SUBROUTINE EGATH_SPEC_CONTROL(PSPECG,KFGATHG,KTO,KVSET,PSPEC,LDIM1_IS_FLD,& + & KSMAX,KSPEC2,KSPEC2_G,KPOSSP,KDIM0G,KCPL2M,LDZA0IP) + +!**** *GATH_SPEC_CONTROL* - Gather global spectral array from processors + +! Purpose. +! -------- +! Routine for gathering spectral array + +!** Interface. +! ---------- +! CALL GATH_SPEC_CONTROL(...) + +! Explicit arguments : +! -------------------- +! PSPECG(:,:) - Global spectral array +! KFGATHG - Global number of fields to be distributed +! KTO(:) - Processor responsible for distributing each field +! KVSET(:) - "B-Set" for each field +! PSPEC(:,:) - Local spectral array + +! ------------------------------------------------------------------ + + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE MPL_MODULE ,ONLY : MPL_RECV, MPL_SEND, MPL_BARRIER, MPL_WAIT, & + & JP_BLOCKING_STANDARD, JP_NON_BLOCKING_STANDARD + +USE TPM_DISTR ,ONLY : MTAGDISTSP, NPRCIDS, NPRTRW, MYSETV, MYPROC, NPROC +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +USE SET2PE_MOD ,ONLY : SET2PE + +IMPLICIT NONE + +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPECG(:,:) +INTEGER(KIND=JPIM) , INTENT(IN) :: KFGATHG +INTEGER(KIND=JPIM) , INTENT(IN) :: KTO(:) +INTEGER(KIND=JPIM) , INTENT(IN) :: KVSET(:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPEC(:,:) +LOGICAL ,OPTIONAL, INTENT(IN) :: LDIM1_IS_FLD +INTEGER(KIND=JPIM) , INTENT(IN) :: KSMAX +INTEGER(KIND=JPIM) , INTENT(IN) :: KSPEC2 +INTEGER(KIND=JPIM) , INTENT(IN) :: KSPEC2_G +INTEGER(KIND=JPIM) , INTENT(IN) :: KPOSSP(:) +INTEGER(KIND=JPIM) , INTENT(IN) :: KDIM0G(0:) +INTEGER(KIND=JPIM) , INTENT(IN) :: KCPL2M(0:) +LOGICAL ,OPTIONAL, INTENT(IN) :: LDZA0IP + +REAL(KIND=JPRB) :: ZFLD(KSPEC2,KFGATHG) +REAL(KIND=JPRB),ALLOCATABLE :: ZRECV(:,:) +INTEGER(KIND=JPIM) :: JM,JN,II,IFLDR,IFLDS,JFLD,ITAG,IBSET,ILEN,JA,ISND +INTEGER(KIND=JPIM) :: IRCV,ISP,ILENR,ISTA,ISTP,ISENDREQ(KFGATHG),IPOS0,JNM +INTEGER(KIND=JPIM) :: IDIST(KSPEC2_G),IMYFIELDS + +! ------------------------------------------------------------------ + + +CALL ABORT_TRANS('EGATH_SPEC_CONTROL:DEAD CODE') +!GATHER SPECTRAL ARRAY + +IF( NPROC == 1 ) THEN + CALL GSTATS(1644,0) + IF(LDIM1_IS_FLD) THEN +!$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(JM,JFLD) + DO JM=1,KSPEC2_G + DO JFLD=1,KFGATHG + PSPECG(JFLD,JM) =PSPEC(JFLD,JM) + ENDDO + ENDDO +!$OMP END PARALLEL DO + ELSE +!$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(JM,JFLD) + DO JFLD=1,KFGATHG + DO JM=1,KSPEC2_G + PSPECG(JM,JFLD) =PSPEC(JM,JFLD) + ENDDO + ENDDO +!$OMP END PARALLEL DO + ENDIF + CALL GSTATS(1644,1) +ELSE + IMYFIELDS = 0 + DO JFLD=1,KFGATHG + IF(KTO(JFLD) == MYPROC) THEN + IMYFIELDS = IMYFIELDS+1 + ENDIF + ENDDO + IF(IMYFIELDS>0) THEN + ALLOCATE(ZRECV(KSPEC2_G,IMYFIELDS)) + II = 0 + CALL GSTATS(1804,0) + DO JM=0,KSMAX + DO JN=0,KCPL2M(JM)/2-1 + IDIST(II+1) = KDIM0G(JM)+4*JN + IDIST(II+2) = KDIM0G(JM)+4*JN+1 + IDIST(II+3) = KDIM0G(JM)+4*JN+2 + IDIST(II+4) = KDIM0G(JM)+4*JN+3 + II = II+4 + ENDDO + ENDDO + CALL GSTATS(1804,1) + ENDIF + + CALL GSTATS_BARRIER(788) + + !Send + CALL GSTATS(810,0) + IFLDS = 0 + IF(KSPEC2 > 0 )THEN + DO JFLD=1,KFGATHG + + IBSET = KVSET(JFLD) + IF( IBSET == MYSETV )THEN + + IFLDS = IFLDS+1 + ISND = KTO(JFLD) + ITAG = MTAGDISTSP+JFLD+17 + IF(LDIM1_IS_FLD) THEN + ZFLD(1:KSPEC2,IFLDS)=PSPEC(IFLDS,1:KSPEC2) + CALL MPL_SEND(ZFLD(1:KSPEC2,IFLDS),KDEST=NPRCIDS(ISND),KTAG=ITAG,& + &KMP_TYPE=JP_NON_BLOCKING_STANDARD,KREQUEST=ISENDREQ(JFLD),& + &CDSTRING='GATH_SPEC_CONTROL') + ELSE + CALL MPL_SEND(PSPEC(1:KSPEC2,IFLDS),KDEST=NPRCIDS(ISND),KTAG=ITAG,& + &KMP_TYPE=JP_NON_BLOCKING_STANDARD,KREQUEST=ISENDREQ(JFLD),& + &CDSTRING='GATH_SPEC_CONTROL') + ENDIF + ENDIF + ENDDO + ENDIF + + ! Recieve + IFLDR = 0 + DO JFLD=1,KFGATHG + IF(KTO(JFLD) == MYPROC) THEN + IBSET = KVSET(JFLD) + IFLDR = IFLDR+1 + DO JA=1,NPRTRW + ILEN = KPOSSP(JA+1)-KPOSSP(JA) + IF( ILEN > 0 )THEN + CALL SET2PE(IRCV,0,0,JA,IBSET) + ITAG = MTAGDISTSP+JFLD+17 + ISTA = KPOSSP(JA) + ISTP = ISTA+ILEN-1 + CALL MPL_RECV(ZRECV(ISTA:ISTP,IFLDR),KSOURCE=NPRCIDS(IRCV),KTAG=ITAG,& + &KMP_TYPE=JP_BLOCKING_STANDARD,KOUNT=ILENR, & + &CDSTRING='GATH_SPEC_CONTROL') + IF( ILENR /= ILEN )THEN + WRITE(0,'("GATH_SPEC_CONTROL: JFLD=",I4," JA=",I4," ILEN=",I10," ILENR=",I10)')& + &JFLD,JA,ILEN,ILENR + CALL ABORT_TRANS('GATH_SPEC_CONTROL:INVALID RECEIVE MESSAGE LENGTH') + ENDIF + ENDIF + ENDDO + ENDIF + ENDDO + + ! Check for completion of sends + IF(KSPEC2 > 0 )THEN + DO JFLD=1,KFGATHG + IBSET = KVSET(JFLD) + IF( IBSET == MYSETV )THEN + CALL MPL_WAIT(ISENDREQ(JFLD), & + & CDSTRING='GATH_GRID_CTL: WAIT') + ENDIF + ENDDO + ENDIF + CALL GSTATS(810,1) + CALL GSTATS_BARRIER2(788) + + CALL GSTATS(1644,0) +!$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(JFLD,JNM,II,JN,ISP) + DO JFLD=1,IMYFIELDS + IF(LDIM1_IS_FLD) THEN + DO JNM=1,KSPEC2_G + PSPECG(JFLD,JNM) = ZRECV(IDIST(JNM),JFLD) + ENDDO + ELSE + DO JNM=1,KSPEC2_G + PSPECG(JNM,JFLD) = ZRECV(IDIST(JNM),JFLD) + ENDDO + ENDIF + ENDDO +!$OMP END PARALLEL DO + CALL GSTATS(1644,1) + IF(ALLOCATED(ZRECV)) DEALLOCATE(ZRECV) + + !Synchronize processors + CALL GSTATS(785,0) + CALL MPL_BARRIER(CDSTRING='GATH_SPEC_CONTROL:') + CALL GSTATS(785,1) +ENDIF + +! ------------------------------------------------------------------ + +END SUBROUTINE EGATH_SPEC_CONTROL +END MODULE EGATH_SPEC_CONTROL_MOD + + diff --git a/src/etrans/cpu/internal/einv_trans_ctl_mod.F90 b/src/etrans/cpu/internal/einv_trans_ctl_mod.F90 new file mode 100644 index 000000000..56ad491a1 --- /dev/null +++ b/src/etrans/cpu/internal/einv_trans_ctl_mod.F90 @@ -0,0 +1,309 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + + +MODULE EINV_TRANS_CTL_MOD +CONTAINS +SUBROUTINE EINV_TRANS_CTL(KF_UV_G,KF_SCALARS_G,KF_GP,KF_FS,KF_OUT_LT,& + & KF_UV,KF_SCALARS,KF_SCDERS,& + & PSPVOR,PSPDIV,PSPSCALAR,KVSETUV,KVSETSC,PGP,FSPGL_PROC,& + & PSPSC3A,PSPSC3B,PSPSC2,KVSETSC3A,KVSETSC3B,KVSETSC2,PGPUV,PGP3A,PGP3B,PGP2,& + & PSPMEANU,PSPMEANV) + +!**** *EINV_TRANS_CTL* - Control routine for inverse spectral transform. + +! Purpose. +! -------- +! Control routine for the inverse spectral transform + +!** Interface. +! ---------- +! CALL EINV_TRANS_CTL(...) + +! Explicit arguments : +! -------------------- +! KF_UV_G - global number of spectral u-v fields +! KF_SCALARS_G - global number of scalar spectral fields +! KF_GP - total number of output gridpoint fields +! KF_FS - total number of fields in fourier space +! KF_OUT_LT - total number of fields coming out from inverse LT +! KF_UV - local number of spectral u-v fields +! KF_SCALARS - local number of scalar spectral fields +! KF_SCDERS - local number of derivatives of scalar spectral fields +! PSPVOR(:,:) - spectral vorticity (input) +! PSPDIV(:,:) - spectral divergence (input) +! PSPSCALAR(:,:) - spectral scalarvalued fields (input) +! KVSETUV(:) - indicating which 'b-set' in spectral space owns a +! vor/div field. Equivalant to NBSETLEV in the IFS. +! The length of KVSETUV should be the GLOBAL number +! of u/v fields which is the dimension of u and v releated +! fields in grid-point space. +! KVESETSC(:) - indicating which 'b-set' in spectral space owns a +! scalar field. As for KVSETUV this argument is required +! if the total number of processors is greater than +! the number of processors used for distribution in +! spectral wave space. +! FSPGL_PROC - external procedure to be executed in fourier space +! before transposition +! PGP(:,:,:) - gridpoint fields (output) + +! The ordering of the output fields is as follows (all +! parts are optional depending on the input switches): + +! vorticity : KF_UV_G fields +! divergence : KF_UV_G fields +! u : KF_UV_G fields +! v : KF_UV_G fields +! scalar fields : KF_SCALARS_G fields +! N-S derivative of scalar fields : KF_SCALARS_G fields +! E-W derivative of u : KF_UV_G fields +! E-W derivative of v : KF_UV_G fields +! E-W derivative of scalar fields : KF_SCALARS_G fields + +! Method. +! ------- + +! Externals. SHUFFLE - reshuffle fields for load balancing +! ---------- FIELD_SPLIT - split fields in NPROMATR packets +! LTINV_CTL - control of Legendre transform +! FTINV_CTL - control of Fourier transform + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 01-01-03 +! M.Hamrud 01-Oct-2003 CY28 Cleaning + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_GEN ,ONLY : NPROMATR +USE TPM_TRANS ,ONLY : LDIVGP, LSCDERS, LUVDER, LVORGP +!USE TPM_DISTR + +USE SHUFFLE_MOD ,ONLY : SHUFFLE +USE FIELD_SPLIT_MOD ,ONLY : FIELD_SPLIT +USE ELTINV_CTL_MOD ,ONLY : ELTINV_CTL +USE EFTINV_CTL_MOD ,ONLY : EFTINV_CTL +! + +IMPLICIT NONE + +! Declaration of arguments + +INTEGER(KIND=JPIM), INTENT(IN) :: KF_UV_G +INTEGER(KIND=JPIM), INTENT(IN) :: KF_SCALARS_G +INTEGER(KIND=JPIM), INTENT(IN) :: KF_GP +INTEGER(KIND=JPIM), INTENT(IN) :: KF_FS +INTEGER(KIND=JPIM), INTENT(IN) :: KF_OUT_LT +INTEGER(KIND=JPIM), INTENT(IN) :: KF_UV +INTEGER(KIND=JPIM), INTENT(IN) :: KF_SCALARS +INTEGER(KIND=JPIM), INTENT(IN) :: KF_SCDERS +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPSC3A(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPSC3B(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPSC2(:,:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETUV(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3A(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3B(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC2(:) +REAL(KIND=JPRB) ,OPTIONAL ,INTENT(OUT) :: PGP(:,:,:) +EXTERNAL FSPGL_PROC +OPTIONAL FSPGL_PROC +REAL(KIND=JPRB),OPTIONAL ,INTENT(OUT) :: PGPUV(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(OUT) :: PGP3A(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(OUT) :: PGP3B(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(OUT) :: PGP2(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPMEANU(:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPMEANV(:) + +! Local variables + +INTEGER(KIND=JPIM) :: IPTRGP(KF_GP),IPTRSPUV(NPROMATR),IPTRSPSC(NPROMATR) +INTEGER(KIND=JPIM) :: ISHFUV_G(KF_GP),ISHFSC_G(KF_GP) +INTEGER(KIND=JPIM) :: IVSETUV(KF_GP),IVSETSC(KF_GP) +INTEGER(KIND=JPIM) :: IBLKS,JBLK,ISTUV_G,IENUV_G +INTEGER(KIND=JPIM) :: IF_UV_G,IF_UV,ISTUV,IF_SCALARS,IF_SCALARS_G,IF_FS,IF_GP +INTEGER(KIND=JPIM) :: JFLD,ISTSC_G,IENSC_G,ISTSC,IENSC,IENUV,IF_SCDERS,IF_OUT_LT +INTEGER(KIND=JPIM) :: IOFFD,IOFFU,IOFFV,IOFFUVD,IOFFSC,IOFFSCNS,IOFFSCEW,IOFF,IF_GPB +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +! Perform transform + +IF (LHOOK) CALL DR_HOOK('EINV_TRANS_CTL_MOD:EINV_TRANS_CTL',0,ZHOOK_HANDLE) +IF_GPB = 2*KF_UV_G+KF_SCALARS_G + +IF(NPROMATR > 0 .AND. IF_GPB > NPROMATR) THEN + + ! Fields to be split into packets + + CALL SHUFFLE(KF_UV_G,KF_SCALARS_G,ISHFUV_G,IVSETUV,ISHFSC_G,IVSETSC, & + & KVSETUV,KVSETSC) + + IBLKS=(IF_GPB-1)/NPROMATR+1 + + DO JBLK=1,IBLKS + + CALL FIELD_SPLIT(JBLK,IF_GPB,KF_UV_G,IVSETUV,IVSETSC,& + & ISTUV_G,IENUV_G,IF_UV_G,ISTSC_G,IENSC_G,IF_SCALARS_G,& + & ISTUV,IENUV,IF_UV,ISTSC,IENSC,IF_SCALARS) + + IF(LSCDERS) THEN + IF_SCDERS = IF_SCALARS + ELSE + IF_SCDERS = 0 + ENDIF + + IF_OUT_LT = 2*IF_UV + IF_SCALARS+IF_SCDERS + IF(LVORGP) THEN + IF_OUT_LT = IF_OUT_LT+IF_UV + ENDIF + IF(LDIVGP) THEN + IF_OUT_LT = IF_OUT_LT+IF_UV + ENDIF + IF_FS = IF_OUT_LT+IF_SCDERS + IF(LUVDER) THEN + IF_FS = IF_FS+2*IF_UV + ENDIF + + IF_GP = 2*IF_UV_G+IF_SCALARS_G + IOFFD = 0 + IOFFU = 0 + IOFFV = KF_UV_G + IOFFUVD = 2*KF_UV_G+KF_SCALARS_G + IOFFSC = 2*KF_UV_G + IF(LVORGP) THEN + IF_GP = IF_GP+IF_UV_G + IOFFD = KF_UV_G + IOFFU = IOFFU+KF_UV_G + IOFFV = IOFFV+KF_UV_G + IOFFUVD =IOFFUVD+KF_UV_G + IOFFSC = IOFFSC+KF_UV_G + ENDIF + IF(LDIVGP) THEN + IF_GP = IF_GP+IF_UV_G + IOFFU = IOFFU+KF_UV_G + IOFFV = IOFFV+KF_UV_G + IOFFUVD =IOFFUVD+KF_UV_G + IOFFSC = IOFFSC+KF_UV_G + ENDIF + IF(LSCDERS) THEN + IF_GP = IF_GP+2*IF_SCALARS_G + IOFFUVD =IOFFUVD+KF_SCALARS_G + IOFFSCNS = IOFFSC+KF_SCALARS_G + IOFFSCEW = IOFFSC+2*KF_SCALARS_G + ENDIF + IF(LUVDER) THEN + IF_GP = IF_GP+2*IF_UV_G + IOFFSCEW = IOFFSCEW+2*KF_UV_G + ENDIF + + DO JFLD=1,IF_UV_G + IOFF = 0 + IF(LVORGP) THEN + IPTRGP(JFLD+IOFF) = ISHFUV_G(ISTUV_G+JFLD-1) + IOFF = IOFF+IF_UV_G + ENDIF + IF(LDIVGP) THEN + IPTRGP(JFLD+IOFF) = IOFFD+ISHFUV_G(ISTUV_G+JFLD-1) + IOFF = IOFF+IF_UV_G + ENDIF + IPTRGP(JFLD+IOFF) = IOFFU+ISHFUV_G(ISTUV_G+JFLD-1) + IOFF = IOFF+IF_UV_G + IPTRGP(JFLD+IOFF) = IOFFV+ISHFUV_G(ISTUV_G+JFLD-1) + IOFF = IOFF+IF_UV_G+IF_SCALARS_G + IF(LSCDERS) THEN + IOFF = IOFF+IF_SCALARS_G + ENDIF + IF(LUVDER) THEN + IPTRGP(JFLD+IOFF) = IOFFUVD+ISHFUV_G(ISTUV_G+JFLD-1) + IOFF = IOFF+IF_UV_G + IPTRGP(JFLD+IOFF) = IOFFUVD+KF_UV_G+ISHFUV_G(ISTUV_G+JFLD-1) + ENDIF + ENDDO + + DO JFLD=1,IF_SCALARS_G + IOFF = 2*IF_UV_G + IF (LVORGP) IOFF = IOFF+IF_UV_G + IF (LDIVGP) IOFF = IOFF+IF_UV_G + IPTRGP(JFLD+IOFF) = IOFFSC+ISHFSC_G(ISTSC_G+JFLD-1) + IOFF = IOFF+IF_SCALARS_G + IF(LSCDERS) THEN + IPTRGP(JFLD+IOFF) = IOFFSCNS+ISHFSC_G(ISTSC_G+JFLD-1) + IOFF = IOFF+IF_SCALARS_G + IF(LUVDER) THEN + IOFF = IOFF+2*IF_UV_G + ENDIF + IPTRGP(JFLD+IOFF) = IOFFSCEW+ISHFSC_G(ISTSC_G+JFLD-1) + ENDIF + ENDDO + DO JFLD=1,IF_UV + IPTRSPUV(JFLD) = ISTUV+JFLD-1 + ENDDO + DO JFLD=1,IF_SCALARS + IPTRSPSC(JFLD) = ISTSC+JFLD-1 + ENDDO + + CALL ELTINV_CTL(IF_OUT_LT,IF_UV,IF_SCALARS,IF_SCDERS, & + & PSPVOR=PSPVOR,PSPDIV=PSPDIV,PSPSCALAR=PSPSCALAR,& + & KFLDPTRUV=IPTRSPUV,KFLDPTRSC=IPTRSPSC,& + & PSPMEANU=PSPMEANU,PSPMEANV=PSPMEANV,FSPGL_PROC=FSPGL_PROC) + + IF(IF_UV_G > 0 .AND. IF_SCALARS_G > 0) THEN + CALL EFTINV_CTL(IF_UV_G,IF_SCALARS_G,& + & IF_UV,IF_SCALARS,IF_SCDERS,IF_GP,IF_FS,IF_OUT_LT,& + & KVSETUV=IVSETUV(ISTUV_G:IENUV_G),& + & KVSETSC=IVSETSC(ISTSC_G:IENSC_G),KPTRGP=IPTRGP,& + & PGP=PGP) + ELSEIF(IF_UV_G > 0) THEN + CALL EFTINV_CTL(IF_UV_G,IF_SCALARS_G,& + & IF_UV,IF_SCALARS,IF_SCDERS,IF_GP,IF_FS,IF_OUT_LT,& + & KVSETUV=IVSETUV(ISTUV_G:IENUV_G),KPTRGP=IPTRGP,& + & PGP=PGP) + ELSEIF(IF_SCALARS_G > 0) THEN + CALL EFTINV_CTL(IF_UV_G,IF_SCALARS_G,& + & IF_UV,IF_SCALARS,IF_SCDERS,IF_GP,IF_FS,IF_OUT_LT,& + & KVSETSC=IVSETSC(ISTSC_G:IENSC_G),KPTRGP=IPTRGP,& + & PGP=PGP) + ENDIF + ENDDO + +ELSE + + ! No splitting of fields, transform done in one go + + CALL ELTINV_CTL(KF_OUT_LT,KF_UV,KF_SCALARS,KF_SCDERS, & + & PSPVOR=PSPVOR,PSPDIV=PSPDIV,PSPSCALAR=PSPSCALAR,& + & PSPSC3A=PSPSC3A,PSPSC3B=PSPSC3B,PSPSC2=PSPSC2,& + & PSPMEANU=PSPMEANU,PSPMEANV=PSPMEANV,FSPGL_PROC=FSPGL_PROC) + + CALL EFTINV_CTL(KF_UV_G,KF_SCALARS_G,& + & KF_UV,KF_SCALARS,KF_SCDERS,KF_GP,KF_FS,KF_OUT_LT,& + & KVSETUV=KVSETUV,KVSETSC=KVSETSC,& + & KVSETSC3A=KVSETSC3A,KVSETSC3B=KVSETSC3B,KVSETSC2=KVSETSC2,& + & PGP=PGP,PGPUV=PGPUV,PGP3A=PGP3A,PGP3B=PGP3B,PGP2=PGP2) + +ENDIF +IF (LHOOK) CALL DR_HOOK('EINV_TRANS_CTL_MOD:EINV_TRANS_CTL',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE EINV_TRANS_CTL +END MODULE EINV_TRANS_CTL_MOD diff --git a/src/etrans/cpu/internal/einv_trans_ctlad_mod.F90 b/src/etrans/cpu/internal/einv_trans_ctlad_mod.F90 new file mode 100644 index 000000000..68b35001b --- /dev/null +++ b/src/etrans/cpu/internal/einv_trans_ctlad_mod.F90 @@ -0,0 +1,303 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + + +MODULE EINV_TRANS_CTLAD_MOD +CONTAINS +SUBROUTINE EINV_TRANS_CTLAD(KF_UV_G,KF_SCALARS_G,KF_GP,KF_FS,KF_OUT_LT,& + & KF_UV,KF_SCALARS,KF_SCDERS,& + & PSPVOR,PSPDIV,PSPSCALAR,KVSETUV,KVSETSC,PGP,& + & PSPSC3A,PSPSC3B,PSPSC2,KVSETSC3A,KVSETSC3B,KVSETSC2,PGPUV,PGP3A,PGP3B,PGP2,& + & PMEANU,PMEANV) + +!**** *EINV_TRANS_CTLAD* - Control routine for inverse spectral transform adj. + +! Purpose. +! -------- +! Control routine for the inverse spectral transform + +!** Interface. +! ---------- +! CALL EINV_TRANS_CTLAD(...) + +! Explicit arguments : +! -------------------- +! KF_UV_G - global number of spectral u-v fields +! KF_SCALARS_G - global number of scalar spectral fields +! KF_GP - total number of output gridpoint fields +! KF_FS - total number of fields in fourier space +! KF_OUT_LT - total number of fields coming out from inverse LT +! KF_UV - local number of spectral u-v fields +! KF_SCALARS - local number of scalar spectral fields +! KF_SCDERS - local number of derivatives of scalar spectral fields +! PSPVOR(:,:) - spectral vorticity (input) +! PSPDIV(:,:) - spectral divergence (input) +! PSPSCALAR(:,:) - spectral scalarvalued fields (input) +! KVSETUV(:) - indicating which 'b-set' in spectral space owns a +! vor/div field. Equivalant to NBSETLEV in the IFS. +! The length of KVSETUV should be the GLOBAL number +! of u/v fields which is the dimension of u and v releated +! fields in grid-point space. +! KVESETSC(:) - indicating which 'b-set' in spectral space owns a +! scalar field. As for KVSETUV this argument is required +! if the total number of processors is greater than +! the number of processors used for distribution in +! spectral wave space. +! PGP(:,:,:) - gridpoint fields (output) + +! The ordering of the output fields is as follows (all +! parts are optional depending on the input switches): + +! vorticity : KF_UV_G fields +! divergence : KF_UV_G fields +! u : KF_UV_G fields +! v : KF_UV_G fields +! scalar fields : KF_SCALARS_G fields +! N-S derivative of scalar fields : KF_SCALARS_G fields +! E-W derivative of u : KF_UV_G fields +! E-W derivative of v : KF_UV_G fields +! E-W derivative of scalar fields : KF_SCALARS_G fields + +! Method. +! ------- + +! Externals. SHUFFLE - reshuffle fields for load balancing +! ---------- FIELD_SPLIT - split fields in NPROMATR packets +! LTINV_CTLAD - control of Legendre transform +! FTINV_CTLAD - control of Fourier transform + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 01-01-03 +! M.Hamrud 01-Oct-2003 CY28 Cleaning + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_GEN ,ONLY : NPROMATR +USE TPM_TRANS ,ONLY : LDIVGP, LSCDERS, LUVDER, LVORGP +!USE TPM_DISTR + +USE SHUFFLE_MOD ,ONLY : SHUFFLE +USE FIELD_SPLIT_MOD ,ONLY : FIELD_SPLIT +USE ELTINV_CTLAD_MOD ,ONLY : ELTINV_CTLAD +USE EFTINV_CTLAD_MOD ,ONLY : EFTINV_CTLAD +! + +IMPLICIT NONE + +! Declaration of arguments +! +INTEGER(KIND=JPIM), INTENT(IN) :: KF_UV_G +INTEGER(KIND=JPIM), INTENT(IN) :: KF_SCALARS_G +INTEGER(KIND=JPIM), INTENT(IN) :: KF_GP +INTEGER(KIND=JPIM), INTENT(IN) :: KF_FS +INTEGER(KIND=JPIM), INTENT(IN) :: KF_OUT_LT +INTEGER(KIND=JPIM), INTENT(IN) :: KF_UV +INTEGER(KIND=JPIM), INTENT(IN) :: KF_SCALARS +INTEGER(KIND=JPIM), INTENT(IN) :: KF_SCDERS +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSC3A(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSC3B(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSC2(:,:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETUV(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3A(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3B(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC2(:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PGP(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PGPUV(:,:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PGP3A(:,:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PGP3B(:,:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PGP2(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PMEANU(:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PMEANV(:) + +! Local variables + +INTEGER(KIND=JPIM) :: IPTRGP(KF_GP),IPTRSPUV(NPROMATR),IPTRSPSC(NPROMATR) +INTEGER(KIND=JPIM) :: ISHFUV_G(KF_GP),ISHFSC_G(KF_GP) +INTEGER(KIND=JPIM) :: IVSETUV(KF_GP),IVSETSC(KF_GP) +INTEGER(KIND=JPIM) :: IBLKS,JBLK,ISTUV_G,IENUV_G +INTEGER(KIND=JPIM) :: IF_UV_G,IF_UV,ISTUV,IF_SCALARS,IF_SCALARS_G,IF_FS,IF_GP +INTEGER(KIND=JPIM) :: JFLD,ISTSC_G,IENSC_G,ISTSC,IENSC,IENUV,IF_SCDERS,IF_OUT_LT +INTEGER(KIND=JPIM) :: IOFFD,IOFFU,IOFFV,IOFFUVD,IOFFSC,IOFFSCNS,IOFFSCEW,IOFF,IF_GPB +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +! Perform transform + +IF (LHOOK) CALL DR_HOOK('EINV_TRANS_CTLAD_MOD:EINV_TRANS_CTLAD',0,ZHOOK_HANDLE) +IF_GPB = 2*KF_UV_G+KF_SCALARS_G +IF(NPROMATR > 0 .AND. IF_GPB > NPROMATR) THEN + + ! Fields to be split into packets + + CALL SHUFFLE(KF_UV_G,KF_SCALARS_G,ISHFUV_G,IVSETUV,ISHFSC_G,IVSETSC, & + & KVSETUV,KVSETSC) + + IBLKS=(IF_GPB-1)/NPROMATR+1 + + DO JBLK=1,IBLKS + + CALL FIELD_SPLIT(JBLK,IF_GPB,KF_UV_G,IVSETUV,IVSETSC,& + & ISTUV_G,IENUV_G,IF_UV_G,ISTSC_G,IENSC_G,IF_SCALARS_G,& + & ISTUV,IENUV,IF_UV,ISTSC,IENSC,IF_SCALARS) + + IF(LSCDERS) THEN + IF_SCDERS = IF_SCALARS + ELSE + IF_SCDERS = 0 + ENDIF + + IF_OUT_LT = 2*IF_UV + IF_SCALARS+IF_SCDERS + IF(LVORGP) THEN + IF_OUT_LT = IF_OUT_LT+IF_UV + ENDIF + IF(LDIVGP) THEN + IF_OUT_LT = IF_OUT_LT+IF_UV + ENDIF + IF_FS = IF_OUT_LT+IF_SCDERS + IF(LUVDER) THEN + IF_FS = IF_FS+2*IF_UV + ENDIF + + IF_GP = 2*IF_UV_G+IF_SCALARS_G + IOFFD = 0 + IOFFU = 0 + IOFFV = KF_UV_G + IOFFUVD = 2*KF_UV_G+KF_SCALARS_G + IOFFSC = 2*KF_UV_G + IF(LVORGP) THEN + IF_GP = IF_GP+IF_UV_G + IOFFD = KF_UV_G + IOFFU = IOFFU+KF_UV_G + IOFFV = IOFFV+KF_UV_G + IOFFUVD =IOFFUVD+KF_UV_G + IOFFSC = IOFFSC+KF_UV_G + ENDIF + IF(LDIVGP) THEN + IF_GP = IF_GP+IF_UV_G + IOFFU = IOFFU+KF_UV_G + IOFFV = IOFFV+KF_UV_G + IOFFUVD =IOFFUVD+KF_UV_G + IOFFSC = IOFFSC+KF_UV_G + ENDIF + IF(LSCDERS) THEN + IF_GP = IF_GP+2*IF_SCALARS_G + IOFFUVD =IOFFUVD+KF_SCALARS_G + IOFFSCNS = IOFFSC+KF_SCALARS_G + IOFFSCEW = IOFFSC+2*KF_SCALARS_G + ENDIF + IF(LUVDER) THEN + IF_GP = IF_GP+2*IF_UV_G + IOFFSCEW = IOFFSCEW+2*KF_UV_G + ENDIF + + DO JFLD=1,IF_UV_G + IOFF = 0 + IF(LVORGP) THEN + IPTRGP(JFLD+IOFF) = ISHFUV_G(ISTUV_G+JFLD-1) + IOFF = IOFF+IF_UV_G + ENDIF + IF(LDIVGP) THEN + IPTRGP(JFLD+IOFF) = IOFFD+ISHFUV_G(ISTUV_G+JFLD-1) + IOFF = IOFF+IF_UV_G + ENDIF + IPTRGP(JFLD+IOFF) = IOFFU+ISHFUV_G(ISTUV_G+JFLD-1) + IOFF = IOFF+IF_UV_G + IPTRGP(JFLD+IOFF) = IOFFV+ISHFUV_G(ISTUV_G+JFLD-1) + IOFF = IOFF+IF_UV_G+IF_SCALARS_G + IF(LSCDERS) THEN + IOFF = IOFF+IF_SCALARS_G + ENDIF + IF(LUVDER) THEN + IPTRGP(JFLD+IOFF) = IOFFUVD+ISHFUV_G(ISTUV_G+JFLD-1) + IOFF = IOFF+IF_UV_G + IPTRGP(JFLD+IOFF) = IOFFUVD+KF_UV_G+ISHFUV_G(ISTUV_G+JFLD-1) + ENDIF + ENDDO + + DO JFLD=1,IF_SCALARS_G + IOFF = 2*IF_UV_G + IF (LVORGP) IOFF = IOFF+IF_UV_G + IF (LDIVGP) IOFF = IOFF+IF_UV_G + IPTRGP(JFLD+IOFF) = IOFFSC+ISHFSC_G(ISTSC_G+JFLD-1) + IOFF = IOFF+IF_SCALARS_G + IF(LSCDERS) THEN + IPTRGP(JFLD+IOFF) = IOFFSCNS+ISHFSC_G(ISTSC_G+JFLD-1) + IOFF = IOFF+IF_SCALARS_G + IF(LUVDER) THEN + IOFF = IOFF+2*IF_UV_G + ENDIF + IPTRGP(JFLD+IOFF) = IOFFSCEW+ISHFSC_G(ISTSC_G+JFLD-1) + ENDIF + ENDDO + DO JFLD=1,IF_UV + IPTRSPUV(JFLD) = ISTUV+JFLD-1 + ENDDO + DO JFLD=1,IF_SCALARS + IPTRSPSC(JFLD) = ISTSC+JFLD-1 + ENDDO + + IF(IF_UV_G > 0 .AND. IF_SCALARS_G > 0) THEN + CALL EFTINV_CTLAD(IF_UV_G,IF_SCALARS_G,& + & IF_UV,IF_SCALARS,IF_SCDERS,IF_GP,IF_FS,IF_OUT_LT,& + & KVSETUV=IVSETUV(ISTUV_G:IENUV_G),& + & KVSETSC=IVSETSC(ISTSC_G:IENSC_G),KPTRGP=IPTRGP,& + & PGP=PGP) + ELSEIF(IF_UV_G > 0) THEN + CALL EFTINV_CTLAD(IF_UV_G,IF_SCALARS_G,& + & IF_UV,IF_SCALARS,IF_SCDERS,IF_GP,IF_FS,IF_OUT_LT,& + & KVSETUV=IVSETUV(ISTUV_G:IENUV_G),& + & KPTRGP=IPTRGP,& + & PGP=PGP) + ELSEIF(IF_SCALARS_G > 0) THEN + CALL EFTINV_CTLAD(IF_UV_G,IF_SCALARS_G,& + & IF_UV,IF_SCALARS,IF_SCDERS,IF_GP,IF_FS,IF_OUT_LT,& + & KVSETSC=IVSETSC(ISTSC_G:IENSC_G),KPTRGP=IPTRGP,& + & PGP=PGP) + ENDIF + CALL ELTINV_CTLAD(IF_OUT_LT,IF_UV,IF_SCALARS,IF_SCDERS, & + & PSPVOR=PSPVOR,PSPDIV=PSPDIV,PSPSCALAR=PSPSCALAR,& + & KFLDPTRUV=IPTRSPUV,KFLDPTRSC=IPTRSPSC,& + & PSPMEANU=PMEANU,PSPMEANV=PMEANV) + ENDDO + +ELSE + + ! No splitting of fields, transform done in one go + + CALL EFTINV_CTLAD(KF_UV_G,KF_SCALARS_G,& + & KF_UV,KF_SCALARS,KF_SCDERS,KF_GP,KF_FS,KF_OUT_LT,& + & KVSETUV=KVSETUV,KVSETSC=KVSETSC,& + & KVSETSC3A=KVSETSC3A,KVSETSC3B=KVSETSC3B,KVSETSC2=KVSETSC2,& + & PGP=PGP,PGPUV=PGPUV,PGP3A=PGP3A,PGP3B=PGP3B,PGP2=PGP2) + + CALL ELTINV_CTLAD(KF_OUT_LT,KF_UV,KF_SCALARS,KF_SCDERS, & + & PSPVOR=PSPVOR,PSPDIV=PSPDIV,PSPSCALAR=PSPSCALAR,& + & PSPSC3A=PSPSC3A,PSPSC3B=PSPSC3B,PSPSC2=PSPSC2,& + & PSPMEANU=PMEANU,PSPMEANV=PMEANV ) +ENDIF +IF (LHOOK) CALL DR_HOOK('EINV_TRANS_CTLAD_MOD:EINV_TRANS_CTLAD',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE EINV_TRANS_CTLAD +END MODULE EINV_TRANS_CTLAD_MOD diff --git a/src/etrans/cpu/internal/eledir_mod.F90 b/src/etrans/cpu/internal/eledir_mod.F90 new file mode 100644 index 000000000..ae9596edd --- /dev/null +++ b/src/etrans/cpu/internal/eledir_mod.F90 @@ -0,0 +1,109 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + + +MODULE ELEDIR_MOD +CONTAINS +SUBROUTINE ELEDIR(KM,KFC,KLED2,PFFT) + +!**** *ELEDIR* - Direct meridional transform. + +! Purpose. +! -------- +! Direct meridional tranform of state variables. + +!** Interface. +! ---------- +! CALL ELEDIR(...) + +! Explicit arguments : KM - zonal wavenumber +! -------------------- KFC - number of field to transform +! PAIA - antisymmetric part of Fourier +! fields for zonal wavenumber KM +! PSIA - symmetric part of Fourier +! fields for zonal wavenumber KM +! POA1 - spectral +! fields for zonal wavenumber KM +! PLEPO - Legendre polonomials + +! Implicit arguments : None. +! -------------------- + +! Method. +! ------- + +! Externals. MXMAOP - matrix multiply +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 88-01-28 +! Modified : 91-07-01 Philippe Courtier/Mats Hamrud - Rewrite +! for uv formulation +! Modified : 93-03-19 D. Giard - NTMAX instead of NSMAX +! Modified : 04/06/99 D.Salmond : change order of AIA and SIA +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! D. Degrauwe (Feb 2012): Alternative extension zone (E') +! R. El Khatib 01-Sep-2015 support for FFTW transforms +! R. El Khatib 08-Jun-2023 LALL_FFTW for better flexibility +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM, JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK + +USE TPM_DIM ,ONLY : R +!USE TPM_GEOMETRY +!USE TPM_TRANS +#ifdef WITH_FFT992 +USE TPMALD_FFT ,ONLY : TALD +#endif +USE TPM_FFTW ,ONLY : TW, EXEC_EFFTW +USE TPMALD_DIM ,ONLY : RALD +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM), INTENT(IN) :: KM,KFC,KLED2 +REAL(KIND=JPRB) , INTENT(INOUT) :: PFFT(:,:) + +INTEGER(KIND=JPIM) :: IRLEN, ICLEN, IOFF, ITYPE +! ------------------------------------------------------------------ + +!* 1. PERFORM FOURIER TRANFORM. +! -------------------------- + +IF (KFC>0) THEN + ITYPE=-1 + IRLEN=R%NDGL+R%NNOEXTZG + ICLEN=RALD%NDGLSUR+R%NNOEXTZG +#ifdef WITH_FFT992 + IF( TALD%LFFT992 )THEN + CALL FFT992(PFFT,TALD%TRIGSE,TALD%NFAXE,1,ICLEN,IRLEN,KFC,ITYPE) +#endif + IOFF=1 + CALL EXEC_EFFTW(ITYPE,IRLEN,ICLEN,IOFF,KFC,TW%LALL_FFTW,PFFT) +#ifdef WITH_FFT992 + ENDIF +#endif +ENDIF + +! ------------------------------------------------------------------ + +END SUBROUTINE ELEDIR +END MODULE ELEDIR_MOD diff --git a/src/etrans/cpu/internal/eledirad_mod.F90 b/src/etrans/cpu/internal/eledirad_mod.F90 new file mode 100644 index 000000000..738dc4b75 --- /dev/null +++ b/src/etrans/cpu/internal/eledirad_mod.F90 @@ -0,0 +1,129 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + + +MODULE ELEDIRAD_MOD +CONTAINS +SUBROUTINE ELEDIRAD(KM,KFC,KLED2,PFFT) + +!**** *ELEDIRAD* - Direct Legendre transform. + +! Purpose. +! -------- +! Direct Legendre tranform of state variables. + +!** Interface. +! ---------- +! CALL ELEDIRAD(...) + +! Explicit arguments : KM - zonal wavenumber +! -------------------- KFC - number of field to transform +! PAIA - antisymmetric part of Fourier +! fields for zonal wavenumber KM +! PSIA - symmetric part of Fourier +! fields for zonal wavenumber KM +! POA1 - spectral +! fields for zonal wavenumber KM +! PLEPO - Legendre polonomials + +! Implicit arguments : None. +! -------------------- + +! Method. +! ------- + +! Externals. MXMAOP - matrix multiply +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 88-01-28 +! Modified : 91-07-01 Philippe Courtier/Mats Hamrud - Rewrite +! for uv formulation +! Modified : 93-03-19 D. Giard - NTMAX instead of NSMAX +! Modified : 04/06/99 D.Salmond : change order of AIA and SIA +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! D. Degrauwe (Feb 2012): Alternative extension zone (E') +! R. El Khatib : fix missing support for FFTW +! R. El Khatib 08-Jun-2023 LALL_FFTW for better flexibility +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_DIM ,ONLY : R +!USE TPM_GEOMETRY +!USE TPM_TRANS +USE TPM_FFTW ,ONLY : TW, EXEC_EFFTW +#ifdef WITH_FFT992 +USE TPMALD_FFT ,ONLY : TALD +#endif +USE TPMALD_DIM ,ONLY : RALD +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM), INTENT(IN) :: KM +INTEGER(KIND=JPIM), INTENT(IN) :: KFC +INTEGER(KIND=JPIM), INTENT(IN) :: KLED2 + +REAL(KIND=JPRB), INTENT(INOUT) :: PFFT(:,:) + +INTEGER(KIND=JPIM) :: IRLEN, ICLEN, IOFF, ITYPE +INTEGER(KIND=JPIM) :: JF, JJ +REAL(KIND=JPRB) :: ZNORM +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('ELEDIRAD_MOD:ELEDIRAD',0,ZHOOK_HANDLE) + +IF (KFC>0) THEN + DO JJ=1,1 + DO JF=1,KFC + PFFT(JJ,JF) = 2.0_JPRB * PFFT(JJ,JF) + ENDDO + ENDDO + ITYPE=1 + IRLEN=R%NDGL+R%NNOEXTZG + ICLEN=RALD%NDGLSUR+R%NNOEXTZG +#ifdef WITH_FFT992 + IF( TALD%LFFT992 )THEN + CALL FFT992(PFFT,TALD%TRIGSE,TALD%NFAXE,1,RALD%NDGLSUR+R%NNOEXTZG,IRLEN,KFC,ITYPE) + ELSEIF ( ASSOCIATED(TW) )THEN +#endif + IOFF=1 + CALL EXEC_EFFTW(ITYPE,IRLEN,ICLEN,IOFF,KFC,TW%LALL_FFTW,PFFT) +#ifdef WITH_FFT992 + ENDIF +#endif + ZNORM=1.0_JPRB/(2.0_JPRB*REAL(R%NDGL+R%NNOEXTZG,JPRB)) + DO JJ=1,R%NDGL+R%NNOEXTZG + DO JF=1,KFC + PFFT(JJ,JF) = ZNORM * PFFT(JJ,JF) + ENDDO + ENDDO +ENDIF + +IF (LHOOK) CALL DR_HOOK('ELEDIRAD_MOD:ELEDIRAD',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE ELEDIRAD +END MODULE ELEDIRAD_MOD diff --git a/src/etrans/cpu/internal/eleinv_mod.F90 b/src/etrans/cpu/internal/eleinv_mod.F90 new file mode 100644 index 000000000..082acedab --- /dev/null +++ b/src/etrans/cpu/internal/eleinv_mod.F90 @@ -0,0 +1,114 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + + +MODULE ELEINV_MOD +CONTAINS +SUBROUTINE ELEINV(KM,KFC,KF_OUT_LT,PIA) + +!**** *LEINV* - Inverse Legendre transform. + +! Purpose. +! -------- +! Inverse Legendre tranform of all variables(kernel). + +!** Interface. +! ---------- +! CALL LEINV(...) + +! Explicit arguments : KM - zonal wavenumber (input-c) +! -------------------- KFC - number of fields to tranform (input-c) +! PIA - spectral fields +! for zonal wavenumber KM (input) +! PAOA1 - antisymmetric part of Fourier +! fields for zonal wavenumber KM (output) +! PSOA1 - symmetric part of Fourier +! fields for zonal wavenumber KM (output) +! PLEPO - Legendre polonomials for zonal +! wavenumber KM (input-c) + +! Implicit arguments : None. +! -------------------- + +! Method. +! ------- + +! Externals. MXMAOP - calls SGEMVX (matrix multiply) +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 00-02-01 From LEINV in IFS CY22R1 +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! D. Degrauwe (Feb 2012): Alternative extension zone (E') +! R. El Khatib 01-Sep-2015 support for FFTW transforms +! R. El Khatib 08-Jun-2023 LALL_FFTW for better flexibility +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM, JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_DIM ,ONLY : R +!USE TPM_GEOMETRY +!USE TPM_TRANS +USE TPM_FFTW ,ONLY : TW, EXEC_EFFTW +USE TPMALD_DIM ,ONLY : RALD +#ifdef WITH_FFT992 +USE TPMALD_FFT ,ONLY : TALD +#endif +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM), INTENT(IN) :: KM +INTEGER(KIND=JPIM), INTENT(IN) :: KFC +INTEGER(KIND=JPIM), INTENT(IN) :: KF_OUT_LT +REAL(KIND=JPRB), INTENT(INOUT) :: PIA(:,:) + +INTEGER(KIND=JPIM) :: IRLEN, ICLEN, IOFF, ITYPE + +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +!* 1. PERFORM LEGENDRE TRANFORM. +! -------------------------- + +IF (LHOOK) CALL DR_HOOK('ELEINV_MOD:ELEINV',0,ZHOOK_HANDLE) + +IF (KFC>0) THEN + ITYPE=1 + IRLEN=R%NDGL+R%NNOEXTZG + ICLEN=RALD%NDGLSUR+R%NNOEXTZG +#ifdef WITH_FFT992 + IF( TALD%LFFT992 )THEN + CALL FFT992(PIA,TALD%TRIGSE,TALD%NFAXE,1,RALD%NDGLSUR+R%NNOEXTZG,IRLEN,KFC,ITYPE) + ELSE +#endif + IOFF=1 + CALL EXEC_EFFTW(ITYPE,IRLEN,ICLEN,IOFF,KFC,TW%LALL_FFTW,PIA) +#ifdef WITH_FFT992 + ENDIF +#endif +ENDIF + +IF (LHOOK) CALL DR_HOOK('ELEINV_MOD:ELEINV',1,ZHOOK_HANDLE) + +END SUBROUTINE ELEINV +END MODULE ELEINV_MOD diff --git a/src/etrans/cpu/internal/eleinvad_mod.F90 b/src/etrans/cpu/internal/eleinvad_mod.F90 new file mode 100644 index 000000000..04f5cfb1c --- /dev/null +++ b/src/etrans/cpu/internal/eleinvad_mod.F90 @@ -0,0 +1,125 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + + +MODULE ELEINVAD_MOD +CONTAINS +SUBROUTINE ELEINVAD(KM,KFC,KF_OUT_LT,PIA) + +!**** *ELEINVAD* - Inverse Legendre transform. + +! Purpose. +! -------- +! Inverse Legendre tranform of all variables(kernel). + +!** Interface. +! ---------- +! CALL ELEINVAD(...) + +! Explicit arguments : KM - zonal wavenumber (input-c) +! -------------------- KFC - number of fields to tranform (input-c) +! PIA - spectral fields +! for zonal wavenumber KM (input) +! PAOA1 - antisymmetric part of Fourier +! fields for zonal wavenumber KM (output) +! PSOA1 - symmetric part of Fourier +! fields for zonal wavenumber KM (output) +! PLEPO - Legendre polonomials for zonal +! wavenumber KM (input-c) + +! Implicit arguments : None. +! -------------------- + +! Method. +! ------- + +! Externals. MXMAOP - calls SGEMVX (matrix multiply) +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 00-02-01 From LEINVAD in IFS CY22R1 +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! D. Degrauwe (Feb 2012): Alternative extension zone (E') +! R. El Khatib 01-Sep-2015 support for FFTW transforms +! R. El Khatib 08-Jun-2023 LALL_FFTW for better flexibility +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_DIM ,ONLY : R +!USE TPM_GEOMETRY +!USE TPM_TRANS +USE TPM_FFTW ,ONLY : TW, EXEC_EFFTW +USE TPMALD_DIM ,ONLY : RALD +#ifdef WITH_FFT992 +USE TPMALD_FFT ,ONLY : TALD +#endif +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM), INTENT(IN) :: KM +INTEGER(KIND=JPIM), INTENT(IN) :: KFC +INTEGER(KIND=JPIM), INTENT(IN) :: KF_OUT_LT +REAL(KIND=JPRB), INTENT(OUT) :: PIA(:,:) + +INTEGER(KIND=JPIM) :: IRLEN, ICLEN, IOFF, ITYPE +INTEGER(KIND=JPIM) :: JJ, JF +REAL(KIND=JPRB) :: ZNORM +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('ELEINVAD_MOD:ELEINVAD',0,ZHOOK_HANDLE) + +IF (KFC>0) THEN + ITYPE=-1 + IRLEN=R%NDGL+R%NNOEXTZG + ICLEN=RALD%NDGLSUR+R%NNOEXTZG +#ifdef WITH_FFT992 + IF( TALD%LFFT992 )THEN + CALL FFT992(PIA,TALD%TRIGSE,TALD%NFAXE,1,ICLEN,IRLEN,KFC,ITYPE) + ELSE +#endif + IOFF=1 + CALL EXEC_EFFTW(ITYPE,IRLEN,ICLEN,IOFF,KFC,TW%LALL_FFTW,PIA) +#ifdef WITH_FFT992 + ENDIF +#endif + ZNORM=2.0_JPRB*REAL(R%NDGL+R%NNOEXTZG,JPRB) + DO JJ=1,1 + DO JF=1,KFC + PIA(JJ,JF) = (ZNORM/2.0_JPRB) * PIA(JJ,JF) + ENDDO + ENDDO + DO JJ=3,R%NDGL+R%NNOEXTZG+1 + DO JF=1,KFC + PIA(JJ,JF) = ZNORM * PIA(JJ,JF) + ENDDO + ENDDO +ENDIF + +IF (LHOOK) CALL DR_HOOK('ELEINVAD_MOD:ELEINVAD',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE ELEINVAD +END MODULE ELEINVAD_MOD diff --git a/src/etrans/cpu/internal/ellips.F90 b/src/etrans/cpu/internal/ellips.F90 new file mode 100644 index 000000000..55682502d --- /dev/null +++ b/src/etrans/cpu/internal/ellips.F90 @@ -0,0 +1,100 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + + +! Jan-2011 P. Marguinaud Interface to thread-safe FA +SUBROUTINE ELLIPS (KSMAX,KMSMAX,KNTMP,KMTMP) +USE PARKIND1, ONLY : JPRD, JPIM +USE YOMHOOK , ONLY : LHOOK, DR_HOOK, JPHOOK +IMPLICIT NONE +! +! ***ELLIPS*** - General routine for computing elliptic truncation +! +! Purpose. +! -------- +! Computation of zonal and meridional limit wavenumbers within the ellipse +! Interface: +! ---------- +! *CALL* *ELLIPS * +! +! Explicit arguments : +! -------------------- +! +! Implicit arguments : +! -------------------- +! +! +! Method. +! ------- +! See documentation +! +! Externals. NONE. +! ---------- +! +! Reference. +! ---------- +! ARPEGE/ALADIN documentation +! +! Author. +! ------- +! G. Radnoti LACE 97/04/04 +! +! Modifications. +! +!------------------------------------------------------------- +! J.Vivoda, 99/05/19 treating NSMAX=0 and NMSMAX=0 +! O.Nuissier, 23/09/01 Change type of real (simple --> +! double precision) +! +! +INTEGER (KIND=JPIM) KSMAX, KMSMAX +INTEGER (KIND=JPIM) KNTMP(0:KMSMAX),KMTMP(0:KSMAX) +! +INTEGER (KIND=JPIM) JM, JN +! +REAL (KIND=JPRD) ZEPS, ZKN, ZKM, ZAUXIL +! +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE +IF (LHOOK) CALL DR_HOOK('ELLIPS',0,ZHOOK_HANDLE) +ZEPS=1.E-10 +ZAUXIL=0. +! +! 1. Computing meridional limit wavenumbers along zonal wavenumbers +! +DO JM=1,KMSMAX-1 +ZKN = REAL(KSMAX,JPRD)/REAL(KMSMAX,JPRD)* & +& SQRT(MAX(ZAUXIL,REAL(KMSMAX**2-JM**2,JPRD))) + KNTMP(JM)=INT(ZKN+ZEPS, JPIM) +ENDDO + +IF( KMSMAX.EQ.0 )THEN + KNTMP(0)=KSMAX +ELSE + KNTMP(0)=KSMAX + KNTMP(KMSMAX)=0 +ENDIF +! +! 2. Computing zonal limit wavenumbers along meridional wavenumbers +! +DO JN=1,KSMAX-1 +ZKM = REAL(KMSMAX,JPRD)/REAL(KSMAX,JPRD)* & + & SQRT(MAX(ZAUXIL,REAL(KSMAX**2-JN**2,JPRD))) + KMTMP(JN)=INT(ZKM+ZEPS, JPIM) +ENDDO + +IF( KSMAX.EQ.0 )THEN + KMTMP(0)=KMSMAX +ELSE + KMTMP(0)=KMSMAX + KMTMP(KSMAX)=0 +ENDIF +! +IF (LHOOK) CALL DR_HOOK('ELLIPS',1,ZHOOK_HANDLE) +END SUBROUTINE ELLIPS diff --git a/src/etrans/cpu/internal/eltdir_ctl_mod.F90 b/src/etrans/cpu/internal/eltdir_ctl_mod.F90 new file mode 100644 index 000000000..97796a36c --- /dev/null +++ b/src/etrans/cpu/internal/eltdir_ctl_mod.F90 @@ -0,0 +1,128 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + + +MODULE ELTDIR_CTL_MOD +CONTAINS +SUBROUTINE ELTDIR_CTL(KF_FS,KF_UV,KF_SCALARS, & + & PSPVOR,PSPDIV,PSPSCALAR, & + & PSPSC3A,PSPSC3B,PSPSC2, & + & KFLDPTRUV,KFLDPTRSC,PSPMEANU,PSPMEANV,AUX_PROC) + +!**** *ELTDIR_CTL* - Control routine for direct Legendre transform + +! Purpose. +! -------- +! Direct Legendre transform + +!** Interface. +! ---------- +! CALL ELTDIR_CTL(...) + +! Explicit arguments : +! -------------------- +! KF_FS - number of fields in Fourier space +! KF_UV - local number of spectral u-v fields +! KF_SCALARS - local number of scalar spectral fields +! PSPVOR(:,:) - spectral vorticity (output) +! PSPDIV(:,:) - spectral divergence (output) +! PSPSCALAR(:,:) - spectral scalarvalued fields (output) +! KFLDPTRUV(:) - field pointer for vorticity and divergence (input) +! KFLDPTRSC(:) - field pointer for scalarvalued fields (input) +! PSPMEANU(:),PSPMEANV(:) - mean winds + +! R. El Khatib 02-Jun-2022 Optimization/Cleaning +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_GEN ,ONLY : LALLOPERM +USE TPM_TRANS ,ONLY : FOUBUF, FOUBUF_IN +USE TPM_DISTR ,ONLY : D + +USE ELTDIR_MOD ,ONLY : ELTDIR +USE EUVTVD_COMM_MOD , ONLY : EUVTVD_COMM +USE TRLTOM_MOD ,ONLY : TRLTOM +USE MPL_MODULE + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KF_FS,KF_UV,KF_SCALARS +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSC3A(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSC3B(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSC2(:,:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRUV(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRSC(:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPMEANU(:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPMEANV(:) +EXTERNAL AUX_PROC +OPTIONAL AUX_PROC + +INTEGER(KIND=JPIM) :: JM,IM,IBLEN,ILED2,INUL +REAL(KIND=JPRB) :: ZDUM +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE +! ------------------------------------------------------------------ + +! Transposition from Fourier space distribution to spectral space distribution + +IF (LHOOK) CALL DR_HOOK('ELTDIR_CTL_MOD:ELTDIR_CTL',0,ZHOOK_HANDLE) +IBLEN = D%NLENGT0B*2*KF_FS +IF (ALLOCATED(FOUBUF)) THEN + IF (MAX(1,IBLEN) > SIZE(FOUBUF)) THEN + DEALLOCATE(FOUBUF) + ALLOCATE(FOUBUF(MAX(1,IBLEN))) + ENDIF +ELSE + ALLOCATE(FOUBUF(MAX(1,IBLEN))) + FOUBUF(1)=0._JPRB ! enforce allocation here +ENDIF +CALL GSTATS(153,0) +CALL TRLTOM(FOUBUF_IN,FOUBUF,2*KF_FS) +CALL GSTATS(153,1) +IF (.NOT.LALLOPERM) DEALLOCATE(FOUBUF_IN) + +! Periodization of auxiliary fields in y direction + +IF (PRESENT(AUX_PROC)) THEN + CALL AUX_PROC(ZDUM,FOUBUF,2*KF_FS,1,IBLEN,0,D%NUMP,.FALSE.,& + & INUL,D%NPROCL,D%NSTAGT0B,D%NPNTGTB1) +ENDIF + +! Direct Legendre transform + +ILED2 = 2*KF_FS +CALL GSTATS(1645,0) +IF (KF_FS>0) THEN +!$OMP PARALLEL DO SCHEDULE(DYNAMIC,1) PRIVATE(JM,IM) + DO JM=1,D%NUMP + IM = D%MYMS(JM) + CALL ELTDIR(IM,JM,KF_FS,KF_UV,KF_SCALARS,ILED2, & + & PSPVOR,PSPDIV,PSPSCALAR,& + & PSPSC3A,PSPSC3B,PSPSC2 , & + & KFLDPTRUV,KFLDPTRSC,PSPMEANU,PSPMEANV) + ENDDO +!$OMP END PARALLEL DO + IF (KF_UV > 0) THEN + CALL EUVTVD_COMM(KF_UV,PSPMEANU,PSPMEANV,KFLDPTRUV) + ENDIF +ENDIF +CALL GSTATS(1645,1) + +IF (.NOT.LALLOPERM) DEALLOCATE(FOUBUF) +IF (LHOOK) CALL DR_HOOK('ELTDIR_CTL_MOD:ELTDIR_CTL',1,ZHOOK_HANDLE) + +! ----------------------------------------------------------------- + +END SUBROUTINE ELTDIR_CTL +END MODULE ELTDIR_CTL_MOD diff --git a/src/etrans/cpu/internal/eltdir_ctlad_mod.F90 b/src/etrans/cpu/internal/eltdir_ctlad_mod.F90 new file mode 100644 index 000000000..ea19442f7 --- /dev/null +++ b/src/etrans/cpu/internal/eltdir_ctlad_mod.F90 @@ -0,0 +1,120 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + + +MODULE ELTDIR_CTLAD_MOD +CONTAINS +SUBROUTINE ELTDIR_CTLAD(KF_FS,KF_UV,KF_SCALARS, & + & PSPVOR,PSPDIV,PSPSCALAR, & + & PSPSC3A,PSPSC3B,PSPSC2, & + & KFLDPTRUV,KFLDPTRSC,PSPMEANU,PSPMEANV) + +!**** *ELTDIR_CTLAD* - Control routine for direct Legendre transform + +! Purpose. +! -------- +! Direct Legendre transform + +!** Interface. +! ---------- +! CALL LTDIR_CTLAD(...) + +! Explicit arguments : +! -------------------- +! PSPVOR(:,:) - spectral vorticity (output) +! PSPDIV(:,:) - spectral divergence (output) +! PSPSCALAR(:,:) - spectral scalarvalued fields (output) + +! R. El Khatib 02-Jun-2022 Optimization/Cleaning +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_GEN ,ONLY : LALLOPERM +!USE TPM_DIM +USE TPM_TRANS ,ONLY : FOUBUF, FOUBUF_IN +USE TPM_DISTR ,ONLY : D + +USE ELTDIRAD_MOD ,ONLY : ELTDIRAD +USE TRMTOL_MOD ,ONLY : TRMTOL + + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KF_FS,KF_UV,KF_SCALARS +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPSC3A(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPSC3B(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPSC2(:,:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRUV(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRSC(:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPMEANU(:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPMEANV(:) + +INTEGER(KIND=JPIM) :: JM,IM,IBLEN,ILED2 +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +! Transposition from Fourier space distribution to spectral space distribution + +IF (LHOOK) CALL DR_HOOK('ELTDIR_CTLAD_MOD:ELTDIR_CTLAD',0,ZHOOK_HANDLE) +IBLEN = D%NLENGT0B*2*KF_FS +IF (ALLOCATED(FOUBUF_IN)) THEN + IF (MAX(1,IBLEN) > SIZE(FOUBUF_IN)) THEN + DEALLOCATE(FOUBUF_IN) + ALLOCATE(FOUBUF_IN(MAX(1,IBLEN))) + FOUBUF_IN(1)=0._JPRB ! force allocation here + ENDIF +ELSE + ALLOCATE(FOUBUF_IN(MAX(1,IBLEN))) + FOUBUF_IN(1)=0._JPRB ! force allocation here +ENDIF +IF (ALLOCATED(FOUBUF)) THEN + IF (MAX(1,IBLEN) > SIZE(FOUBUF)) THEN + DEALLOCATE(FOUBUF) + ALLOCATE(FOUBUF(MAX(1,IBLEN))) + FOUBUF(1)=0._JPRB ! force allocation here + ENDIF +ELSE + ALLOCATE(FOUBUF(MAX(1,IBLEN))) + FOUBUF(1)=0._JPRB ! force allocation here +ENDIF + +! Direct Legendre transform + +ILED2 = 2*KF_FS +CALL GSTATS(1646,0) +IF(KF_FS > 0) THEN +!$OMP PARALLEL DO SCHEDULE(DYNAMIC,1) PRIVATE(JM,IM) + DO JM=1,D%NUMP + IM = D%MYMS(JM) + CALL ELTDIRAD(IM,JM,KF_FS,KF_UV,KF_SCALARS,ILED2, & + & PSPVOR,PSPDIV,PSPSCALAR,& + & PSPSC3A,PSPSC3B,PSPSC2 , & + & KFLDPTRUV,KFLDPTRSC, PSPMEANU,PSPMEANV) + ENDDO +!$OMP END PARALLEL DO +ENDIF +CALL GSTATS(1646,1) + +CALL GSTATS(181,0) +CALL TRMTOL(FOUBUF,FOUBUF_IN,2*KF_FS) +CALL GSTATS(181,1) +IF (.NOT.LALLOPERM) DEALLOCATE(FOUBUF) +IF (LHOOK) CALL DR_HOOK('ELTDIR_CTLAD_MOD:ELTDIR_CTLAD',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE ELTDIR_CTLAD +END MODULE ELTDIR_CTLAD_MOD diff --git a/src/etrans/cpu/internal/eltdir_mod.F90 b/src/etrans/cpu/internal/eltdir_mod.F90 new file mode 100644 index 000000000..89932a7eb --- /dev/null +++ b/src/etrans/cpu/internal/eltdir_mod.F90 @@ -0,0 +1,195 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + + +MODULE ELTDIR_MOD +CONTAINS +SUBROUTINE ELTDIR(KM,KMLOC,KF_FS,KF_UV,KF_SCALARS,KLED2,& + & PSPVOR,PSPDIV,PSPSCALAR,PSPSC3A,PSPSC3B,PSPSC2,& + & KFLDPTRUV,KFLDPTRSC,PSPMEANU,PSPMEANV) + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_DIM ,ONLY : R +USE TPM_DISTR ,ONLY : D +USE TPMALD_DIM ,ONLY : RALD + +USE EPRFI2_MOD ,ONLY : EPRFI2 +USE ELEDIR_MOD ,ONLY : ELEDIR +USE EUVTVD_MOD +USE EUPDSP_MOD ,ONLY : EUPDSP +USE EXTPER_MOD ,ONLY : EXTPER + +! +!**** *ELTDIR* - Control of Direct Legendre transform step + +! Purpose. +! -------- +! Tranform from Fourier space to spectral space, compute +! vorticity and divergence. + +!** Interface. +! ---------- +! *CALL* *ELTDIR(...)* + +! Explicit arguments : +! -------------------- KM - zonal wavenumber +! KMLOC - local zonal wavenumber + +! Implicit arguments : None +! -------------------- + +! Method. +! ------- + +! Externals. +! ---------- +! EPRFI2 - prepares the Fourier work arrays for model variables +! ELEDIR - direct Legendre transform +! EUVTVD - +! EUPDSP - updating of spectral arrays (fields) +! EUVTVD_COMM - +! EXTPER - + + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 87-11-24 +! Modified : 91-07-01 Philippe Courtier/Mats Hamrud - Rewrite +! for uv formulation +! Modified 93-03-19 D. Giard - CDCONF='T' for tendencies +! Modified 93-11-18 M. Hamrud - use only one Fourier buffer +! Modified 94-04-06 R. El khatib Full-POS implementation +! M.Hamrud : 94-11-01 New conf 'G' - vor,div->vor,div +! instead of u,v->vor,div +! MPP Group : 95-10-01 Support for Distributed Memory version +! K. YESSAD (AUGUST 1996): +! - Legendre transforms for transmission coefficients. +! Modified : 04/06/99 D.Salmond : change order of AIA and SIA +! 01-03-14 G. Radnoti aladin version +! 01-08-27 : R. El Khatib Fix for NPROMATR /= 0 +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! F. Vana + NEC 28-Apr-2009 MPI-OpenMP fix +! D. Degrauwe (Feb 2012): Alternative extension zone (E') +! N. Lopes & R. El Khatib 15-Jun-2012 Scalability enhancement +! R. El Khatib 02-Jun-2022 Optimization/Cleaning +! ------------------------------------------------------------------ + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KM +INTEGER(KIND=JPIM),INTENT(IN) :: KMLOC +INTEGER(KIND=JPIM),INTENT(IN) :: KF_FS,KF_UV,KF_SCALARS,KLED2 + +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PSPSC2(:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PSPSC3A(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PSPSC3B(:,:,:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRUV(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRSC(:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPMEANU(:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPMEANV(:) + +INTEGER(KIND=JPIM) :: IFC, IINDEX(2*KF_FS), JF, JDIM +INTEGER(KIND=JPIM) :: IFLD, IR, J +INTEGER(KIND=JPIM) :: IUS,IVS,IVORS,IDIVS + +REAL(KIND=JPRB) :: ZFFT(RALD%NDGLSUR+R%NNOEXTZG,KLED2,D%NUMP) +REAL(KIND=JPRB) :: ZVODI(RALD%NDGLSUR+R%NNOEXTZG,MAX(4*KF_UV,1),D%NUMP) + +! Only if R%NNOEXTZG > 0 : +REAL(KIND=JPRB) :: ZFFT2(KLED2,(RALD%NDGLSUR+R%NNOEXTZG)*MIN(1,R%NNOEXTZG)) + +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('ELTDIR_MOD:ELTDIR',0,ZHOOK_HANDLE) + +IUS = 1 +IVS = 2*KF_UV+1 +IVORS = IUS +IDIVS = IVS +IFC = 2*KF_FS + +!* 1. PREPARE WORK ARRAYS. +! -------------------- + +CALL EPRFI2(KM,KMLOC,KF_FS,ZFFT(:,:,KMLOC)) + +!* 2. PERIODICIZATION IN Y DIRECTION +! ------------------------------ + +IF(R%NNOEXTZG>0) THEN + DO JF = 1,IFC + DO JDIM = 1,R%NDGL + ZFFT2(JF,JDIM)=ZFFT(JDIM,JF,KMLOC) + ENDDO + ENDDO + IINDEX(1)=0 + CALL EXTPER(ZFFT2(:,:),R%NDGL+R%NNOEXTZG,1,R%NDGL,IFC,1,IINDEX,0) + DO JF = 1,IFC + DO JDIM = 1,R%NDGL+R%NNOEXTZG + ZFFT(JDIM,JF,KMLOC) = ZFFT2(JF,JDIM) + ENDDO + ENDDO +ENDIF + +!* 3. DIRECT LEGENDRE TRANSFORM. +! -------------------------- + +CALL ELEDIR(KM,IFC,KLED2,ZFFT(:,:,KMLOC)) + +!* 4. COMPUTE VORTICITY AND DIVERGENCE AND STORE MEAN WIND ON TASK OWNING WAVE 0 +! -------------------------------------------------------------------------- + +IF( KF_UV > 0 ) THEN + CALL EUVTVD(KM,KMLOC,KF_UV,ZFFT(:,IUS:,KMLOC),ZFFT(:,IVS:,KMLOC),& + & ZVODI(:,IVORS:,KMLOC),ZVODI(:,IDIVS:,KMLOC)) + IF (KM == 0) THEN + IF (PRESENT(KFLDPTRUV)) THEN + DO J = 1, KF_UV + IR = 2*J-1 + IFLD=KFLDPTRUV(J) + PSPMEANU(IFLD)=ZFFT(1,IUS-1+IR,KMLOC) + PSPMEANV(IFLD)=ZFFT(1,IVS-1+IR,KMLOC) + ENDDO + ELSE + DO J = 1, KF_UV + IR = 2*J-1 + PSPMEANU(J)=ZFFT(1,IUS-1+IR,KMLOC) + PSPMEANV(J)=ZFFT(1,IVS-1+IR,KMLOC) + ENDDO + ENDIF + ENDIF +ENDIF + +!* 5. UPDATE SPECTRAL ARRAYS. +! ----------------------- + +CALL EUPDSP(KM,KF_UV,KF_SCALARS,ZFFT(:,:,KMLOC),ZVODI(:,:,KMLOC), & + & PSPVOR,PSPDIV,PSPSCALAR,PSPSC3A,PSPSC3B,PSPSC2,KFLDPTRUV,KFLDPTRSC) + +IF (LHOOK) CALL DR_HOOK('ELTDIR_MOD:ELTDIR',1,ZHOOK_HANDLE) +! ------------------------------------------------------------------ + +END SUBROUTINE ELTDIR +END MODULE ELTDIR_MOD diff --git a/src/etrans/cpu/internal/eltdirad_mod.F90 b/src/etrans/cpu/internal/eltdirad_mod.F90 new file mode 100644 index 000000000..fc8457faf --- /dev/null +++ b/src/etrans/cpu/internal/eltdirad_mod.F90 @@ -0,0 +1,177 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + + +MODULE ELTDIRAD_MOD +CONTAINS +SUBROUTINE ELTDIRAD(KM,KMLOC,KF_FS,KF_UV,KF_SCALARS,KLED2,& + & PSPVOR,PSPDIV,PSPSCALAR,& + & PSPSC3A,PSPSC3B,PSPSC2 , & + & KFLDPTRUV,KFLDPTRSC,PSPMEANU,PSPMEANV) + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_DIM ,ONLY : R +USE TPMALD_DIM ,ONLY : RALD + +USE EPRFI2AD_MOD ,ONLY : EPRFI2AD +USE ELEDIRAD_MOD ,ONLY : ELEDIRAD +USE EUVTVDAD_MOD +USE EUPDSPAD_MOD ,ONLY : EUPDSPAD + + +!**** *ELTDIRAD* - Control of Direct Legendre transform step - adjoint + +! Purpose. +! -------- +! Tranform from Fourier space to spectral space, compute +! vorticity and divergence. + +!** Interface. +! ---------- +! *CALL* *ELTDIRAD(...)* + +! Explicit arguments : +! -------------------- KM - zonal wavenumber +! KMLOC - local zonal wavenumber +! PSPVOR - spectral vorticity +! PSPDIV - spectral divergence +! PSPSCALAR - spectral scalar variables + +! Implicit arguments : None +! -------------------- + +! Method. +! ------- + +! Externals. +! ---------- +! EPRFI2AD - prepares the Fourier work arrays for model variables. +! ELEDIRAD - direct Legendre transform +! EUVTVDAD - +! EUPDSPAD - updating of spectral arrays (fields) + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 87-11-24 +! Modified : 91-07-01 Philippe Courtier/Mats Hamrud - Rewrite +! for uv formulation +! Modified 93-03-19 D. Giard - CDCONF='T' for tendencies +! Modified 93-11-18 M. Hamrud - use only one Fourier buffer +! Modified 94-04-06 R. El khatib Full-POS implementation +! M.Hamrud : 94-11-01 New conf 'G' - vor,div->vor,div +! instead of u,v->vor,div +! MPP Group : 95-10-01 Support for Distributed Memory version +! K. YESSAD (AUGUST 1996): +! - Legendre transforms for transmission coefficients. +! Modified : 04/06/99 D.Salmond : change order of AIA and SIA +! 01-08-27 : R. El Khatib Fix for NPROMATR /= 0 +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! D. Degrauwe (Feb 2012): Alternative extension zone (E') +! N. Lopes & R. El Khatib 15-Jun-2012 Scalability enhancement + +! thread-safety +! ------------------------------------------------------------------ +! +IMPLICIT NONE +! +INTEGER(KIND=JPIM), INTENT(IN) :: KM +INTEGER(KIND=JPIM), INTENT(IN) :: KMLOC +INTEGER(KIND=JPIM),INTENT(IN) :: KF_FS,KF_UV,KF_SCALARS,KLED2 + +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(INOUT) :: PSPSC2(:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(INOUT) :: PSPSC3A(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(INOUT) :: PSPSC3B(:,:,:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRUV(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRSC(:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPMEANU(:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPMEANV(:) + +INTEGER(KIND=JPIM) :: IFC +INTEGER(KIND=JPIM) :: IUS,IUE,IVS,IVE,IVORS,IVORE,IDIVS,IDIVE + +REAL(KIND=JPRB) :: ZFFT(RALD%NDGLSUR+R%NNOEXTZG,KLED2) +REAL(KIND=JPRB) :: ZVODI(RALD%NDGLSUR+R%NNOEXTZG,MAX(4*KF_UV,1)) +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +!* 1. PREPARE LEGENDRE POLONOMIALS AND EPSNM +! -------------------------------------- + +IF (LHOOK) CALL DR_HOOK('ELTDIRAD_MOD:ELTDIRAD',0,ZHOOK_HANDLE) +ZFFT=0.0_JPRB +ZVODI=0.0_JPRB + +! ------------------------------------------------------------------ + +!* 6. UPDATE SPECTRAL ARRAYS. +! ----------------------- + +CALL EUPDSPAD(KM,KF_UV,KF_SCALARS,ZFFT,ZVODI, & + & PSPVOR,PSPDIV,PSPSCALAR,& + & PSPSC3A,PSPSC3B,PSPSC2 , & + & KFLDPTRUV,KFLDPTRSC) + +! ------------------------------------------------------------------ + +!* 5. COMPUTE VORTICITY AND DIVERGENCE. +! --------------------------------- +IF( KF_UV > 0 ) THEN + IUS = 1 + IUE = 2*KF_UV + IVS = 2*KF_UV+1 + IVE = 4*KF_UV + IVORS = 1 + IVORE = 2*KF_UV + IDIVS = 2*KF_UV+1 + IDIVE = 4*KF_UV +! SET PART OF ZFFT CONTAINING U AND V TO 0. + ZFFT(:,IUS:IVE) = 0.0_JPRB + CALL EUVTVDAD(KM,KMLOC,KF_UV,KFLDPTRUV,ZFFT(:,IUS:IUE),ZFFT(:,IVS:IVE),& + & ZVODI(:,IVORS:IVORE),ZVODI(:,IDIVS:IDIVE),PSPMEANU,PSPMEANV) +ENDIF + +! ------------------------------------------------------------------ + +!* 4. DIRECT LEGENDRE TRANSFORM. +! -------------------------- +IFC = 2*KF_FS +CALL ELEDIRAD(KM,IFC,KLED2,ZFFT) + +! ------------------------------------------------------------------ + +!* 3. FOURIER SPACE COMPUTATIONS. +! --------------------------- + +! ------------------------------------------------------------------ + +!* 2. PREPARE WORK ARRAYS. +! -------------------- + +CALL EPRFI2AD(KM,KMLOC,KF_FS,ZFFT) +IF (LHOOK) CALL DR_HOOK('ELTDIRAD_MOD:ELTDIRAD',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE ELTDIRAD +END MODULE ELTDIRAD_MOD + diff --git a/src/etrans/cpu/internal/eltinv_ctl_mod.F90 b/src/etrans/cpu/internal/eltinv_ctl_mod.F90 new file mode 100644 index 000000000..f79ba89b5 --- /dev/null +++ b/src/etrans/cpu/internal/eltinv_ctl_mod.F90 @@ -0,0 +1,140 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + + +MODULE ELTINV_CTL_MOD +CONTAINS +SUBROUTINE ELTINV_CTL(KF_OUT_LT,KF_UV,KF_SCALARS,KF_SCDERS,& + & PSPVOR,PSPDIV,PSPSCALAR,& + & PSPSC3A,PSPSC3B,PSPSC2,& + & KFLDPTRUV,KFLDPTRSC,PSPMEANU,PSPMEANV,FSPGL_PROC) + +!**** *ELTINV_CTL* - Control routine for inverse Legandre transform. + +! Purpose. +! -------- +! Control routine for the inverse LEGENDRE transform + +!** Interface. +! ---------- +! CALL EINV_TRANS_CTL(...) +! KF_OUT_LT - number of fields coming out from inverse LT +! KF_UV - local number of spectral u-v fields +! KF_SCALARS - local number of scalar spectral fields +! KF_SCDERS - local number of derivatives of scalar spectral fields +! PSPVOR(:,:) - spectral vorticity (input) +! PSPDIV(:,:) - spectral divergence (input) +! PSPSCALAR(:,:) - spectral scalarvalued fields (input) +! KFLDPTRUV(:) - field pointer array for vor./div. +! KFLDPTRSC(:) - field pointer array for PSPSCALAR +! FSPGL_PROC - external procedure to be executed in fourier space +! before transposition + +! Method. +! ------- + +! Externals. +! ---------- + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-06-03 +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! O.Spaniel Oct-2004 phasing for AL29 +! R. El Khatib 02-Jun-2022 Optimization/Cleaning +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_GEN ,ONLY : LALLOPERM +!USE TPM_DIM +USE TPM_TRANS ,ONLY : FOUBUF, FOUBUF_IN +USE TPM_DISTR ,ONLY : D + +USE ELTINV_MOD ,ONLY : ELTINV +USE TRMTOL_MOD ,ONLY : TRMTOL +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KF_OUT_LT,KF_UV,KF_SCALARS,KF_SCDERS +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPSC3A(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPSC3B(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPSC2(:,:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRUV(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRSC(:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPMEANU(:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPMEANV(:) + +EXTERNAL FSPGL_PROC +OPTIONAL FSPGL_PROC + +INTEGER(KIND=JPIM) :: JM,IM,IBLEN,ILEI2,IDIM1 +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('ELTINV_CTL_MOD:ELTINV_CTL',0,ZHOOK_HANDLE) +ILEI2 = 8*KF_UV + 2*KF_SCALARS + 2*KF_SCDERS +IDIM1 = 2*KF_OUT_LT +IBLEN = D%NLENGT0B*2*KF_OUT_LT +IF (ALLOCATED(FOUBUF)) THEN + IF (MAX(1,IBLEN) > SIZE(FOUBUF)) THEN + DEALLOCATE(FOUBUF) + ALLOCATE(FOUBUF(MAX(1,IBLEN))) + FOUBUF(1)=0._JPRB ! to force allocation here + ENDIF +ELSE + ALLOCATE(FOUBUF(MAX(1,IBLEN))) + FOUBUF(1)=0._JPRB ! to force allocation here +ENDIF +IF (ALLOCATED(FOUBUF_IN)) THEN + IF (MAX(1,IBLEN) > SIZE(FOUBUF_IN)) THEN + DEALLOCATE(FOUBUF_IN) + ALLOCATE(FOUBUF_IN(MAX(1,IBLEN))) + FOUBUF_IN(1)=0._JPRB ! to force allocation here + ENDIF +ELSE + ALLOCATE(FOUBUF_IN(MAX(1,IBLEN))) + FOUBUF_IN(1)=0._JPRB ! to force allocation here +ENDIF + +IF(KF_OUT_LT > 0) THEN +CALL GSTATS(1647,0) +!$OMP PARALLEL DO SCHEDULE(DYNAMIC,1) PRIVATE(JM,IM) + DO JM=1,D%NUMP + IM = D%MYMS(JM) + CALL ELTINV(IM,JM,KF_OUT_LT,KF_UV,KF_SCALARS,KF_SCDERS,ILEI2,IDIM1,& + & PSPVOR,PSPDIV,PSPSCALAR ,& + & PSPSC3A,PSPSC3B,PSPSC2 , & + & KFLDPTRUV,KFLDPTRSC,FSPGL_PROC,PSPMEANU,PSPMEANV) + ENDDO +!$OMP END PARALLEL DO +CALL GSTATS(1647,1) +ENDIF + +CALL GSTATS(152,0) +CALL TRMTOL(FOUBUF_IN,FOUBUF,2*KF_OUT_LT) +CALL GSTATS(152,1) +IF (.NOT.LALLOPERM) DEALLOCATE(FOUBUF_IN) +IF (LHOOK) CALL DR_HOOK('ELTINV_CTL_MOD:ELTINV_CTL',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE ELTINV_CTL +END MODULE ELTINV_CTL_MOD diff --git a/src/etrans/cpu/internal/eltinv_ctlad_mod.F90 b/src/etrans/cpu/internal/eltinv_ctlad_mod.F90 new file mode 100644 index 000000000..8cca104ee --- /dev/null +++ b/src/etrans/cpu/internal/eltinv_ctlad_mod.F90 @@ -0,0 +1,127 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + + +MODULE ELTINV_CTLAD_MOD +CONTAINS +SUBROUTINE ELTINV_CTLAD(KF_OUT_LT,KF_UV,KF_SCALARS,KF_SCDERS,& + & PSPVOR,PSPDIV,PSPSCALAR,& + & PSPSC3A,PSPSC3B,PSPSC2,& + & KFLDPTRUV,KFLDPTRSC,FSPGL_PROC,PSPMEANU,PSPMEANV) + +!**** *ELTINV_CTLAD* - Control routine for inverse Legandre transform - adj. + +! Purpose. +! -------- +! Control routine for the inverse LEGENDRE transform + +!** Interface. +! ---------- +! CALL EINV_TRANS_CTL(...) +! KF_OUT_LT - number of fields coming out from inverse LT +! KF_UV - local number of spectral u-v fields +! KF_SCALARS - local number of scalar spectral fields +! KF_SCDERS - local number of derivatives of scalar spectral fields +! PSPVOR(:,:) - spectral vorticity (input) +! PSPDIV(:,:) - spectral divergence (input) +! PSPSCALAR(:,:) - spectral scalarvalued fields (input) +! KFLDPTRUV(:) - field pointer array for vor./div. +! KFLDPTRSC(:) - field pointer array for PSPSCALAR +! FSPGL_PROC - external procedure to be executed in fourier space +! before transposition + +! Method. +! ------- + +! Externals. +! ---------- + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-06-03 +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! O.Spaniel Oct-2004 phasing for AL29 +! N. Lopes & R. El Khatib 15-Jun-2012 Scalability enhancement + +! thread-safety +! R. El Khatib 02-Jun-2022 Optimization/Cleaning +! ------------------------------------------------------------------ +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK +USE TPM_GEN ,ONLY : LALLOPERM +!USE TPM_DIM +USE TPM_TRANS ,ONLY : FOUBUF, FOUBUF_IN +USE TPM_DISTR ,ONLY : D +USE ELTINVAD_MOD ,ONLY : ELTINVAD +USE TRLTOM_MOD ,ONLY : TRLTOM +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KF_OUT_LT +INTEGER(KIND=JPIM),INTENT(IN) :: KF_UV +INTEGER(KIND=JPIM),INTENT(IN) :: KF_SCALARS +INTEGER(KIND=JPIM),INTENT(IN) :: KF_SCDERS +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSC3A(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSC3B(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSC2(:,:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRUV(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRSC(:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPMEANU(:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPMEANV(:) + +EXTERNAL FSPGL_PROC +OPTIONAL FSPGL_PROC + +INTEGER(KIND=JPIM) :: IBLEN, ILEI2 +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('ELTINV_CTLAD_MOD:ELTINV_CTLAD',0,ZHOOK_HANDLE) + +ILEI2 = 8*KF_UV + 2*KF_SCALARS + 2*KF_SCDERS +IBLEN = D%NLENGT0B*2*KF_OUT_LT +IF (ALLOCATED(FOUBUF_IN)) THEN + IF (MAX(1,IBLEN) > SIZE(FOUBUF_IN)) THEN + DEALLOCATE(FOUBUF_IN) + ALLOCATE(FOUBUF_IN(MAX(1,IBLEN))) + FOUBUF_IN(1)=0._JPRB ! force allocation here + ENDIF +ELSE + ALLOCATE(FOUBUF_IN(MAX(1,IBLEN))) + FOUBUF_IN(1)=0._JPRB ! force allocation here +ENDIF +CALL GSTATS(180,0) +CALL TRLTOM(FOUBUF,FOUBUF_IN,2*KF_OUT_LT) +CALL GSTATS(180,1) +IF (.NOT.LALLOPERM) DEALLOCATE(FOUBUF) + +CALL GSTATS(1648,0) +IF(KF_OUT_LT > 0) THEN + CALL ELTINVAD(KF_OUT_LT,KF_UV,KF_SCALARS,KF_SCDERS,ILEI2,& + & PSPVOR,PSPDIV,PSPSCALAR,PSPSC3A,PSPSC3B,PSPSC2, & + & KFLDPTRUV,KFLDPTRSC,FSPGL_PROC,PSPMEANU,PSPMEANV) +ENDIF +CALL GSTATS(1648,1) + +IF (.NOT.LALLOPERM) DEALLOCATE(FOUBUF_IN) +IF (LHOOK) CALL DR_HOOK('ELTINV_CTLAD_MOD:ELTINV_CTLAD',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE ELTINV_CTLAD +END MODULE ELTINV_CTLAD_MOD diff --git a/src/etrans/cpu/internal/eltinv_mod.F90 b/src/etrans/cpu/internal/eltinv_mod.F90 new file mode 100644 index 000000000..183d9187e --- /dev/null +++ b/src/etrans/cpu/internal/eltinv_mod.F90 @@ -0,0 +1,224 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + + +MODULE ELTINV_MOD +CONTAINS +SUBROUTINE ELTINV(KM,KMLOC,KF_OUT_LT,KF_UV,KF_SCALARS,KF_SCDERS,KLEI2,KDIM1,& + & PSPVOR,PSPDIV,PSPSCALAR,& + & PSPSC3A,PSPSC3B,PSPSC2 , & + & KFLDPTRUV,KFLDPTRSC,FSPGL_PROC,PSPMEANU,PSPMEANV) + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_DIM ,ONLY : R +USE TPM_TRANS ,ONLY : LDIVGP, LVORGP, NF_SC2, NF_SC3A, NF_SC3B +USE TPMALD_DIM ,ONLY : RALD +USE EPRFI1B_MOD ,ONLY : EPRFI1B +USE EVDTUV_MOD ,ONLY : EVDTUV +USE ESPNSDE_MOD ,ONLY : ESPNSDE +USE ELEINV_MOD ,ONLY : ELEINV +USE EASRE1B_MOD ,ONLY : EASRE1B +USE FSPGL_INT_MOD ,ONLY : FSPGL_INT +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS + +!**** *LTINV* - Inverse Legendre transform + +! Purpose. +! -------- +! Tranform from Laplace space to Fourier space, compute U and V +! and north/south derivatives of state variables. + +!** Interface. +! ---------- +! *CALL* *LTINV(...) + +! Explicit arguments : +! -------------------- +! KM - zonal wavenumber +! KMLOC - local zonal wavenumber +! PSPVOR - spectral vorticity +! PSPDIV - spectral divergence +! PSPSCALAR - spectral scalar variables + +! Implicit arguments : The Laplace arrays of the model. +! -------------------- The values of the Legendre polynomials +! The grid point arrays of the model +! Method. +! ------- + +! Externals. +! ---------- + +! PREPSNM - prepare REPSNM for wavenumber KM +! PRFI1B - prepares the spectral fields +! VDTUV - compute u and v from vorticity and divergence +! SPNSDE - compute north-south derivatives +! LEINV - Inverse Legendre transform +! ASRE1 - recombination of symmetric/antisymmetric part + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS +! Temperton, 1991, MWR 119 p1303 + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-02-01 From LTINV in IFS CY22R1 +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! D. Degrauwe (Feb 2012): Alternative extension zone (E') +! R. El Khatib 26-Aug-2021 Optimization for EASRE1B +! ------------------------------------------------------------------ + +IMPLICIT NONE + +INTEGER(KIND=JPIM), INTENT(IN) :: KM +INTEGER(KIND=JPIM), INTENT(IN) :: KMLOC +INTEGER(KIND=JPIM), INTENT(IN) :: KF_OUT_LT +INTEGER(KIND=JPIM), INTENT(IN) :: KF_UV +INTEGER(KIND=JPIM), INTENT(IN) :: KF_SCALARS +INTEGER(KIND=JPIM), INTENT(IN) :: KF_SCDERS +INTEGER(KIND=JPIM), INTENT(IN) :: KLEI2 +INTEGER(KIND=JPIM), INTENT(IN) :: KDIM1 + +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PSPSC2(:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PSPSC3A(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PSPSC3B(:,:,:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRUV(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRSC(:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPMEANU(:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPMEANV(:) +EXTERNAL FSPGL_PROC +OPTIONAL FSPGL_PROC + +REAL(KIND=JPRB) :: ZIA(RALD%NDGLSUR+R%NNOEXTZG,KLEI2) + +INTEGER(KIND=JPIM) :: IFC, ISTA +INTEGER(KIND=JPIM) :: IVORL,IVORU,IDIVL,IDIVU,IUL,IUU,IVL,IVU,ISL,ISU,IDL,IDU +INTEGER(KIND=JPIM) :: IFIRST, ILAST,IDIM1,IDIM3,J3 +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + + + +! ------------------------------------------------------------------ + +!* 3. SPECTRAL COMPUTATIONS FOR U,V AND DERIVATIVES. +! ---------------------------------------------- + +IF (LHOOK) CALL DR_HOOK('ELTINV_MOD:ELTINV',0,ZHOOK_HANDLE) +IFIRST = 1 +ILAST = 4*KF_UV +ZIA=0.0_JPRB +IF (KF_UV > 0) THEN + IVORL = 1 + IVORU = 2*KF_UV + IDIVL = 2*KF_UV+1 + IDIVU = 4*KF_UV + IUL = 4*KF_UV+1 + IUU = 6*KF_UV + IVL = 6*KF_UV+1 + IVU = 8*KF_UV + CALL EPRFI1B(KM,ZIA(:,IVORL:IVORU),PSPVOR,KF_UV,KFLDPTRUV) + CALL EPRFI1B(KM,ZIA(:,IDIVL:IDIVU),PSPDIV,KF_UV,KFLDPTRUV) + ILAST = ILAST+4*KF_UV + CALL EVDTUV(KM,KF_UV,KFLDPTRUV,ZIA(:,IVORL:IVORU),ZIA(:,IDIVL:IDIVU),& + & ZIA(:,IUL:IUU),ZIA(:,IVL:IVU),PSPMEANU,PSPMEANV) + +ENDIF + +IF(KF_SCALARS > 0)THEN + IF(PRESENT(PSPSCALAR)) THEN + IFIRST = ILAST+1 + ILAST = IFIRST - 1 + 2*KF_SCALARS + CALL EPRFI1B(KM,ZIA(:,IFIRST:ILAST),PSPSCALAR(:,:),KF_SCALARS,KFLDPTRSC) + ELSE + IF(PRESENT(PSPSC2) .AND. NF_SC2 > 0) THEN + IFIRST = ILAST+1 + ILAST = IFIRST-1+2*NF_SC2 + CALL EPRFI1B(KM,ZIA(:,IFIRST:ILAST),PSPSC2(:,:),NF_SC2) + ENDIF + IF(PRESENT(PSPSC3A) .AND. NF_SC3A > 0) THEN + IDIM1=NF_SC3A + IDIM3=UBOUND(PSPSC3A,3) + DO J3=1,IDIM3 + IFIRST = ILAST+1 + ILAST = IFIRST-1+2*IDIM1 + CALL EPRFI1B(KM,ZIA(:,IFIRST:ILAST),PSPSC3A(:,:,J3),IDIM1) + ENDDO + ENDIF + IF(PRESENT(PSPSC3B) .AND. NF_SC3B > 0) THEN + IDIM1=NF_SC3B + IDIM3=UBOUND(PSPSC3B,3) + DO J3=1,IDIM3 + IFIRST = ILAST+1 + ILAST = IFIRST-1+2*IDIM1 + CALL EPRFI1B(KM,ZIA(:,IFIRST:ILAST),PSPSC3B(:,:,J3),IDIM1) + ENDDO + ENDIF + ENDIF + IF(ILAST /= 8*KF_UV+2*KF_SCALARS) THEN + WRITE(0,*) 'LTINV:KF_UV,KF_SCALARS,ILAST ',KF_UV,KF_SCALARS,ILAST + CALL ABORT_TRANS('LTINV_MOD:ILAST /= 8*KF_UV+2*KF_SCALARS') + ENDIF +ENDIF + +IF (KF_SCDERS > 0) THEN + ISL = 2*(4*KF_UV)+1 + ISU = ISL+2*KF_SCALARS-1 + IDL = 2*(4*KF_UV+KF_SCALARS)+1 + IDU = IDL+2*KF_SCDERS-1 + CALL ESPNSDE(KM,KF_SCALARS,ZIA(:,ISL:ISU),ZIA(:,IDL:IDU)) +ENDIF + +! ------------------------------------------------------------------ + +!* 4. INVERSE LEGENDRE TRANSFORM. +! --------------------------- + +ISTA = 1 +IFC = 2*KF_OUT_LT +IF(KF_UV > 0 .AND. .NOT. LVORGP) THEN + ISTA = ISTA+2*KF_UV +ENDIF +IF(KF_UV > 0 .AND. .NOT. LDIVGP) THEN + ISTA = ISTA+2*KF_UV +ENDIF + +CALL ELEINV(KM,IFC,KF_OUT_LT,ZIA(:,ISTA:ISTA+IFC-1)) + +! ------------------------------------------------------------------ + +!* 5. RECOMBINATION SYMMETRIC/ANTISYMMETRIC PART. +! -------------------------------------------- + +CALL EASRE1B(IFC,KM,KMLOC,ZIA(:,ISTA:ISTA+IFC-1)) +! ------------------------------------------------------------------ + +! 6. OPTIONAL COMPUTATIONS IN FOURIER SPACE + +IF(PRESENT(FSPGL_PROC)) THEN + CALL FSPGL_INT(KM,KMLOC,KF_UV,KF_SCALARS,KF_SCDERS,KF_OUT_LT,FSPGL_PROC,& + & KFLDPTRUV,KFLDPTRSC) +ENDIF +IF (LHOOK) CALL DR_HOOK('ELTINV_MOD:ELTINV',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE ELTINV +END MODULE ELTINV_MOD + diff --git a/src/etrans/cpu/internal/eltinvad_mod.F90 b/src/etrans/cpu/internal/eltinvad_mod.F90 new file mode 100644 index 000000000..fc1d354fb --- /dev/null +++ b/src/etrans/cpu/internal/eltinvad_mod.F90 @@ -0,0 +1,263 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + + +MODULE ELTINVAD_MOD +CONTAINS +SUBROUTINE ELTINVAD(KF_OUT_LT,KF_UV,KF_SCALARS,KF_SCDERS,KLEI2,& + & PSPVOR,PSPDIV,PSPSCALAR,PSPSC3A,PSPSC3B,PSPSC2,& + & KFLDPTRUV,KFLDPTRSC,FSPGL_PROC,PSPMEANU,PSPMEANV) + +!**** *ELTINVAD* - Control routine for inverse Legandre transform - adj. + +! Purpose. +! -------- +! Control routine for the inverse LEGENDRE transform + +!** Interface. +! ---------- +! CALL ELTINVAD(...) +! KF_OUT_LT - number of fields coming out from inverse LT +! KF_UV - local number of spectral u-v fields +! KF_SCALARS - local number of scalar spectral fields +! KF_SCDERS - local number of derivatives of scalar spectral fields +! PSPVOR(:,:) - spectral vorticity (input) +! PSPDIV(:,:) - spectral divergence (input) +! PSPSCALAR(:,:) - spectral scalarvalued fields (input) +! KFLDPTRUV(:) - field pointer array for vor./div. +! KFLDPTRSC(:) - field pointer array for PSPSCALAR +! FSPGL_PROC - external procedure to be executed in fourier space +! before transposition + +! Method. +! ------- + +! Externals. +! ---------- + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-02-01 From LTINVAD in IFS CY22R1 +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! 01-Dec-2004 A. Deckmyn add KMLOC to EVDTUVAD call +! D. Degrauwe (Feb 2012): Alternative extension zone (E') +! N. Lopes & R. El Khatib 15-Jun-2012 Scalability enhancement + +! thread-safety +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_GEN +USE TPM_DIM ,ONLY : R +USE TPMALD_DIM ,ONLY : RALD +USE TPM_TRANS ,ONLY : LDIVGP, LVORGP, NF_SC2, NF_SC3A, NF_SC3B +USE TPM_DISTR + +USE EASRE1BAD_MOD ,ONLY : EASRE1BAD +USE ELEINVAD_MOD ,ONLY : ELEINVAD +USE EPRFI1BAD_MOD ,ONLY : EPRFI1BAD +USE ESPNSDEAD_MOD ,ONLY : ESPNSDEAD +USE EVDTUVAD_MOD ,ONLY : EVDTUVAD +USE EVDTUVAD_COMM_MOD +USE EXTPER_MOD ,ONLY : EXTPER + +IMPLICIT NONE + +INTEGER(KIND=JPIM), INTENT(IN) :: KF_OUT_LT +INTEGER(KIND=JPIM), INTENT(IN) :: KF_UV +INTEGER(KIND=JPIM), INTENT(IN) :: KF_SCALARS +INTEGER(KIND=JPIM), INTENT(IN) :: KF_SCDERS +INTEGER(KIND=JPIM), INTENT(IN) :: KLEI2 + +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPSC2(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPSC3A(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPSC3B(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPMEANU(:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPMEANV(:) + +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRUV(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRSC(:) + +EXTERNAL FSPGL_PROC +OPTIONAL FSPGL_PROC + +REAL(KIND=JPRB) :: ZIA(RALD%NDGLSUR+R%NNOEXTZG,KLEI2,D%NUMP) +REAL(KIND=JPRB) :: ZIA2(KLEI2,RALD%NDGLSUR+R%NNOEXTZG) + +INTEGER(KIND=JPIM) :: IFC, ISTA, IINDEX(2*KF_OUT_LT), JF, JDIM, IM, JM +INTEGER(KIND=JPIM) :: IVORL,IVORU,IDIVL,IDIVU,IUL,IUU,IVL,IVU,ISL,ISU,IDL,IDU +INTEGER(KIND=JPIM) :: ILAST,IFIRST,IDIM1,IDIM3,J3 + +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('ELTINVAD_MOD:ELTINVAD',0,ZHOOK_HANDLE) + +IF (KF_UV > 0) THEN + IVORL = 1 + IVORU = 2*KF_UV + IDIVL = 2*KF_UV+1 + IDIVU = 4*KF_UV + IUL = 4*KF_UV+1 + IUU = 6*KF_UV + IVL = 6*KF_UV+1 + IVU = 8*KF_UV +ENDIF +ISTA = 1 +IFC = 2*KF_OUT_LT +IF(KF_UV > 0 .AND. .NOT. LVORGP) THEN + ISTA = ISTA+2*KF_UV +ENDIF +IF(KF_UV > 0 .AND. .NOT. LDIVGP) THEN + ISTA = ISTA+2*KF_UV +ENDIF +IF (KF_SCDERS > 0) THEN + ISL = 2*(4*KF_UV)+1 + ISU = ISL+2*KF_SCALARS-1 + IDL = 2*(4*KF_UV+KF_SCALARS)+1 + IDU = IDL+2*KF_SCDERS-1 +ENDIF + +!$OMP PARALLEL DO SCHEDULE(DYNAMIC,1) PRIVATE(JM,IM,JF,JDIM,IINDEX,ZIA2) +DO JM=1,D%NUMP + IM = D%MYMS(JM) + +! 7. OPTIONAL COMPUTATIONS IN FOURIER SPACE +! -------------------------------------- + +!commented IF(PRESENT(FSPGL_PROC)) THEN +!commented CALL FSPGL_INT(IM,JM,FSPGL_PROC) +!commented ENDIF + + +!* 6. RECOMBINATION SYMMETRIC/ANTISYMMETRIC PART. +! -------------------------------------------- + + ZIA(:,:,JM)=0.0_JPRB + CALL EASRE1BAD(IFC,IM,JM,ZIA(:,ISTA:ISTA+IFC-1,JM)) + + +!* 5. PERIODICIZATION IN Y DIRECTION +! ------------------------------ + + IF(R%NNOEXTZG>0) THEN + DO JF = 1,IFC + DO JDIM = 1,R%NDGL + ZIA2(JF,JDIM)=ZIA(JDIM,JF,JM) + ENDDO + ENDDO + IINDEX(1)=0 + CALL EXTPER(ZIA2(:,:),R%NDGL+R%NNOEXTZG,1,R%NDGL,IFC,1,IINDEX,0) + DO JF = 1,IFC + DO JDIM = 1,R%NDGL+R%NNOEXTZG + ZIA(JDIM,JF,JM) = ZIA2(JF,JDIM) + ENDDO + ENDDO + ENDIF + +!* 4. INVERSE LEGENDRE TRANSFORM. +! --------------------------- + + CALL ELEINVAD(IM,IFC,KF_OUT_LT,ZIA(:,ISTA:ISTA+IFC-1,JM)) + + +!* 3. SPECTRAL COMPUTATIONS FOR U,V AND DERIVATIVES. +! ---------------------------------------------- + + ZIA(:,1:ISTA-1,JM) = 0.0_JPRB + + IF (KF_UV > 0) THEN + CALL EVDTUVAD(IM,JM,KF_UV,KFLDPTRUV,ZIA(:,IVORL:IVORU,JM),ZIA(:,IDIVL:IDIVU,JM),& + & ZIA(:,IUL:IUU,JM),ZIA(:,IVL:IVU,JM),PSPMEANU,PSPMEANV) + ENDIF + + +ENDDO +!$OMP END PARALLEL DO + +!* 2. COMMUNICATION OF MEAN WIND +! -------------------------- + +IF (KF_UV > 0) THEN + DO JM=1,D%NUMP + IM = D%MYMS(JM) + CALL EVDTUVAD_COMM(IM,JM,KF_UV,KFLDPTRUV,PSPMEANU,PSPMEANV) + ENDDO +ENDIF + +!* 2. PREPARE SPECTRAL FIELDS +! ----------------------- + +!$OMP PARALLEL DO SCHEDULE(DYNAMIC,1) PRIVATE(JM,IM,IFIRST,ILAST,IDIM1,IDIM3) +DO JM=1,D%NUMP + IM = D%MYMS(JM) + + IFIRST = 1 + ILAST = 4*KF_UV + IF (KF_UV > 0) THEN + CALL EPRFI1BAD(IM,ZIA(:,IVORL:IVORU,JM),PSPVOR,KF_UV,KFLDPTRUV) + CALL EPRFI1BAD(IM,ZIA(:,IDIVL:IDIVU,JM),PSPDIV,KF_UV,KFLDPTRUV) + ILAST = ILAST+4*KF_UV + ENDIF + + IF (KF_SCDERS > 0) THEN + CALL ESPNSDEAD(IM,KF_SCALARS,ZIA(:,ISL:ISU,JM),ZIA(:,IDL:IDU,JM)) + ENDIF + + IF(KF_SCALARS > 0)THEN + IF(PRESENT(PSPSCALAR)) THEN + IFIRST = ILAST+1 + ILAST = IFIRST - 1 + 2*KF_SCALARS + CALL EPRFI1BAD(IM,ZIA(:,IFIRST:ILAST,JM),PSPSCALAR(:,:),KF_SCALARS,KFLDPTRSC) + ELSE + IF(PRESENT(PSPSC2) .AND. NF_SC2 > 0) THEN + IFIRST = ILAST+1 + ILAST = IFIRST-1+2*NF_SC2 + CALL EPRFI1BAD(IM,ZIA(:,IFIRST:ILAST,JM),PSPSC2(:,:),NF_SC2) + ENDIF + IF(PRESENT(PSPSC3A) .AND. NF_SC3A > 0) THEN + IDIM1=NF_SC3A + IDIM3=UBOUND(PSPSC3A,3) + DO J3=1,IDIM3 + IFIRST = ILAST+1 + ILAST = IFIRST-1+2*IDIM1 + CALL EPRFI1BAD(IM,ZIA(:,IFIRST:ILAST,JM),PSPSC3A(:,:,J3),IDIM1) + ENDDO + ENDIF + IF(PRESENT(PSPSC3B) .AND. NF_SC3B > 0) THEN + IDIM1=NF_SC3B + IDIM3=UBOUND(PSPSC3B,3) + DO J3=1,IDIM3 + IFIRST = ILAST+1 + ILAST = IFIRST-1+2*IDIM1 + CALL EPRFI1BAD(IM,ZIA(:,IFIRST:ILAST,JM),PSPSC3B(:,:,J3),IDIM1) + ENDDO + ENDIF + ENDIF + ENDIF + +ENDDO +!$OMP END PARALLEL DO + +IF (LHOOK) CALL DR_HOOK('ELTINVAD_MOD:ELTINVAD',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE ELTINVAD +END MODULE ELTINVAD_MOD diff --git a/src/etrans/cpu/internal/eprfi1_mod.F90 b/src/etrans/cpu/internal/eprfi1_mod.F90 new file mode 100644 index 000000000..afbf9b259 --- /dev/null +++ b/src/etrans/cpu/internal/eprfi1_mod.F90 @@ -0,0 +1,116 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + + +MODULE EPRFI1_MOD +CONTAINS +SUBROUTINE EPRFI1(KM,KF_UV,KF_SCALARS,PIA,PSPVOR,PSPDIV,PSPSCALAR,& + & KFLDPTRUV,KFLDPTRSC) + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +!USE TPM_DISTR +!USE TPM_TRANS + +USE EPRFI1B_MOD ,ONLY : EPRFI1B + +!**** *PRFI1* - Prepare spectral fields for inverse Legendre transform + +! Purpose. +! -------- +! To extract the spectral fields for a specific zonal wavenumber +! and put them in an order suitable for the inverse Legendre . +! tranforms.The ordering is from NSMAX to KM for better conditioning. +! Elements 1,2 and NLCM(KM)+1 are zeroed in preparation for computing +! u,v and derivatives in spectral space. + +!** Interface. +! ---------- +! *CALL* *EPRFI1(KM,PIA,PSPVOR,PSPDIV,PSPSCALAR + +! Explicit arguments : KM - zonal wavenumber +! ------------------ PIA - spectral components for transform +! PSPVOR - vorticity +! PSPDIV - divergence +! PSPSCALAR - scalar variables + +! Implicit arguments : None. +! -------------------- + +! Method. +! ------- + +! Externals. None. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 00-02-01 From PRFI1 in IFS CY22R1 +! M.Hamrud 01-Oct-2003 CY28 Cleaning + +! ------------------------------------------------------------------ + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KM +INTEGER(KIND=JPIM),INTENT(IN) :: KF_UV +INTEGER(KIND=JPIM),INTENT(IN) :: KF_SCALARS +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) , INTENT(OUT) :: PIA(:,:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRUV(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRSC(:) + +INTEGER(KIND=JPIM) :: IDIV, IFIRST, ILAST, IVOR +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +!* 1. EXTRACT FIELDS FROM SPECTRAL ARRAYS. +! ------------------------------------ + +IF (LHOOK) CALL DR_HOOK('EPRFI1_MOD:EPRFI1',0,ZHOOK_HANDLE) +IFIRST = 1 +ILAST = 4*KF_UV + +!* 1.1 VORTICITY AND DIVERGENCE. + +IF(KF_UV > 0)THEN + IVOR = 1 + IDIV = 2*KF_UV+1 + CALL EPRFI1B(KM,PIA(:,IVOR:IDIV-1),PSPVOR,KF_UV,KFLDPTRUV) + CALL EPRFI1B(KM,PIA(:,IDIV:ILAST) ,PSPDIV,KF_UV,KFLDPTRUV) + ILAST = ILAST+4*KF_UV +ENDIF + +!* 1.2 SCALAR VARIABLES. + +IF(KF_SCALARS > 0)THEN + IFIRST = ILAST+1 + ILAST = IFIRST - 1 + 2*KF_SCALARS + CALL EPRFI1B(KM,PIA(:,IFIRST:ILAST),PSPSCALAR(:,:),KF_SCALARS,KFLDPTRSC) +ENDIF +IF (LHOOK) CALL DR_HOOK('EPRFI1_MOD:EPRFI1',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE EPRFI1 +END MODULE EPRFI1_MOD + diff --git a/src/etrans/cpu/internal/eprfi1ad_mod.F90 b/src/etrans/cpu/internal/eprfi1ad_mod.F90 new file mode 100644 index 000000000..e89caa9ac --- /dev/null +++ b/src/etrans/cpu/internal/eprfi1ad_mod.F90 @@ -0,0 +1,114 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + + +MODULE EPRFI1AD_MOD +CONTAINS +SUBROUTINE EPRFI1AD(KM,KF_UV,KF_SCALARS,PIA,PSPVOR,PSPDIV,PSPSCALAR,& + & KFLDPTRUV,KFLDPTRSC) + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +!USE TPM_DISTR +!USE TPM_TRANS + +USE EPRFI1BAD_MOD ,ONLY : EPRFI1BAD + +!**** *EPRFI1AD* - Prepare spectral fields for inverse Legendre transform + +! Purpose. +! -------- +! To extract the spectral fields for a specific zonal wavenumber +! and put them in an order suitable for the inverse Legendre . +! tranforms.The ordering is from NSMAX to KM for better conditioning. +! Elements 1,2 and NLCM(KM)+1 are zeroed in preparation for computing +! u,v and derivatives in spectral space. + +!** Interface. +! ---------- +! *CALL* *EPRFI1AD(KM,PIA,PSPVOR,PSPDIV,PSPSCALAR + +! Explicit arguments : KM - zonal wavenumber +! ------------------ PIA - spectral components for transform +! PSPVOR - vorticity +! PSPDIV - divergence +! PSPSCALAR - scalar variables + +! Implicit arguments : None. +! -------------------- + +! Method. +! ------- + +! Externals. None. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 00-02-01 From PRFI1AD in IFS CY22R1 +! M.Hamrud 01-Oct-2003 CY28 Cleaning + +! ------------------------------------------------------------------ +! +IMPLICIT NONE +! +! +INTEGER(KIND=JPIM),INTENT(IN) :: KM,KF_UV,KF_SCALARS +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) , INTENT(IN) :: PIA(:,:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRUV(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRSC(:) + +INTEGER(KIND=JPIM) :: IDIV, IFIRST, ILAST, IVOR +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +!* 1. EXTRACT FIELDS FROM SPECTRAL ARRAYS. +! ------------------------------------ + +IF (LHOOK) CALL DR_HOOK('EPRFI1AD_MOD:EPRFI1AD',0,ZHOOK_HANDLE) +IFIRST = 1 +ILAST = 4*KF_UV + +!* 1.1 VORTICITY AND DIVERGENCE. + +IF(KF_UV > 0)THEN + IVOR = 1 + IDIV = 2*KF_UV+1 + CALL EPRFI1BAD(KM,PIA(:,IVOR:IDIV-1),PSPVOR,KF_UV,KFLDPTRUV) + CALL EPRFI1BAD(KM,PIA(:,IDIV:ILAST) ,PSPDIV,KF_UV,KFLDPTRUV) + ILAST = ILAST+4*KF_UV +ENDIF + +!* 1.2 SCALAR VARIABLES. + +IF(KF_SCALARS > 0)THEN + IFIRST = ILAST+1 + ILAST = IFIRST - 1 + 2*KF_SCALARS + CALL EPRFI1BAD(KM,PIA(:,IFIRST:ILAST),PSPSCALAR(:,:),KF_SCALARS,KFLDPTRSC) +ENDIF +IF (LHOOK) CALL DR_HOOK('EPRFI1AD_MOD:EPRFI1AD',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE EPRFI1AD +END MODULE EPRFI1AD_MOD diff --git a/src/etrans/cpu/internal/eprfi1b_mod.F90 b/src/etrans/cpu/internal/eprfi1b_mod.F90 new file mode 100644 index 000000000..d0a6d7858 --- /dev/null +++ b/src/etrans/cpu/internal/eprfi1b_mod.F90 @@ -0,0 +1,121 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + + +MODULE EPRFI1B_MOD +CONTAINS +SUBROUTINE EPRFI1B(KM,PIA,PSPEC,KFIELDS,KFLDPTR) + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +!USE TPM_DIM +!USE TPM_DISTR +USE TPMALD_DISTR ,ONLY : DALD +! +!**** *PRFI1* - Prepare spectral fields for inverse Legendre transform + +! Purpose. +! -------- +! To extract the spectral fields for a specific zonal wavenumber +! and put them in an order suitable for the inverse Legendre . +! tranforms.The ordering is from NSMAX to KM for better conditioning. +! Elements 1,2 and NLCM(KM)+1 are zeroed in preparation for computing +! u,v and derivatives in spectral space. + +!** Interface. +! ---------- +! *CALL* *PRFI1B(...)* + +! Explicit arguments : KM - zonal wavenumber +! ------------------ PIA - spectral components for transform +! PSPEC - spectral array +! KFIELDS - number of fields + +! Implicit arguments : None. +! -------------------- + +! Method. +! ------- + +! Externals. None. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 00-02-01 From PRFI1B in IFS CY22R1 +! M.Hamrud 01-Oct-2003 CY28 Cleaning + +! ------------------------------------------------------------------ + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KM,KFIELDS +REAL(KIND=JPRB) ,INTENT(IN) :: PSPEC(:,:) +REAL(KIND=JPRB) ,INTENT(OUT) :: PIA(:,:) +INTEGER(KIND=JPIM),INTENT(IN),OPTIONAL :: KFLDPTR(:) + +INTEGER(KIND=JPIM) :: II, INM, IR, J, JFLD, ILCM, IOFF,IFLD +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +!* 1. EXTRACT FIELDS FROM SPECTRAL ARRAYS. +! -------------------------------------------------- + +IF (LHOOK) CALL DR_HOOK('EPRFI1B_MOD:EPRFI1B',0,ZHOOK_HANDLE) +ILCM = DALD%NCPL2M(KM) +IOFF = DALD%NESM0(KM) + +IF(PRESENT(KFLDPTR)) THEN + DO JFLD=1,KFIELDS + IR = 2*(JFLD-1)+1 + II = IR+1 + IFLD = KFLDPTR(JFLD) + DO J=1,ILCM,2 + INM = IOFF+(J-1)*2 + PIA(J ,IR) = PSPEC(IFLD,INM ) + PIA(J+1,IR) = PSPEC(IFLD,INM+1) + PIA(J ,II) = PSPEC(IFLD,INM+2) + PIA(J+1,II) = PSPEC(IFLD,INM+3) + ENDDO + ENDDO + +ELSE + DO J=1,ILCM,2 + INM = IOFF+(J-1)*2 + !DIR$ IVDEP + !OCL NOVREC + !cdir unroll=4 + DO JFLD=1,KFIELDS + IR = 2*(JFLD-1)+1 + II = IR+1 + PIA(J ,IR) = PSPEC(JFLD,INM ) + PIA(J+1,IR) = PSPEC(JFLD,INM+1) + PIA(J ,II) = PSPEC(JFLD,INM+2) + PIA(J+1,II) = PSPEC(JFLD,INM+3) + ENDDO + ENDDO + +ENDIF +IF (LHOOK) CALL DR_HOOK('EPRFI1B_MOD:EPRFI1B',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE EPRFI1B +END MODULE EPRFI1B_MOD diff --git a/src/etrans/cpu/internal/eprfi1bad_mod.F90 b/src/etrans/cpu/internal/eprfi1bad_mod.F90 new file mode 100644 index 000000000..e59ccb5ab --- /dev/null +++ b/src/etrans/cpu/internal/eprfi1bad_mod.F90 @@ -0,0 +1,121 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + + +MODULE EPRFI1BAD_MOD +CONTAINS +SUBROUTINE EPRFI1BAD(KM,PIA,PSPEC,KFIELDS,KFLDPTR) + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPMALD_DISTR ,ONLY : DALD + +!**** *EPRFI1BAD* - Prepare spectral fields for inverse Legendre transform + +! Purpose. +! -------- +! To extract the spectral fields for a specific zonal wavenumber +! and put them in an order suitable for the inverse Legendre . +! tranforms.The ordering is from NSMAX to KM for better conditioning. +! Elements 1,2 and NLCM(KM)+1 are zeroed in preparation for computing +! u,v and derivatives in spectral space. + +!** Interface. +! ---------- +! *CALL* *EPRFI1BAD(...)* + +! Explicit arguments : KM - zonal wavenumber +! ------------------ PIA - spectral components for transform +! PSPEC - spectral array +! KFIELDS - number of fields + +! Implicit arguments : None. +! -------------------- + +! Method. +! ------- + +! Externals. None. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 00-02-01 From PRFI1BAD in IFS CY22R1 +! M.Hamrud 01-Oct-2003 CY28 Cleaning + +! ------------------------------------------------------------------ + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KM,KFIELDS +REAL(KIND=JPRB) ,INTENT(INOUT) :: PSPEC(:,:) +REAL(KIND=JPRB) ,INTENT(IN) :: PIA(:,:) +INTEGER(KIND=JPIM),INTENT(IN),OPTIONAL :: KFLDPTR(:) + +INTEGER(KIND=JPIM) :: II, INM, IR, J, JFLD, ILCM, IOFF, IFLD +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +!* 1. EXTRACT FIELDS FROM SPECTRAL ARRAYS. +! -------------------------------------------------- + +IF (LHOOK) CALL DR_HOOK('EPRFI1BAD_MOD:EPRFI1BAD',0,ZHOOK_HANDLE) +ILCM=DALD%NCPL2M(KM) + +IOFF = DALD%NESM0(KM) + +IF(PRESENT(KFLDPTR)) THEN + DO JFLD=1,KFIELDS + IR = 2*(JFLD-1)+1 + II = IR+1 + IFLD = KFLDPTR(JFLD) + DO J=1,ILCM,2 + INM = IOFF+(J-1)*2 + + PSPEC(IFLD,INM ) = PSPEC(IFLD,INM ) + PIA(J ,IR) + PSPEC(IFLD,INM+1) = PSPEC(IFLD,INM+1) + PIA(J+1,IR) + PSPEC(IFLD,INM+2) = PSPEC(IFLD,INM+2) + PIA(J ,II) + PSPEC(IFLD,INM+3) = PSPEC(IFLD,INM+3) + PIA(J+1,II) + + ENDDO + ENDDO +ELSE + DO J=1,ILCM,2 + INM = IOFF+(J-1)*2 +!DIR$ IVDEP +!OCL NOVREC + DO JFLD=1,KFIELDS + IR = 2*(JFLD-1)+1 + II = IR+1 + + PSPEC(JFLD,INM ) = PSPEC(JFLD,INM ) + PIA(J ,IR) + PSPEC(JFLD,INM+1) = PSPEC(JFLD,INM+1) + PIA(J+1,IR) + PSPEC(JFLD,INM+2) = PSPEC(JFLD,INM+2) + PIA(J ,II) + PSPEC(JFLD,INM+3) = PSPEC(JFLD,INM+3) + PIA(J+1,II) + + ENDDO + ENDDO +ENDIF +IF (LHOOK) CALL DR_HOOK('EPRFI1BAD_MOD:EPRFI1BAD',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE EPRFI1BAD +END MODULE EPRFI1BAD_MOD diff --git a/src/etrans/cpu/internal/eprfi2_mod.F90 b/src/etrans/cpu/internal/eprfi2_mod.F90 new file mode 100644 index 000000000..8fa01d25e --- /dev/null +++ b/src/etrans/cpu/internal/eprfi2_mod.F90 @@ -0,0 +1,96 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + + +MODULE EPRFI2_MOD +CONTAINS +SUBROUTINE EPRFI2(KM,KMLOC,KF_FS,PFFT) + +!**** *EPRFI2* - Prepare input work arrays for direct transform + +! Purpose. +! -------- +! To extract the Fourier fields for a specific zonal wavenumber +! and put them in an order suitable for the direct Legendre +! tranforms, i.e. split into symmetric and anti-symmetric part. + +!** Interface. +! ---------- +! *CALL* *EPRFI2(..) + +! Explicit arguments : +! -------------------- KM - zonal wavenumber +! KMLOC - local zonal wavenumber +! PAIA - antisymmetric part of Fourier +! components for KM (output) +! PSIA - symmetric part of Fourier +! components for KM (output) + +! Implicit arguments : The Grid point arrays of the model. +! -------------------- + +! Method. +! ------- + +! Externals. PRFI2B - basic copying routine +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 87-11-25 +! Modified : 91-07-01 Philippe Courtier/Mats Hamrud - Rewrite +! for uv formulation +! Modified : 93-03-19 D. Giard - CDCONF='T' +! Modified : 93-??-?? ???????? - CDCONF='C'--> bug if CDCONF='T' +! Modified : 93-05-13 D. Giard - correction of the previous bug +! Modified : 93-11-18 M. Hamrud - use only one Fourier buffer +! Modified : 94-04-06 R. El Khatib - Full-POS configuration 'P' +! M.Hamrud : 94-11-01 New conf 'G' - vor,div->vor,div +! instead of u,v->vor,div +! MPP Group: 95-10-01 Support for Distributed Memory version +! Modified : 04/06/99 D.Salmond : change order of AIA and SIA +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK + +!USE TPM_TRANS + +USE EPRFI2B_MOD ,ONLY : EPRFI2B +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM) , INTENT(IN) :: KM +INTEGER(KIND=JPIM) , INTENT(IN) :: KMLOC +INTEGER(KIND=JPIM) , INTENT(IN) :: KF_FS + +REAL(KIND=JPRB) , INTENT(OUT) :: PFFT(:,:) + +! ------------------------------------------------------------------ + +!* 2. EXTRACT SYM./ANTISYM. FIELDS FROM TIME T+1. +! ------------------------------------------- + +CALL EPRFI2B(KF_FS,KM,KMLOC,PFFT) + +! ------------------------------------------------------------------ + +END SUBROUTINE EPRFI2 +END MODULE EPRFI2_MOD diff --git a/src/etrans/cpu/internal/eprfi2ad_mod.F90 b/src/etrans/cpu/internal/eprfi2ad_mod.F90 new file mode 100644 index 000000000..ccd279f7f --- /dev/null +++ b/src/etrans/cpu/internal/eprfi2ad_mod.F90 @@ -0,0 +1,93 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + + +MODULE EPRFI2AD_MOD +CONTAINS +SUBROUTINE EPRFI2AD(KM,KMLOC,KF_FS,PFFT) + +!**** *EPRFI2AD* - Prepare input work arrays for direct transform + +! Purpose. +! -------- +! To extract the Fourier fields for a specific zonal wavenumber +! and put them in an order suitable for the direct Legendre +! tranforms, i.e. split into symmetric and anti-symmetric part. + +!** Interface. +! ---------- +! *CALL* *EPRFI2AD(..) + +! Explicit arguments : +! -------------------- KM - zonal wavenumber +! KMLOC - local zonal wavenumber +! PAIA - antisymmetric part of Fourier +! components for KM (output) +! PSIA - symmetric part of Fourier +! components for KM (output) + +! Implicit arguments : The Grid point arrays of the model. +! -------------------- + +! Method. +! ------- + +! Externals. EPRFI2BAD - basic copying routine +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 87-11-25 +! Modified : 91-07-01 Philippe Courtier/Mats Hamrud - Rewrite +! for uv formulation +! Modified : 93-03-19 D. Giard - CDCONF='T' +! Modified : 93-??-?? ???????? - CDCONF='C'--> bug if CDCONF='T' +! Modified : 93-05-13 D. Giard - correction of the previous bug +! Modified : 93-11-18 M. Hamrud - use only one Fourier buffer +! Modified : 94-04-06 R. El Khatib - Full-POS configuration 'P' +! M.Hamrud : 94-11-01 New conf 'G' - vor,div->vor,div +! instead of u,v->vor,div +! MPP Group: 95-10-01 Support for Distributed Memory version +! Modified : 04/06/99 D.Salmond : change order of AIA and SIA +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +USE EPRFI2BAD_MOD ,ONLY : EPRFI2BAD +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM) , INTENT(IN) :: KM +INTEGER(KIND=JPIM) , INTENT(IN) :: KMLOC +INTEGER(KIND=JPIM) , INTENT(IN) :: KF_FS + +REAL(KIND=JPRB) , INTENT(IN) :: PFFT(:,:) + +! ------------------------------------------------------------------ + +!* 2. EXTRACT SYM./ANTISYM. FIELDS FROM TIME T+1. +! ------------------------------------------- + +CALL EPRFI2BAD(KF_FS,KM,KMLOC,PFFT) + +! ------------------------------------------------------------------ + +END SUBROUTINE EPRFI2AD +END MODULE EPRFI2AD_MOD diff --git a/src/etrans/cpu/internal/eprfi2b_mod.F90 b/src/etrans/cpu/internal/eprfi2b_mod.F90 new file mode 100644 index 000000000..0555bed1b --- /dev/null +++ b/src/etrans/cpu/internal/eprfi2b_mod.F90 @@ -0,0 +1,103 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + + +MODULE EPRFI2B_MOD +CONTAINS +SUBROUTINE EPRFI2B(KFIELD,KM,KMLOC,PFFT) + +!**** *EPRFI2B* - Prepare input work arrays for direct transform + +! Purpose. +! -------- +! To extract the Fourier fields for a specific zonal wavenumber +! and put them in an order suitable for the direct Legendre +! tranforms, i.e. split into symmetric and anti-symmetric part. + +!** Interface. +! ---------- +! *CALL* *EPRFI2B(..) + +! Explicit arguments : +! ------------------- KFIELD - number of fields +! KM - zonal wavenumber +! KMLOC - local zonal wavenumber +! PAOA - antisymmetric part of Fourier +! fields for zonal wavenumber KM +! PSOA - symmetric part of Fourier +! fields for zonal wavenumber KM + +! Implicit arguments : FOUBUF in TPM_TRANS +! -------------------- + +! Method. +! ------- + +! Externals. None. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 90-07-01 +! MPP Group: 95-10-01 Support for Distributed Memory version +! Modified : 04/06/99 D.Salmond : change order of AIA and SIA +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_DIM ,ONLY : R +USE TPM_TRANS ,ONLY : FOUBUF +!USE TPM_GEOMETRY +USE TPM_DISTR ,ONLY : D +!USE TPMALD_DIM ,ONLY : RALD +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KFIELD,KM,KMLOC +REAL(KIND=JPRB) , INTENT(OUT) :: PFFT(:,:) +INTEGER(KIND=JPIM) :: ISTAN, JF, JGL +INTEGER(KIND=JPIM) :: IJR,IJI +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('EPRFI2B_MOD:EPRFI2B',0,ZHOOK_HANDLE) + +!* 1. EXTRACT SYM./ANTISYM. FIELDS FROM FOURIER ARRAY. +! ------------------------------------------------ + +!DIR$ IVDEP +!OCL NOVREC +DO JGL=1,R%NDGL + ISTAN = (D%NSTAGT1B(D%NPROCL(JGL) )+D%NPNTGTB1(KMLOC,JGL ))*2*KFIELD + DO JF =1,KFIELD + IJR = 2*(JF-1)+1 + IJI = IJR+1 + PFFT(JGL,IJR) = FOUBUF(ISTAN+IJR) + PFFT(JGL,IJI) = FOUBUF(ISTAN+IJI) + ENDDO +ENDDO +IF (LHOOK) CALL DR_HOOK('EPRFI2B_MOD:EPRFI2B',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE EPRFI2B +END MODULE EPRFI2B_MOD diff --git a/src/etrans/cpu/internal/eprfi2bad_mod.F90 b/src/etrans/cpu/internal/eprfi2bad_mod.F90 new file mode 100644 index 000000000..bf8d38952 --- /dev/null +++ b/src/etrans/cpu/internal/eprfi2bad_mod.F90 @@ -0,0 +1,101 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + + +MODULE EPRFI2BAD_MOD +CONTAINS +SUBROUTINE EPRFI2BAD(KFIELD,KM,KMLOC,PFFT) + +!**** *EPRFI2BAD* - Prepare input work arrays for direct transform + +! Purpose. +! -------- +! To extract the Fourier fields for a specific zonal wavenumber +! and put them in an order suitable for the direct Legendre +! tranforms, i.e. split into symmetric and anti-symmetric part. + +!** Interface. +! ---------- +! *CALL* *EPRFI2BAD(..) + +! Explicit arguments : +! ------------------- KFIELD - number of fields +! KM - zonal wavenumber +! KMLOC - local zonal wavenumber +! PAOA - antisymmetric part of Fourier +! fields for zonal wavenumber KM +! PSOA - symmetric part of Fourier +! fields for zonal wavenumber KM + +! Implicit arguments : FOUBUF in TPM_TRANS +! -------------------- + +! Method. +! ------- + +! Externals. None. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 90-07-01 +! MPP Group: 95-10-01 Support for Distributed Memory version +! Modified : 04/06/99 D.Salmond : change order of AIA and SIA +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_DIM ,ONLY : R +!USE TPMALD_DIM ,ONLY : RALD +USE TPM_TRANS ,ONLY : FOUBUF +!USE TPM_GEOMETRY +USE TPM_DISTR ,ONLY : D +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KFIELD,KM,KMLOC +REAL(KIND=JPRB) , INTENT(IN) :: PFFT(:,:) + +INTEGER(KIND=JPIM) :: ISTAN, JF, JGL + +INTEGER(KIND=JPIM) :: IJR,IJI +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +!* 1. EXTRACT SYM./ANTISYM. FIELDS FROM FOURIER ARRAY. +! ------------------------------------------------ +IF (LHOOK) CALL DR_HOOK('EPRFI2BAD_MOD:EPRFI2BAD',0,ZHOOK_HANDLE) +DO JGL=1,R%NDGL + ISTAN = (D%NSTAGT1B(D%NPROCL(JGL) )+D%NPNTGTB1(KMLOC,JGL ))*2*KFIELD + DO JF =1,KFIELD + IJR = 2*(JF-1)+1 + IJI = IJR+1 + FOUBUF(ISTAN+IJR) = PFFT(JGL,IJR) + FOUBUF(ISTAN+IJI) = PFFT(JGL,IJI) + ENDDO +ENDDO +IF (LHOOK) CALL DR_HOOK('EPRFI2BAD_MOD:EPRFI2BAD',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE EPRFI2BAD +END MODULE EPRFI2BAD_MOD diff --git a/src/etrans/cpu/internal/eset_resol_mod.F90 b/src/etrans/cpu/internal/eset_resol_mod.F90 new file mode 100644 index 000000000..c76bfc467 --- /dev/null +++ b/src/etrans/cpu/internal/eset_resol_mod.F90 @@ -0,0 +1,85 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + + +MODULE ESET_RESOL_MOD +CONTAINS +SUBROUTINE ESET_RESOL(KRESOL) +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_GEN ,ONLY : NOUT, MSETUP0, NCUR_RESOL, NMAX_RESOL +USE TPM_DIM ,ONLY : R, DIM_RESOL +!USE TPM_TRANS +USE TPM_DISTR ,ONLY : D, DISTR_RESOL +USE TPM_GEOMETRY ,ONLY : G, GEOM_RESOL +USE TPM_FIELDS ,ONLY : F, FIELDS_RESOL +#ifdef WITH_FFT992 +USE TPM_FFT ,ONLY : T, FFT_RESOL +#endif +USE TPM_FFTW ,ONLY : TW, FFTW_RESOL + +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS + +USE TPMALD_DIM ,ONLY : RALD, ALDDIM_RESOL +USE TPMALD_DISTR ,ONLY : DALD, ALDDISTR_RESOL +#ifdef WITH_FFT992 +USE TPMALD_FFT ,ONLY : TALD, ALDFFT_RESOL +#endif +USE TPMALD_FIELDS ,ONLY : FALD, ALDFIELDS_RESOL +USE TPMALD_GEO ,ONLY : GALD, ALDGEO_RESOL +! + +IMPLICIT NONE + +! Declaration of arguments + +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KRESOL + +! Local varaibles +INTEGER(KIND=JPIM) :: IRESOL +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('ESET_RESOL_MOD:ESET_RESOL',0,ZHOOK_HANDLE) +IF(MSETUP0 == 0) CALL ABORT_TRANS('ESET_RESOL:TRANS NOT SETUP') +IRESOL = 1 +IF(PRESENT(KRESOL)) THEN + IRESOL = KRESOL + IF(KRESOL < 1 .OR. KRESOL > NMAX_RESOL) THEN + WRITE(NOUT,*)'ESET_RESOL: UNKNOWN RESOLUTION ',KRESOL,NMAX_RESOL + CALL ABORT_TRANS('ESET_RESOL:KRESOL < 1 .OR. KRESOL > NMAX_RESOL') + ENDIF +ENDIF +IF(IRESOL /= NCUR_RESOL) THEN + NCUR_RESOL = IRESOL + R => DIM_RESOL(NCUR_RESOL) + F => FIELDS_RESOL(NCUR_RESOL) + G => GEOM_RESOL(NCUR_RESOL) + D => DISTR_RESOL(NCUR_RESOL) +#ifdef WITH_FFT992 + T => FFT_RESOL(NCUR_RESOL) +#endif + TW => FFTW_RESOL(NCUR_RESOL) + + RALD => ALDDIM_RESOL(NCUR_RESOL) + DALD => ALDDISTR_RESOL(NCUR_RESOL) +#ifdef WITH_FFT992 + TALD => ALDFFT_RESOL(NCUR_RESOL) +#endif + FALD => ALDFIELDS_RESOL(NCUR_RESOL) + GALD => ALDGEO_RESOL(NCUR_RESOL) + +ENDIF +IF (LHOOK) CALL DR_HOOK('ESET_RESOL_MOD:ESET_RESOL',1,ZHOOK_HANDLE) + +END SUBROUTINE ESET_RESOL +END MODULE ESET_RESOL_MOD diff --git a/src/etrans/cpu/internal/esetup_dims_mod.F90 b/src/etrans/cpu/internal/esetup_dims_mod.F90 new file mode 100644 index 000000000..b5b1a2271 --- /dev/null +++ b/src/etrans/cpu/internal/esetup_dims_mod.F90 @@ -0,0 +1,57 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + + +MODULE ESETUP_DIMS_MOD +CONTAINS +SUBROUTINE ESETUP_DIMS + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_DIM ,ONLY : R +USE TPMALD_DIM ,ONLY : RALD +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM) :: JM,JN,ISPOLEG +INTEGER(KIND=JPIM) :: ISMAX(0:R%NSMAX),ISNAX(0:RALD%NMSMAX) +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('ESETUP_DIMS_MOD:ESETUP_DIMS',0,ZHOOK_HANDLE) +ISPOLEG = 0 +DO JM=0,R%NSMAX + DO JN=JM,R%NTMAX+1 + ISPOLEG = ISPOLEG+1 + ENDDO +ENDDO +R%NSPOLEG = ISPOLEG +CALL ELLIPS(R%NSMAX,RALD%NMSMAX,ISNAX,ISMAX) +R%NSPEC_G=0 +DO JM=0,RALD%NMSMAX + R%NSPEC_G=R%NSPEC_G+2*(ISNAX(JM)+1) +ENDDO +R%NSPEC2_G = R%NSPEC_G*2 + +R%NDGNH = (R%NDGL+1)/2 + +R%NLEI1 = R%NSMAX+4+MOD(R%NSMAX+4+1,2) +R%NLEI3 = R%NDGNH+MOD(R%NDGNH+2,2) + +R%NLED3 = R%NTMAX+2+MOD(R%NTMAX+3,2) +R%NLED4 = R%NTMAX+3+MOD(R%NTMAX+4,2) +IF (LHOOK) CALL DR_HOOK('ESETUP_DIMS_MOD:ESETUP_DIMS',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE ESETUP_DIMS +END MODULE ESETUP_DIMS_MOD diff --git a/src/etrans/cpu/internal/esetup_geom_mod.F90 b/src/etrans/cpu/internal/esetup_geom_mod.F90 new file mode 100644 index 000000000..e61f9b6b9 --- /dev/null +++ b/src/etrans/cpu/internal/esetup_geom_mod.F90 @@ -0,0 +1,77 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + + +MODULE ESETUP_GEOM_MOD +CONTAINS +SUBROUTINE ESETUP_GEOM + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_GEN ,ONLY : NOUT, NPRINTLEV +USE TPM_DIM ,ONLY : R +USE TPM_DISTR ,ONLY : D +USE TPMALD_DIM ,ONLY : RALD +!USE TPM_FIELDS +USE TPM_GEOMETRY ,ONLY : G +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM) :: IDGLU(0:RALD%NMSMAX,R%NDGNH) +INTEGER(KIND=JPIM) :: JGL,JM + +LOGICAL :: LLP1,LLP2 +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('ESETUP_GEOM_MOD:ESETUP_GEOM',0,ZHOOK_HANDLE) +IF(.NOT.D%LGRIDONLY) THEN +LLP1 = NPRINTLEV>0 +LLP2 = NPRINTLEV>1 + +IF(LLP1) WRITE(NOUT,*) '=== ENTER ROUTINE SETUP_GEOM ===' + +ALLOCATE (G%NMEN(R%NDGL)) +IF(LLP2)WRITE(NOUT,9) 'G%NMEN ',SIZE(G%NMEN ),SHAPE(G%NMEN ) +G%NMEN(:)=RALD%NMSMAX +IF(LLP1) THEN + WRITE(NOUT,FMT='('' (JGL,G%NLOEN,G%NMEN) '')') + WRITE(NOUT,FMT='(8(1X,''('',I4,I4,I4,'')''))')& + & (JGL,G%NLOEN(JGL),G%NMEN(JGL),JGL=1,R%NDGL) +ENDIF +ALLOCATE(G%NDGLU(0:RALD%NMSMAX)) +IF(LLP2)WRITE(NOUT,9) 'G%NDGLU ',SIZE(G%NDGLU ),SHAPE(G%NDGLU ) +IDGLU(:,:) = 0 +G%NDGLU(:) = 0 +DO JGL=1,R%NDGNH + DO JM=0,G%NMEN(JGL) + IDGLU(JM,JGL) = 1 + ENDDO +ENDDO +DO JM=0,RALD%NMSMAX + DO JGL=1,R%NDGNH + G%NDGLU(JM) = G%NDGLU(JM)+IDGLU(JM,JGL) + ENDDO +ENDDO +IF(LLP1) THEN + WRITE(NOUT,FMT='('' (JM,G%NDGLU) '')') + WRITE(NOUT,FMT='(10(1X,''('',I4,I4,'')''))')& + & (JM,G%NDGLU(JM),JM=0,RALD%NMSMAX) +ENDIF +ENDIF +IF (LHOOK) CALL DR_HOOK('ESETUP_GEOM_MOD:ESETUP_GEOM',1,ZHOOK_HANDLE) +! ------------------------------------------------------------------ +9 FORMAT(1X,'ARRAY ',A10,' ALLOCATED ',8I8) + +END SUBROUTINE ESETUP_GEOM +END MODULE ESETUP_GEOM_MOD diff --git a/src/etrans/cpu/internal/espnorm_ctl_mod.F90 b/src/etrans/cpu/internal/espnorm_ctl_mod.F90 new file mode 100644 index 000000000..163059ff0 --- /dev/null +++ b/src/etrans/cpu/internal/espnorm_ctl_mod.F90 @@ -0,0 +1,75 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + + +MODULE ESPNORM_CTL_MOD +CONTAINS +SUBROUTINE ESPNORM_CTL(PSPEC,KFLD,KFLD_G,KVSET,KMASTER,PMET,PNORM) + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK, JPHOOK + +USE TPM_DIM ,ONLY : R +USE TPM_DISTR ,ONLY : D, MYSETV, MYPROC + +USE ESPNORMD_MOD ,ONLY : ESPNORMD +USE SPNORMC_MOD ,ONLY : SPNORMC + +USE TPMALD_DIM ,ONLY : RALD +! + +IMPLICIT NONE + +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPEC(:,:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSET(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KMASTER +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PMET(:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PNORM(:) +INTEGER(KIND=JPIM) , INTENT(IN) :: KFLD,KFLD_G +INTEGER(KIND=JPIM) :: IVSET(KFLD_G) + +REAL(KIND=JPRB) :: ZMET(0:R%NSPEC_G) + +REAL(KIND=JPRB) :: ZSM(KFLD,D%NUMP) + +REAL(KIND=JPRB) :: ZGM(KFLD_G,0:RALD%NMSMAX) +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE1 + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('ESPNORM_CTL_MOD:ESPNORM_CTL',0,ZHOOK_HANDLE) +IF(PRESENT(KVSET)) THEN + IVSET(:) = KVSET(:) +ELSE + IVSET(:) = MYSETV +ENDIF + +IF(PRESENT(PMET)) THEN + ZMET(:) = PMET(:) +ELSE + ZMET(:) = 1.0_JPRB +ENDIF + +CALL ESPNORMD(PSPEC,KFLD,ZMET,ZSM) + +IF (LHOOK) CALL DR_HOOK('ESPNORM_CTL_MOD:SPNORMC',0,ZHOOK_HANDLE1) +CALL SPNORMC(ZSM,KFLD_G,IVSET,KMASTER,RALD%NMSMAX,ZGM) +IF (LHOOK) CALL DR_HOOK('ESPNORM_CTL_MOD:SPNORMC',1,ZHOOK_HANDLE1) + +IF(MYPROC == KMASTER) THEN + PNORM(1:KFLD_G) = SUM(ZGM,DIM=2) + PNORM(1:KFLD_G) = SQRT(PNORM(1:KFLD_G)) +ENDIF +IF (LHOOK) CALL DR_HOOK('ESPNORM_CTL_MOD:ESPNORM_CTL',1,ZHOOK_HANDLE) +! ------------------------------------------------------------------ + +END SUBROUTINE ESPNORM_CTL +END MODULE ESPNORM_CTL_MOD diff --git a/src/etrans/cpu/internal/espnormc_mod.F90 b/src/etrans/cpu/internal/espnormc_mod.F90 new file mode 100644 index 000000000..f802ac553 --- /dev/null +++ b/src/etrans/cpu/internal/espnormc_mod.F90 @@ -0,0 +1,14 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + + +MODULE ESPNORMC_MOD + ! dead code +END MODULE ESPNORMC_MOD diff --git a/src/etrans/cpu/internal/espnormd_mod.F90 b/src/etrans/cpu/internal/espnormd_mod.F90 new file mode 100644 index 000000000..a17b11698 --- /dev/null +++ b/src/etrans/cpu/internal/espnormd_mod.F90 @@ -0,0 +1,66 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + + +MODULE ESPNORMD_MOD +CONTAINS +SUBROUTINE ESPNORMD(PSPEC,KFLD,PMET,PSM) + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_DIM ,ONLY : R +USE TPM_DISTR ,ONLY : D + +USE TPMALD_DISTR ,ONLY : DALD +! + +IMPLICIT NONE + +REAL(KIND=JPRB) ,INTENT(IN) :: PSPEC(:,:) +REAL(KIND=JPRB) ,INTENT(IN) :: PMET(0:R%NSPEC_G) +INTEGER(KIND=JPIM) ,INTENT(IN) :: KFLD +REAL(KIND=JPRB) ,INTENT(OUT) :: PSM(:,:) +INTEGER(KIND=JPIM) :: JM ,JFLD ,JN ,IM ,ISP +INTEGER(KIND=JPIM) :: IN,ISPE +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('ESPNORMD_MOD:ESPNORMD',0,ZHOOK_HANDLE) + +!$OMP PARALLEL DO SCHEDULE(STATIC,1) PRIVATE(JM,IM,JN,ISP,JFLD,IN,ISPE) +DO JM=1,D%NUMP + PSM(:,JM) = 0.0_JPRB + IM = D%MYMS(JM) + + IN=DALD%NCPL2M(IM)/2 - 1 + DO JN=0,IN + ISP=DALD%NESM0(IM) + (JN)*4 + ISPE=DALD%NPME (IM) + JN + DO JFLD=1,KFLD + PSM(JFLD,JM) =PSM(JFLD,JM)& + & + PMET(ISPE) *& + & ( PSPEC(JFLD,ISP )**2 + PSPEC(JFLD,ISP+1)**2 +& + & PSPEC(JFLD,ISP+2)**2 + PSPEC(JFLD,ISP+3)**2 ) + + ENDDO + ENDDO + +ENDDO +!$OMP END PARALLEL DO + +IF (LHOOK) CALL DR_HOOK('ESPNORMD_MOD:ESPNORMD',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE ESPNORMD +END MODULE ESPNORMD_MOD + diff --git a/src/etrans/cpu/internal/espnsde_mod.F90 b/src/etrans/cpu/internal/espnsde_mod.F90 new file mode 100644 index 000000000..48918d755 --- /dev/null +++ b/src/etrans/cpu/internal/espnsde_mod.F90 @@ -0,0 +1,112 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + + +MODULE ESPNSDE_MOD +CONTAINS +SUBROUTINE ESPNSDE(KM,KF_SCALARS,PF,PNSD) + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +!USE TPM_GEN +!USE TPM_DIM +!USE TPM_FIELDS +!USE TPM_TRANS +USE TPMALD_DISTR ,ONLY : DALD +USE TPMALD_GEO ,ONLY : GALD + + +!**** *SPNSDE* - Compute North-South derivative in spectral space + +! Purpose. +! -------- +! In Laplace space compute the the North-south derivative + +!** Interface. +! ---------- +! CALL SPNSDE(...) + +! Explicit arguments : +! -------------------- +! KM -zonal wavenumber (input-c) +! PEPSNM - REPSNM for wavenumber KM (input-c) +! PF (NLEI1,2*KF_SCALARS) - input field (input) +! PNSD(NLEI1,2*KF_SCALARS) - N-S derivative (output) + +! Organisation within NLEI1: +! NLEI1 = NSMAX+4+mod(NSMAX+4+1,2) +! overdimensioning +! 1 : n=NSMAX+2 +! 2 : n=NSMAX+1 +! 3 : n=NSMAX +! . : +! . : +! NSMAX+3 : n=0 +! NSMAX+4 : n=-1 + +! Implicit arguments : YOMLAP +! -------------------- + +! Method. +! ------- + +! Externals. None. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS +! Temperton, 1991, MWR 119 p1303 + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 00-02-01 From SPNSDE in IFS CY22R1 +! M.Hamrud 01-Oct-2003 CY28 Cleaning + +! ------------------------------------------------------------------ + +IMPLICIT NONE + +INTEGER(KIND=JPIM), INTENT(IN) :: KM +INTEGER(KIND=JPIM), INTENT(IN) :: KF_SCALARS +REAL(KIND=JPRB), INTENT(IN) :: PF(:,:) +REAL(KIND=JPRB), INTENT(OUT) :: PNSD(:,:) + +INTEGER(KIND=JPIM) :: J, JN,IN +REAL(KIND=JPRB) :: ZIN +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +!* 1. COMPUTE NORTH SOUTH DERIVATIVE. +! ------------------------------- + +!* 1.1 COMPUTE + +IF (LHOOK) CALL DR_HOOK('ESPNSDE_MOD:ESPNSDE',0,ZHOOK_HANDLE) +DO JN=1,DALD%NCPL2M(KM),2 + IN =(JN-1)/2 + ZIN = REAL(IN,JPRB)*GALD%EYWN + DO J=1,2*KF_SCALARS + PNSD(JN ,J) = -ZIN*PF(JN+1,J) + PNSD(JN+1,J) = ZIN*PF(JN,J) + ENDDO +ENDDO +IF (LHOOK) CALL DR_HOOK('ESPNSDE_MOD:ESPNSDE',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE ESPNSDE +END MODULE ESPNSDE_MOD diff --git a/src/etrans/cpu/internal/espnsdead_mod.F90 b/src/etrans/cpu/internal/espnsdead_mod.F90 new file mode 100644 index 000000000..66fabc53a --- /dev/null +++ b/src/etrans/cpu/internal/espnsdead_mod.F90 @@ -0,0 +1,123 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + + +MODULE ESPNSDEAD_MOD +CONTAINS +SUBROUTINE ESPNSDEAD(KM,KF_SCALARS,PF,PNSD) + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +!USE TPM_GEN +!USE TPM_DIM +!USE TPM_FIELDS +!USE TPM_TRANS + +USE TPMALD_DISTR ,ONLY : DALD +USE TPMALD_GEO ,ONLY : GALD + + +!**** *ESPNSDEAD* - Compute North-South derivative in spectral space + +! Purpose. +! -------- +! In Laplace space compute the the North-south derivative + +!** Interface. +! ---------- +! CALL ESPNSDEAD(...) + +! Explicit arguments : +! -------------------- +! KM -zonal wavenumber (input-c) +! PEPSNM - REPSNM for wavenumber KM (input-c) +! PF (NLEI1,2*KF_SCALARS) - input field (input) +! PNSD(NLEI1,2*KF_SCALARS) - N-S derivative (output) + +! Organisation within NLEI1: +! NLEI1 = NSMAX+4+mod(NSMAX+4+1,2) +! overdimensioning +! 1 : n=NSMAX+2 +! 2 : n=NSMAX+1 +! 3 : n=NSMAX +! . : +! . : +! NSMAX+3 : n=0 +! NSMAX+4 : n=-1 + +! Implicit arguments : YOMLAP +! -------------------- + +! Method. +! ------- + +! Externals. None. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS +! Temperton, 1991, MWR 119 p1303 + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 00-02-01 From SPNSDEAD in IFS CY22R1 +! M.Hamrud 01-Oct-2003 CY28 Cleaning + +! ------------------------------------------------------------------ + +IMPLICIT NONE + +INTEGER(KIND=JPIM), INTENT(IN) :: KM +INTEGER(KIND=JPIM), INTENT(IN) :: KF_SCALARS +REAL(KIND=JPRB), INTENT(INOUT) :: PF(:,:) +REAL(KIND=JPRB), INTENT(IN) :: PNSD(:,:) +INTEGER(KIND=JPIM) :: ISKIP, J, JN +INTEGER(KIND=JPIM) :: IN +REAL(KIND=JPRB) :: ZIN +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +!* 1. COMPUTE NORTH SOUTH DERIVATIVE. +! ------------------------------- + +!* 1.1 COMPUTE + +IF (LHOOK) CALL DR_HOOK('ESPNSDEAD_MOD:ESPNSDEAD',0,ZHOOK_HANDLE) +IF(KM == 0) THEN + ISKIP = 1 +ELSE + ISKIP = 1 +ENDIF + +DO JN=1,DALD%NCPL2M(KM),2 + + IN = (JN-1)/2 + ZIN = REAL(IN,JPRB)*GALD%EYWN + + DO J=1,2*KF_SCALARS,ISKIP + + PF(JN+1,J) = PF(JN+1,J)-ZIN*PNSD(JN ,J) + PF(JN ,J) = PF(JN ,J)+ZIN*PNSD(JN+1,J) + + ENDDO +ENDDO +IF (LHOOK) CALL DR_HOOK('ESPNSDEAD_MOD:ESPNSDEAD',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE ESPNSDEAD +END MODULE ESPNSDEAD_MOD diff --git a/src/etrans/cpu/internal/eupdsp_mod.F90 b/src/etrans/cpu/internal/eupdsp_mod.F90 new file mode 100644 index 000000000..ee336eee0 --- /dev/null +++ b/src/etrans/cpu/internal/eupdsp_mod.F90 @@ -0,0 +1,152 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + + +MODULE EUPDSP_MOD +CONTAINS +SUBROUTINE EUPDSP(KM,KF_UV,KF_SCALARS,PFFT,PVODI, & + & PSPVOR,PSPDIV,PSPSCALAR,& + & PSPSC3A,PSPSC3B,PSPSC2 , & + & KFLDPTRUV,KFLDPTRSC) + +!**** *EUPDSP* - Update spectral arrays after direct Legendre transform + +! Purpose. +! -------- +! To update the spectral arrays for a fixed zonal wave-number +! from values in POA1 and POA2. + +!** Interface. +! ---------- +! CALL EUPDSP(...) + +! Explicit arguments : +! -------------------- +! KM - zonal wave-number +! POA1 - spectral fields for zonal wavenumber KM (basic var.) +! POA2 - spectral fields for zonal wavenumber KM (vor. div.) +! PSPVOR - spectral vorticity +! PSPDIV - spectral divergence +! PSPSCALAR - spectral scalar variables + +! Implicit arguments : +! -------------------- + +! Method. +! ------- + +! Externals. UPDSPB - basic transfer routine +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 88-02-02 +! Modified : 91-07-01 Philippe Courtier/Mats Hamrud - Rewrite +! for uv formulation +! Modified : 94-08-02 R. El Khatib - interface to UPDSPB +! M.Hamrud : 94-11-01 New conf 'G' - vor,div->vor,div +! instead of u,v->vor,div +! MPP Group: 95-10-01 Support for Distributed Memory version +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_TRANS ,ONLY : NF_SC2, NF_SC3A, NF_SC3B +!USE TPM_DISTR + +USE EUPDSPB_MOD ,ONLY : EUPDSPB +! + +IMPLICIT NONE + + +INTEGER(KIND=JPIM), INTENT(IN) :: KM,KF_UV,KF_SCALARS +REAL(KIND=JPRB) , INTENT(IN) :: PFFT(:,:) +REAL(KIND=JPRB) , INTENT(IN) :: PVODI(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PSPSC2(:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PSPSC3A(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PSPSC3B(:,:,:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRUV(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRSC(:) + +INTEGER(KIND=JPIM) :: IVORS, IVORE, IDIVS, IDIVE, IST ,IEND,IDIM1,IDIM3,J3 +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +!* 1. UPDATE FIELDS +! ------------- + +!* 1.1 VORTICITY AND DIVERGENCE. + +IF (LHOOK) CALL DR_HOOK('EUPDSP_MOD:EUPDSP',0,ZHOOK_HANDLE) +IST = 1 +IF (KF_UV > 0) THEN + IST = IST+4*KF_UV + IVORS = 1 + IVORE = 2*KF_UV + IDIVS = 2*KF_UV+1 + IDIVE = 4*KF_UV + CALL EUPDSPB(KM,KF_UV,PVODI(:,IVORS:IVORE),PSPVOR,KFLDPTRUV) + CALL EUPDSPB(KM,KF_UV,PVODI(:,IDIVS:IDIVE),PSPDIV,KFLDPTRUV) +ENDIF + +!* 1.2 SCALARS + +IF (KF_SCALARS > 0) THEN + IF(PRESENT(PSPSCALAR)) THEN + IEND = IST+2*KF_SCALARS-1 + CALL EUPDSPB(KM,KF_SCALARS,PFFT(:,IST:IEND),PSPSCALAR,KFLDPTRSC) + ELSE + IF(PRESENT(PSPSC2) .AND. NF_SC2 > 0) THEN + IDIM1 = NF_SC2 + IEND = IST+2*IDIM1-1 + CALL EUPDSPB(KM,IDIM1,PFFT(:,IST:IEND),PSPSC2) + IST=IST+2*IDIM1 + ENDIF + IF(PRESENT(PSPSC3A) .AND. NF_SC3A > 0) THEN + IDIM1=NF_SC3A + IDIM3=UBOUND(PSPSC3A,3) + DO J3=1,IDIM3 + IEND = IST+2*IDIM1-1 + CALL EUPDSPB(KM,IDIM1,PFFT(:,IST:IEND),PSPSC3A(:,:,J3)) + IST=IST+2*IDIM1 + ENDDO + ENDIF + IF(PRESENT(PSPSC3B) .AND. NF_SC3B > 0) THEN + IDIM1=NF_SC3B + IDIM3=UBOUND(PSPSC3B,3) + DO J3=1,IDIM3 + IEND = IST+2*IDIM1-1 + CALL EUPDSPB(KM,IDIM1,PFFT(:,IST:IEND),PSPSC3B(:,:,J3)) + IST=IST+2*IDIM1 + ENDDO + ENDIF + ENDIF +ENDIF +IF (LHOOK) CALL DR_HOOK('EUPDSP_MOD:EUPDSP',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE EUPDSP +END MODULE EUPDSP_MOD diff --git a/src/etrans/cpu/internal/eupdspad_mod.F90 b/src/etrans/cpu/internal/eupdspad_mod.F90 new file mode 100644 index 000000000..9f50dea0d --- /dev/null +++ b/src/etrans/cpu/internal/eupdspad_mod.F90 @@ -0,0 +1,156 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + + +MODULE EUPDSPAD_MOD +CONTAINS +SUBROUTINE EUPDSPAD(KM,KF_UV,KF_SCALARS,PFFT,PVODI, & + & PSPVOR,PSPDIV,PSPSCALAR,& + & PSPSC3A,PSPSC3B,PSPSC2 , & + & KFLDPTRUV,KFLDPTRSC) + +!**** *EUPDSPAD* - Update spectral arrays after direct Legendre transform + +! Purpose. +! -------- +! To update the spectral arrays for a fixed zonal wave-number +! from values in POA1 and POA2. + +!** Interface. +! ---------- +! CALL EUPDSPAD(...) + +! Explicit arguments : +! -------------------- +! KM - zonal wave-number +! POA1 - spectral fields for zonal wavenumber KM (basic var.) +! POA2 - spectral fields for zonal wavenumber KM (vor. div.) +! PSPVOR - spectral vorticity +! PSPDIV - spectral divergence +! PSPSCALAR - spectral scalar variables + +! Implicit arguments : +! -------------------- + +! Method. +! ------- + +! Externals. UPDSPADB - basic transfer routine +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 88-02-02 +! Modified : 91-07-01 Philippe Courtier/Mats Hamrud - Rewrite +! for uv formulation +! Modified : 94-08-02 R. El Khatib - interface to UPDSPADB +! M.Hamrud : 94-11-01 New conf 'G' - vor,div->vor,div +! instead of u,v->vor,div +! MPP Group: 95-10-01 Support for Distributed Memory version +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +!USE TPM_DIM +USE TPM_TRANS ,ONLY : NF_SC2, NF_SC3A, NF_SC3B +!USE TPM_DISTR + +USE EUPDSPBAD_MOD ,ONLY : EUPDSPBAD +! + +IMPLICIT NONE + + +INTEGER(KIND=JPIM), INTENT(IN) :: KM,KF_UV,KF_SCALARS + +REAL(KIND=JPRB) , INTENT(OUT) :: PFFT(:,:) +REAL(KIND=JPRB) , INTENT(OUT) :: PVODI(:,:) + +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPSC2(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPSC3A(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPSC3B(:,:,:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRUV(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KFLDPTRSC(:) + +INTEGER(KIND=JPIM) :: IVORS, IVORE, IDIVS, IDIVE, IST ,IEND +INTEGER(KIND=JPIM) :: IDIM1,IDIM3,J3 +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +!* 1. UPDATE FIELDS +! ------------- + +!* 1.1 VORTICITY AND DIVERGENCE. + +IF (LHOOK) CALL DR_HOOK('EUPDSPAD_MOD:EUPDSPAD',0,ZHOOK_HANDLE) +IST = 1 +IF (KF_UV > 0) THEN + IST = IST+4*KF_UV + IVORS = 1 + IVORE = 2*KF_UV + IDIVS = 2*KF_UV+1 + IDIVE = 4*KF_UV + CALL EUPDSPBAD(KM,KF_UV,PVODI(:,IVORS:IVORE),PSPVOR,KFLDPTRUV) + CALL EUPDSPBAD(KM,KF_UV,PVODI(:,IDIVS:IDIVE),PSPDIV,KFLDPTRUV) +ENDIF + +!* 1.2 SCALARS + +IF (KF_SCALARS > 0) THEN + IF(PRESENT(PSPSCALAR)) THEN + IEND = IST+2*KF_SCALARS-1 + CALL EUPDSPBAD(KM,KF_SCALARS,PFFT(:,IST:IEND),PSPSCALAR,KFLDPTRSC) + ELSE + IF(PRESENT(PSPSC2) .AND. NF_SC2 > 0) THEN + IDIM1 = NF_SC2 + IEND = IST+2*IDIM1-1 + CALL EUPDSPBAD(KM,IDIM1,PFFT(:,IST:IEND),PSPSC2) + IST=IST+2*IDIM1 + ENDIF + IF(PRESENT(PSPSC3A) .AND. NF_SC3A > 0) THEN + IDIM1=NF_SC3A + IDIM3=UBOUND(PSPSC3A,3) + DO J3=1,IDIM3 + IEND = IST+2*IDIM1-1 + CALL EUPDSPBAD(KM,IDIM1,PFFT(:,IST:IEND),PSPSC3A(:,:,J3)) + IST=IST+2*IDIM1 + ENDDO + ENDIF + IF(PRESENT(PSPSC3B) .AND. NF_SC3B > 0) THEN + IDIM1=NF_SC3B + IDIM3=UBOUND(PSPSC3B,3) + DO J3=1,IDIM3 + IEND = IST+2*IDIM1-1 + CALL EUPDSPBAD(KM,IDIM1,PFFT(:,IST:IEND),PSPSC3B(:,:,J3)) + IST=IST+2*IDIM1 + ENDDO + ENDIF + ENDIF +ENDIF +IF (LHOOK) CALL DR_HOOK('EUPDSPAD_MOD:EUPDSPAD',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE EUPDSPAD +END MODULE EUPDSPAD_MOD diff --git a/src/etrans/cpu/internal/eupdspb_mod.F90 b/src/etrans/cpu/internal/eupdspb_mod.F90 new file mode 100644 index 000000000..17c0be09b --- /dev/null +++ b/src/etrans/cpu/internal/eupdspb_mod.F90 @@ -0,0 +1,116 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + + +MODULE EUPDSPB_MOD +CONTAINS +SUBROUTINE EUPDSPB(KM,KFIELD,POA,PSPEC,KFLDPTR) + +!**** *EUPDSPB* - Update spectral arrays after direct Legendre transform + +! Purpose. +! -------- +! To update spectral arrays for a fixed zonal wave-number +! from values in POA. + +!** Interface. +! ---------- +! CALL EUPDSPB(....) + +! Explicit arguments : KM - zonal wavenumber +! -------------------- KFIELD - number of fields +! POA - work array +! PSPEC - spectral array + +! Implicit arguments : None +! -------------------- + +! Method. +! ------- + +! Externals. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 88-02-02 +! D. Giard : 93-03-19 truncations NSMAX and NTMAX (see NOTE) +! R. El Khatib : 94-08-02 Replace number of fields by indexes of the +! first and last field +! L. Isaksen : 95-06-06 Reordering of spectral arrays +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +!USE TPM_DIM +!USE TPM_FIELDS +!USE TPM_DISTR +USE TPMALD_DISTR ,ONLY : DALD +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KM,KFIELD +REAL(KIND=JPRB) ,INTENT(IN) :: POA(:,:) +REAL(KIND=JPRB) ,INTENT(OUT) :: PSPEC(:,:) +INTEGER(KIND=JPIM),INTENT(IN),OPTIONAL :: KFLDPTR(:) + +INTEGER(KIND=JPIM) :: II, INM, IR, JFLD, JN,IFLD +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +!* 1. UPDATE SPECTRAL FIELDS. +! ----------------------- +IF (LHOOK) CALL DR_HOOK('EUPDSPB_MOD:EUPDSPB',0,ZHOOK_HANDLE) +IF(PRESENT(KFLDPTR)) THEN + DO JN=1,DALD%NCPL2M(KM),2 + INM=DALD%NESM0(KM)+(JN-1)*2 + DO JFLD=1,KFIELD + IR= 2*JFLD-1 + II=IR+1 + IFLD = KFLDPTR(JFLD) + PSPEC(IFLD,INM) =POA(JN,IR) + PSPEC(IFLD,INM+1) =POA(JN+1,IR) + PSPEC(IFLD,INM+2) =POA(JN,II) + PSPEC(IFLD,INM+3) =POA(JN+1,II) + ENDDO + ENDDO +ELSE + DO JN=1,DALD%NCPL2M(KM),2 + INM=DALD%NESM0(KM)+(JN-1)*2 +! use unroll to provoke vectorization of outer loop +!cdir unroll=4 +!DIR$ IVDEP +!OCL NOVREC + DO JFLD=1,KFIELD + IR= 2*JFLD-1 + II=IR+1 + PSPEC(JFLD,INM) =POA(JN,IR) + PSPEC(JFLD,INM+1) =POA(JN+1,IR) + PSPEC(JFLD,INM+2) =POA(JN,II) + PSPEC(JFLD,INM+3) =POA(JN+1,II) + ENDDO + ENDDO +ENDIF +IF (LHOOK) CALL DR_HOOK('EUPDSPB_MOD:EUPDSPB',1,ZHOOK_HANDLE) + +END SUBROUTINE EUPDSPB +END MODULE EUPDSPB_MOD diff --git a/src/etrans/cpu/internal/eupdspbad_mod.F90 b/src/etrans/cpu/internal/eupdspbad_mod.F90 new file mode 100644 index 000000000..8f85b2846 --- /dev/null +++ b/src/etrans/cpu/internal/eupdspbad_mod.F90 @@ -0,0 +1,144 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + + +MODULE EUPDSPBAD_MOD +CONTAINS +SUBROUTINE EUPDSPBAD(KM,KFIELD,POA,PSPEC,KFLDPTR) + +!**** *EUPDSPBAD* - Update spectral arrays after direct Legendre transform + +! Purpose. +! -------- +! To update spectral arrays for a fixed zonal wave-number +! from values in POA. + +!** Interface. +! ---------- +! CALL EUPDSPBAD(....) + +! Explicit arguments : KM - zonal wavenumber +! -------------------- KFIELD - number of fields +! POA - work array +! PSPEC - spectral array + +! Implicit arguments : None +! -------------------- + +! Method. +! ------- + +! Externals. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 88-02-02 +! D. Giard : 93-03-19 truncations NSMAX and NTMAX (see NOTE) +! R. El Khatib : 94-08-02 Replace number of fields by indexes of the +! first and last field +! L. Isaksen : 95-06-06 Reordering of spectral arrays +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +!USE TPM_DIM +!USE TPM_FIELDS +!USE TPM_DISTR + +USE TPMALD_DISTR ,ONLY : DALD +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KM,KFIELD +REAL(KIND=JPRB) ,INTENT(OUT) :: POA(:,:) +REAL(KIND=JPRB) ,INTENT(INOUT) :: PSPEC(:,:) +INTEGER(KIND=JPIM),INTENT(IN),OPTIONAL :: KFLDPTR(:) + +INTEGER(KIND=JPIM) :: II, INM, IR, JFLD, JN,IFLD +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +!* 0. NOTE. +! ----- + +! The following transfer reads : +! SPEC(k,NASM0(m)+NLTN(n)*2) =POA(nn,2*k-1) (real part) +! SPEC(k,NASM0(m)+NLTN(n)*2+1)=POA(nn,2*k ) (imaginary part) +! with n from m to NSMAX +! and nn=NTMAX+2-n from NTMAX+2-m to NTMAX+2-NSMAX. +! NLTN(m)=NTMAX+2-m : n=NLTN(nn),nn=NLTN(n) +! nn is the loop index. + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('EUPDSPBAD_MOD:EUPDSPBAD',0,ZHOOK_HANDLE) +POA(:,:) = 0.0_JPRB + +IF(PRESENT(KFLDPTR)) THEN + + DO JFLD=1,KFIELD + IR= 2*JFLD-1 + II=IR+1 + IFLD = KFLDPTR(JFLD) +!DIR$ IVDEP +!OCL NOVREC + DO JN=1,DALD%NCPL2M(KM),2 + INM=DALD%NESM0(KM)+(JN-1)*2 + POA(JN,IR) = PSPEC(IFLD,INM) + POA(JN+1,IR) = PSPEC(IFLD,INM+1) + POA(JN,II) = PSPEC(IFLD,INM+2) + POA(JN+1,II) = PSPEC(IFLD,INM+3) + PSPEC(IFLD,INM )= 0.0_JPRB + PSPEC(IFLD,INM+1)= 0.0_JPRB + PSPEC(IFLD,INM+2)= 0.0_JPRB + PSPEC(IFLD,INM+3)= 0.0_JPRB + ENDDO + ENDDO + +ELSE + + DO JN=1,DALD%NCPL2M(KM),2 + INM=DALD%NESM0(KM)+(JN-1)*2 +!DIR$ IVDEP +!OCL NOVREC + DO JFLD=1,KFIELD + IR= 2*JFLD-1 + II=IR+1 + POA(JN,IR) = PSPEC(JFLD,INM) + POA(JN+1,IR) = PSPEC(JFLD,INM+1) + POA(JN,II) = PSPEC(JFLD,INM+2) + POA(JN+1,II) = PSPEC(JFLD,INM+3) + PSPEC(JFLD,INM )= 0.0_JPRB + PSPEC(JFLD,INM+1)= 0.0_JPRB + PSPEC(JFLD,INM+2)= 0.0_JPRB + PSPEC(JFLD,INM+3)= 0.0_JPRB + ENDDO + ENDDO + +ENDIF +IF (LHOOK) CALL DR_HOOK('EUPDSPBAD_MOD:EUPDSPBAD',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE EUPDSPBAD +END MODULE EUPDSPBAD_MOD diff --git a/src/etrans/cpu/internal/euvtvd_comm_mod.F90 b/src/etrans/cpu/internal/euvtvd_comm_mod.F90 new file mode 100644 index 000000000..14c2b0091 --- /dev/null +++ b/src/etrans/cpu/internal/euvtvd_comm_mod.F90 @@ -0,0 +1,138 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + + +MODULE EUVTVD_COMM_MOD +CONTAINS +SUBROUTINE EUVTVD_COMM(KFIELD,PSPMEANU,PSPMEANV,KFLDPTR) + +!**** *EUVTVD_COMM* - Communicate mean wind + +! Purpose. +! -------- + +!** Interface. +! ---------- +! CALL EUVTVD_COMM(KFIELD,PSPMEANU,PSPMEANV,KFLDPTR) + +! Explicit arguments : +! -------------------- KFIELD - number of fields (levels) +! KFLDPTR - fields pointers + +! Method. See ref. +! ------- + +! Externals. None. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 91-07-01 +! D. Giard : NTMAX instead of NSMAX +! 01-08-27 : R. El Khatib Fix for NPROMATR /= 0 +! 03-03-03 : G. Radnoti: b-level conform mean-wind distribution +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! F. Vana + NEC 28-Apr-2009 MPI-OpenMP fix +! N. Lopes & R. El Khatib 15-Jun-2012 Scalability enhancement +! R. El Khatib 12-Jan-2020 Fix missing finalization of communications +! R. El Khatib 02-Jun-2022 Optimization/Cleaning +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_DIM +USE TPM_FIELDS +USE TPM_DISTR +USE TPMALD_GEO +USE TPMALD_DISTR +USE MPL_MODULE +USE SET2PE_MOD +USE ABORT_TRANS_MOD +IMPLICIT NONE + +INTEGER(KIND=JPIM), INTENT(IN) :: KFIELD +REAL(KIND=JPRB), INTENT(INOUT) :: PSPMEANU(KFIELD) +REAL(KIND=JPRB), INTENT(INOUT) :: PSPMEANV(KFIELD) +INTEGER(KIND=JPIM), OPTIONAL, INTENT(IN) :: KFLDPTR(KFIELD) + +INTEGER(KIND=JPIM) :: J, JA,ITAG,ILEN,IFLD,ISND, IM, JM + +INTEGER(KIND=JPIM) :: ISENDREQ(NPRTRW) + +REAL(KIND=JPRB) :: ZSPU(2*KFIELD) + +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('EUVTVD_COMM_MOD:EUVTVD_COMM',0,ZHOOK_HANDLE) + +!* 1. COMMUNICATE MEAN WIND +! --------------------- + +IF (D%NPROCM(0) == MYSETW) THEN + IF (PRESENT(KFLDPTR)) THEN + DO J=1,KFIELD + IFLD=KFLDPTR(J) + ZSPU(J)=PSPMEANU(IFLD) + ZSPU(KFIELD+J)=PSPMEANV(IFLD) + ENDDO + ELSE + DO J=1,KFIELD + ZSPU(J)=PSPMEANU(J) + ZSPU(KFIELD+J)=PSPMEANV(J) + ENDDO + ENDIF + DO JA=1,NPRTRW + IF (JA /= MYSETW) THEN + CALL SET2PE(ISND,0,0,JA,MYSETV) + ISND=NPRCIDS(ISND) + ITAG=1 + CALL MPL_SEND(ZSPU(1:2*KFIELD),KDEST=ISND,KTAG=ITAG, & + & KMP_TYPE=JP_NON_BLOCKING_STANDARD,KREQUEST=ISENDREQ(JA),CDSTRING='EUVTVD_COMM:') + ENDIF + ENDDO + DO JA=1,NPRTRW + IF (JA /= MYSETW) THEN + CALL MPL_WAIT(KREQUEST=ISENDREQ(JA),CDSTRING='EUVTVD_COMM:') + ENDIF + ENDDO +ELSE + CALL SET2PE(ISND,0,0,D%NPROCM(0),MYSETV) + ITAG=1 + CALL MPL_RECV(ZSPU(1:2*KFIELD),KSOURCE=NPRCIDS(ISND),KTAG=ITAG,KOUNT=ILEN, CDSTRING='EUVTVD_COMM:') + IF (ILEN /= 2*KFIELD) CALL ABORT_TRANS('EUVTVD_COMM: RECV INVALID RECEIVE MESSAGE LENGHT') + IF (PRESENT(KFLDPTR)) THEN + DO J=1,KFIELD + IFLD=KFLDPTR(J) + PSPMEANU(IFLD)=ZSPU(J) + PSPMEANV(IFLD)=ZSPU(KFIELD+J) + ENDDO + ELSE + DO J=1,KFIELD + PSPMEANU(J)=ZSPU(J) + PSPMEANV(J)=ZSPU(KFIELD+J) + ENDDO + ENDIF +ENDIF + +IF (LHOOK) CALL DR_HOOK('EUVTVD_COMM_MOD:EUVTVD_COMM',1,ZHOOK_HANDLE) + +END SUBROUTINE EUVTVD_COMM +END MODULE EUVTVD_COMM_MOD diff --git a/src/etrans/cpu/internal/euvtvd_mod.F90 b/src/etrans/cpu/internal/euvtvd_mod.F90 new file mode 100644 index 000000000..4d8895d39 --- /dev/null +++ b/src/etrans/cpu/internal/euvtvd_mod.F90 @@ -0,0 +1,122 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + + +MODULE EUVTVD_MOD +CONTAINS +SUBROUTINE EUVTVD(KM,KMLOC,KFIELD,PU,PV,PVOR,PDIV) + +!**** *EUVTVD* - Compute vor/div from u and v in spectral space + +! Purpose. +! -------- +! To compute vorticity and divergence from u and v in spectral +! space. Input u and v from KM to NTMAX+1, output vorticity and +! divergence from KM to NTMAX - calculation part. + +!** Interface. +! ---------- +! CALL EUVTVD(KM,KFIELD,PEPSNM,PU,PV,PVOR,PDIV) + +! Explicit arguments : KM - zonal wave-number +! -------------------- KFIELD - number of fields (levels) +! PEPSNM - REPSNM for wavenumber KM +! PU - u wind component for zonal +! wavenumber KM +! PV - v wind component for zonal +! wavenumber KM +! PVOR - vorticity for zonal +! wavenumber KM +! PDIV - divergence for zonal +! wavenumber KM + +! Method. See ref. +! ------- + +! Externals. None. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 91-07-01 +! D. Giard : NTMAX instead of NSMAX +! 01-08-27 : R. El Khatib Fix for NPROMATR /= 0 +! 03-03-03 : G. Radnoti: b-level conform mean-wind distribution +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! F. Vana + NEC 28-Apr-2009 MPI-OpenMP fix +! D. Degrauwe (Feb 2012): Alternative extension zone (E') +! N. Lopes & R. El Khatib 15-Jun-2012 Scalability enhancement +! R. El Khatib 02-Jun-2022 Optimization/Cleaning +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_DIM ,ONLY : R +USE TPMALD_GEO ,ONLY : GALD +USE TPMALD_DISTR ,ONLY : DALD + +IMPLICIT NONE + +INTEGER(KIND=JPIM), INTENT(IN) :: KM +INTEGER(KIND=JPIM), INTENT(IN) :: KMLOC +INTEGER(KIND=JPIM), INTENT(IN) :: KFIELD +REAL(KIND=JPRB), INTENT(IN) :: PU(:,:) +REAL(KIND=JPRB), INTENT(IN) :: PV(:,:) +REAL(KIND=JPRB), INTENT(OUT):: PVOR(:,:) +REAL(KIND=JPRB), INTENT(OUT):: PDIV(:,:) + +INTEGER(KIND=JPIM) :: II, IN, IR, J, JN + +REAL(KIND=JPRB) :: ZKM, ZIN + +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('EUVTVD_MOD:EUVTVD',0,ZHOOK_HANDLE) + +!* 1. COMPUTE U V FROM VORTICITY AND DIVERGENCE. +! ------------------------------------------ + +ZKM=REAL(KM,JPRB)*GALD%EXWN +DO J=1,KFIELD + IR=2*J-1 + II=IR+1 + DO JN=1,R%NDGL+R%NNOEXTZG + PDIV(JN,IR)=-ZKM*PU(JN,II) + PDIV(JN,II)= ZKM*PU(JN,IR) + PVOR(JN,IR)=-ZKM*PV(JN,II) + PVOR(JN,II)= ZKM*PV(JN,IR) + ENDDO +ENDDO +DO J=1,2*KFIELD + DO JN=1,DALD%NCPL2M(KM),2 + IN=(JN-1)/2 + ZIN=REAL(IN,JPRB)*GALD%EYWN + PVOR(JN,J )=PVOR(JN ,J)+ZIN*PU(JN+1,J) + PVOR(JN+1,J)=PVOR(JN+1,J)-ZIN*PU(JN ,J) + PDIV(JN,J )=PDIV(JN ,J)-ZIN*PV(JN+1,J) + PDIV(JN+1,J)=PDIV(JN+1,J)+ZIN*PV(JN ,J) + ENDDO +ENDDO + +IF (LHOOK) CALL DR_HOOK('EUVTVD_MOD:EUVTVD',1,ZHOOK_HANDLE) + +END SUBROUTINE EUVTVD +END MODULE EUVTVD_MOD diff --git a/src/etrans/cpu/internal/euvtvdad_mod.F90 b/src/etrans/cpu/internal/euvtvdad_mod.F90 new file mode 100644 index 000000000..e6a36eac3 --- /dev/null +++ b/src/etrans/cpu/internal/euvtvdad_mod.F90 @@ -0,0 +1,139 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + + +MODULE EUVTVDAD_MOD +CONTAINS +SUBROUTINE EUVTVDAD(KM,KMLOC,KFIELD,KFLDPTR,PU,PV,PVOR,PDIV,PSPMEANU,PSPMEANV) + +!**** *EUVTVDAD* - Compute vor/div from u and v in spectral space + +! Purpose. +! -------- +! To compute vorticity and divergence from u and v in spectral +! space. Input u and v from KM to NTMAX+1, output vorticity and +! divergence from KM to NTMAX. + +!** Interface. +! ---------- +! CALL EUVTVDAD() + +! Explicit arguments : KM - zonal wave-number +! -------------------- KFIELD - number of fields (levels) +! KFLDPTR - fields pointers +! PEPSNM - REPSNM for wavenumber KM +! PU - u wind component for zonal +! wavenumber KM +! PV - v wind component for zonal +! wavenumber KM +! PVOR - vorticity for zonal +! wavenumber KM +! PDIV - divergence for zonal +! wavenumber KM + +! Method. See ref. +! ------- + +! Externals. None. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 91-07-01 +! D. Giard : NTMAX instead of NSMAX +! 01-08-27 : R. El Khatib Fix for NPROMATR /= 0 +! 03-03-03 G. Radnoti: b-level conform mean wind distribution +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! 01-Dec-2004 A. Deckmyn removed erasing of mean wind +! D. Degrauwe (Feb 2012): Alternative extension zone (E') +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_DIM ,ONLY : R +!USE TPM_FIELDS + +USE TPMALD_GEO ,ONLY : GALD +USE TPMALD_DISTR ,ONLY : DALD +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM), INTENT(IN) :: KFIELD, KM, KMLOC +REAL(KIND=JPRB), INTENT(IN) :: PVOR(:,:),PDIV(:,:) +REAL(KIND=JPRB), INTENT(INOUT) :: PU (:,:),PV (:,:) + +INTEGER(KIND=JPIM), OPTIONAL, INTENT(IN) :: KFLDPTR(:) +REAL(KIND=JPRB), OPTIONAL, INTENT(INOUT) :: PSPMEANU(:),PSPMEANV(:) + +INTEGER(KIND=JPIM) :: II, IN, IR, J, JN, IFLD + +REAL(KIND=JPRB) :: ZKM +REAL(KIND=JPRB) :: ZIN +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('EUVTVDAD_MOD:EUVTVDAD',0,ZHOOK_HANDLE) + +IF (KM == 0) THEN + IF (PRESENT(KFLDPTR)) THEN + DO J=1,KFIELD + IR=2*J-1 + IFLD=KFLDPTR(J) + PU(1,IR)=PSPMEANU(IFLD) + PV(1,IR)=PSPMEANV(IFLD) + ENDDO + ELSE + DO J=1,KFIELD + IR=2*J-1 + PU(1,IR)=PSPMEANU(J) + PV(1,IR)=PSPMEANV(J) + ENDDO + ENDIF +ENDIF + +DO J=1,2*KFIELD + DO JN=1,DALD%NCPL2M(KM),2 + IN=(JN-1)/2 + ZIN=REAL(IN,JPRB)*GALD%EYWN + PU(JN+1,J) = PU(JN+1,J) + ZIN * PVOR(JN ,J) + PU(JN ,J) = PU(JN ,J) - ZIN * PVOR(JN+1,J) + PV(JN+1,J) = PV(JN+1,J) - ZIN * PDIV(JN ,J) + PV(JN ,J) = PV(JN ,J) + ZIN * PDIV(JN+1,J) + ENDDO +ENDDO + +ZKM=REAL(KM,JPRB)*GALD%EXWN +DO J=1,KFIELD + IR=2*J-1 + II=IR+1 + DO JN=1,R%NDGL+R%NNOEXTZG + PU(JN,II) = PU(JN,II) - ZKM * PDIV(JN,IR) + PU(JN,IR) = PU(JN,IR) + ZKM * PDIV(JN,II) + PV(JN,II) = PV(JN,II) - ZKM * PVOR(JN,IR) + PV(JN,IR) = PV(JN,IR) + ZKM * PVOR(JN,II) + ENDDO +ENDDO +IF (LHOOK) CALL DR_HOOK('EUVTVDAD_MOD:EUVTVDAD',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE EUVTVDAD +END MODULE EUVTVDAD_MOD diff --git a/src/etrans/cpu/internal/evdtuv_mod.F90 b/src/etrans/cpu/internal/evdtuv_mod.F90 new file mode 100644 index 000000000..184d6cb2b --- /dev/null +++ b/src/etrans/cpu/internal/evdtuv_mod.F90 @@ -0,0 +1,136 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + + +MODULE EVDTUV_MOD +CONTAINS +SUBROUTINE EVDTUV(KM,KFIELD,KFLDPTR,PVOR,PDIV,PU,PV,PSPMEANU,PSPMEANV) + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +!USE TPM_DIM +!USE TPM_FIELDS +USE TPMALD_FIELDS ,ONLY : FALD +USE TPMALD_GEO ,ONLY : GALD +USE TPMALD_DISTR ,ONLY : DALD + +!**** *VDTUV* - Compute U,V in spectral space + +! Purpose. +! -------- +! In Laplace space compute the the winds +! from vorticity and divergence. + +!** Interface. +! ---------- +! CALL VDTUV(...) + +! Explicit arguments : KM -zonal wavenumber (input-c) +! -------------------- KFIELD - number of fields (input-c) +! KFLDPTR - fields pointers +! PEPSNM - REPSNM for wavenumber KM (input-c) +! PVOR(NLEI1,2*KFIELD) - vorticity (input) +! PDIV(NLEI1,2*KFIELD) - divergence (input) +! PU(NLEI1,2*KFIELD) - u wind (output) +! PV(NLEI1,2*KFIELD) - v wind (output) +! Organisation within NLEI1: +! NLEI1 = NSMAX+4+mod(NSMAX+4+1,2) +! overdimensioning +! 1 : n=NSMAX+2 +! 2 : n=NSMAX+1 +! 3 : n=NSMAX +! . : +! . : +! NSMAX+3 : n=0 +! NSMAX+4 : n=-1 + +! Implicit arguments : Eigenvalues of inverse Laplace operator +! -------------------- from YOMLAP + +! Method. +! ------- + +! Externals. None. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS +! Temperton, 1991, MWR 119 p1303 + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 00-02-01 From VDTUV in IFS CY22R1 +! 01-08-27 : R. El Khatib Fix for NPROMATR /= 0 +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! ------------------------------------------------------------------ + +IMPLICIT NONE + +INTEGER(KIND=JPIM), INTENT(IN) :: KM, KFIELD +REAL(KIND=JPRB), INTENT(IN) :: PVOR(:,:),PDIV(:,:) +REAL(KIND=JPRB), INTENT(OUT) :: PU (:,:),PV (:,:) + +INTEGER(KIND=JPIM), OPTIONAL, INTENT(IN) :: KFLDPTR(:) +REAL(KIND=JPRB), OPTIONAL, INTENT(IN) :: PSPMEANU(:),PSPMEANV(:) + +INTEGER(KIND=JPIM) :: II, IJ, IR, J, JN, IN, IFLD + +REAL(KIND=JPRB) :: ZKM +REAL(KIND=JPRB) :: ZIN +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE +IF (LHOOK) CALL DR_HOOK('EVDTUV_MOD:EVDTUV',0,ZHOOK_HANDLE) +ZKM=REAL(KM,JPRB)*GALD%EXWN +DO J=1,2*KFIELD + DO JN=1,DALD%NCPL2M(KM),2 + IN = (JN-1)/2 + ZIN = REAL(IN,JPRB)*GALD%EYWN + PU(JN ,J) = -ZIN*PVOR(JN+1,J) + PU(JN+1,J) = ZIN*PVOR(JN,J) + PV(JN ,J) = -ZIN*PDIV(JN+1,J) + PV(JN+1,J) = ZIN*PDIV(JN,J) + ENDDO +ENDDO +DO J=1,KFIELD + IR = 2*J-1 + II = IR+1 + DO JN=1,DALD%NCPL2M(KM) + IJ=(JN-1)/2 + PU(JN,IR)= FALD%RLEPINM(DALD%NPME(KM)+IJ)*(-ZKM*PDIV(JN,II)-PU(JN,IR)) + PU(JN,II)= FALD%RLEPINM(DALD%NPME(KM)+IJ)*( ZKM*PDIV(JN,IR)-PU(JN,II)) + PV(JN,IR)= FALD%RLEPINM(DALD%NPME(KM)+IJ)*(-ZKM*PVOR(JN,II)+PV(JN,IR)) + PV(JN,II)= FALD%RLEPINM(DALD%NPME(KM)+IJ)*( ZKM*PVOR(JN,IR)+PV(JN,II)) + ENDDO +ENDDO +IF (KM == 0) THEN + IF (PRESENT(KFLDPTR)) THEN + DO J = 1, KFIELD + IR = 2*J-1 + IFLD=KFLDPTR(J) + PU(1,IR)=PSPMEANU(IFLD) + PV(1,IR)=PSPMEANV(IFLD) + ENDDO + ELSE + DO J = 1, KFIELD + IR = 2*J-1 + PU(1,IR)=PSPMEANU(J) + PV(1,IR)=PSPMEANV(J) + ENDDO + ENDIF +ENDIF +IF (LHOOK) CALL DR_HOOK('EVDTUV_MOD:EVDTUV',1,ZHOOK_HANDLE) + +END SUBROUTINE EVDTUV +END MODULE EVDTUV_MOD diff --git a/src/etrans/cpu/internal/evdtuvad_comm_mod.F90 b/src/etrans/cpu/internal/evdtuvad_comm_mod.F90 new file mode 100644 index 000000000..63fd32814 --- /dev/null +++ b/src/etrans/cpu/internal/evdtuvad_comm_mod.F90 @@ -0,0 +1,174 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + + +MODULE EVDTUVAD_COMM_MOD +CONTAINS +SUBROUTINE EVDTUVAD_COMM(KM,KMLOC,KFIELD,KFLDPTR,PSPMEANU,PSPMEANV) + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_DIM +USE TPM_FIELDS +USE TPM_DISTR + +USE TPMALD_FIELDS +USE TPMALD_GEO +USE TPMALD_DISTR + +USE MPL_MODULE +USE ABORT_TRANS_MOD +USE SET2PE_MOD + + +!**** *EVDTUVAD_COMM* - Compute U,V in spectral space + +! Purpose. +! -------- +! In Laplace space communicate the mean winds +! from vorticity and divergence. + +!** Interface. +! ---------- +! CALL EVDTUVAD_COMM(...) + +! Explicit arguments : KM -zonal wavenumber (input-c) +! -------------------- KFIELD - number of fields (input-c) +! KFLDPTR - fields pointers +! PEPSNM - REPSNM for wavenumber KM (input-c) +! Organisation within NLEI1: +! NLEI1 = NSMAX+4+mod(NSMAX+4+1,2) +! overdimensioning +! 1 : n=NSMAX+2 +! 2 : n=NSMAX+1 +! 3 : n=NSMAX +! . : +! . : +! NSMAX+3 : n=0 +! NSMAX+4 : n=-1 + +! Implicit arguments : Eigenvalues of inverse Laplace operator +! -------------------- from YOMLAP + +! Method. +! ------- + +! Externals. None. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS +! Temperton, 1991, MWR 119 p1303 + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 00-02-01 From VDTUVAD in IFS CY22R1 +! 01-08-27 : R. El Khatib Fix for NPROMATR /= 0 +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! 01-Dec-2004 A. Deckmyn Fix mean wind for NPRTRW > 1 +! N. Lopes & R. El Khatib 15-Jun-2012 Scalability enhancement + +! thread-safety +! R. El Khatib 12-Jan-2020 Fix missing finalization of communications +! R. El Khatib 02-Jun-2022 Optimization/Cleaning +! ------------------------------------------------------------------ + +IMPLICIT NONE + +INTEGER(KIND=JPIM), INTENT(IN) :: KM, KFIELD, KMLOC + +INTEGER(KIND=JPIM), OPTIONAL, INTENT(IN) :: KFLDPTR(:) +REAL(KIND=JPRB), OPTIONAL, INTENT(OUT) :: PSPMEANU(:),PSPMEANV(:) + +INTEGER(KIND=JPIM) :: II, IJ, IR, J, JN, IFLD + +INTEGER(KIND=JPIM) :: IN +INTEGER(KIND=JPIM) :: ISND, JA, ITAG, ILEN + +INTEGER(KIND=JPIM) :: ISENDREQ(NPRTRW) + +REAL(KIND=JPRB) :: ZSPU(2*KFIELD) +REAL(KIND=JPRB) :: ZKM +REAL(KIND=JPRB) :: ZIN +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('EVDTUVAD_COMM_MOD:EVDTUVAD_COMM',0,ZHOOK_HANDLE) + +IF (NPRTRW > 1 .AND. KFIELD > 0) THEN + IF (KM == 0) THEN + IF (PRESENT(KFLDPTR)) THEN + DO J=1,KFIELD + IFLD=KFLDPTR(J) + ZSPU(J)=PSPMEANU(IFLD) + ZSPU(KFIELD+J)=PSPMEANV(IFLD) + ENDDO + ELSE + DO J=1,KFIELD + ZSPU(J)=PSPMEANU(J) + ZSPU(KFIELD+J)=PSPMEANV(J) + ENDDO + ENDIF + DO JA=1,NPRTRW + IF (JA /= MYSETW) THEN + CALL SET2PE(ISND,0,0,JA,MYSETV) + ISND=NPRCIDS(ISND) + ITAG=300000+KFIELD*NPROC+ISND + CALL MPL_SEND(ZSPU(1:2*KFIELD),KDEST=ISND,KTAG=ITAG, & + & KMP_TYPE=JP_NON_BLOCKING_STANDARD,KREQUEST=ISENDREQ(JA), & + & CDSTRING='EVDTUVAD_COMM:') + ENDIF + ENDDO + ELSE + IF (KMLOC == 1) THEN + IF (D%NPROCM(0) /= MYSETW) THEN + CALL SET2PE(ISND,0,0,D%NPROCM(0),MYSETV) + ISND=NPRCIDS(ISND) + ITAG=300000+KFIELD*NPROC+MYPROC + CALL MPL_RECV(ZSPU(1:2*KFIELD),KSOURCE=ISND,KTAG=ITAG,KOUNT=ILEN,CDSTRING='EVDTUVAD_COMM:') + IF (ILEN /= 2*KFIELD) THEN + CALL ABORT_TRANS('EVDTUVAD_COMM: RECV INVALID RECEIVE MESSAGE LENGTH') + ENDIF + IF (PRESENT(KFLDPTR)) THEN + DO J=1,KFIELD + IFLD=KFLDPTR(J) + PSPMEANU(IFLD)=ZSPU(J) + PSPMEANV(IFLD)=ZSPU(KFIELD+J) + ENDDO + ELSE + DO J=1,KFIELD + PSPMEANU(J)=ZSPU(J) + PSPMEANV(J)=ZSPU(KFIELD+J) + ENDDO + ENDIF + ENDIF + ENDIF + ENDIF + IF (KM == 0) THEN + DO JA=1,NPRTRW + IF (JA /= MYSETW) THEN + CALL MPL_WAIT(KREQUEST=ISENDREQ(JA),CDSTRING='EUVTVDAD_COMM:') + ENDIF + ENDDO + ENDIF +ENDIF + +IF (LHOOK) CALL DR_HOOK('EVDTUVAD_COMM_MOD:EVDTUVAD_COMM',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE EVDTUVAD_COMM +END MODULE EVDTUVAD_COMM_MOD diff --git a/src/etrans/cpu/internal/evdtuvad_mod.F90 b/src/etrans/cpu/internal/evdtuvad_mod.F90 new file mode 100644 index 000000000..e0a8a2749 --- /dev/null +++ b/src/etrans/cpu/internal/evdtuvad_mod.F90 @@ -0,0 +1,162 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + + +MODULE EVDTUVAD_MOD +CONTAINS +SUBROUTINE EVDTUVAD(KM,KMLOC,KFIELD,KFLDPTR,PVOR,PDIV,PU,PV,PSPMEANU,PSPMEANV) + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +!USE TPM_DIM +!USE TPM_FIELDS +USE TPM_DISTR ,ONLY : D, NPRCIDS, NPRTRW, MYSETV, MYSETW, MYPROC, NPROC + +USE TPMALD_FIELDS ,ONLY : FALD +USE TPMALD_GEO ,ONLY : GALD +USE TPMALD_DISTR ,ONLY : DALD + +!**** *EVDTUVAD* - Compute U,V in spectral space + +! Purpose. +! -------- +! In Laplace space compute the the winds +! from vorticity and divergence. + +!** Interface. +! ---------- +! CALL EVDTUVAD(...) + +! Explicit arguments : KM -zonal wavenumber (input-c) +! -------------------- KFIELD - number of fields (input-c) +! KFLDPTR - fields pointers +! PEPSNM - REPSNM for wavenumber KM (input-c) +! PVOR(NLEI1,2*KFIELD) - vorticity (input) +! PDIV(NLEI1,2*KFIELD) - divergence (input) +! PU(NLEI1,2*KFIELD) - u wind (output) +! PV(NLEI1,2*KFIELD) - v wind (output) +! Organisation within NLEI1: +! NLEI1 = NSMAX+4+mod(NSMAX+4+1,2) +! overdimensioning +! 1 : n=NSMAX+2 +! 2 : n=NSMAX+1 +! 3 : n=NSMAX +! . : +! . : +! NSMAX+3 : n=0 +! NSMAX+4 : n=-1 + +! Implicit arguments : Eigenvalues of inverse Laplace operator +! -------------------- from YOMLAP + +! Method. +! ------- + +! Externals. None. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS +! Temperton, 1991, MWR 119 p1303 + +! Author. +! ------- +! Mats Hamrud and Philippe Courtier *ECMWF* + +! Modifications. +! -------------- +! Original : 00-02-01 From VDTUVAD in IFS CY22R1 +! 01-08-27 : R. El Khatib Fix for NPROMATR /= 0 +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! 01-Dec-2004 A. Deckmyn Fix mean wind for NPRTRW > 1 +! N. Lopes & R. El Khatib 15-Jun-2012 Scalability enhancement + +! thread-safety +! ------------------------------------------------------------------ + +IMPLICIT NONE + +INTEGER(KIND=JPIM), INTENT(IN) :: KM, KFIELD, KMLOC +REAL(KIND=JPRB), INTENT(INOUT) :: PVOR(:,:),PDIV(:,:) +REAL(KIND=JPRB), INTENT(INOUT) :: PU (:,:),PV (:,:) + +INTEGER(KIND=JPIM), OPTIONAL, INTENT(IN) :: KFLDPTR(:) +REAL(KIND=JPRB), OPTIONAL, INTENT(OUT) :: PSPMEANU(:),PSPMEANV(:) + +INTEGER(KIND=JPIM) :: II, IJ, IR, J, JN, IFLD + +INTEGER(KIND=JPIM) :: IN +INTEGER(KIND=JPIM) :: ISND, JA, ITAG, ILEN + +REAL(KIND=JPRB) :: ZSPU(2*KFIELD) +REAL(KIND=JPRB) :: ZKM +REAL(KIND=JPRB) :: ZIN +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('EVDTUVAD_MOD:EVDTUVAD',0,ZHOOK_HANDLE) + +IF (KM == 0) THEN + IF (PRESENT(KFLDPTR)) THEN + DO J = 1, KFIELD + IR = 2*J-1 + IFLD=KFLDPTR(J) + PSPMEANU(IFLD)=PU(1,IR) + PSPMEANV(IFLD)=PV(1,IR) + ENDDO + ELSE + DO J = 1, KFIELD + IR = 2*J-1 + PSPMEANU(J)=PU(1,IR) + PSPMEANV(J)=PV(1,IR) + ENDDO + ENDIF +ENDIF + +ZKM=REAL(KM,JPRB)*GALD%EXWN +DO J=1,KFIELD + IR = 2*J-1 + II = IR+1 + DO JN=1,DALD%NCPL2M(KM) + IJ=(JN-1)/2 + PDIV(JN,II)=PDIV(JN,II)-ZKM*FALD%RLEPINM(DALD%NPME(KM)+IJ)*PU(JN,IR) + PU(JN,IR)=-FALD%RLEPINM(DALD%NPME(KM)+IJ)*PU(JN,IR) + + PDIV(JN,IR)=PDIV(JN,IR)+ZKM*FALD%RLEPINM(DALD%NPME(KM)+IJ)*PU(JN,II) + PU(JN,II)=-FALD%RLEPINM(DALD%NPME(KM)+IJ)*PU(JN,II) + + PVOR(JN,II)=PVOR(JN,II)-ZKM*FALD%RLEPINM(DALD%NPME(KM)+IJ)*PV(JN,IR) + PV(JN,IR)=FALD%RLEPINM(DALD%NPME(KM)+IJ)*PV(JN,IR) + + PVOR(JN,IR)=PVOR(JN,IR)+ZKM*FALD%RLEPINM(DALD%NPME(KM)+IJ)*PV(JN,II) + PV(JN,II)=FALD%RLEPINM(DALD%NPME(KM)+IJ)*PV(JN,II) + + ENDDO +ENDDO + +DO J=1,2*KFIELD + DO JN=1,DALD%NCPL2M(KM),2 + IN = (JN-1)/2 + ZIN = REAL(IN,JPRB)*GALD%EYWN + PVOR(JN+1,J) = PVOR(JN+1,J)-ZIN*PU(JN ,J) + PVOR(JN ,J) = PVOR(JN ,J)+ZIN*PU(JN+1,J) + PDIV(JN+1,J) = PDIV(JN+1,J)-ZIN*PV(JN ,J) + PDIV(JN ,J) = PDIV(JN ,J)+ZIN*PV(JN+1,J) + ENDDO +ENDDO + +IF (LHOOK) CALL DR_HOOK('EVDTUVAD_MOD:EVDTUVAD',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE EVDTUVAD +END MODULE EVDTUVAD_MOD diff --git a/src/etrans/cpu/internal/suefft_mod.F90 b/src/etrans/cpu/internal/suefft_mod.F90 new file mode 100644 index 000000000..670b38d52 --- /dev/null +++ b/src/etrans/cpu/internal/suefft_mod.F90 @@ -0,0 +1,101 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + + +MODULE SUEFFT_MOD +CONTAINS +SUBROUTINE SUEFFT + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_DIM ,ONLY : R +USE TPM_GEN ,ONLY : NOUT, NPRINTLEV +USE TPM_DISTR ,ONLY : D, MYSETW +USE TPM_GEOMETRY ,ONLY : G +#ifdef WITH_FFT992 +USE TPM_FFT ,ONLY : T +USE TPMALD_FFT ,ONLY : TALD +#endif +USE TPM_FFTW ,ONLY : TW, INIT_PLANS_FFTW +! + +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM) :: JGL,IGLG, ILATS +LOGICAL :: LLP1,LLP2 +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('SUEFFT_MOD:SUEFFT',0,ZHOOK_HANDLE) + +IF(.NOT.D%LGRIDONLY) THEN + + LLP1 = NPRINTLEV>0 + LLP2 = NPRINTLEV>1 + IF(LLP1) WRITE(NOUT,*) '=== ENTER ROUTINE SUEFFT ===' + +#ifdef WITH_FFT992 + IF( TALD%LFFT992 )THEN + + NULLIFY(TW%FFTW_PLANS) + + ALLOCATE(T%TRIGS(R%NDLON+R%NNOEXTZL,D%NDGL_FS)) + IF(LLP2)WRITE(NOUT,9) 'T%TRIGS ',SIZE(T%TRIGS),SHAPE(T%TRIGS) + ALLOCATE(T%NFAX(19,D%NDGL_FS)) + IF(LLP2)WRITE(NOUT,9) 'T%NFAX ',SIZE(T%NFAX),SHAPE(T%NFAX) + ALLOCATE(T%LUSEFFT992(D%NDGL_FS)) + IF(LLP2)WRITE(NOUT,9) 'T%LUSEFFT992',SIZE(T%LUSEFFT992),SHAPE(T%LUSEFFT992) + + ! + ! create TRIGS and NFAX for latitude lengths supported by FFT992, + ! that is just with factors 2, 3 or 5 + ! + + ILATS=0 + DO JGL=1,D%NDGL_FS + IGLG = D%NPTRLS(MYSETW)+JGL-1 + IF (G%NLOEN(IGLG)>1) THEN + CALL SET99B(T%TRIGS(1,JGL),T%NFAX(1,JGL),G%NLOEN(IGLG)+R%NNOEXTZL,T%LUSEFFT992(JGL)) + IF( .NOT.T%LUSEFFT992(JGL) )THEN + ILATS=ILATS+1 + ENDIF + ENDIF + ENDDO + + ALLOCATE(TALD%TRIGSE(R%NDGL+R%NNOEXTZG)) + IF(LLP2)WRITE(NOUT,9) 'TALD%TRIGSE ',SIZE(TALD%TRIGSE),SHAPE(TALD%TRIGSE) + ALLOCATE(TALD%NFAXE(19)) + IF(LLP2)WRITE(NOUT,9) 'TALD%NFAXE ',SIZE(TALD%NFAXE),SHAPE(TALD%NFAXE) + CALL SET99(TALD%TRIGSE,TALD%NFAXE,R%NDGL+R%NNOEXTZG) + + + ELSE +#endif + + CALL INIT_PLANS_FFTW(MAX(R%NDLON+R%NNOEXTZL,R%NDGL+R%NNOEXTZG)) + +#ifdef WITH_FFT992 + ENDIF +#endif + +ENDIF + +IF (LHOOK) CALL DR_HOOK('SUEFFT_MOD:SUEFFT',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +9 FORMAT(1X,'ARRAY ',A10,' ALLOCATED ',8I8) + +END SUBROUTINE SUEFFT +END MODULE SUEFFT_MOD diff --git a/src/etrans/cpu/internal/suemp_trans_mod.F90 b/src/etrans/cpu/internal/suemp_trans_mod.F90 new file mode 100644 index 000000000..17bb28870 --- /dev/null +++ b/src/etrans/cpu/internal/suemp_trans_mod.F90 @@ -0,0 +1,278 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + + +MODULE SUEMP_TRANS_MOD +CONTAINS +SUBROUTINE SUEMP_TRANS + +! Set up distributed environment for the transform package (part 2) +! R. El Khatib 09-Aug-2013 Allow LEQ_REGIONS + +USE PARKIND1 ,ONLY : JPIM ,JPRB, JPRD +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_GEN ,ONLY : NOUT, NPRINTLEV +USE TPM_DIM ,ONLY : R +USE TPM_GEOMETRY ,ONLY : G +USE TPM_DISTR ,ONLY : D, LEQ_REGIONS, NPRTRNS, NPRTRV, NPRTRW, MYSETW, NPROC, MYPROC +USE TPMALD_DIM ,ONLY : RALD +!USE TPMALD_DISTR +!USE SUWAVEDI_MOD +!USE PE2SET_MOD +USE SUMPLATF_MOD ,ONLY : SUMPLATF +USE SUEMPLAT_MOD ,ONLY : SUEMPLAT +USE SUESTAONL_MOD ,ONLY : SUESTAONL +USE MYSENDSET_MOD ,ONLY : MYSENDSET +USE MYRECVSET_MOD ,ONLY : MYRECVSET +USE EQ_REGIONS_MOD ,ONLY : MY_REGION_EW, MY_REGION_NS, & + & N_REGIONS, N_REGIONS_EW, N_REGIONS_NS +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM) :: JM,JMLOC +INTEGER(KIND=JPIM) :: JGL,IGL,IPLAT,ISENDSET,IRECVSET,JML,IPOS,IM +INTEGER(KIND=JPIM) :: I1,I2,I3,IAUX0,IAUX1,JA1 +INTEGER(KIND=JPIM) :: IGPTOT,IMEDIAP,IRESTM,JA,JB,IOFF +INTEGER(KIND=JPIM), ALLOCATABLE :: IGPTOTL(:,:) + +REAL(KIND=JPRD) :: ZMEDIAP + +LOGICAL :: LLP1,LLP2 +REAL(KIND=JPRD),ALLOCATABLE :: ZDUM(:) +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ------------------------------------------------------------------ + +IF (LHOOK) CALL DR_HOOK('SUEMP_TRANS_MOD:SUEMP_TRANS',0,ZHOOK_HANDLE) +LLP1 = NPRINTLEV>0 +LLP2 = NPRINTLEV>1 +IF(LLP1) WRITE(NOUT,*) '=== ENTER ROUTINE SUEMP_TRANS ===' + +IF(.NOT.D%LGRIDONLY) THEN + +ALLOCATE(D%NULTPP(NPRTRNS)) +IF(LLP2)WRITE(NOUT,9) 'D%NULTPP ',SIZE(D%NULTPP ),SHAPE(D%NULTPP ) +ALLOCATE(D%NPTRLS(NPRTRNS)) +IF(LLP2)WRITE(NOUT,9) 'D%NPTRLS ',SIZE(D%NPTRLS ),SHAPE(D%NPTRLS ) +ALLOCATE(D%NPROCL(R%NDGL)) +IF(LLP2)WRITE(NOUT,9) 'D%NPROCL ',SIZE(D%NPROCL ),SHAPE(D%NPROCL ) + +CALL SUMPLATF(R%NDGL,NPRTRNS,MYSETW,D%NULTPP,D%NPROCL,D%NPTRLS) +D%NDGL_FS = D%NULTPP(MYSETW) + +! Help arrays for spectral to fourier space transposition +ALLOCATE(D%NLTSGTB (NPRTRNS+1)) +IF(LLP2)WRITE(NOUT,9) 'D%NLTSGTB ',SIZE(D%NLTSGTB),SHAPE(D%NLTSGTB) +ALLOCATE(D%NLTSFTB (NPRTRNS+1)) +IF(LLP2)WRITE(NOUT,9) 'D%NLTSFTB ',SIZE(D%NLTSFTB),SHAPE(D%NLTSFTB) +ALLOCATE(D%NSTAGT0B(NPRTRNS+1)) +IF(LLP2)WRITE(NOUT,9) 'D%NSTAGT0B ',SIZE(D%NSTAGT0B),SHAPE(D%NSTAGT0B) +ALLOCATE(D%NSTAGT1B(NPRTRNS+1)) +IF(LLP2)WRITE(NOUT,9) 'D%NSTAGT1B ',SIZE(D%NSTAGT1B),SHAPE(D%NSTAGT1B) +ALLOCATE(D%MSTABF (NPRTRNS+1)) +IF(LLP2)WRITE(NOUT,9) 'D%MSTABF ',SIZE(D%MSTABF),SHAPE(D%MSTABF) + +D%NLTSGTB(:) = 0 +DO JGL=1,D%NDGL_FS + IGL = D%NPTRLS(MYSETW)+JGL-1 + DO JM=0,G%NMEN(IGL) + D%NLTSGTB(D%NPROCM(JM)) = D%NLTSGTB(D%NPROCM(JM))+1 + ENDDO +ENDDO +DO JA=1,NPRTRW + IPLAT = 0 + DO JGL=1,D%NULTPP(JA) + IGL = D%NPTRLS(JA)+JGL-1 + DO JM=1,D%NUMP + IF(IGL > R%NDGNH-G%NDGLU(D%MYMS(JM)) .AND. IGL <= R%NDGNH+G%NDGLU(D%MYMS(JM))) THEN + IPLAT = IPLAT + 1 + ENDIF + ENDDO + ENDDO + D%NLTSFTB(JA) = IPLAT +ENDDO + +DO JA=1,NPRTRW-1 + ISENDSET = MYSENDSET(NPRTRW,MYSETW,JA) + IRECVSET = MYRECVSET(NPRTRW,MYSETW,JA) + D%MSTABF(IRECVSET) = ISENDSET +ENDDO +D%MSTABF(MYSETW) = MYSETW + +ALLOCATE(D%NPNTGTB0(0:RALD%NMSMAX,D%NDGL_FS)) +IF(LLP2)WRITE(NOUT,9) 'D%NPNTGTB0 ',SIZE(D%NPNTGTB0 ),SHAPE(D%NPNTGTB0 ) +ALLOCATE(D%NPNTGTB1(D%NUMP,R%NDGL)) +IF(LLP2)WRITE(NOUT,9) 'D%NPNTGTB1 ',SIZE(D%NPNTGTB1 ),SHAPE(D%NPNTGTB1 ) + +DO JA=1,NPRTRW + IPOS = 0 + DO JGL=1,D%NULTPP(MYSETW) + IGL = D%NPTRLS(MYSETW) + JGL - 1 + DO JML=D%NPTRMS(JA),D%NPTRMS(JA)+D%NUMPP(JA)-1 + IM = D%NALLMS(JML) + IF (IM <= G%NMEN(IGL)) THEN + D%NPNTGTB0(IM,JGL) = IPOS + IPOS = IPOS+1 + ELSE + D%NPNTGTB0(IM,JGL) = -99 + ENDIF + ENDDO + ENDDO +ENDDO + +DO JA=1,NPRTRW + IPOS = 0 + DO JGL=1,D%NULTPP(JA) + IGL = D%NPTRLS(JA) + JGL - 1 + DO JM=1,D%NUMP + IM = D%MYMS(JM) + IF (IM <= G%NMEN(IGL)) THEN + D%NPNTGTB1(JM,IGL) = IPOS + IPOS = IPOS+1 + ELSE + D%NPNTGTB1(JM,IGL) = -99 + ENDIF + ENDDO + ENDDO +ENDDO + +IAUX0 = 0 +IAUX1 = 0 +DO JA=1,NPRTRNS-1 + I1 = MYSENDSET(NPRTRNS,MYSETW,JA) + I2 = MYRECVSET(NPRTRNS,MYSETW,JA) + DO JA1=1,NPRTRNS-1 + IF(MYSENDSET(NPRTRNS,MYSETW,JA1) == I2) I3 =MYRECVSET(NPRTRNS,MYSETW,JA1) + ENDDO + IAUX0 = MAX(D%NLTSFTB(I1),D%NLTSGTB(I2),IAUX0) + IAUX1 = MAX(D%NLTSGTB(I2),D%NLTSFTB(I3),IAUX1) +ENDDO +IAUX0 = MAX(D%NLTSGTB(MYSETW),IAUX0) +IAUX1 = MAX(D%NLTSGTB(MYSETW),IAUX1) +DO JA=1,NPRTRNS+1 + D%NSTAGT0B(JA) = (JA-1)*IAUX0 + D%NSTAGT1B(JA) = (JA-1)*IAUX1 +ENDDO +D%NLENGT0B = IAUX0*NPRTRNS +D%NLENGT1B = IAUX1*NPRTRNS + +ENDIF + +! GRIDPOINT SPACE + +ALLOCATE(D%NFRSTLAT(N_REGIONS_NS)) +IF(LLP2)WRITE(NOUT,9) 'D%NFRSTLAT ',SIZE(D%NFRSTLAT ),SHAPE(D%NFRSTLAT ) +ALLOCATE(D%NLSTLAT(N_REGIONS_NS)) +IF(LLP2)WRITE(NOUT,9) 'D%NLSTLAT ',SIZE(D%NLSTLAT ),SHAPE(D%NLSTLAT ) +ALLOCATE(D%NPTRLAT(R%NDGL)) +IF(LLP2)WRITE(NOUT,9) 'D%NPTRLAT ',SIZE(D%NPTRLAT ),SHAPE(D%NPTRLAT ) +ALLOCATE(D%NPTRFRSTLAT(N_REGIONS_NS)) +IF(LLP2)WRITE(NOUT,9) 'D%NPTRFRSTLAT',SIZE(D%NPTRFRSTLAT),SHAPE(D%NPTRFRSTLAT) +ALLOCATE(D%NPTRLSTLAT(N_REGIONS_NS)) +IF(LLP2)WRITE(NOUT,9)'D%NPTRLSTLAT',SIZE(D%NPTRLSTLAT),SHAPE(D%NPTRLSTLAT) +ALLOCATE(D%LSPLITLAT(R%NDGL)) +IF(LLP2)WRITE(NOUT,9) 'D%LSPLITLAT',SIZE(D%LSPLITLAT),SHAPE(D%LSPLITLAT) +ALLOCATE(D%NPROCA_GP(N_REGIONS_NS)) +IF(LLP2)WRITE(NOUT,9) 'D%NPROCA_GP',SIZE(D%NPROCA_GP),SHAPE(D%NPROCA_GP) + + +IF(.NOT.D%LWEIGHTED_DISTR) THEN + ALLOCATE(ZDUM(1)) + CALL SUEMPLAT(R%NDGL,NPROC,N_REGIONS_NS,MY_REGION_NS,D%LSPLIT, LEQ_REGIONS,& + & D%NFRSTLAT,D%NLSTLAT,D%NFRSTLOFF,D%NPTRLAT,& + & D%NPTRFRSTLAT,D%NPTRLSTLAT,D%NPTRFLOFF,& + & ZDUM,D%LWEIGHTED_DISTR,ZMEDIAP,D%NPROCA_GP,& + & IMEDIAP,IRESTM,D%LSPLITLAT,MYPROC,G%NLOEN,RALD%NDGUX) +ELSE + CALL SUEMPLAT(R%NDGL,NPROC,N_REGIONS_NS,MY_REGION_NS,D%LSPLIT, LEQ_REGIONS,& + & D%NFRSTLAT,D%NLSTLAT,D%NFRSTLOFF,D%NPTRLAT,& + & D%NPTRFRSTLAT,D%NPTRLSTLAT,D%NPTRFLOFF,& + & D%RWEIGHT,D%LWEIGHTED_DISTR,ZMEDIAP,D%NPROCA_GP,& + & IMEDIAP,IRESTM,D%LSPLITLAT,MYPROC,G%NLOEN,RALD%NDGUX) +ENDIF +D%NDGL_GP = D%NLSTLAT(MY_REGION_NS)-D%NFRSTLOFF + +IF (LLP1) THEN + IF(.NOT.D%LGRIDONLY) THEN + WRITE(NOUT,FMT='(/'' OUTPUT FROM ROUTINE SUEMPLAT: ''/)') + WRITE(NOUT,FMT='('' D%NULTPP '')') + WRITE(NOUT,FMT='(20(1X,I4))') D%NULTPP(1:NPRTRNS) + WRITE(NOUT,FMT='('' D%NPROCL '')') + WRITE(NOUT,FMT='(20(1X,I4))') D%NPROCL(1:R%NDGL) + ENDIF + WRITE(NOUT,FMT='('' D%NFRSTLAT '')') + WRITE(NOUT,FMT='(20(1X,I4))') D%NFRSTLAT(1:N_REGIONS_NS) + WRITE(NOUT,FMT='('' D%NLSTLAT '')') + WRITE(NOUT,FMT='(20(1X,I4))') D%NLSTLAT(1:N_REGIONS_NS) + WRITE(NOUT,FMT='('' D%NFRSTLOFF D%NPTRFLOFF '')') + WRITE(NOUT,FMT='(2(1X,I6))') D%NFRSTLOFF, D%NPTRFLOFF + WRITE(NOUT,FMT='('' D%NPTRLAT '')') + WRITE(NOUT,FMT='(20(1X,I4))') D%NPTRLAT(1:R%NDGL) + WRITE(NOUT,FMT='('' D%LSPLITLAT '')') + WRITE(NOUT,FMT='(50(1X,L1))') D%LSPLITLAT(1:R%NDGL) + WRITE(NOUT,FMT='('' D%NPTRFRSTLAT '')') + WRITE(NOUT,FMT='(20(1X,I4))') D%NPTRFRSTLAT(1:N_REGIONS_NS) + WRITE(NOUT,FMT='('' D%NPTRLSTLAT '')') + WRITE(NOUT,FMT='(20(1X,I4))') D%NPTRLSTLAT(1:N_REGIONS_NS) + WRITE(NOUT,FMT='(/)') +ENDIF +ALLOCATE(D%NSTA(R%NDGL+N_REGIONS_NS-1,N_REGIONS_EW)) +IF(LLP2)WRITE(NOUT,9) 'D%NSTA ',SIZE(D%NSTA ),SHAPE(D%NSTA ) +ALLOCATE(D%NONL(R%NDGL+N_REGIONS_NS-1,N_REGIONS_EW)) +IF(LLP2)WRITE(NOUT,9) 'D%NONL ',SIZE(D%NONL ),SHAPE(D%NONL ) + +IF(.NOT.D%LWEIGHTED_DISTR) THEN + CALL SUESTAONL(IMEDIAP,IRESTM,D%LWEIGHTED_DISTR,ZDUM,ZMEDIAP,D%NPROCA_GP) +ELSE + CALL SUESTAONL(IMEDIAP,IRESTM,D%LWEIGHTED_DISTR,D%RWEIGHT,ZMEDIAP,D%NPROCA_GP) +ENDIF +! IGPTOTL is the number of grid points in each individual processor +ALLOCATE(IGPTOTL(N_REGIONS_NS,N_REGIONS_EW)) +IGPTOTL(:,:)=0 +DO JA=1,N_REGIONS_NS + DO JB=1,N_REGIONS(JA) + IGPTOT = 0 + DO JGL=D%NPTRFRSTLAT(JA),D%NPTRLSTLAT(JA) + IGPTOT = IGPTOT+D%NONL(JGL,JB) + ENDDO + IGPTOTL(JA,JB) = IGPTOT + ENDDO +ENDDO +D%NGPTOT = IGPTOTL(MY_REGION_NS,MY_REGION_EW) +D%NGPTOTMX = MAXVAL(IGPTOTL) +D%NGPTOTG = SUM(IGPTOTL) +ALLOCATE(D%NGPTOTL(N_REGIONS_NS,N_REGIONS_EW)) +IF(LLP2)WRITE(NOUT,9) 'D%NGPTOTL ',SIZE(D%NGPTOTL ),SHAPE(D%NGPTOTL ) +D%NGPTOTL(:,:) = IGPTOTL(:,:) + +IF(.NOT.D%LGRIDONLY) THEN +ALLOCATE(D%NSTAGTF(D%NDGL_FS)) +IF(LLP2)WRITE(NOUT,9) 'D%NSTAGTF ',SIZE(D%NSTAGTF ),SHAPE(D%NSTAGTF ) +IOFF = 0 +DO JGL=1,D%NDGL_FS + D%NSTAGTF(JGL) = IOFF + IGL = D%NPTRLS(MYSETW) + JGL - 1 + IOFF = IOFF + G%NLOEN(IGL)+3+R%NNOEXTZL +ENDDO +D%NLENGTF = IOFF +ENDIF + +IF(ALLOCATED(ZDUM)) DEALLOCATE(ZDUM) +DEALLOCATE(IGPTOTL) +IF (LHOOK) CALL DR_HOOK('SUEMP_TRANS_MOD:SUEMP_TRANS',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ +9 FORMAT(1X,'ARRAY ',A10,' ALLOCATED ',8I8) + +END SUBROUTINE SUEMP_TRANS +END MODULE SUEMP_TRANS_MOD + diff --git a/src/etrans/cpu/internal/suemp_trans_preleg_mod.F90 b/src/etrans/cpu/internal/suemp_trans_preleg_mod.F90 new file mode 100644 index 000000000..9ab000f16 --- /dev/null +++ b/src/etrans/cpu/internal/suemp_trans_preleg_mod.F90 @@ -0,0 +1,251 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + + +MODULE SUEMP_TRANS_PRELEG_MOD +CONTAINS +SUBROUTINE SUEMP_TRANS_PRELEG + +! Set up distributed environment for the transform package (part 1) + +USE PARKIND1 ,ONLY : JPIM ,JPRB +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_GEN ,ONLY : NOUT, NPRINTLEV +USE TPM_DIM ,ONLY : R +USE TPM_DISTR ,ONLY : D, NPRTRW, NPRTRV, MYSETW + +USE TPMALD_DISTR ,ONLY : DALD +USE TPMALD_DIM ,ONLY : RALD +USE TPMALD_FIELDS ,ONLY : FALD +USE TPMALD_GEO ,ONLY : GALD + +!USE SUWAVEDI_MOD +!USE ABORT_TRANS_MOD + +IMPLICIT NONE + + INTEGER(KIND=JPIM) :: JA,JM,JMLOC,JW,JV,ILATPP,IRESTL,IMLOC,IDT,INM,JN,IM,ILAST + + LOGICAL :: LLP1,LLP2 + + INTEGER(KIND=JPIM) :: ISPEC(NPRTRW),IMYMS(RALD%NMSMAX+1),IKNTMP(0:RALD%NMSMAX) + INTEGER(KIND=JPIM) :: IKMTMP(0:R%NSMAX),ISPEC2P + INTEGER(KIND=JPIM) :: IC(NPRTRW) + INTEGER(KIND=JPIM) :: IMDIM,IL,IND,IK,IPOS,IKM + REAL(KIND=JPRB) :: ZLEPDIM + REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + + ! ------------------------------------------------------------------ + + IF (LHOOK) CALL DR_HOOK('SUEMP_TRANS_PRELEG_MOD:SUEMP_TRANS_PRELEG',0,ZHOOK_HANDLE) + + IF(.NOT.D%LGRIDONLY) THEN + + LLP1 = NPRINTLEV>0 + LLP2 = NPRINTLEV>1 + IF(LLP1) WRITE(NOUT,*) '=== ENTER ROUTINE SUEMP_TRANS_PRELEG ===' + + !* 1. Initialize partitioning of wave numbers to PEs ! + ! ---------------------------------------------- + + ALLOCATE(D%NASM0(0:R%NSMAX)) + IF(LLP2)WRITE(NOUT,9) 'D%NASM0 ',SIZE(D%NASM0 ),SHAPE(D%NASM0 ) + + ALLOCATE(DALD%NESM0(0:RALD%NMSMAX)) + IF(LLP2)WRITE(NOUT,9) 'DALD%NESM0 ',SIZE(DALD%NESM0 ),SHAPE(DALD%NESM0 ) + + ALLOCATE(D%NATM0(0:R%NTMAX)) + IF(LLP2)WRITE(NOUT,9) 'D%NATM0 ',SIZE(D%NATM0 ),SHAPE(D%NATM0 ) + ALLOCATE(D%NUMPP(NPRTRW)) + IF(LLP2)WRITE(NOUT,9) 'D%NUMPP ',SIZE(D%NUMPP ),SHAPE(D%NUMPP ) + ALLOCATE(D%NPOSSP(NPRTRW+1)) + IF(LLP2)WRITE(NOUT,9) 'D%NPOSSP',SIZE(D%NPOSSP ),SHAPE(D%NPOSSP ) + + ALLOCATE(D%NPROCM(0:RALD%NMSMAX)) + IF(LLP2)WRITE(NOUT,9) 'D%NPROCM',SIZE(D%NPROCM ),SHAPE(D%NPROCM ) + + ALLOCATE(DALD%NPME(0:RALD%NMSMAX)) + IF(LLP2)WRITE(NOUT,9) 'DALD%NPME',SIZE(DALD%NPME),SHAPE(DALD%NPME) + ALLOCATE(DALD%NCPL2M(0:RALD%NMSMAX)) + IF(LLP2)WRITE(NOUT,9) 'DALD%NCPL2M',SIZE(DALD%NCPL2M),SHAPE(DALD%NCPL2M) + CALL ELLIPS(R%NSMAX,RALD%NMSMAX,IKNTMP,IKMTMP) + DALD%NPME(0)=1 + DO JM=1,RALD%NMSMAX + DALD%NPME(JM)=DALD%NPME(JM-1)+IKNTMP(JM-1)+1 + ENDDO + DO JM=0,RALD%NMSMAX + DALD%NCPL2M(JM) = 2*(IKNTMP(JM)+1) + ENDDO + ALLOCATE(FALD%RLEPINM(R%NSPEC_G/2)) + IF(LLP2)WRITE(NOUT,9) 'FALD%RLEPINM',SIZE(FALD%RLEPINM),SHAPE(FALD%RLEPINM) + DO JM=0,RALD%NMSMAX + DO JN=1,IKNTMP(JM) + ZLEPDIM=-((REAL(JM,JPRB)**2)*(GALD%EXWN**2)+& + & (REAL(JN,JPRB)**2)*(GALD%EYWN**2)) + FALD%RLEPINM(DALD%NPME(JM)+JN)=1./ZLEPDIM + ENDDO + ENDDO + DO JM=1,RALD%NMSMAX + ZLEPDIM=-(REAL(JM,JPRB)**2)*(GALD%EXWN**2) + FALD%RLEPINM(DALD%NPME(JM))=1./ZLEPDIM + ENDDO + FALD%RLEPINM(DALD%NPME(0))=0. + + D%NUMPP(:) = 0 + ISPEC(:) = 0 + DALD%NESM0(:)=-99 + + IMDIM = 0 + IL = 1 + IND = 1 + IK = 0 + IPOS = 1 + DO JM=0,RALD%NMSMAX + IK = IK + IND + IF (IK > NPRTRW) THEN + IK = NPRTRW + IND = -1 + ELSEIF (IK < 1) THEN + IK = 1 + IND = 1 + ENDIF + + IKM =DALD%NCPL2M(JM)/2 -1 + D%NPROCM(JM) = IK + ISPEC(IK) = ISPEC(IK)+IKM+1 + D%NUMPP(IK) = D%NUMPP(IK)+1 + IF (IK == MYSETW) THEN + IMDIM = IMDIM + IKM+1 + IMYMS(IL) = JM + DALD%NESM0(JM) = IPOS + IPOS = IPOS+(IKM+1)*4 + IL = IL+1 + ENDIF + ENDDO + D%NPOSSP(1) = 1 + ISPEC2P = 4*ISPEC(1) + D%NSPEC2MX = ISPEC2P + DO JA=2,NPRTRW + D%NPOSSP(JA) = D%NPOSSP(JA-1)+ISPEC2P + ISPEC2P = 4*ISPEC(JA) + D%NSPEC2MX=MAX(D%NSPEC2MX,ISPEC2P) + ENDDO + D%NPOSSP(NPRTRW+1) = D%NPOSSP(NPRTRW)+ISPEC2P + + D%NSPEC2 = 4*IMDIM + D%NSPEC=D%NSPEC2 + + D%NUMP = D%NUMPP (MYSETW) + ALLOCATE(D%MYMS(D%NUMP)) + IF(LLP2)WRITE(NOUT,9) 'D%MYMS ',SIZE(D%MYMS ),SHAPE(D%MYMS ) + D%MYMS(:) = IMYMS(1:D%NUMP) + D%NUMTP = D%NUMP + + ! pointer to the first wave number of a given wave-set in NALLMS array + ALLOCATE(D%NPTRMS(NPRTRW)) + IF(LLP2)WRITE(NOUT,9) 'D%NPTRMS ',SIZE(D%NPTRMS ),SHAPE(D%NPTRMS ) + D%NPTRMS(:) = 1 + DO JA=2,NPRTRW + D%NPTRMS(JA) = D%NPTRMS(JA-1)+D%NUMPP(JA-1) + ENDDO + ! D%NALLMS : wave numbers for all wave-set concatenated together to give all + ! wave numbers in wave-set order. + ALLOCATE(D%NALLMS(RALD%NMSMAX+1)) + IF(LLP2)WRITE(NOUT,9) 'D%NALLMS ',SIZE(D%NALLMS ),SHAPE(D%NALLMS ) + IC(:) = 0 + DO JM=0,RALD%NMSMAX + D%NALLMS(IC(D%NPROCM(JM))+D%NPTRMS(D%NPROCM(JM))) = JM + IC(D%NPROCM(JM)) = IC(D%NPROCM(JM))+1 + ENDDO + ALLOCATE(D%NDIM0G(0:RALD%NMSMAX)) + IF(LLP2)WRITE(NOUT,9) 'D%NDIM0G ',SIZE(D%NDIM0G ),SHAPE(D%NDIM0G ) + IPOS = 1 + DO JA=1,NPRTRW + DO JMLOC=1,D%NUMPP(JA) + IM = D%NALLMS(D%NPTRMS(JA)+JMLOC-1) + D%NDIM0G(IM) = IPOS + IPOS = IPOS+2*DALD%NCPL2M(IM) + ENDDO + ENDDO + +ALLOCATE(D%NLATLS(NPRTRW,NPRTRV)) +IF(LLP2)WRITE(NOUT,9) 'D%NLATLS',SIZE(D%NLATLS ),SHAPE(D%NLATLS ) +ALLOCATE(D%NLATLE(NPRTRW,NPRTRV)) +IF(LLP2)WRITE(NOUT,9) 'D%NLATLE',SIZE(D%NLATLE ),SHAPE(D%NLATLE ) + +D%NLATLS(:,:) = 9999 +D%NLATLE(:,:) = -1 + +ILATPP = R%NDGL/NPRTRW +IRESTL = R%NDGL-NPRTRW*ILATPP +DO JW=1,NPRTRW + IF (JW > IRESTL) THEN + D%NLATLS(JW,1) = IRESTL*(ILATPP+1)+(JA-IRESTL-1)*ILATPP+1 + D%NLATLE(JW,1) = D%NLATLS(JW,1)+ILATPP-1 + ELSE + D%NLATLS(JW,1) = (JA-1)*(ILATPP+1)+1 + D%NLATLE(JW,1) = D%NLATLS(JW,1)+ILATPP + ENDIF +ENDDO +ILAST=0 +DO JW=1,NPRTRW + ILATPP = (D%NLATLE(JW,1)-D%NLATLS(JW,1)+1)/NPRTRV + IRESTL = (D%NLATLE(JW,1)-D%NLATLS(JW,1)+1)-NPRTRV*ILATPP + DO JV=1,NPRTRV + IF (JV > IRESTL) THEN + D%NLATLS(JW,JV) = IRESTL*(ILATPP+1)+(JV-IRESTL-1)*ILATPP+1+ILAST + D%NLATLE(JW,JV) = D%NLATLS(JW,JV)+ILATPP-1 + ELSE + D%NLATLS(JW,JV) = (JV-1)*(ILATPP+1)+1+ILAST + D%NLATLE(JW,JV) = D%NLATLS(JW,JV)+ILATPP + ENDIF + ENDDO + ILAST=D%NLATLE(JW,NPRTRV) +ENDDO +IF (LLP1) THEN + DO JW=1,NPRTRW + DO JV=1,NPRTRV + WRITE(NOUT,'(" JW=",I6," JV=",I6," D%NLATLS=",I6," D%NLATLE=",I6)')& + & JW,JV,D%NLATLS(JW,JV),D%NLATLE(JW,JV) + ENDDO + ENDDO +ENDIF + +ALLOCATE(D%NPMT(0:R%NSMAX)) +IF(LLP2)WRITE(NOUT,9) 'D%NPMT ',SIZE(D%NPMT ),SHAPE(D%NPMT ) +ALLOCATE(D%NPMS(0:R%NSMAX)) +IF(LLP2)WRITE(NOUT,9) 'D%NPMS ',SIZE(D%NPMS ),SHAPE(D%NPMS ) +ALLOCATE(D%NPMG(0:R%NSMAX)) +IF(LLP2)WRITE(NOUT,9) 'D%NPMG ',SIZE(D%NPMG ),SHAPE(D%NPMG ) +IDT = R%NTMAX-R%NSMAX +INM = 0 +DO JMLOC=1,D%NUMP + IMLOC = D%MYMS(JMLOC) + + INM = INM+R%NTMAX+2-IMLOC +ENDDO +INM = 0 +DO JM=0,R%NSMAX + + INM = INM+R%NTMAX+2-JM +ENDDO + +D%NLEI3D = (R%NLEI3-1)/NPRTRW+1 + +ENDIF + +IF (LHOOK) CALL DR_HOOK('SUEMP_TRANS_PRELEG_MOD:SUEMP_TRANS_PRELEG',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ +9 FORMAT(1X,'ARRAY ',A10,' ALLOCATED ',8I8) + +END SUBROUTINE SUEMP_TRANS_PRELEG +END MODULE SUEMP_TRANS_PRELEG_MOD diff --git a/src/etrans/cpu/internal/suemplat_mod.F90 b/src/etrans/cpu/internal/suemplat_mod.F90 new file mode 100644 index 000000000..7f1c1393e --- /dev/null +++ b/src/etrans/cpu/internal/suemplat_mod.F90 @@ -0,0 +1,265 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + + +MODULE SUEMPLAT_MOD +CONTAINS +SUBROUTINE SUEMPLAT(KDGL,KPROC,KPROCA,KMYSETA,LDSPLIT,LDEQ_REGIONS,& + & KFRSTLAT,KLSTLAT,KFRSTLOFF,KPTRLAT,& + & KPTRFRSTLAT,KPTRLSTLAT,KPTRFLOFF,& + & PWEIGHT,LDWEIGHTED_DISTR,PMEDIAP,KPROCAGP,& + & KMEDIAP,KRESTM,LDSPLITLAT,KMYPROC,KLOEN,KDGUX) + +!**** *SUEMPLAT * - Initialize gridpoint distrbution in N-S direction + +! Purpose. +! -------- + +!** Interface. +! ---------- +! *CALL* *SUEMPLAT * + +! Explicit arguments - input : +! -------------------- +! KDGL -last latitude +! KPROC -total number of processors +! KPROCA -number of processors in A direction +! KMYSETA -process number in A direction +! LDSPLIT -true for latitudes shared between sets +! PWEIGHT -weight per grid-point if weighted +! distribution +! LDEQ_REGIONS -true if eq_regions partitioning +! LDWEIGHTED_DISTR -true if weighted distribution + +! Explicit arguments - output: +! -------------------- +! KMEDIAP -mean number of grid points per PE +! KRESTM -number of PEs with one extra point +! KFRSTLAT -first latitude row on processor +! KLSTLAT -last latitude row on processor +! KFRSTLOFF -offset for first latitude in set +! KPROCAGP -number of grid points per A set +! KPTRLAT -pointer to start of latitude +! KPTRFRSTLAT-pointer to first latitude +! KPTRLSTLAT -pointer to last latitude +! KPTRFLOFF -offset for pointer to first latitude +! LDSPLITLAT -true for latitudes which are split +! PMEDIAP -mean weight per PE if weighted +! distribution +! + +! Implicit arguments : +! -------------------- + +! Method. +! ------- +! See documentation + +! Externals. SUMPLATB and SUEMPLATB. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! MPP Group *ECMWF* + +! Modifications. +! -------------- +! Original : 95-10-01 +! David Dent:97-06-02 parameters KFRSTLAT etc added +! JF. Estrade:97-11-13 Adaptation to ALADIN case +! J.Boutahar: 98-07-06 phasing with CY19 +! Modified 98-08-10 by K. YESSAD: removal of LRPOLE option + cleanings +! (correct computation of extrapolar latitudes for KPROCL). +! Modified 98-12-07 by K. YESSAD and C. FISCHER: cleaning. +! - merge old sumplat.F and suemplat.F +! - gather 'lelam' code and 'not lelam' code. +! - clean (useless duplication of variables, non doctor features). +! - remodularise according to lelam/not lelam +! -> lelam features in new routine suemplatb.F, +! not lelam features in new routine sumplatb.F +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! A.Bogatchev 20-Sep-2010 Phasing cy37 +! R. El Khatib 09-Aug-2013 Allow LEQ_REGIONS +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB, JPRD +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE TPM_GEN ,ONLY : NOUT, NPRINTLEV + +USE SUEMPLATB_MOD ,ONLY : SUEMPLATB +USE SUMPLATBEQ_MOD ,ONLY : SUMPLATBEQ +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +! + +IMPLICIT NONE + +! * DUMMY: +INTEGER(KIND=JPIM),INTENT(OUT) :: KMEDIAP +INTEGER(KIND=JPIM),INTENT(OUT) :: KRESTM +INTEGER(KIND=JPIM),INTENT(IN) :: KDGL +INTEGER(KIND=JPIM),INTENT(IN) :: KPROC +INTEGER(KIND=JPIM),INTENT(IN) :: KPROCA +INTEGER(KIND=JPIM),INTENT(IN) :: KMYSETA +INTEGER(KIND=JPIM),INTENT(OUT) :: KFRSTLAT(:) +INTEGER(KIND=JPIM),INTENT(OUT) :: KLSTLAT(:) +INTEGER(KIND=JPIM),INTENT(OUT) :: KFRSTLOFF +INTEGER(KIND=JPIM),INTENT(OUT) :: KPTRLAT(:) +INTEGER(KIND=JPIM),INTENT(OUT) :: KPTRFRSTLAT(:) +INTEGER(KIND=JPIM),INTENT(OUT) :: KPTRLSTLAT(:) +INTEGER(KIND=JPIM),INTENT(OUT) :: KPTRFLOFF +INTEGER(KIND=JPIM),INTENT(OUT) :: KPROCAGP(KPROCA) +REAL(KIND=JPRD),INTENT(OUT) :: PMEDIAP +REAL(KIND=JPRD),INTENT(IN) :: PWEIGHT(:) + +LOGICAL,INTENT(IN) :: LDSPLIT +LOGICAL,INTENT(IN) :: LDEQ_REGIONS +LOGICAL,INTENT(OUT) :: LDSPLITLAT(:) +LOGICAL,INTENT(INOUT) :: LDWEIGHTED_DISTR +INTEGER(KIND=JPIM),INTENT(IN) :: KMYPROC +INTEGER(KIND=JPIM),INTENT(IN) :: KLOEN(KDGL) +INTEGER(KIND=JPIM),INTENT(IN) :: KDGUX + +! === END OF INTERFACE BLOCK === +INTEGER(KIND=JPIM) :: INDIC(KPROCA),ILAST(KPROCA) + +INTEGER(KIND=JPIM) :: IPTRLATITUDE, JA, JGL + +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +LOGICAL :: LLFOURIER +LOGICAL :: LLDEBUG=.FALSE. + +! ----------------------------------------------------------------- + +!* 1. CODE DEPENDING ON 'LELAM': COMPUTATION OF +! KMEDIAP, KRESTM, INDIC, ILAST. +! ----------------------------------------- + +IF (LHOOK) CALL DR_HOOK('SUEMPLAT_MOD:SUEMPLAT',0,ZHOOK_HANDLE) + +INDIC(:)=0 +ILAST(:)=0 + +IF(LDWEIGHTED_DISTR.AND..NOT.LDEQ_REGIONS)THEN + CALL ABORT_TRANS ('SUEMPLAT: LDWEIGHTED_DISTR=T AND LDEQ_REGIONS=F NOT SUPPORTED') +ENDIF + + +IF( LDEQ_REGIONS )THEN + CALL SUMPLATBEQ(1,KDGL,KPROC,KPROCA,KLOEN,LDSPLIT,LDEQ_REGIONS,& + &PWEIGHT,LDWEIGHTED_DISTR,PMEDIAP,KPROCAGP,& + &KMEDIAP,KRESTM,INDIC,ILAST) +ELSE + LLFOURIER=.FALSE. +!REK commented out for now ... monkey business to be done again, should lead to the use of sumplatb +!REK CALL SUMPLATB(1,KDGL,KPROCA,G%NLOEN,LDSPLIT,LLFOURIER,& +!REK &KMEDIAP,KRESTM,INDIC,ILAST) + CALL SUEMPLATB(1,KDGL,KPROCA,KLOEN,LDSPLIT,& + & PWEIGHT,LDWEIGHTED_DISTR,PMEDIAP,KPROCAGP,& + & KMEDIAP,KRESTM,INDIC,ILAST,KDGUX) +ENDIF + +! ----------------------------------------------------------------- + +!* 2. CODE NOT DEPENDING ON 'LELAM': COMPUTATION OF +! KFRSTLAT TO LDSPLITLAT. +! --------------------------------------------- + +! * Computation of first and last latitude of processor sets +! ----------- in grid-point-space ----------------------- +IF(KMYPROC==1.AND.LLDEBUG)THEN + WRITE(0,'("")') + WRITE(0,'("SUEMPLAT_MOD:LDWEIGHTED_DISTR=",L1)')LDWEIGHTED_DISTR + WRITE(0,'("")') + DO JA=1,KPROCA + WRITE(0,'("SUEMPLAT_MOD: JA=",I3," ILAST=",I3," INDIC=",I3)')& + &JA,ILAST(JA),INDIC(JA) + ENDDO + WRITE(0,'("")') + IF( LDEQ_REGIONS .AND. LDSPLIT )THEN + DO JA=1,KPROCA + WRITE(0,'("SUEMPLAT_MOD: JA=",I3," KPROCAGP=",I8)')& + &JA,KPROCAGP(JA) + ENDDO + WRITE(0,'("")') + ENDIF +ENDIF +KFRSTLAT(1) = 1 +KLSTLAT(KPROCA) = KDGL +DO JA=1,KPROCA-1 + IF(KMYPROC==1 .AND. NPRINTLEV > 1)THEN + WRITE(NOUT,'("SUEMPLAT_MOD: JA=",I3," ILAST=",I3," INDIC=",I3)')& + &JA,ILAST(JA),INDIC(JA) + ENDIF + IF ((.NOT. LDSPLIT) .OR. INDIC(JA) == 0) THEN + KFRSTLAT(JA+1) = ILAST(JA) + 1 + KLSTLAT(JA) = ILAST(JA) + ELSE + KFRSTLAT(JA+1) = INDIC(JA) + KLSTLAT(JA) = INDIC(JA) + ENDIF +ENDDO +KFRSTLOFF=KFRSTLAT(KMYSETA)-1 + +! * Initialise following data structures:- +! NPTRLAT (pointer to the start of each latitude) +! LSPLITLAT (TRUE if latitude is split over two A sets) +! NPTRFRSTLAT (pointer to the first latitude of each A set) +! NPTRLSTLAT (pointer to the last latitude of each A set) + +DO JGL=1,KDGL + KPTRLAT (JGL)=-999 + LDSPLITLAT(JGL)=.FALSE. +ENDDO +IPTRLATITUDE=0 +DO JA=1,KPROCA + DO JGL=KFRSTLAT(JA),KLSTLAT(JA) + IPTRLATITUDE=IPTRLATITUDE+1 + LDSPLITLAT(JGL)=.TRUE. + IF( KPTRLAT(JGL) == -999 )THEN + KPTRLAT(JGL)=IPTRLATITUDE + LDSPLITLAT(JGL)=.FALSE. + ENDIF + ENDDO +ENDDO +DO JA=1,KPROCA + IF( LDSPLITLAT(KFRSTLAT(JA)) .AND. JA /= 1 )THEN + KPTRFRSTLAT(JA)=KPTRLAT(KFRSTLAT(JA))+1 + ELSE + KPTRFRSTLAT(JA)=KPTRLAT(KFRSTLAT(JA)) + ENDIF + IF( LDSPLITLAT(KLSTLAT(JA)) .AND. JA == KPROCA)THEN + KPTRLSTLAT(JA)=KPTRLAT(KLSTLAT(JA))+1 + ELSE + KPTRLSTLAT(JA)=KPTRLAT(KLSTLAT(JA)) + ENDIF +ENDDO +KPTRFLOFF=KPTRFRSTLAT(KMYSETA)-1 +IF(KMYPROC==1 .AND. NPRINTLEV > 1)THEN + DO JGL=1,KDGL + WRITE(NOUT,'("SUEMPLAT_MOD: JGL=",I3," KPTRLAT=",I3," LDSPLITLAT=",L4)')& + & JGL,KPTRLAT(JGL),LDSPLITLAT(JGL) + ENDDO + DO JA=1,KPROCA + WRITE(NOUT,'("SUEMPLAT_MOD: JA=",I3," KFRSTLAT=",I3," KLSTLAT=",I3,& + & " KPTRFRSTLAT=",I3," KPTRLSTLAT=",I3)')& + & JA,KFRSTLAT(JA),KLSTLAT(JA),KPTRFRSTLAT(JA),KPTRLSTLAT(JA) + ENDDO +ENDIF + +IF (LHOOK) CALL DR_HOOK('SUEMPLAT_MOD:SUEMPLAT',1,ZHOOK_HANDLE) +END SUBROUTINE SUEMPLAT +END MODULE SUEMPLAT_MOD + diff --git a/src/etrans/cpu/internal/suemplatb_mod.F90 b/src/etrans/cpu/internal/suemplatb_mod.F90 new file mode 100644 index 000000000..66c275599 --- /dev/null +++ b/src/etrans/cpu/internal/suemplatb_mod.F90 @@ -0,0 +1,247 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + + +MODULE SUEMPLATB_MOD +CONTAINS +SUBROUTINE SUEMPLATB(KDGSA,KDGL,KPROCA,KLOENG,LDSPLIT,& + & PWEIGHT,LDWEIGHTED_DISTR,PMEDIAP,KPROCAGP,& + & KMEDIAP,KRESTM,KINDIC,KLAST,KDGUX) + +!**** *SUMPLATB * - Routine to initialize parallel environment + +! Purpose. +! -------- + +!** Interface. +! ---------- +! *CALL* *SUMPLATB * + +! Explicit arguments - input : +! -------------------- +! KDGSA -first latitude (grid-space) +! (may be different from NDGSAG) +! KDGL -last latitude +! KPROCA -number of processors in A direction +! KLOENG -actual number of longitudes per latitude. +! LDSPLIT -true for latitudes shared between sets +! KDGUX -last latitude for meaningful computations +! (suggested to pass NDGUX in gp-space, NDGL in Fourier space +! for having a good load-balance) +! PWEIGHT -weight per grid-point if weighted distribution +! LDWEIGHTED_DISTR -true if weighted distribution` + +! Explicit arguments - output: +! -------------------- +! KMEDIAP -mean number of grid points per PE +! KPROCAGP -number of grid points per A set +! KRESTM -number of PEs with one extra point +! KINDIC -intermediate quantity for 'sumplat' +! KLAST -intermediate quantity for 'sumplat' +! PMEDIAP -mean weight per PE if weighted distribution + +! Implicit arguments : +! -------------------- + +! Method. +! ------- +! See documentation + +! Externals. NONE. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! K. YESSAD (after old version of sumplat.F). + +! Modifications. +! -------------- +! Original : 98-12-07 +! G. Radnoti: 03-03-03: Semi-merge with sumplatb, only difference: +! NS-partitioning according to NDGUX +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! A.Bogatchev 21-Sep-2010 phasing CY37 +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB, JPRD +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +! + +IMPLICIT NONE + +! * DUMMY: +INTEGER(KIND=JPIM),INTENT(IN) :: KDGSA +INTEGER(KIND=JPIM),INTENT(IN) :: KDGL +INTEGER(KIND=JPIM),INTENT(IN) :: KPROCA +INTEGER(KIND=JPIM),INTENT(IN) :: KLOENG(KDGSA:KDGL) +REAL(KIND=JPRD),INTENT(IN) :: PWEIGHT(:) +LOGICAL,INTENT(IN) :: LDSPLIT +LOGICAL,INTENT(INOUT) :: LDWEIGHTED_DISTR +INTEGER(KIND=JPIM),INTENT(IN) :: KDGUX +INTEGER(KIND=JPIM),INTENT(OUT) :: KMEDIAP +INTEGER(KIND=JPIM),INTENT(OUT) :: KRESTM +INTEGER(KIND=JPIM),INTENT(OUT) :: KINDIC(KPROCA) +INTEGER(KIND=JPIM),INTENT(OUT) :: KLAST(KPROCA) +INTEGER(KIND=JPIM),INTENT(IN) :: KPROCAGP(KPROCA) +REAL(KIND=JPRD),INTENT(IN) :: PMEDIAP + +INTEGER(KIND=JPIM) :: IPP1(KPROCA),ILAST1(KPROCA) +INTEGER(KIND=JPIM) :: IPP(KPROCA) +INTEGER(KIND=JPIM) :: IFIRST(KPROCA) + +INTEGER(KIND=JPIM) :: ICOMP, IGL, IMAXI, IMAXIOL, IMEDIA, ITOT, JA, JGL,& + & ILAST,IREST,ILIMIT,IFRST +LOGICAL :: LLDONE +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +! ----------------------------------------------------------------- + +!* 1. COMPUTATION OF KMEDIAP, KRESTM, KINDIC, KLAST. +! ---------------------------------------------- + +! * Computation of KMEDIAP and KRESTM. + +IF (LHOOK) CALL DR_HOOK('SUEMPLATB_MOD:SUEMPLATB',0,ZHOOK_HANDLE) +IF (LDWEIGHTED_DISTR) THEN + CALL ABORT_TRANS ('SUMPLATBEQ: ALADIN CODE IS NOT PREPARED FOR WEIGHTED DISTRIBUTION') +ENDIF +IMEDIA = SUM(KLOENG(KDGSA:KDGUX)) +KMEDIAP = IMEDIA / KPROCA +IF (KMEDIAP < KLOENG(KDGL/2)) THEN + CALL ABORT_TRANS ('SUMPLATB: KPROCA TOO BIG FOR THIS RESOLUTION') +ENDIF +KRESTM = IMEDIA - KMEDIAP * KPROCA +IF (KRESTM > 0) KMEDIAP = KMEDIAP + 1 + +! * Computation of intermediate quantities KINDIC and KLAST + +IF (LDSPLIT) THEN + + IREST = 0 + ILAST =0 + DO JA=1,KPROCA + IF (JA <= KRESTM .OR. KRESTM == 0) THEN + ICOMP = KMEDIAP + ELSE + ICOMP = KMEDIAP - 1 + ENDIF + ITOT = IREST + IGL = ILAST+1 + DO JGL=IGL,KDGUX + ILAST = JGL + IF(ITOT+KLOENG(JGL) < ICOMP) THEN + ITOT = ITOT+KLOENG(JGL) + ELSEIF(ITOT+KLOENG(JGL) == ICOMP) THEN + IREST = 0 + KLAST(JA) = JGL + KINDIC(JA) = 0 + EXIT + ELSE + IREST = KLOENG(JGL) -(ICOMP-ITOT) + KLAST(JA) = JGL + KINDIC(JA) = JGL + EXIT + ENDIF + ENDDO + ENDDO + KLAST(KPROCA)=KDGL + KINDIC(KPROCA)=0 +ELSE + + KINDIC(:) = 0 + + IMAXI = KMEDIAP-1 + IMAXIOL = HUGE(IMAXIOL) + DO + ILIMIT = IMAXI + IMAXI = 0 + IFRST = KDGUX + ILAST1(:) = 0 + IPP1(:) = 0 + DO JA=KPROCA,1,-1 + IGL = IFRST + LATS:DO JGL=IGL,1,-1 + IF (IPP1(JA) < ILIMIT .OR. JA == 1) THEN + IFRST = JGL-1 + IPP1(JA) = IPP1(JA) + KLOENG(JGL) + IF(ILAST1(JA) == 0) ILAST1(JA) = JGL + ELSE + EXIT LATS + ENDIF + ENDDO LATS + IMAXI = MAX (IMAXI,IPP1(JA)) + ENDDO + IF(IMAXI >= IMAXIOL) EXIT + KLAST(:) = ILAST1(:) + IPP(:) = IPP1(:) + IMAXIOL = IMAXI + ENDDO + +! make the distribution more uniform +! ---------------------------------- + + IFIRST(1) = 0 + IF (KLAST(1) > 0) IFIRST(1) = 1 + DO JA=2,KPROCA + IF (IPP(JA) > 0) THEN + IFIRST(JA) = KLAST(JA-1)+1 + ELSE + IFIRST(JA) = 0 + ENDIF + ENDDO + + LLDONE = .FALSE. + DO WHILE( .NOT.LLDONE ) + LLDONE = .TRUE. + + DO JA=1,KPROCA-1 + IF (IPP(JA) > IPP(JA+1)) THEN + IF (IPP(JA)-IPP(JA+1) > IPP(JA+1) + 2 *& + & KLOENG(KLAST(JA)) -IPP(JA) ) THEN + IPP(JA) = IPP(JA) - KLOENG(KLAST(JA)) + IPP(JA+1) = IPP(JA+1) + KLOENG(KLAST(JA)) + IF (KLAST(JA+1) == 0) KLAST(JA+1) = KLAST(JA) + IFIRST(JA+1) = KLAST(JA) + KLAST(JA) = KLAST(JA) - 1 + IF (KLAST(JA) == 0) IFIRST(JA) = 0 + LLDONE = .FALSE. + ENDIF + ELSE + IF( IFIRST(JA+1) > 0 )THEN + IF (IPP(JA+1)-IPP(JA) >= IPP(JA) + 2 *& + & KLOENG(IFIRST(JA+1)) -IPP(JA+1) ) THEN + IPP(JA) = IPP(JA) + KLOENG(IFIRST(JA+1)) + IPP(JA+1) = IPP(JA+1) - KLOENG(IFIRST(JA+1)) + KLAST(JA) = IFIRST(JA+1) + IF (IFIRST(JA) == 0) IFIRST(JA) = KLAST(JA) + IF (KLAST(JA+1) == KLAST(JA)) THEN + KLAST(JA+1) = 0 + IFIRST(JA+1) = 0 + ELSE + IFIRST(JA+1) = IFIRST(JA+1) + 1 + ENDIF + LLDONE = .FALSE. + ENDIF + ENDIF + ENDIF + ENDDO + ENDDO + KLAST(KPROCA)=KDGL +ENDIF + +IF (LHOOK) CALL DR_HOOK('SUEMPLATB_MOD:SUEMPLATB',1,ZHOOK_HANDLE) +END SUBROUTINE SUEMPLATB +END MODULE SUEMPLATB_MOD diff --git a/src/etrans/cpu/internal/suestaonl_mod.F90 b/src/etrans/cpu/internal/suestaonl_mod.F90 new file mode 100644 index 000000000..e69dcd946 --- /dev/null +++ b/src/etrans/cpu/internal/suestaonl_mod.F90 @@ -0,0 +1,462 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + + +MODULE SUESTAONL_MOD +CONTAINS +SUBROUTINE SUESTAONL(KMEDIAP,KRESTM,LDWEIGHTED_DISTR,PWEIGHT,PMEDIAP,KPROCAGP) + +!**** *SUESTAONL * - Routine to initialize parallel environment, TAL + +! Purpose. +! -------- +! Initialize D%NSTA and D%NONL. +! Calculation of distribution of grid points to processors : +! Splitting of grid in B direction + +!** Interface. +! ---------- +! *CALL* *SUESTAONL * + +! Explicit arguments : +! -------------------- +! KMEDIAP - mean number of grid points per PE +! KRESTM - number of PEs with one extra point +! LDWEIGHTED_DISTR -true if weighted distribution +! PWEIGHT -weight per grid-point if weighted +! distribution +! PMEDIAP -mean weight per PE if weighted +! distribution +! KPROCAGP -number of grid points per A set +! Implicit arguments : +! -------------------- + +! Method. +! ------- +! See documentation + +! Externals. NONE. +! ---------- + +! Reference. +! ---------- +! ECMWF Research Department documentation of the IFS + +! Author. +! ------- +! MPP Group *ECMWF* + +! Modifications. +! -------------- +! Original : 95-10-01 +! Modified 98-08-10 by K. YESSAD: removal of LRPOLE option. +! - removal of LRPOLE in YOMCT0. +! - removal of code under LRPOLE. +! Modified 98-12-04 C. Fischer: merge with SUESTAONL (Aladin) +! 03-03-03 G. Radnoti: no merge: only difference with +! sustaonl: ezone added to last a-set +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! O.Spaniel Oct-2004 phasing for AL29 +! A.Bogatchev Sep-2010 phasing for AL37 +! R. El Khatib 09-Aug-2013 Allow LEQ_REGIONS +! R. El Khatib 26-Apr-2018 vectorization +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB, JPRD +USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK +USE MPL_MODULE ,ONLY : MPL_ALLGATHERV, MPL_RECV, MPL_SEND + +USE TPM_GEN ,ONLY : NOUT, NPRINTLEV +USE TPM_DIM ,ONLY : R +USE TPM_GEOMETRY ,ONLY : G +USE TPM_DISTR ,ONLY : D, LEQ_REGIONS, MTAGPART, NPRCIDS, MYPROC, NPROC +USE TPMALD_DIM ,ONLY : RALD +USE SET2PE_MOD ,ONLY : SET2PE +USE EQ_REGIONS_MOD ,ONLY : MY_REGION_EW, MY_REGION_NS, & + & N_REGIONS, N_REGIONS_NS, N_REGIONS_EW +USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +! + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KMEDIAP +INTEGER(KIND=JPIM),INTENT(IN) :: KRESTM +REAL(KIND=JPRD),INTENT(IN) :: PWEIGHT(:) +LOGICAL,INTENT(IN) :: LDWEIGHTED_DISTR +REAL(KIND=JPRD),INTENT(IN) :: PMEDIAP +INTEGER(KIND=JPIM),INTENT(IN) :: KPROCAGP(:) + +INTEGER(KIND=JPIM) :: IXPTLAT(R%NDGL), ILSTPTLAT(R%NDGL) +INTEGER(KIND=JPIM) :: ICHK(R%NDLON,R%NDGL), ICOMBUF(R%NDGL*N_REGIONS_EW*2) + +INTEGER(KIND=JPIM) :: I1, I2, IBUFLEN, IDGLG, IDWIDE, & + & IGL, IGL1, IGL2, IGLOFF, IGPTA, & + & IGPTPRSETS, IGPTS, IGPTSP, ILEN, ILRECV, & + & ILSEND, INPLAT, INXLAT, IPOS, & + & IPROCB, IPTSRE, IRECV, & + & IREST, ISEND, ITAG, JA, JB, JGL, JL, JNPTSRE, & + & ILAT, ILON, ILOEN +INTEGER(KIND=JPIM),ALLOCATABLE :: ICOMBUFG(:) +REAL(KIND=JPRB),ALLOCATABLE :: ZWEIGHT(:,:) +INTEGER(KIND=JPIM) :: JJ, ILENG(NPROC), IOFF(NPROC) + +LOGICAL :: LLABORT +LOGICAL :: LLP1,LLP2 + +REAL(KIND=JPRB) :: ZLAT, ZLAT1(R%NDGL), ZCOMP +REAL(KIND=JPRB) :: ZDIVID(R%NDGL),ZXPTLAT(R%NDGL) + +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE +! ----------------------------------------------------------------- + +IF (LHOOK) CALL DR_HOOK('SUESTAONL_MOD:SUESTAONL',0,ZHOOK_HANDLE) +IXPTLAT (:)=999999 +ILSTPTLAT(:)=999999 +LLP1 = NPRINTLEV>0 +LLP2 = NPRINTLEV>1 + +IDWIDE = R%NDGL/2 +IBUFLEN = R%NDGL*N_REGIONS_EW*2 +IDGLG = R%NDGL + +I1 = MAX( 1,D%NFRSTLAT(MY_REGION_NS)-D%NFRSTLOFF) +I2 = MIN(IDGLG,D%NLSTLAT (MY_REGION_NS)-D%NFRSTLOFF) + +ILEN = D%NLSTLAT(MY_REGION_NS) - D%NFRSTLAT(MY_REGION_NS)+1 + +IGPTPRSETS = SUM(G%NLOEN(1:D%NFRSTLAT(MY_REGION_NS)-1)) + + +IF (D%LSPLIT) THEN + IF( LEQ_REGIONS )THEN + IGPTA=0 + DO JA=1,MY_REGION_NS-1 + IGPTA = IGPTA + KPROCAGP(JA) + ENDDO + IGPTS = KPROCAGP(MY_REGION_NS) + ELSE + IF (MY_REGION_NS <= KRESTM.OR.KRESTM == 0) THEN + IF (MY_REGION_NS < N_REGIONS_NS) THEN + IGPTS = KMEDIAP + IGPTA = KMEDIAP*(MY_REGION_NS-1) + ELSE + IGPTS = KMEDIAP+SUM(G%NLOEN(RALD%NDGUX+1:R%NDGL)) + IGPTA = KMEDIAP*(MY_REGION_NS-1) + ENDIF + ELSE + IF (MY_REGION_NS < N_REGIONS_NS) THEN + IGPTS = KMEDIAP-1 + IGPTA = KMEDIAP*KRESTM+IGPTS*(MY_REGION_NS-1-KRESTM) + ELSE + IGPTS = KMEDIAP-1+SUM(G%NLOEN(RALD%NDGUX+1:R%NDGL)) + IGPTA = KMEDIAP*KRESTM+(KMEDIAP-1)*(MY_REGION_NS-1-KRESTM) + ENDIF + ENDIF + ENDIF +ELSE + IGPTA = IGPTPRSETS + IGPTS = SUM(G%NLOEN(D%NFRSTLAT(MY_REGION_NS):D%NLSTLAT(MY_REGION_NS))) +ENDIF +IGPTSP = IGPTS/N_REGIONS(MY_REGION_NS) +IREST = IGPTS-N_REGIONS(MY_REGION_NS)*IGPTSP +IXPTLAT(1) = IGPTA-IGPTPRSETS+1 +ZXPTLAT(1) = REAL(IXPTLAT(1)) +ILSTPTLAT(1) = G%NLOEN(D%NFRSTLAT(MY_REGION_NS)) +INPLAT = G%NLOEN(D%NFRSTLAT(MY_REGION_NS))-IXPTLAT(1)+1 +DO JGL=2,ILEN + IXPTLAT(JGL) = 1 + ZXPTLAT(JGL) = 1.0_JPRB + ILSTPTLAT(JGL) = G%NLOEN(D%NFRSTLAT(MY_REGION_NS)+JGL-1) + INPLAT = INPLAT+G%NLOEN(D%NFRSTLAT(MY_REGION_NS)+JGL-1) +ENDDO +ILSTPTLAT(ILEN) = G%NLOEN(D%NLSTLAT(MY_REGION_NS))-INPLAT+IGPTS + +DO JB=1,N_REGIONS_EW + DO JGL=1,R%NDGL+N_REGIONS_NS-1 + D%NSTA(JGL,JB) = 0 + D%NONL(JGL,JB) = 0 + ENDDO +ENDDO + +! grid point decomposition +! --------------------------------------- +DO JGL=1,ILEN + ZDIVID(JGL)=1._JPRB/REAL(G%NLOEN(D%NFRSTLAT(MY_REGION_NS)+JGL-1),JPRB) +ENDDO +IF( LDWEIGHTED_DISTR )THEN + ALLOCATE(ZWEIGHT(G%NLOEN(R%NDGL/2),R%NDGL)) + IGL=0 + DO JGL=1,R%NDGL + DO JL=1,G%NLOEN(JGL) + IGL=IGL+1 + ZWEIGHT(JL,JGL)=PWEIGHT(IGL) + ENDDO + ENDDO + ZCOMP=0 + IGPTS=0 +ENDIF +DO JB=1,N_REGIONS(MY_REGION_NS) + + IF( .NOT.LDWEIGHTED_DISTR )THEN + + IF (JB <= IREST) THEN + IPTSRE = IGPTSP+1 + ELSE + IPTSRE = IGPTSP + ENDIF + + DO JNPTSRE=1,IPTSRE + ZLAT = 1._JPRB + DO JGL=1,ILEN + ZLAT1(JGL) = (ZXPTLAT(JGL)-1.0_JPRB)*ZDIVID(JGL) + ENDDO + DO JGL=1,ILEN + IF (IXPTLAT(JGL) <= ILSTPTLAT(JGL)) THEN + IF (ZLAT1(JGL) < ZLAT) THEN + ZLAT=ZLAT1(JGL) + INXLAT = JGL + ENDIF + ENDIF + ENDDO + IF (INXLAT >= I1 .AND. INXLAT <= I2) THEN + IGL=D%NPTRFLOFF+INXLAT + IF (D%NSTA(IGL,JB) == 0) THEN + D%NSTA(IGL,JB) = IXPTLAT(INXLAT) + ENDIF + D%NONL(IGL,JB) = D%NONL(IGL,JB)+1 + ENDIF + IXPTLAT(INXLAT) = IXPTLAT(INXLAT)+1 + ZXPTLAT(INXLAT) = REAL(IXPTLAT(INXLAT),JPRB) + ENDDO + + ELSE + DO WHILE ( (JB < N_REGIONS(MY_REGION_NS) .AND. ZCOMP < PMEDIAP) & + & .OR. (JB == N_REGIONS(MY_REGION_NS) .AND. IGPTS < KPROCAGP(MY_REGION_NS)) ) + + IGPTS = IGPTS + 1 + ZLAT = 1._JPRB + DO JGL=1,ILEN + ZLAT1(JGL) = (ZXPTLAT(JGL)-1.0_JPRB)*ZDIVID(JGL) + ENDDO + DO JGL=1,ILEN + IF (IXPTLAT(JGL) <= ILSTPTLAT(JGL)) THEN + IF (ZLAT1(JGL) < ZLAT) THEN + ZLAT = ZLAT1(JGL) + INXLAT = JGL + ENDIF + ENDIF + ENDDO + + IF (INXLAT >= I1 .AND. INXLAT <= I2) THEN + IGL=D%NPTRFLOFF+INXLAT + IF (D%NSTA(IGL,JB) == 0) THEN + D%NSTA(IGL,JB) = IXPTLAT(INXLAT) + ENDIF + D%NONL(IGL,JB) = D%NONL(IGL,JB)+1 + IF(IGL<1.OR.IGL>R%NDGL+N_REGIONS_NS-1)THEN + CALL ABORT_TRANS(' SUSTAONL: IGL<1.OR.IGL>R%NDGL+N_REGIONS_NS-1') + ENDIF + ILON=D%NSTA(IGL,JB)+D%NONL(IGL,JB)-1 + ILAT=D%NFRSTLAT(MY_REGION_NS)+INXLAT-1 + ILOEN=G%NLOEN(ILAT) + IF(ILON<1.OR.ILON>ILOEN)THEN + CALL ABORT_TRANS(' SUSTAONL: ILON<1.OR.ILON>ILOEN') + ENDIF + ZCOMP = ZCOMP + ZWEIGHT(ILON,ILAT) + ENDIF + IXPTLAT(INXLAT) = IXPTLAT(INXLAT)+1 + ZXPTLAT(INXLAT) = REAL(IXPTLAT(INXLAT),JPRB) + ENDDO + + ZCOMP = ZCOMP - PMEDIAP + + ENDIF + +ENDDO + +IF( LDWEIGHTED_DISTR )THEN + DEALLOCATE(ZWEIGHT) +ENDIF +! Exchange local partitioning info to produce global view + +IF( NPROC > 1 )THEN + IF( LEQ_REGIONS )THEN + + ITAG = MTAGPART + IPOS = 0 + DO JGL=1,D%NLSTLAT(MY_REGION_NS)-D%NFRSTLAT(MY_REGION_NS)+1 + IPOS = IPOS+1 + ICOMBUF(IPOS) = D%NSTA(D%NPTRFLOFF+JGL,MY_REGION_EW) + IPOS = IPOS+1 + ICOMBUF(IPOS) = D%NONL(D%NPTRFLOFF+JGL,MY_REGION_EW) + ENDDO + IF( IPOS > IBUFLEN )THEN + CALL ABORT_TRANS(' SUSTAONL: SEND BUFFER TOO SMALL FOR GLOBAL INFO') + ENDIF + ILSEND = IPOS + + DO JA=1,N_REGIONS_NS + DO JB=1,N_REGIONS(JA) + CALL SET2PE(IRECV,JA,JB,0,0) + ILEN = (D%NLSTLAT(JA)-D%NFRSTLAT(JA)+1)*2 + ILENG(NPRCIDS(IRECV))=ILEN + ENDDO + ENDDO + IOFF(1)=0 + DO JJ=2,NPROC + IOFF(JJ)=IOFF(JJ-1)+ILENG(JJ-1) + ENDDO + ALLOCATE(ICOMBUFG(SUM(ILENG(:)))) + CALL MPL_ALLGATHERV(ICOMBUF(1:ILSEND),ICOMBUFG,ILENG,CDSTRING='SUSTAONL') + DO JA=1,N_REGIONS_NS + IGL1 = D%NFRSTLAT(JA) + IGL2 = D%NLSTLAT(JA) + DO JB=1,N_REGIONS(JA) + CALL SET2PE(IRECV,JA,JB,0,0) + IF(IRECV /= MYPROC) THEN + ILEN = (D%NLSTLAT(JA)-D%NFRSTLAT(JA)+1)*2 + IPOS = IOFF(NPRCIDS(IRECV)) + DO JGL=IGL1,IGL2 + IGL = D%NPTRFRSTLAT(JA)+JGL-IGL1 + IPOS = IPOS+1 + D%NSTA(IGL,JB) = ICOMBUFG(IPOS) + IPOS = IPOS+1 + D%NONL(IGL,JB) = ICOMBUFG(IPOS) + ENDDO + ENDIF + ENDDO + ENDDO + DEALLOCATE(ICOMBUFG) + + ELSE + + ITAG = MTAGPART + IPOS = 0 + DO JB=1,N_REGIONS(MY_REGION_NS) + DO JGL=1,D%NLSTLAT(MY_REGION_NS)-D%NFRSTLAT(MY_REGION_NS)+1 + IPOS = IPOS+1 + ICOMBUF(IPOS) = D%NSTA(D%NPTRFLOFF+JGL,JB) + IPOS = IPOS+1 + ICOMBUF(IPOS) = D%NONL(D%NPTRFLOFF+JGL,JB) + ENDDO + ENDDO + IF( IPOS > IBUFLEN )THEN + CALL ABORT_TRANS(' SUESTAONL: SEND BUFFER TOO SMALL FOR GLOBAL INFO') + ENDIF + ILSEND = IPOS + + DO JA=1,N_REGIONS_NS + CALL SET2PE(ISEND,JA,MY_REGION_EW,0,0) + IF(ISEND /= MYPROC) THEN + CALL MPL_SEND(ICOMBUF(1:ILSEND),KDEST=NPRCIDS(ISEND),KTAG=ITAG, & + & CDSTRING='SUESTAONL:') + ENDIF + ENDDO + DO JA=1,N_REGIONS_NS + CALL SET2PE(IRECV,JA,MY_REGION_EW,0,0) + IF(IRECV /= MYPROC) THEN + ILEN = (D%NLSTLAT(JA)-D%NFRSTLAT(JA)+1)*N_REGIONS(JA)*2 + CALL MPL_RECV(ICOMBUF(1:ILEN),KSOURCE=NPRCIDS(IRECV),KTAG=ITAG, & + & KOUNT=ILRECV,CDSTRING='SUESTAONL:') + IGL1 = D%NFRSTLAT(JA) + IGL2 = D%NLSTLAT(JA) + IPOS = 0 + DO JB=1,N_REGIONS(JA) + DO JGL=IGL1,IGL2 + IGL = D%NPTRFRSTLAT(JA)+JGL-IGL1 + IPOS = IPOS+1 + D%NSTA(IGL,JB) = ICOMBUF(IPOS) + IPOS = IPOS+1 + D%NONL(IGL,JB) = ICOMBUF(IPOS) + ENDDO + ENDDO + ENDIF + ENDDO + + ENDIF +ENDIF + +! Confirm consistency of global partitioning, specifically testing for +! multiple assignments of same grid point and unassigned grid points + +LLABORT = .FALSE. +DO JGL=1,R%NDGL + DO JL=1,G%NLOEN(JGL) + ICHK(JL,JGL) = 1 + ENDDO +ENDDO +DO JA=1,N_REGIONS_NS + IGLOFF = D%NPTRFRSTLAT(JA) + DO JB=1,N_REGIONS(JA) + IGL1 = D%NFRSTLAT(JA) + IGL2 = D%NLSTLAT(JA) + DO JGL=IGL1,IGL2 + IGL = IGLOFF+JGL-IGL1 + DO JL=D%NSTA(IGL,JB),D%NSTA(IGL,JB)+D%NONL(IGL,JB)-1 + IF( ICHK(JL,JGL) /= 1 )THEN + WRITE(NOUT,'(" SUESTAONL : seta=",i4," setb=",i4,& + & " row=",I4," sta=",I4," INVALID GRID POINT")')& + & JA,JB,JGL,JL + WRITE(0,'(" SUESTAONL : seta=",i4," setb=",i4,& + & " ROW=",I4," sta=",I4," INVALID GRID POINT")')& + & JA,JB,JGL,JL + LLABORT = .TRUE. + ENDIF + ICHK(JL,JGL) = 2 + ENDDO + ENDDO + ENDDO +ENDDO +DO JGL=1,R%NDGL + DO JL=1,G%NLOEN(JGL) + IF( ICHK(JL,JGL) /= 2 )THEN + WRITE(NOUT,'(" SUESTAONL : row=",i4," sta=",i4,& + & " GRID POINT NOT ASSIGNED")') JGL,JL + LLABORT = .TRUE. + ENDIF + ENDDO +ENDDO +IF( LLABORT )THEN + WRITE(NOUT,'(" SUESTAONL : inconsistent partitioning")') + CALL ABORT_TRANS(' SUESTAONL: inconsistent partitioning') +ENDIF + +IF (LLP1) THEN + WRITE(UNIT=NOUT,FMT='('' OUTPUT FROM ROUTINE SUESTAONL '')') + WRITE(UNIT=NOUT,FMT='('' '')') + WRITE(UNIT=NOUT,FMT='('' PARTITIONING INFORMATION '')') + WRITE(UNIT=NOUT,FMT='('' '')') + IPROCB = MIN(32,N_REGIONS_EW) + WRITE(UNIT=NOUT,FMT='(17X," SETB=",32(1X,I3))') (JB,JB=1,IPROCB) + DO JA=1,N_REGIONS_NS + IPROCB = MIN(32,N_REGIONS(JA)) + WRITE(UNIT=NOUT,FMT='('' '')') + IGLOFF = D%NPTRFRSTLAT(JA) + IGL1 = D%NFRSTLAT(JA) + IGL2 = D%NLSTLAT(JA) + DO JGL=IGL1,IGL2 + IGL=IGLOFF+JGL-IGL1 + WRITE(UNIT=NOUT,FMT='(" SETA=",I3," LAT=",I3," NSTA=",& + & 32(1X,I3))') JA,JGL,(D%NSTA(IGL,JB),JB=1,IPROCB) + WRITE(UNIT=NOUT,FMT='(" SETA=",I3," LAT=",I3," D%NONL=",& + & 32(1X,I3))') JA,JGL,(D%NONL(IGL,JB),JB=1,IPROCB) + WRITE(UNIT=NOUT,FMT='('' '')') + ENDDO + WRITE(UNIT=NOUT,FMT='('' '')') + ENDDO + WRITE(UNIT=NOUT,FMT='('' '')') + WRITE(UNIT=NOUT,FMT='('' '')') +ENDIF +IF (LHOOK) CALL DR_HOOK('SUESTAONL_MOD:SUESTAONL',1,ZHOOK_HANDLE) + +! ------------------------------------------------------------------ + +END SUBROUTINE SUESTAONL +END MODULE SUESTAONL_MOD diff --git a/src/etrans/cpu/internal/tpmald_dim.F90 b/src/etrans/cpu/internal/tpmald_dim.F90 new file mode 100644 index 000000000..188f6ebc6 --- /dev/null +++ b/src/etrans/cpu/internal/tpmald_dim.F90 @@ -0,0 +1,34 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + + +MODULE TPMALD_DIM + +! Module for dimensions. + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +IMPLICIT NONE + +SAVE + +TYPE ALDDIM_TYPE + +! COLLOCATION GRID DIMENSIONS + +INTEGER(KIND=JPIM) :: NDGLSUR ! Number of rows of latitudes+... +INTEGER(KIND=JPIM) :: NMSMAX ! Zonal truncation +INTEGER(KIND=JPIM) :: NDGUX ! Number of rows in zone C+I +END TYPE ALDDIM_TYPE + +TYPE(ALDDIM_TYPE),ALLOCATABLE,TARGET :: ALDDIM_RESOL(:) +TYPE(ALDDIM_TYPE),POINTER :: RALD + +END MODULE TPMALD_DIM diff --git a/src/etrans/cpu/internal/tpmald_distr.F90 b/src/etrans/cpu/internal/tpmald_distr.F90 new file mode 100644 index 000000000..2d9cc0a79 --- /dev/null +++ b/src/etrans/cpu/internal/tpmald_distr.F90 @@ -0,0 +1,34 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + + +MODULE TPMALD_DISTR + +! Module for distributed memory environment. + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +IMPLICIT NONE + +SAVE + +TYPE ALDDISTR_TYPE + +INTEGER(KIND=JPIM) ,POINTER :: NESM0(:) ! Address in a spectral array of (m, n=m) +INTEGER(KIND=JPIM) ,POINTER :: NCPL2M(:) ! Number of complex Laplace coefficient for m given +INTEGER(KIND=JPIM) ,POINTER :: NPME(:) ! Address for the Laplace operator and its inverse + +END TYPE ALDDISTR_TYPE + +TYPE(ALDDISTR_TYPE),ALLOCATABLE,TARGET :: ALDDISTR_RESOL(:) +TYPE(ALDDISTR_TYPE),POINTER :: DALD + +END MODULE TPMALD_DISTR + diff --git a/src/etrans/cpu/internal/tpmald_fft.F90 b/src/etrans/cpu/internal/tpmald_fft.F90 new file mode 100644 index 000000000..10d7f70fc --- /dev/null +++ b/src/etrans/cpu/internal/tpmald_fft.F90 @@ -0,0 +1,31 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + + +MODULE TPMALD_FFT + +! Module for Fourier transforms. + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +IMPLICIT NONE + +SAVE + +TYPE ALDFFT_TYPE +REAL(KIND=JPRB) ,POINTER :: TRIGSE(:) ! list of trigonometric function values +INTEGER(KIND=JPIM),POINTER :: NFAXE(:) ! list of factors of truncation +LOGICAL :: LFFT992=.FALSE. +END TYPE ALDFFT_TYPE + +TYPE(ALDFFT_TYPE),ALLOCATABLE,TARGET :: ALDFFT_RESOL(:) +TYPE(ALDFFT_TYPE),POINTER :: TALD + +END MODULE TPMALD_FFT diff --git a/src/etrans/cpu/internal/tpmald_fields.F90 b/src/etrans/cpu/internal/tpmald_fields.F90 new file mode 100644 index 000000000..0d9cec7de --- /dev/null +++ b/src/etrans/cpu/internal/tpmald_fields.F90 @@ -0,0 +1,28 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + + +MODULE TPMALD_FIELDS + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +IMPLICIT NONE + +SAVE + +TYPE ALDFIELDS_TYPE + +REAL(KIND=JPRB) ,POINTER :: RLEPINM(:) ! eigen-values of the inverse Laplace operator +END TYPE ALDFIELDS_TYPE + +TYPE(ALDFIELDS_TYPE),ALLOCATABLE,TARGET :: ALDFIELDS_RESOL(:) +TYPE(ALDFIELDS_TYPE),POINTER :: FALD + +END MODULE TPMALD_FIELDS diff --git a/src/etrans/cpu/internal/tpmald_geo.F90 b/src/etrans/cpu/internal/tpmald_geo.F90 new file mode 100644 index 000000000..2f720c8fe --- /dev/null +++ b/src/etrans/cpu/internal/tpmald_geo.F90 @@ -0,0 +1,33 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + + +MODULE TPMALD_GEO + +! Module containing data describing plane projection grid. + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +IMPLICIT NONE + +SAVE + +TYPE ALDGEO_TYPE + +! GEOGRAPHY + +REAL(KIND=JPRB) :: EYWN ! Y-reso +REAL(KIND=JPRB) :: EXWN ! X-reso +END TYPE ALDGEO_TYPE + +TYPE(ALDGEO_TYPE),ALLOCATABLE,TARGET :: ALDGEO_RESOL(:) +TYPE(ALDGEO_TYPE),POINTER :: GALD + +END MODULE TPMALD_GEO diff --git a/src/etrans/cpu/internal/tpmald_tcdis.F90 b/src/etrans/cpu/internal/tpmald_tcdis.F90 new file mode 100644 index 000000000..539677efa --- /dev/null +++ b/src/etrans/cpu/internal/tpmald_tcdis.F90 @@ -0,0 +1,24 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + + +MODULE TPMALD_TCDIS + +! useless + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +IMPLICIT NONE + +SAVE + +REAL(KIND=JPRB) :: TCDIS + +END MODULE TPMALD_TCDIS diff --git a/src/etrans/include/etrans/edir_trans.h b/src/etrans/include/etrans/edir_trans.h new file mode 100644 index 000000000..6b00892ae --- /dev/null +++ b/src/etrans/include/etrans/edir_trans.h @@ -0,0 +1,146 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + + +INTERFACE +SUBROUTINE EDIR_TRANS(PSPVOR,PSPDIV,PSPSCALAR,PSPSC3A,PSPSC3B,PSPSC2,& +& KPROMA,KVSETUV,KVSETSC,KRESOL,KVSETSC3A,KVSETSC3B,KVSETSC2,& +& PGP,PGPUV,PGP3A,PGP3B,PGP2,PMEANU,PMEANV,AUX_PROC) + + +!**** *EDIR_TRANS* - Direct spectral transform (from grid-point to spectral). + +! Purpose. +! -------- +! Interface routine for the direct spectral transform + +!** Interface. +! ---------- +! CALL EDIR_TRANS(...) + +! Explicit arguments : All arguments except from PGP are optional. +! -------------------- +! PSPVOR(:,:) - spectral vorticity (output) +! PSPDIV(:,:) - spectral divergence (output) +! PSPSCALAR(:,:) - spectral scalarvalued fields (output) +! PSPSC3A(:,:,:) - alternative to use of PSPSCALAR, see PGP3A below (input) +! PSPSC3B(:,:,:) - alternative to use of PSPSCALAR, see PGP3B below (input) +! PSPSC2(:,:) - alternative to use of PSPSCALAR, see PGP2 below (input) +! KPROMA - required blocking factor for gridpoint output +! KVSETUV(:) - indicating which 'b-set' in spectral space owns a +! vor/div field. Equivalant to NBSETLEV in the IFS. +! The length of KVSETUV should be the GLOBAL number +! of u/v fields which is the dimension of u and v releated +! fields in grid-point space. +! KVESETSC(:) - indicating which 'b-set' in spectral space owns a +! scalar field. As for KVSETUV this argument is required +! if the total number of processors is greater than +! the number of processors used for distribution in +! spectral wave space. +! KVSETSC3A(:) - as KVESETSC for PSPSC3A (distribution on first dimension) +! KVSETSC3B(:) - as KVESETSC for PSPSC3B (distribution on first dimension) +! KVSETSC2(:) - as KVESETSC for PSPSC2 (distribution on first dimension) +! KRESOL - resolution tag which is required ,default is the +! first defined resulution (input) +! PGP(:,:,:) - gridpoint fields (input) +! PGP need to dimensioned (NPROMA,IF_GP,NGPBLKS) where +! NPROMA is the blocking factor, IF_GP the total number +! of output fields and NGPBLKS the number of NPROMA blocks. +! The ordering of the output fields is as follows (all +! parts are optional depending on the input switches): +! +! u : IF_UV_G fields (if psvor present) +! v : IF_UV_G fields (if psvor present) +! scalar fields : IF_SCALARS_G fields (if pspscalar present) +! +! Here IF_UV_G is the GLOBAL number of u/v fields as given by the length +! of KVSETUV (or by PSPVOR if no split in spectral 'b-set' direction +! IF_SCALARS_G is the GLOBAL number of scalar fields as giben by the +! length of KVESETSC (or by number of fields in PSPSCALAR if no spectral +! 'b-set' split + +! As an alternative to using PGP you can also use a combination of the +! following arrays. The reason for introducing these alternative ways +! of calling DIR_TRANS is to avoid uneccessary copies where your data +! structures don't fit in to the 'PSPVOR,PSPDIV, PSPSCALAR, PGP' layout. +! The use of any of these precludes the use of PGP and vice versa. + +! PGPUV(:,:,:,:) - the 'u-v' related grid-point variables in the order +! described for PGP. The second dimension of PGPUV should +! be the same as the "global" first dimension of +! PSPVOR,PSPDIV (in the IFS this is the number of levels) +! PGPUV need to be dimensioned(NPROMA,ILEVS,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (u,v) +! PGP3A(:,:,:,:) - grid-point array directly connected with PSPSC3A +! dimensioned(NPROMA,ILEVS,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (the same as in +! PSPSC3A ) +! PGP3B(:,:,:,:) - grid-point array directly connected with PSPSC3B +! dimensioned(NPROMA,ILEVS,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (the same as in +! PSPSC3B) +! PGP2(:,:,:) - grid-point array directly connected with PSPSC2 +! dimensioned(NPROMA,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (the same as in +! PSPSC2 ) +! PMEANU(:),PMEANV(:) - mean wind +! AUX_PROC - optional external procedure for biperiodization of +! aux.fields + +! Method. +! ------- + +! Externals. SET_RESOL - set resolution +! ---------- LTDIR_CTL - control of Legendre transform +! FTDIR_CTL - control of Fourier transform + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 + +! ------------------------------------------------------------------ +USE PARKIND1 ,ONLY : JPIM ,JPRB + +IMPLICIT NONE + +! Declaration of arguments + +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSC3A(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSC3B(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSC2(:,:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KPROMA +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETUV(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3A(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3B(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC2(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KRESOL + +REAL(KIND=JPRB),OPTIONAL ,INTENT(IN) :: PGP(:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(IN) :: PGPUV(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(IN) :: PGP3A(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(IN) :: PGP3B(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(IN) :: PGP2(:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(OUT) :: PMEANU(:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(OUT) :: PMEANV(:) +EXTERNAL AUX_PROC +OPTIONAL AUX_PROC + + +END SUBROUTINE EDIR_TRANS + +END INTERFACE diff --git a/src/etrans/include/etrans/edir_transad.h b/src/etrans/include/etrans/edir_transad.h new file mode 100644 index 000000000..7bc4a99f4 --- /dev/null +++ b/src/etrans/include/etrans/edir_transad.h @@ -0,0 +1,142 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + + +INTERFACE +SUBROUTINE EDIR_TRANSAD(PSPVOR,PSPDIV,PSPSCALAR,PSPSC3A,PSPSC3B,PSPSC2,& +& KPROMA,KVSETUV,KVSETSC,KRESOL,KVSETSC3A,KVSETSC3B,KVSETSC2,& +& PGP,PGPUV,PGP3A,PGP3B,PGP2,PMEANU,PMEANV) + + +!**** *EDIR_TRANSAD* - Direct spectral transform - adjoint. + +! Purpose. +! -------- +! Interface routine for the direct spectral transform - adjoint + +!** Interface. +! ---------- +! CALL EDIR_TRANSAD(...) + +! Explicit arguments : All arguments except from PGP are optional. +! -------------------- +! PSPVOR(:,:) - spectral vorticity (output) +! PSPDIV(:,:) - spectral divergence (output) +! PSPSCALAR(:,:) - spectral scalarvalued fields (output) +! PSPSC3A(:,:,:) - alternative to use of PSPSCALAR, see PGP3A below (input) +! PSPSC3B(:,:,:) - alternative to use of PSPSCALAR, see PGP3B below (input) +! PSPSC2(:,:) - alternative to use of PSPSCALAR, see PGP2 below (input) +! KPROMA - required blocking factor for gridpoint output +! KVSETUV(:) - indicating which 'b-set' in spectral space owns a +! vor/div field. Equivalant to NBSETLEV in the IFS. +! The length of KVSETUV should be the GLOBAL number +! of u/v fields which is the dimension of u and v releated +! fields in grid-point space. +! KVESETSC(:) - indicating which 'b-set' in spectral space owns a +! scalar field. As for KVSETUV this argument is required +! if the total number of processors is greater than +! the number of processors used for distribution in +! spectral wave space. +! KVSETSC3A(:) - as KVESETSC for PSPSC3A (distribution on first dimension) +! KVSETSC3B(:) - as KVESETSC for PSPSC3B (distribution on first dimension) +! KVSETSC2(:) - as KVESETSC for PSPSC2 (distribution on first dimension) +! KRESOL - resolution tag which is required ,default is the +! first defined resulution (input) +! PGP(:,:,:) - gridpoint fields (input) +! PGP need to dimensioned (NPROMA,IF_GP,NGPBLKS) where +! NPROMA is the blocking factor, IF_GP the total number +! of output fields and NGPBLKS the number of NPROMA blocks. +! The ordering of the output fields is as follows (all +! parts are optional depending on the input switches): +! +! u : IF_UV_G fields (if psvor present) +! v : IF_UV_G fields (if psvor present) +! scalar fields : IF_SCALARS_G fields (if pspscalar present) +! +! Here IF_UV_G is the GLOBAL number of u/v fields as given by the length +! of KVSETUV (or by PSPVOR if no split in spectral 'b-set' direction +! IF_SCALARS_G is the GLOBAL number of scalar fields as giben by the +! length of KVESETSC (or by number of fields in PSPSCALAR if no spectral +! 'b-set' split +! +! As an alternative to using PGP you can also use a combination of the +! following arrays. The reason for introducing these alternative ways +! of calling DIR_TRANS is to avoid uneccessary copies where your data +! structures don't fit in to the 'PSPVOR,PSPDIV, PSPSCALAR, PGP' layout. +! The use of any of these precludes the use of PGP and vice versa. + +! PGPUV(:,:,:,:) - the 'u-v' related grid-point variables in the order +! described for PGP. The second dimension of PGPUV should +! be the same as the "global" first dimension of +! PSPVOR,PSPDIV (in the IFS this is the number of levels) +! PGPUV need to be dimensioned(NPROMA,ILEVS,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (u,v) +! PGP3A(:,:,:,:) - grid-point array directly connected with PSPSC3A +! dimensioned(NPROMA,ILEVS,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (the same as in +! PSPSC3A ) +! PGP3B(:,:,:,:) - grid-point array directly connected with PSPSC3B +! dimensioned(NPROMA,ILEVS,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (the same as in +! PSPSC3B) +! PGP2(:,:,:) - grid-point array directly connected with PSPSC2 +! dimensioned(NPROMA,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (the same as in +! PSPSC2 ) +! +! Method. +! ------- + +! Externals. ESET_RESOL - set resolution +! ---------- EDIR_TRANS_CTLAD - control routine +! + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 + +! ------------------------------------------------------------------ +USE PARKIND1 ,ONLY : JPIM ,JPRB + +IMPLICIT NONE + +! Declaration of arguments + +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPSC3A(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPSC3B(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(INOUT) :: PSPSC2(:,:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KPROMA +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETUV(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3A(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3B(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC2(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KRESOL + +REAL(KIND=JPRB),OPTIONAL ,INTENT(OUT) :: PGP(:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(OUT) :: PGPUV(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(OUT) :: PGP3A(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(OUT) :: PGP3B(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(OUT) :: PGP2(:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(IN) :: PMEANU(:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(IN) :: PMEANV(:) + + +END SUBROUTINE EDIR_TRANSAD + + +END INTERFACE diff --git a/src/etrans/include/etrans/edist_grid.h b/src/etrans/include/etrans/edist_grid.h new file mode 100644 index 000000000..0bdbd8fb7 --- /dev/null +++ b/src/etrans/include/etrans/edist_grid.h @@ -0,0 +1,68 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + + +INTERFACE +SUBROUTINE EDIST_GRID(PGPG,KPROMA,KFDISTG,KFROM,KRESOL,PGP,KSORT) + +!**** *EDIST_GRID* - Distribute global gridpoint array among processors + +! Purpose. +! -------- +! Interface routine for distributing gridpoint array + +!** Interface. +! ---------- +! CALL EDIST_GRID(...) + +! Explicit arguments : +! -------------------- +! PGPG(:,:) - Global spectral array +! KFDISTG - Global number of fields to be distributed +! KPROMA - required blocking factor for gridpoint input +! KFROM(:) - Processor resposible for distributing each field +! KRESOL - resolution tag which is required ,default is the +! first defined resulution (input) +! PGP(:,:) - Local spectral array +! +! Method. +! ------- + +! Externals. ESET_RESOL - set resolution +! ---------- DIST_GRID_CTL - control routine + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 + +! ------------------------------------------------------------------ +USE PARKIND1 ,ONLY : JPIM ,JPRB + +IMPLICIT NONE + +! Declaration of arguments + +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PGPG(:,:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KPROMA +INTEGER(KIND=JPIM) , INTENT(IN) :: KFDISTG +INTEGER(KIND=JPIM) , INTENT(IN) :: KFROM(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KRESOL +REAL(KIND=JPRB) , INTENT(OUT) :: PGP(:,:,:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KSORT (:) + + +! ------------------------------------------------------------------ + +END SUBROUTINE EDIST_GRID +END INTERFACE diff --git a/src/etrans/include/etrans/edist_spec.h b/src/etrans/include/etrans/edist_spec.h new file mode 100644 index 000000000..11616cd8f --- /dev/null +++ b/src/etrans/include/etrans/edist_spec.h @@ -0,0 +1,70 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + + +INTERFACE +SUBROUTINE EDIST_SPEC(PSPECG,KFDISTG,KFROM,KVSET,KRESOL,PSPEC,& + & LDIM1_IS_FLD,KSORT) + +!**** *EDIST_SPEC* - Distribute global spectral array among processors + +! Purpose. +! -------- +! Interface routine for distributing spectral array + +!** Interface. +! ---------- +! CALL EDIST__SPEC(...) + +! Explicit arguments : +! -------------------- +! PSPECG(:,:) - Global spectral array +! KFDISTG - Global number of fields to be distributed +! KFROM(:) - Processor resposible for distributing each field +! KVSET(:) - "B-Set" for each field +! KRESOL - resolution tag which is required ,default is the +! first defined resulution (input) +! PSPEC(:,:) - Local spectral array +! +! Method. +! ------- + +! Externals. ESET_RESOL - set resolution +! ---------- EDIST_SPEC_CONTROL - control routine + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 + +! ------------------------------------------------------------------ +USE PARKIND1 ,ONLY : JPIM ,JPRB + +IMPLICIT NONE + +! Declaration of arguments + +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPECG(:,:) +INTEGER(KIND=JPIM) , INTENT(IN) :: KFDISTG +INTEGER(KIND=JPIM) , INTENT(IN) :: KFROM(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSET(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KRESOL +LOGICAL ,OPTIONAL, INTENT(IN) :: LDIM1_IS_FLD +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPEC(:,:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KSORT (:) + + +! ------------------------------------------------------------------ + +END SUBROUTINE EDIST_SPEC +END INTERFACE diff --git a/src/etrans/include/etrans/egath_grid.h b/src/etrans/include/etrans/egath_grid.h new file mode 100644 index 000000000..be7853cd9 --- /dev/null +++ b/src/etrans/include/etrans/egath_grid.h @@ -0,0 +1,67 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + + +INTERFACE +SUBROUTINE EGATH_GRID(PGPG,KPROMA,KFGATHG,KTO,KRESOL,PGP) + +!**** *EGATH_GRID* - Gather global gridpoint array from processors + +! Purpose. +! -------- +! Interface routine for gathering gripoint array + +!** Interface. +! ---------- +! CALL EGATH_GRID(...) + +! Explicit arguments : +! -------------------- +! PGPG(:,:) - Global gridpoint array +! KFGATHG - Global number of fields to be gathered +! KPROMA - blocking factor for gridpoint input +! KTO(:) - Processor responsible for gathering each field +! KRESOL - resolution tag which is required ,default is the +! first defined resulution (input) +! PGP(:,:,:) - Local spectral array +! +! Method. +! ------- + +! Externals. ESET_RESOL - set resolution +! ---------- GATH_GRID_CTL - control routine + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 + +! ------------------------------------------------------------------ +USE PARKIND1 ,ONLY : JPIM ,JPRB + +IMPLICIT NONE + +! Declaration of arguments + +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PGPG(:,:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KPROMA +INTEGER(KIND=JPIM) , INTENT(IN) :: KFGATHG +INTEGER(KIND=JPIM) , INTENT(IN) :: KTO(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KRESOL +REAL(KIND=JPRB) , INTENT(IN) :: PGP(:,:,:) + + +! ------------------------------------------------------------------ + +END SUBROUTINE EGATH_GRID +END INTERFACE diff --git a/src/etrans/include/etrans/egath_spec.h b/src/etrans/include/etrans/egath_spec.h new file mode 100644 index 000000000..e6ba990f6 --- /dev/null +++ b/src/etrans/include/etrans/egath_spec.h @@ -0,0 +1,75 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + + +INTERFACE +SUBROUTINE EGATH_SPEC(PSPECG,KFGATHG,KTO,KVSET,KRESOL,PSPEC,LDIM1_IS_FLD,KSMAX,KMSMAX,LDZA0IP) + +!**** *EGATH_SPEC* - Gather global spectral array from processors + +! Purpose. +! -------- +! Interface routine for gathering spectral array + +!** Interface. +! ---------- +! CALL EGATH_SPEC(...) + +! Explicit arguments : +! -------------------- +! PSPECG(:,:) - Global spectral array +! KFGATHG - Global number of fields to be gathered +! KTO(:) - Processor responsible for gathering each field +! KVSET(:) - "B-Set" for each field +! KRESOL - resolution tag which is required ,default is the +! first defined resulution (input) +! PSPEC(:,:) - Local spectral array +! LDIM1_IS_FLD - If TRUE first dimension of PSCPEC and PSPECG is the field dimension [.T.] +! +! Method. +! ------- + +! Externals. SET_RESOL - set resolution +! ---------- EGATH_SPEC_CONTROL - control routine + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB + + +IMPLICIT NONE + +! Declaration of arguments + +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPECG(:,:) +INTEGER(KIND=JPIM) , INTENT(IN) :: KFGATHG +INTEGER(KIND=JPIM) , INTENT(IN) :: KTO(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSET(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KRESOL +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPEC(:,:) +LOGICAL ,OPTIONAL, INTENT(IN) :: LDIM1_IS_FLD +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KSMAX +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KMSMAX +LOGICAL ,OPTIONAL, INTENT(IN) :: LDZA0IP + + +! ------------------------------------------------------------------ + +END SUBROUTINE EGATH_SPEC + +END INTERFACE diff --git a/src/etrans/include/etrans/egpnorm_trans.h b/src/etrans/include/etrans/egpnorm_trans.h new file mode 100644 index 000000000..9f7523e91 --- /dev/null +++ b/src/etrans/include/etrans/egpnorm_trans.h @@ -0,0 +1,70 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + + +INTERFACE +SUBROUTINE EGPNORM_TRANS(PGP,KFIELDS,KPROMA,PAVE,PMIN,PMAX,LDAVE_ONLY,KRESOL) + + +!**** *EGPNORM_TRANS* - calculate grid-point norms + +! Purpose. +! -------- +! calculate grid-point norms using a 2 stage (NPRTRV,NPRTRW) communication rather +! than an approach using a more expensive global gather collective communication + +!** Interface. +! ---------- +! CALL EGPNORM_TRANS(...) + +! Explicit arguments : +! -------------------- +! PGP(:,:,:) - gridpoint fields (input) +! PGP is dimensioned (NPROMA,KFIELDS,NGPBLKS) where +! NPROMA is the blocking factor, KFIELDS the total number +! of fields and NGPBLKS the number of NPROMA blocks. +! KFIELDS - number of fields (input) +! (these do not have to be just levels) +! KPROMA - required blocking factor (input) +! PAVE - average (output) +! PMIN - minimum (input/output) +! PMAX - maximum (input/output) +! LDAVE_ONLY - T : PMIN and PMAX already contain local MIN and MAX +! KRESOL - resolution tag (optional) +! default assumes first defined resolution +! + +! Author. +! ------- +! A.Bogatchev after gpnorm_trans + +! Modifications. +! -------------- +! Original : 12th Jun 2009 + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +IMPLICIT NONE + +! Declaration of arguments + +REAL(KIND=JPRB),INTENT(IN) :: PGP(:,:,:) +REAL(KIND=JPRB),INTENT(OUT) :: PAVE(:) +REAL(KIND=JPRB),INTENT(INOUT) :: PMIN(:) +REAL(KIND=JPRB),INTENT(INOUT) :: PMAX(:) +INTEGER(KIND=JPIM),INTENT(IN) :: KFIELDS +INTEGER(KIND=JPIM),INTENT(IN) :: KPROMA +LOGICAL,INTENT(IN) :: LDAVE_ONLY +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KRESOL + +END SUBROUTINE EGPNORM_TRANS +END INTERFACE diff --git a/src/etrans/include/etrans/einv_trans.h b/src/etrans/include/etrans/einv_trans.h new file mode 100644 index 000000000..5b8be538f --- /dev/null +++ b/src/etrans/include/etrans/einv_trans.h @@ -0,0 +1,162 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + + +INTERFACE +SUBROUTINE EINV_TRANS(PSPVOR,PSPDIV,PSPSCALAR,PSPSC3A,PSPSC3B,PSPSC2,& + & FSPGL_PROC,& + & LDSCDERS,LDVORGP,LDDIVGP,LDUVDER,KPROMA,KVSETUV,KVSETSC,KRESOL,& + & KVSETSC3A,KVSETSC3B,KVSETSC2,& + & PGP,PGPUV,PGP3A,PGP3B,PGP2,PMEANU,PMEANV) + +!**** *EINV_TRANS* - Inverse spectral transform. + +! Purpose. +! -------- +! Interface routine for the inverse spectral transform + +!** Interface. +! ---------- +! CALL EINV_TRANS(...) + +! Explicit arguments : All arguments are optional. +! -------------------- +! PSPVOR(:,:) - spectral vorticity (input) +! PSPDIV(:,:) - spectral divergence (input) +! PSPSCALAR(:,:) - spectral scalarvalued fields (input) +! PSPSC3A(:,:,:) - alternative to use of PSPSCALAR, see PGP3A below (input) +! PSPSC3B(:,:,:) - alternative to use of PSPSCALAR, see PGP3B below (input) +! PSPSC2(:,:) - alternative to use of PSPSCALAR, see PGP2 below (input) +! FSPGL_PROC - external procedure to be executed in fourier space +! before transposition +! LDSCDERS - indicating if derivatives of scalar variables are req. +! LDVORGP - indicating if grid-point vorticity is req. +! LDDIVGP - indicating if grid-point divergence is req. +! LDUVDER - indicating if E-W derivatives of u and v are req. +! KPROMA - required blocking factor for gridpoint output +! KVSETUV(:) - indicating which 'b-set' in spectral space owns a +! vor/div field. Equivalant to NBSETLEV in the IFS. +! The length of KVSETUV should be the GLOBAL number +! of u/v fields which is the dimension of u and v releated +! fields in grid-point space. +! KVESETSC(:) - indicating which 'b-set' in spectral space owns a +! scalar field. As for KVSETUV this argument is required +! if the total number of processors is greater than +! the number of processors used for distribution in +! spectral wave space. +! KVSETSC3A(:) - as KVESETSC for PSPSC3A (distribution on first dimension) +! KVSETSC3B(:) - as KVESETSC for PSPSC3B (distribution on first dimension) +! KVSETSC2(:) - as KVESETSC for PSPSC2 (distribution on first dimension) +! KRESOL - resolution tag which is required ,default is the +! first defined resulution (input) +! PGP(:,:,:) - gridpoint fields (output) +! PGP need to dimensioned (NPROMA,IF_GP,NGPBLKS) where +! NPROMA is the blocking factor, IF_GP the total number +! of output fields and NGPBLKS the number of NPROMA blocks. +! The ordering of the output fields is as follows (all +! parts are optional depending on the input switches): +! +! vorticity : IF_UV_G fields (if psvor present and LDVORGP) +! divergence : IF_UV_G fields (if psvor present and LDDIVGP) +! u : IF_UV_G fields (if psvor present) +! v : IF_UV_G fields (if psvor present) +! scalar fields : IF_SCALARS_G fields (if pspscalar present) +! N-S derivative of scalar fields : IF_SCALARS_G fields (if pspscalar +! present and LDSCDERS) +! E-W derivative of u : IF_UV_G fields (if psvor present and and LDUVDER) +! E-W derivative of v : IF_UV_G fields (if psvor present and and LDUVDER) +! E-W derivative of scalar fields : IF_SCALARS_G fields (if pspscalar +! present and LDSCDERS) +! +! Here IF_UV_G is the GLOBAL number of u/v fields as given by the length +! of KVSETUV (or by PSPVOR if no split in spectral 'b-set' direction +! IF_SCALARS_G is the GLOBAL number of scalar fields as giben by the +! length of KVESETSC (or by number of fields in PSPSCALAR if no spectral +! 'b-set' split + +! As an alternative to using PGP you can also use a combination of the +! following arrays. The reason for introducing these alternative ways +! of calling INV_TRANS is to avoid uneccessary copies where your data +! structures don't fit in to the 'PSPVOR,PSPDIV, PSPSCALAR, PGP' layout. +! The use of any of these precludes the use of PGP and vice versa. + +! PGPUV(:,:,:,:) - the 'u-v' related grid-point variables in the order +! described for PGP. The second dimension of PGPUV should +! be the same as the "global" first dimension of +! PSPVOR,PSPDIV (in the IFS this is the number of levels) +! PGPUV need to be dimensioned(NPROMA,ILEVS,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (u,v,vor,div ...) +! PGP3A(:,:,:,:) - grid-point array directly connected with PSPSC3A +! dimensioned(NPROMA,ILEVS,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (the same as in +! PSPSC3A if no derivatives, 3 times that with der.) +! PGP3B(:,:,:,:) - grid-point array directly connected with PSPSC3B +! dimensioned(NPROMA,ILEVS,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (the same as in +! PSPSC3B if no derivatives, 3 times that with der.) +! PGP2(:,:,:) - grid-point array directly connected with PSPSC2 +! dimensioned(NPROMA,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (the same as in +! PSPSC2 if no derivatives, 3 times that with der.) +! Method. +! ------- + +! Externals. ESET_RESOL - set resolution +! ---------- ELTINV_CTL - control of Legendre transform +! EFTINV_CTL - control of Fourier transform + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 +! 26-02-03 Mats Hamrud & Gabor Radnoti : modified condition for scalar fields +! and derivatives (IF_SCALARS_G) + +! ------------------------------------------------------------------ +USE PARKIND1 ,ONLY : JPIM ,JPRB + +IMPLICIT NONE + +! Declaration of arguments + +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPSC3A(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPSC3B(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPSC2(:,:) +LOGICAL ,OPTIONAL, INTENT(IN) :: LDSCDERS +LOGICAL ,OPTIONAL, INTENT(IN) :: LDVORGP +LOGICAL ,OPTIONAL, INTENT(IN) :: LDDIVGP +LOGICAL ,OPTIONAL, INTENT(IN) :: LDUVDER +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KPROMA +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETUV(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3A(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3B(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC2(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KRESOL +EXTERNAL FSPGL_PROC +OPTIONAL FSPGL_PROC +REAL(KIND=JPRB),OPTIONAL ,INTENT(OUT) :: PGP(:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(OUT) :: PGPUV(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(OUT) :: PGP3A(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(OUT) :: PGP3B(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(OUT) :: PGP2(:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(IN) :: PMEANU(:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(IN) :: PMEANV(:) + + +END SUBROUTINE EINV_TRANS + +END INTERFACE diff --git a/src/etrans/include/etrans/einv_transad.h b/src/etrans/include/etrans/einv_transad.h new file mode 100644 index 000000000..44bbfb11c --- /dev/null +++ b/src/etrans/include/etrans/einv_transad.h @@ -0,0 +1,161 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + + +INTERFACE +SUBROUTINE EINV_TRANSAD(PSPVOR,PSPDIV,PSPSCALAR,PSPSC3A,PSPSC3B,PSPSC2,& + & FSPGL_PROC,& + & LDSCDERS,LDVORGP,LDDIVGP,LDUVDER,KPROMA,KVSETUV,KVSETSC,KRESOL,& + & KVSETSC3A,KVSETSC3B,KVSETSC2,& + & PGP,PGPUV,PGP3A,PGP3B,PGP2,PMEANU,PMEANV) + +!**** *EINV_TRANSAD* - Inverse spectral transform - adjoint. + +! Purpose. +! -------- +! Interface routine for the inverse spectral transform - adjoint + +!** Interface. +! ---------- +! CALL EINV_TRANSAD(...) + +! Explicit arguments : All arguments except from PGP are optional. +! -------------------- +! PSPVOR(:,:) - spectral vorticity (input) +! PSPDIV(:,:) - spectral divergence (input) +! PSPSCALAR(:,:) - spectral scalarvalued fields (input) +! PSPSC3A(:,:,:) - alternative to use of PSPSCALAR, see PGP3A below (input) +! PSPSC3B(:,:,:) - alternative to use of PSPSCALAR, see PGP3B below (input) +! PSPSC2(:,:) - alternative to use of PSPSCALAR, see PGP2 below (input) +! FSPGL_PROC - external procedure to be executed in fourier space +! before transposition +! LDSCDERS - indicating if derivatives of scalar variables are req. +! LDVORGP - indicating if grid-point vorticity is req. +! LDDIVGP - indicating if grid-point divergence is req. +! LDUVDER - indicating if E-W derivatives of u and v are req. +! KPROMA - required blocking factor for gridpoint output +! KVSETUV(:) - indicating which 'b-set' in spectral space owns a +! vor/div field. Equivalant to NBSETLEV in the IFS. +! The length of KVSETUV should be the GLOBAL number +! of u/v fields which is the dimension of u and v releated +! fields in grid-point space. +! KVESETSC(:) - indicating which 'b-set' in spectral space owns a +! scalar field. As for KVSETUV this argument is required +! if the total number of processors is greater than +! the number of processors used for distribution in +! spectral wave space. +! KVSETSC3A(:) - as KVESETSC for PSPSC3A (distribution on first dimension) +! KVSETSC3B(:) - as KVESETSC for PSPSC3B (distribution on first dimension) +! KVSETSC2(:) - as KVESETSC for PSPSC2 (distribution on first dimension) +! KRESOL - resolution tag which is required ,default is the +! first defined resulution (input) +! PGP(:,:,:) - gridpoint fields (output) +! PGP need to dimensioned (NPROMA,IF_GP,NGPBLKS) where +! NPROMA is the blocking factor, IF_GP the total number +! of output fields and NGPBLKS the number of NPROMA blocks. +! The ordering of the output fields is as follows (all +! parts are optional depending on the input switches): +! +! vorticity : IF_UV_G fields (if psvor present and LDVORGP) +! divergence : IF_UV_G fields (if psvor present and LDDIVGP) +! u : IF_UV_G fields (if psvor present) +! v : IF_UV_G fields (if psvor present) +! scalar fields : IF_SCALARS_G fields (if pspscalar present) +! N-S derivative of scalar fields : IF_SCALARS_G fields (if pspscalar +! present and LDSCDERS) +! E-W derivative of u : IF_UV_G fields (if psvor present and and LDUVDER) +! E-W derivative of v : IF_UV_G fields (if psvor present and and LDUVDER) +! E-W derivative of scalar fields : IF_SCALARS_G fields (if pspscalar +! present and LDSCDERS) +! +! Here IF_UV_G is the GLOBAL number of u/v fields as given by the length +! of KVSETUV (or by PSPVOR if no split in spectral 'b-set' direction +! IF_SCALARS_G is the GLOBAL number of scalar fields as giben by the +! length of KVESETSC (or by number of fields in PSPSCALAR if no spectral +! 'b-set' split + +! As an alternative to using PGP you can also use a combination of the +! following arrays. The reason for introducing these alternative ways +! of calling INV_TRANS is to avoid uneccessary copies where your data +! structures don't fit in to the 'PSPVOR,PSPDIV, PSPSCALAR, PGP' layout. +! The use of any of these precludes the use of PGP and vice versa. +! +! PGPUV(:,:,:,:) - the 'u-v' related grid-point variables in the order +! described for PGP. The second dimension of PGPUV should +! be the same as the "global" first dimension of +! PSPVOR,PSPDIV (in the IFS this is the number of levels) +! PGPUV need to be dimensioned(NPROMA,ILEVS,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (u,v,vor,div ...) +! PGP3A(:,:,:,:) - grid-point array directly connected with PSPSC3A +! dimensioned(NPROMA,ILEVS,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (the same as in +! PSPSC3A if no derivatives, 3 times that with der.) +! PGP3B(:,:,:,:) - grid-point array directly connected with PSPSC3B +! dimensioned(NPROMA,ILEVS,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (the same as in +! PSPSC3B if no derivatives, 3 times that with der.) +! PGP2(:,:,:) - grid-point array directly connected with PSPSC2 +! dimensioned(NPROMA,IFLDS,NGPBLKS) +! IFLDS is the number of 'variables' (the same as in +! PSPSC2 if no derivatives, 3 times that with der.) + +! Method. +! ------- + +! Externals. ESET_RESOL - set resolution +! ---------- ELTDIR_CTLAD - control of Legendre transform +! EFTDIR_CTLAD - control of Fourier transform + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 + +! ------------------------------------------------------------------ +USE PARKIND1 ,ONLY : JPIM ,JPRB + +IMPLICIT NONE + +! Declaration of arguments + +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPVOR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPDIV(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSCALAR(:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSC3A(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSC3B(:,:,:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PSPSC2(:,:) +LOGICAL ,OPTIONAL, INTENT(IN) :: LDSCDERS +LOGICAL ,OPTIONAL, INTENT(IN) :: LDVORGP +LOGICAL ,OPTIONAL, INTENT(IN) :: LDDIVGP +LOGICAL ,OPTIONAL, INTENT(IN) :: LDUVDER +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KPROMA +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETUV(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3A(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3B(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC2(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KRESOL +EXTERNAL FSPGL_PROC +OPTIONAL FSPGL_PROC +REAL(KIND=JPRB),OPTIONAL ,INTENT(IN) :: PGP(:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(IN) :: PGPUV(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(IN) :: PGP3A(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(IN) :: PGP3B(:,:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(IN) :: PGP2(:,:,:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(OUT) :: PMEANU(:) +REAL(KIND=JPRB),OPTIONAL ,INTENT(OUT) :: PMEANV(:) + + +END SUBROUTINE EINV_TRANSAD + +END INTERFACE diff --git a/src/etrans/include/etrans/esetup_trans.h b/src/etrans/include/etrans/esetup_trans.h new file mode 100644 index 000000000..48b992dce --- /dev/null +++ b/src/etrans/include/etrans/esetup_trans.h @@ -0,0 +1,99 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + + +INTERFACE +SUBROUTINE ESETUP_TRANS(KMSMAX,KSMAX,KDGL,KDGUX,KLOEN,LDSPLIT,& + & KTMAX,KRESOL,PEXWN,PEYWN,PWEIGHT,LDGRIDONLY,KNOEXTZL,KNOEXTZG,& + & LDUSEFFTW,LD_ALL_FFTW) +!**** *ESETUP_TRANS* - Setup transform package for specific resolution + +! Purpose. +! -------- +! To setup for making spectral transforms. Each call to this routine +! creates a new resolution up to a maximum of NMAX_RESOL set up in +! SETUP_TRANS0. You need to call SETUP_TRANS0 before this routine can +! be called. + +!** Interface. +! ---------- +! CALL ESETUP_TRANS(...) + +! Explicit arguments : KLOEN,LDSPLIT are optional arguments +! -------------------- +! KSMAX - spectral truncation required +! KDGL - number of Gaussian latitudes +! KLOEN(:) - number of points on each Gaussian latitude [2*KDGL] +! LDSPLIT - true if split latitudes in grid-point space [false] +! KTMAX - truncation order for tendencies? +! KRESOL - the resolution identifier +! KSMAX,KDGL,KTMAX and KLOEN are GLOBAL variables desribing the resolution +! in spectral and grid-point space +! LDGRIDONLY - true if only grid space is required + + +! LDSPLIT describe the distribution among processors of +! grid-point data and has no relevance if you are using a single processor + +! LDUSEFFTW - Use FFTW for FFTs +! LD_ALL_FFTW : T to transform all fields in one call, F to transforms fields one after another + +! Method. +! ------- + +! Externals. ESET_RESOL - set resolution +! ---------- ESETUP_DIMS - setup distribution independent dimensions +! SUEMP_TRANS_PRELEG - first part of setup of distr. environment +! SULEG - Compute Legandre polonomial and Gaussian +! Latitudes and Weights +! ESETUP_GEOM - Compute arrays related to grid-point geometry +! SUEMP_TRANS - Second part of setup of distributed environment +! SUEFFT - setup for FFT + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 +! 02-04-11 A. Bogatchev: Passing of TCDIS +! 02-11-14 C. Fischer: soften test on KDGL +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! A.Nmiri 15-Nov-2007 Phasing with TFL 32R3 +! A.Bogatchev 16-Sep-2010 Phasing cy37 +! D. Degrauwe (Feb 2012): Alternative extension zone (E') + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +IMPLICIT NONE + +! Dummy arguments +INTEGER(KIND=JPIM),INTENT(IN) :: KMSMAX +INTEGER(KIND=JPIM),INTENT(IN) :: KSMAX +INTEGER(KIND=JPIM),INTENT(IN) :: KDGL +INTEGER(KIND=JPIM),INTENT(IN) :: KDGUX +INTEGER(KIND=JPIM),INTENT(IN) :: KLOEN(:) +LOGICAL ,OPTIONAL,INTENT(IN) :: LDSPLIT +LOGICAL ,OPTIONAL,INTENT(IN) :: LDGRIDONLY +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KTMAX +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KRESOL +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PEXWN +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PEYWN +REAL(KIND=JPRB) ,OPTIONAL,INTENT(IN) :: PWEIGHT(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KNOEXTZL +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KNOEXTZG +LOGICAL ,OPTIONAL,INTENT(IN) :: LDUSEFFTW +LOGICAL ,OPTIONAL,INTENT(IN) :: LD_ALL_FFTW + +END SUBROUTINE ESETUP_TRANS +END INTERFACE diff --git a/src/etrans/include/etrans/especnorm.h b/src/etrans/include/etrans/especnorm.h new file mode 100644 index 000000000..bbc82a264 --- /dev/null +++ b/src/etrans/include/etrans/especnorm.h @@ -0,0 +1,67 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + + +INTERFACE +SUBROUTINE ESPECNORM(PSPEC,KVSET,KMASTER,KRESOL,PMET,PNORM) + +!**** *ESPECNORM* - Compute global spectral norms + +! Purpose. +! -------- +! Interface routine for computing spectral norms + +!** Interface. +! ---------- +! CALL ESPECNORM(...) + +! Explicit arguments : All arguments optional +! -------------------- +! PSPEC(:,:) - Spectral array +! KVSET(:) - "B-Set" for each field +! KMASTER - processor to recieve norms +! KRESOL - resolution tag which is required ,default is the +! first defined resulution (input) +! PMET(:) - metric +! PNORM(:) - Norms (output for processor KMASTER) +! +! Method. +! ------- + +! Externals. ESET_RESOL - set resolution +! ---------- ESPNORM_CTL - control routine + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 + +! ------------------------------------------------------------------ +USE PARKIND1 ,ONLY : JPIM ,JPRB + +IMPLICIT NONE + +! Declaration of arguments + + +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PSPEC(:,:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSET(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KMASTER +REAL(KIND=JPRB) ,OPTIONAL, INTENT(IN) :: PMET(:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PNORM(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KRESOL + +! ------------------------------------------------------------------ + +END SUBROUTINE ESPECNORM +END INTERFACE diff --git a/src/etrans/include/etrans/etibihie.h b/src/etrans/include/etrans/etibihie.h new file mode 100644 index 000000000..b7305f774 --- /dev/null +++ b/src/etrans/include/etrans/etibihie.h @@ -0,0 +1,33 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + + +INTERFACE +SUBROUTINE ETIBIHIE(KDLON,KDGL,KNUBI,KDLUX,KDGUX,& + & KSTART,KDLSM,PGPBI,LDBIX,LDBIY,KDADD) + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +IMPLICIT NONE + +INTEGER(KIND=JPIM),INTENT(IN) :: KNUBI +INTEGER(KIND=JPIM),INTENT(IN) :: KSTART +INTEGER(KIND=JPIM),INTENT(IN) :: KDLSM +INTEGER(KIND=JPIM),INTENT(IN) :: KDLON +INTEGER(KIND=JPIM),INTENT(IN) :: KDGL +INTEGER(KIND=JPIM),INTENT(IN) :: KDLUX +INTEGER(KIND=JPIM),INTENT(IN) :: KDGUX +INTEGER(KIND=JPIM),INTENT(IN) :: KDADD +REAL(KIND=JPRB),INTENT(INOUT) :: PGPBI(KSTART:KDLSM+KDADD,KNUBI,1:KDGL+KDADD) +LOGICAL,INTENT(IN) :: LDBIX +LOGICAL,INTENT(IN) :: LDBIY + +END SUBROUTINE ETIBIHIE +END INTERFACE diff --git a/src/etrans/include/etrans/etrans_end.h b/src/etrans/include/etrans/etrans_end.h new file mode 100644 index 000000000..7f6c95eb8 --- /dev/null +++ b/src/etrans/include/etrans/etrans_end.h @@ -0,0 +1,52 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + + +INTERFACE +SUBROUTINE ETRANS_END(CDMODE) + +!**** *ETRANS_END* - Terminate transform package + +! Purpose. +! -------- +! Terminate transform package. Release all allocated arrays. + +!** Interface. +! ---------- +! CALL ETRANS_END + +! Explicit arguments : None +! -------------------- + +! Method. +! ------- + +! Externals. None +! ---------- + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! A.Nmiri 15-Nov-2007 Phasing with TFL 32R3 +! A.Bogatchev 16-Sep-2010 Phasing cy37 after G.Radnoti + +! ------------------------------------------------------------------ + +IMPLICIT NONE + +CHARACTER*5, OPTIONAL, INTENT(IN) :: CDMODE + +END SUBROUTINE ETRANS_END +END INTERFACE diff --git a/src/etrans/include/etrans/etrans_inq.h b/src/etrans/include/etrans/etrans_inq.h new file mode 100644 index 000000000..c7711863d --- /dev/null +++ b/src/etrans/include/etrans/etrans_inq.h @@ -0,0 +1,183 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + + +INTERFACE +SUBROUTINE ETRANS_INQ(KRESOL,KSPEC,KSPEC2,KSPEC2G,KSPEC2MX,KNUMP,& + & KGPTOT,KGPTOTG,KGPTOTMX,KGPTOTL,& + & KMYMS,KESM0,KUMPP,KPOSSP,KPTRMS,KALLMS,KDIM0G,& + & KFRSTLAT,KLSTLAT,KFRSTLOFF,KPTRLAT,& + & KPTRFRSTLAT,KPTRLSTLAT,KPTRFLOFF,KSTA,KONL,& + & KULTPP,KPTRLS,& + & KPRTRW,KMYSETW,KMYSETV,KMY_REGION_NS,KMY_REGION_EW,& + & LDSPLITLAT,LDLINEAR_GRID,& + & KSMAX,KMSMAX,KNVALUE,KMVALUE,PLEPINM,KDEF_RESOL,LDLAM,& + & PMU,PGW,PRPNM,KLEI3,KSPOLEGL,KPMS,KCPL2M,KCPL4M,KPROCM) + +!**** *ETRANS_INQ* - Extract information from the transform package + +! Purpose. +! -------- +! Interface routine for extracting information from the T.P. + +!** Interface. +! ---------- +! CALL ETRANS_INQ(...) +! Explicit arguments : All arguments are optional. +! -------------------- +! KRESOL - resolution tag for which info is required ,default is the +! first defined resulution (input) + +! MULTI-TRANSFORMS MANAGEMENT +! KDEF_RESOL - number or resolutions defined +! LDLAM - .T. if the corresponding resolution is LAM, .F. if it is global + +! SPECTRAL SPACE +! KSPEC - number of complex spectral coefficients on this PE +! KSPEC2 - 2*KSPEC +! KSPEC2G - global KSPEC2 +! KSPEC2MX - maximun KSPEC2 among all PEs +! KNUMP - Number of spectral waves handled by this PE +! KGPTOT - Total number of grid columns on this PE +! KGPTOTG - Total number of grid columns on the Globe +! KGPTOTMX - Maximum number of grid columns on any of the PEs +! KGPTOTL - Number of grid columns one each PE (dimension N_REGIONS_NS:N_REGIONS_EW) +! KMYMS - This PEs spectral zonal wavenumbers +! KESM0 - Address in a spectral array of (m, n=m) +! KUMPP - No. of wave numbers each wave set is responsible for +! KPOSSP - Defines partitioning of global spectral fields among PEs +! KPTRMS - Pointer to the first wave number of a given a-set +! KALLMS - Wave numbers for all wave-set concatenated together +! to give all wave numbers in wave-set order +! KDIM0G - Defines partitioning of global spectral fields among PEs +! KSMAX - spectral truncation - n direction +! KMSMAX - spectral truncation - m direction +! KNVALUE - n value for each KSPEC2 spectral coeffient +! KMVALUE - m value for each KSPEC2 spectral coeffient +! LDLINEAR_GRID : .TRUE. if the grid is linear + +! GRIDPOINT SPACE +! KFRSTLAT - First latitude of each a-set in grid-point space +! KLSTTLAT - Last latitude of each a-set in grid-point space +! KFRSTLOFF - Offset for first lat of own a-set in grid-point space +! KPTRLAT - Pointer to the start of each latitude +! KPTRFRSTLAT - Pointer to the first latitude of each a-set in +! NSTA and NONL arrays +! KPTRLSTLAT - Pointer to the last latitude of each a-set in +! NSTA and NONL arrays +! KPTRFLOFF - Offset for pointer to the first latitude of own a-set +! NSTA and NONL arrays, i.e. nptrfrstlat(myseta)-1 +! KSTA - Position of first grid column for the latitudes on a +! processor. The information is available for all processors. +! The b-sets are distinguished by the last dimension of +! nsta().The latitude band for each a-set is addressed by +! nptrfrstlat(jaset),nptrlstlat(jaset), and +! nptrfloff=nptrfrstlat(myseta) on this processors a-set. +! Each split latitude has two entries in nsta(,:) which +! necessitates the rather complex addressing of nsta(,:) +! and the overdimensioning of nsta by N_REGIONS_NS. +! KONL - Number of grid columns for the latitudes on a processor. +! Similar to nsta() in data structure. +! LDSPLITLAT - TRUE if latitude is split in grid point space over +! two a-sets + +! FOURIER SPACE +! KULTPP - number of latitudes for which each a-set is calculating +! the FFT's. +! KPTRLS - pointer to first global latitude of each a-set for which +! it performs the Fourier calculations + +! LEGENDRE +! PMU - sin(Gaussian latitudes) +! PGW - Gaussian weights +! PRPNM - Legendre polynomials +! KLEI3 - First dimension of Legendre polynomials +! KSPOLEGL - Second dimension of Legendre polynomials +! KPMS - Adress for legendre polynomial for given M (NSMAX) +! PLEPINM - Eigen-values of the inverse Laplace operator + +! Method. +! ------- + +! Externals. ESET_RESOL - set resolution +! ---------- + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 00-03-03 +! M.Hamrud 01-Oct-2003 CY28 Cleaning +! A.Nmiri 15-Nov-2007 Phasing with TFL 32R3 +! A.Bogatchev 16-Sep-2010 Phasing with TFL 36R4 +! R. El Khatib 08-Aug-2012 KSMAX,KMSMAX,KNVALUE,KMVALUE,PLEPINM,LDLAM,KDEF_RESOL,LDLINEAR_GRID + +! ------------------------------------------------------------------ + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +IMPLICIT NONE + +INTEGER(KIND=JPIM),OPTIONAL,INTENT(IN) :: KRESOL +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KSPEC +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KSPEC2 +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KSPEC2G +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KSPEC2MX +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KNUMP +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KGPTOT +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KGPTOTG +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KGPTOTMX +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KGPTOTL(:,:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KMYMS(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KESM0(0:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KUMPP(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KPOSSP(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KPTRMS(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KALLMS(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KDIM0G(0:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KFRSTLAT(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KLSTLAT(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KFRSTLOFF +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KPTRLAT(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KPTRFRSTLAT(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KPTRLSTLAT(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KPTRFLOFF +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KSTA(:,:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KONL(:,:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KULTPP(:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KPTRLS(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KPRTRW +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KMYSETW +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KMYSETV +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KMY_REGION_NS +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KMY_REGION_EW +LOGICAL ,OPTIONAL,INTENT(INOUT) :: LDSPLITLAT(:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(INOUT) :: PMU(:) +REAL(KIND=JPRB) ,OPTIONAL :: PGW(:) ! Argument NOT used +REAL(KIND=JPRB) ,OPTIONAL,INTENT(INOUT) :: PRPNM(:,:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KLEI3 +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KSPOLEGL +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KPMS(0:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KCPL2M(0:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KCPL4M(0:) +INTEGER(KIND=JPIM),OPTIONAL,INTENT(INOUT) :: KPROCM(0:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KSMAX +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KMSMAX +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KNVALUE(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KMVALUE(:) +REAL(KIND=JPRB) ,OPTIONAL,INTENT(OUT) :: PLEPINM(:) +INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KDEF_RESOL +LOGICAL ,OPTIONAL,INTENT(OUT) :: LDLAM +LOGICAL ,OPTIONAL,INTENT(OUT) :: LDLINEAR_GRID + +END SUBROUTINE ETRANS_INQ +END INTERFACE diff --git a/src/etrans/include/etrans/etrans_release.h b/src/etrans/include/etrans/etrans_release.h new file mode 100644 index 000000000..7f92e1e01 --- /dev/null +++ b/src/etrans/include/etrans/etrans_release.h @@ -0,0 +1,17 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + + +INTERFACE +SUBROUTINE ETRANS_RELEASE(KRESOL) +USE PARKIND1 ,ONLY : JPIM +INTEGER(KIND=JPIM),INTENT(IN) :: KRESOL +END SUBROUTINE ETRANS_RELEASE +END INTERFACE diff --git a/src/etrans/include/etrans/fpbipere.h b/src/etrans/include/etrans/fpbipere.h new file mode 100644 index 000000000..c7356501d --- /dev/null +++ b/src/etrans/include/etrans/fpbipere.h @@ -0,0 +1,30 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + + +INTERFACE +SUBROUTINE FPBIPERE(KDLUX,KDGUX,KDLON,KDGL,KNUBI,KD1,PGPBI,KDADD,LDZON,& +& LDBOYD,KDBOYD,PLBOYD,PBIPOUT) +USE PARKIND1 ,ONLY : JPIM ,JPRB +INTEGER(KIND=JPIM),INTENT(IN) :: KNUBI +INTEGER(KIND=JPIM),INTENT(IN) :: KD1 +INTEGER(KIND=JPIM),INTENT(IN) :: KDLUX +INTEGER(KIND=JPIM),INTENT(IN) :: KDGUX +INTEGER(KIND=JPIM),INTENT(IN) :: KDLON +INTEGER(KIND=JPIM),INTENT(IN) :: KDGL +INTEGER(KIND=JPIM),INTENT(IN) :: KDADD +REAL(KIND=JPRB) ,INTENT(INOUT):: PGPBI(KD1,KNUBI) +LOGICAL, OPTIONAL ,INTENT(IN) :: LDZON +LOGICAL, OPTIONAL ,INTENT(IN) :: LDBOYD +INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: KDBOYD(6) +REAL(KIND=JPRB) , INTENT(IN), OPTIONAL :: PLBOYD +REAL(KIND=JPRB) ,INTENT(OUT), OPTIONAL :: PBIPOUT(:,:) +END SUBROUTINE FPBIPERE +END INTERFACE diff --git a/src/etrans/include/etrans/horiz_field.h b/src/etrans/include/etrans/horiz_field.h new file mode 100644 index 000000000..293825235 --- /dev/null +++ b/src/etrans/include/etrans/horiz_field.h @@ -0,0 +1,24 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + + +INTERFACE +SUBROUTINE HORIZ_FIELD(KX,KY,PHFIELD) + +USE PARKIND1 ,ONLY : JPIM ,JPRB + +IMPLICIT NONE + +INTEGER(KIND=JPIM), INTENT(IN) :: KX +INTEGER(KIND=JPIM), INTENT(IN) :: KY +REAL(KIND=JPRB), INTENT(OUT) :: PHFIELD(KX,KY) +REAL(KIND=JPRB), PARAMETER :: PPI=3.141592 +END SUBROUTINE HORIZ_FIELD +END INTERFACE diff --git a/src/etrans/sedrenames.txt b/src/etrans/sedrenames.txt new file mode 100644 index 000000000..016e2c606 --- /dev/null +++ b/src/etrans/sedrenames.txt @@ -0,0 +1,149 @@ +s/ASRE1_MOD/ASRE1_MOD_VARIANTDESIGNATOR/g +s/ASRE1AD_MOD/ASRE1AD_MOD_VARIANTDESIGNATOR/g +s/ASRE1B_MOD/ASRE1B_MOD_VARIANTDESIGNATOR/g +s/ASRE1BAD_MOD/ASRE1BAD_MOD_VARIANTDESIGNATOR/g +s/BUTTERFLY_ALG_MOD/BUTTERFLY_ALG_MOD_VARIANTDESIGNATOR/g +s/CDMAP_MOD/CDMAP_MOD_VARIANTDESIGNATOR/g +s/DEALLOC_RESOL_MOD/DEALLOC_RESOL_MOD_VARIANTDESIGNATOR/g +s/DIR_TRANS_CTL_MOD/DIR_TRANS_CTL_MOD_VARIANTDESIGNATOR/g +s/DIR_TRANS_CTLAD_MOD/DIR_TRANS_CTLAD_MOD_VARIANTDESIGNATOR/g +s/dir_trans( *($|\(| |\*))/dir_trans_VARIANTDESIGNATOR\1/g +s/DIR_TRANS( *($|\(| |\*))/DIR_TRANS_VARIANTDESIGNATOR\1/g +s/dir_transad( *($|\(| |\*))/dir_transad_VARIANTDESIGNATOR\1/g +s/DIR_TRANSAD( *($|\(| |\*))/DIR_TRANSAD_VARIANTDESIGNATOR\1/g +s/DIST_GRID_32_CTL_MOD/DIST_GRID_32_CTL_MOD_VARIANTDESIGNATOR/g +s/dist_grid_32( *($|\(| |\*))/dist_grid_32_VARIANTDESIGNATOR\1/g +s/DIST_GRID_32( *($|\(| |\*))/DIST_GRID_32_VARIANTDESIGNATOR\1/g +s/DIST_GRID_CTL_MOD/DIST_GRID_CTL_MOD_VARIANTDESIGNATOR/g +s/dist_grid( *($|\(| |\*))/dist_grid_VARIANTDESIGNATOR\1/g +s/DIST_GRID( *($|\(| |\*))/DIST_GRID_VARIANTDESIGNATOR\1/g +s/DIST_SPEC_CONTROL_MOD/DIST_SPEC_CONTROL_MOD_VARIANTDESIGNATOR/g +s/dist_spec( *($|\(| |\*))/dist_spec_VARIANTDESIGNATOR\1/g +s/DIST_SPEC( *($|\(| |\*))/DIST_SPEC_VARIANTDESIGNATOR\1/g +s/ectrans_mod/ectrans_mod_VARIANTDESIGNATOR/g +s/FFTB_PLAN/FFTB_PLAN_VARIANTDESIGNATOR/g +s/FFTB_TYPE/FFTB_TYPE_VARIANTDESIGNATOR/g +s/FOURIER_IN_MOD/FOURIER_IN_MOD_VARIANTDESIGNATOR/g +s/FOURIER_INAD_MOD/FOURIER_INAD_MOD_VARIANTDESIGNATOR/g +s/FOURIER_OUT_MOD/FOURIER_OUT_MOD_VARIANTDESIGNATOR/g +s/FOURIER_OUTAD_MOD/FOURIER_OUTAD_MOD_VARIANTDESIGNATOR/g +s/FSC_MOD/FSC_MOD_VARIANTDESIGNATOR/g +s/FSCAD_MOD/FSCAD_MOD_VARIANTDESIGNATOR/g +s/FSPGL_INT_MOD/FSPGL_INT_MOD_VARIANTDESIGNATOR/g +s/FTDIR_CTL_MOD/FTDIR_CTL_MOD_VARIANTDESIGNATOR/g +s/FTDIR_CTLAD_MOD/FTDIR_CTLAD_MOD_VARIANTDESIGNATOR/g +s/FTDIR_MOD/FTDIR_MOD_VARIANTDESIGNATOR/g +s/FTDIRAD_MOD/FTDIRAD_MOD_VARIANTDESIGNATOR/g +s/FTINV_CTL_MOD/FTINV_CTL_MOD_VARIANTDESIGNATOR/g +s/FTINV_CTLAD_MOD/FTINV_CTLAD_MOD_VARIANTDESIGNATOR/g +s/FTINV_MOD/FTINV_MOD_VARIANTDESIGNATOR/g +s/FTINVAD_MOD/FTINVAD_MOD_VARIANTDESIGNATOR/g +s/GATH_GRID_32_CTL_MOD/GATH_GRID_32_CTL_MOD_VARIANTDESIGNATOR/g +s/gath_grid_32( *($|\(| |\*))/gath_grid_32_VARIANTDESIGNATOR\1/g +s/GATH_GRID_32( *($|\(| |\*))/GATH_GRID_32_VARIANTDESIGNATOR\1/g +s/GATH_GRID_CTL_MOD/GATH_GRID_CTL_MOD_VARIANTDESIGNATOR/g +s/gath_grid( *($|\(| |\*))/gath_grid_VARIANTDESIGNATOR\1/g +s/GATH_GRID( *($|\(| |\*))/GATH_GRID_VARIANTDESIGNATOR\1/g +s/GATH_SPEC_CONTROL_MOD/GATH_SPEC_CONTROL_MOD_VARIANTDESIGNATOR/g +s/gath_spec( *($|\(| |\*))/gath_spec_VARIANTDESIGNATOR\1/g +s/GATH_SPEC( *($|\(| |\*))/GATH_SPEC_VARIANTDESIGNATOR\1/g +s/GPNORM_TRANS_GPU( *($|\(| |\*))/GPNORM_TRANS_GPU_VARIANTDESIGNATOR\1/g +s/GPNORM_TRANS_CTL_MOD/GPNORM_TRANS_CTL_MOD_VARIANTDESIGNATOR/g +s/gpnorm_trans( *($|\(| |\*))/gpnorm_trans_VARIANTDESIGNATOR\1/g +s/GPNORM_TRANS( *($|\(| |\*))/GPNORM_TRANS_VARIANTDESIGNATOR\1/g +s/INIGPTR_MOD/INIGPTR_MOD_VARIANTDESIGNATOR/g +s/INV_TRANS_CTL_MOD/INV_TRANS_CTL_MOD_VARIANTDESIGNATOR/g +s/INV_TRANS_CTLAD_MOD/INV_TRANS_CTLAD_MOD_VARIANTDESIGNATOR/g +s/inv_trans( *($|\(| |\*))/inv_trans_VARIANTDESIGNATOR\1/g +s/INV_TRANS( *($|\(| |\*))/INV_TRANS_VARIANTDESIGNATOR\1/g +s/inv_transad( *($|\(| |\*))/inv_transad_VARIANTDESIGNATOR\1/g +s/INV_TRANSAD/INV_TRANSAD_VARIANTDESIGNATOR/g +s/jprbt/TYPEDESIGNATOR_LOWER/g +s/JPRBT/TYPEDESIGNATOR_UPPER/g +s/jprb/TYPEDESIGNATOR_LOWER/g +s/JPRB/TYPEDESIGNATOR_UPPER/g +s/JPRH/JPRD/g +s/LDFOU2_MOD/LDFOU2_MOD_VARIANTDESIGNATOR/g +s/LDFOU2AD_MOD/LDFOU2AD_MOD_VARIANTDESIGNATOR/g +s/LEDIR_MOD/LEDIR_MOD_VARIANTDESIGNATOR/g +s/LEDIRAD_MOD/LEDIRAD_MOD_VARIANTDESIGNATOR/g +s/LEINV_MOD/LEINV_MOD_VARIANTDESIGNATOR/g +s/LEINVAD_MOD/LEINVAD_MOD_VARIANTDESIGNATOR/g +s/LTDIR_CTL_MOD/LTDIR_CTL_MOD_VARIANTDESIGNATOR/g +s/LTDIR_CTLAD_MOD/LTDIR_CTLAD_MOD_VARIANTDESIGNATOR/g +s/LTDIR_MOD/LTDIR_MOD_VARIANTDESIGNATOR/g +s/LTDIRAD_MOD/LTDIRAD_MOD_VARIANTDESIGNATOR/g +s/LTINV_CTL_MOD/LTINV_CTL_MOD_VARIANTDESIGNATOR/g +s/LTINV_CTLAD_MOD/LTINV_CTLAD_MOD_VARIANTDESIGNATOR/g +s/LTINV_MOD/LTINV_MOD_VARIANTDESIGNATOR/g +s/LTINVAD_MOD/LTINVAD_MOD_VARIANTDESIGNATOR/g +s/parkind1/ec_parkind/g +s/PARKIND1/EC_PARKIND/g +s/PARKIND2/EC_PARKIND/g +s/parkind_ectrans/ec_parkind/g +s/PARKIND_ECTRANS/ec_parkind/g +s/PREPSNM_MOD/PREPSNM_MOD_VARIANTDESIGNATOR/g +s/PRFI1_MOD/PRFI1_MOD_VARIANTDESIGNATOR/g +s/PRFI1AD_MOD/PRFI1AD_MOD_VARIANTDESIGNATOR/g +s/PRFI1B_MOD/PRFI1B_MOD_VARIANTDESIGNATOR/g +s/PRFI1BAD_MOD/PRFI1BAD_MOD_VARIANTDESIGNATOR/g +s/PRFI2_MOD/PRFI2_MOD_VARIANTDESIGNATOR/g +s/PRFI2AD_MOD/PRFI2AD_MOD_VARIANTDESIGNATOR/g +s/PRFI2B_MOD/PRFI2B_MOD_VARIANTDESIGNATOR/g +s/PRFI2BAD_MOD/PRFI2BAD_MOD_VARIANTDESIGNATOR/g +s/READ_LEGPOL_MOD/READ_LEGPOL_MOD_VARIANTDESIGNATOR/g +s/seefmm_mix/seefmm_mix_VARIANTDESIGNATOR/g +s/SEEFMM_MIX/SEEFMM_MIX_VARIANTDESIGNATOR/g +s/SET_RESOL_MOD/SET_RESOL_MOD_VARIANTDESIGNATOR/g +s/SETUP_TRANS( *($|\(| |\*))/SETUP_TRANS_VARIANTDESIGNATOR\1/g +s/setup_trans( *($|\(| |\*|\.h))/setup_trans_VARIANTDESIGNATOR\1/g +s/specnorm/specnorm_VARIANTDESIGNATOR/g +s/SPECNORM/SPECNORM_VARIANTDESIGNATOR/g +s/SPNORM_CTL_MOD/SPNORM_CTL_MOD_VARIANTDESIGNATOR/g +s/SPNORMC_MOD/SPNORMC_MOD_VARIANTDESIGNATOR/g +s/SPNORMD_MOD/SPNORMD_MOD_VARIANTDESIGNATOR/g +s/SPNSDE_MOD/SPNSDE_MOD_VARIANTDESIGNATOR/g +s/SPNSDEAD_MOD/SPNSDEAD_MOD_VARIANTDESIGNATOR/g +s/SUFFT_MOD/SUFFT_MOD_VARIANTDESIGNATOR/g +s/SUMP_TRANS_MOD/SUMP_TRANS_MOD_VARIANTDESIGNATOR/g +s/\ SULEG_MOD/\ SULEG_MOD_VARIANTDESIGNATOR/g +s/SUTRLE_MOD/SUTRLE_MOD_VARIANTDESIGNATOR/g +s/TPM_FFTW/TPM_FFTW_VARIANTDESIGNATOR/g +s/TPM_FFT/TPM_FFT_VARIANTDESIGNATOR/g +s/TPM_FIELDS_FLAT/TPM_FIELDS_FLAT_VARIANTDESIGNATOR/g +s/TPM_FLT/TPM_FLT_VARIANTDESIGNATOR/g +s/TPM_TRANS/TPM_TRANS_VARIANTDESIGNATOR/g +s/trans_end( *($|\(| |\*|\.h))/trans_end_VARIANTDESIGNATOR\1/g +s/TRANS_END/TRANS_END_VARIANTDESIGNATOR/g +s/trans_inq( *($|\(| |\*))/trans_inq_VARIANTDESIGNATOR\1/g +s/TRANS_INQ/TRANS_INQ_VARIANTDESIGNATOR/g +s/TRANS_PNM/TRANS_PNM_VARIANTDESIGNATOR/g +s/trans_release( *($|\(| |\*|\.h))/trans_release_VARIANTDESIGNATOR\1/g +s/TRANS_RELEASE/TRANS_RELEASE_VARIANTDESIGNATOR/g +s/TRGTOL_MOD/TRGTOL_MOD_VARIANTDESIGNATOR/g +s/TRLTOG_MOD/TRLTOG_MOD_VARIANTDESIGNATOR/g +s/TRLTOM_MOD/TRLTOM_MOD_VARIANTDESIGNATOR/g +s/TRMTOL_MOD/TRMTOL_MOD_VARIANTDESIGNATOR/g +s/TRMTOL_PACK_UNPACK/TRMTOL_PACK_UNPACK_VARIANTDESIGNATOR/g +s/TRLTOM_PACK_UNPACK/TRLTOM_PACK_UNPACK_VARIANTDESIGNATOR/g +s/UPDSP_MOD/UPDSP_MOD_VARIANTDESIGNATOR/g +s/UPDSPAD_MOD/UPDSPAD_MOD_VARIANTDESIGNATOR/g +s/UPDSPB_MOD/UPDSPB_MOD_VARIANTDESIGNATOR/g +s/UPDSPBAD_MOD/UPDSPBAD_MOD_VARIANTDESIGNATOR/g +s/UVTVD_MOD/UVTVD_MOD_VARIANTDESIGNATOR/g +s/UVTVDAD_MOD/UVTVDAD_MOD_VARIANTDESIGNATOR/g +s/VD2UV_CTL_MOD/VD2UV_CTL_MOD_VARIANTDESIGNATOR/g +s/VD2UV_MOD/VD2UV_MOD_VARIANTDESIGNATOR/g +s/VDTUV_MOD/VDTUV_MOD_VARIANTDESIGNATOR/g +s/VDTUVAD_MOD/VDTUVAD_MOD_VARIANTDESIGNATOR/g +s/VORDIV_TO_UV/VORDIV_TO_UV_VARIANTDESIGNATOR/g +s/WRITE_LEGPOL_MOD/WRITE_LEGPOL_MOD_VARIANTDESIGNATOR/g +s/EXTPER_MOD/EXTPER_MOD_VARIANTDESIGNATOR/g +s/ESPLINE_MOD/ESPLINE_MOD_VARIANTDESIGNATOR/g +s/ESMOOTHE_MOD/ESMOOTHE_MOD_VARIANTDESIGNATOR/g +s/EWINDOWE_MOD/EWINDOWE_MOD_VARIANTDESIGNATOR/g +s/EUVTVD_COMM_MOD/EUVTVD_COMM_MOD_VARIANTDESIGNATOR/g +s/EVDTUVAD_COMM_MOD/EVDTUVAD_COMM_MOD_VARIANTDESIGNATOR/g +s/SUEMPLATB_MOD/SUEMPLATB_MOD_VARIANTDESIGNATOR/g +s/SUEMPLAT_MOD/SUEMPLAT_MOD_VARIANTDESIGNATOR/g +s/SUESTAONL_MOD/SUESTAONL_MOD_VARIANTDESIGNATOR/g diff --git a/src/programs/CMakeLists.txt b/src/programs/CMakeLists.txt index d7f6013cf..ba30218a0 100644 --- a/src/programs/CMakeLists.txt +++ b/src/programs/CMakeLists.txt @@ -35,7 +35,8 @@ foreach( program ectrans-benchmark ) fiat parkind_${prec} trans_gpu_${prec} - OpenACC::OpenACC_Fortran + $<${HAVE_ACC}:OpenACC::OpenACC_Fortran> + $<${HAVE_OMP}:OpenMP::OpenMP_Fortran> DEFINITIONS VERSION="gpu" $<$:USE_PINNED> @@ -51,6 +52,22 @@ foreach( program ectrans-benchmark ) endif( HAVE_GPU ) endforeach( program ) +if( HAVE_ETRANS ) + foreach( prec sp dp ) + if( HAVE_${prec} ) + ecbuild_add_executable( TARGET ectrans-lam-benchmark-cpu-${prec} + SOURCES ectrans-lam-benchmark.F90 + LIBS + fiat + parkind_${prec} + trans_${prec} + etrans_${prec} + OpenMP::OpenMP_Fortran + ) + endif() + endforeach() +endif() + # ectrans information tool get_property( langs GLOBAL PROPERTY ENABLED_LANGUAGES ) diff --git a/src/programs/ectrans-benchmark-ifs.F90 b/src/programs/ectrans-benchmark-ifs.F90 index fd9fcc8de..5187553ff 100644 --- a/src/programs/ectrans-benchmark-ifs.F90 +++ b/src/programs/ectrans-benchmark-ifs.F90 @@ -1094,6 +1094,17 @@ subroutine parse_grid(cgrid,ndgl,nloen) !=================================================================================================== +subroutine str2int(str, int, stat) + + character(len=*), intent(in) :: str + integer, intent(out) :: int + integer, intent(out) :: stat + read(str, *, iostat=stat) int + +end subroutine str2int + +!=================================================================================================== + function get_int_value(cname, iarg) result(value) integer :: value @@ -1130,6 +1141,78 @@ function get_str_value(cname, iarg) result(value) !=================================================================================================== +subroutine print_help(unit) + + integer, optional :: unit + integer :: nout = 6 + if (present(unit)) then + nout = unit + endif + + write(nout, "(a)") "" + + if (jprb == jprd) then + write(nout, "(a)") "NAME ectrans-benchmark-dp" + else + write(nout, "(a)") "NAME ectrans-benchmark-sp" + end if + write(nout, "(a)") "" + + write(nout, "(a)") "DESCRIPTION" + write(nout, "(a)") " This program tests ecTrans by transforming fields back and forth& + & between spectral " + if (jprb == jprd) then + write(nout, "(a)") " space and grid-point space (double-precision version)" + else + write(nout, "(a)") " space and grid-point space (single-precision version)" + end if + write(nout, "(a)") "" + + write(nout, "(a)") "USAGE" + if (jprb == jprd) then + write(nout, "(a)") " ectrans-benchmark-dp [options]" + else + write(nout, "(a)") " ectrans-benchmark-sp [options]" + end if + write(nout, "(a)") "" + + write(nout, "(a)") "OPTIONS" + write(nout, "(a)") " -h, --help Print this message" + write(nout, "(a)") " -v Run with verbose output" + write(nout, "(a)") " -t, --truncation T Run with this triangular spectral truncation& + & (default = 79)" + write(nout, "(a)") " -g, --grid GRID Run with this grid. Possible values: O, F" + write(nout, "(a)") " If not specified, O is used with N=truncation+1& + & (cubic relation)" + write(nout, "(a)") " -n, --niter NITER Run for this many inverse/direct transform& + & iterations (default = 10)" + write(nout, "(a)") " -f, --nfld NFLD Number of scalar fields (default = 1)" + write(nout, "(a)") " -l, --nlev NLEV Number of vertical levels (default = 1)" + write(nout, "(a)") " --vordiv Also transform vorticity-divergence to wind" + write(nout, "(a)") " --scders Compute scalar derivatives (default off)" + write(nout, "(a)") " --uvders Compute uv East-West derivatives (default off). Only& + & when also --vordiv is given" + write(nout, "(a)") " --flt Run with fast Legendre transforms (default off)" + write(nout, "(a)") " --nproma NPROMA Run with NPROMA (default no blocking: NPROMA=ngptot)" + write(nout, "(a)") " --norms Calculate and print spectral norms of transformed& + & fields" + write(nout, "(a)") " The computation of spectral norms will skew overall& + & timings" + write(nout, "(a)") " --meminfo Show diagnostic information from FIAT's ec_meminfo& + & subroutine on memory usage, thread-binding etc." + write(nout, "(a)") " --nprtrv Size of V set in spectral decomposition" + write(nout, "(a)") " --nprtrw Size of W set in spectral decomposition" + write(nout, "(a)") " -c, --check VALUE The multiplier of the machine epsilon used as a& + & tolerance for correctness checking" + write(nout, "(a)") "" + write(nout, "(a)") "DEBUGGING" + write(nout, "(a)") " --dump-values Output gridpoint fields in unformatted binary file" + write(nout, "(a)") "" + +end subroutine print_help + +!=================================================================================================== + subroutine parsing_failed(message) character(len=*), intent(in) :: message @@ -1240,17 +1323,6 @@ function cubic_octahedral_gaussian_grid(nsmax) result(cgrid) !=================================================================================================== -subroutine str2int(str, int, stat) - - character(len=*), intent(in) :: str - integer, intent(out) :: int - integer, intent(out) :: stat - read(str, *, iostat=stat) int - -end subroutine str2int - -!=================================================================================================== - subroutine sort(a, n) real(kind=jprd), intent(inout) :: a(n) @@ -1275,78 +1347,6 @@ end subroutine sort !=================================================================================================== -subroutine print_help(unit) - - integer, optional :: unit - integer :: nout = 6 - if (present(unit)) then - nout = unit - endif - - write(nout, "(a)") "" - - if (jprb == jprd) then - write(nout, "(a)") "NAME ectrans-benchmark-dp" - else - write(nout, "(a)") "NAME ectrans-benchmark-sp" - end if - write(nout, "(a)") "" - - write(nout, "(a)") "DESCRIPTION" - write(nout, "(a)") " This program tests ecTrans by transforming fields back and forth& - & between spectral " - if (jprb == jprd) then - write(nout, "(a)") " space and grid-point space (double-precision version)" - else - write(nout, "(a)") " space and grid-point space (single-precision version)" - end if - write(nout, "(a)") "" - - write(nout, "(a)") "USAGE" - if (jprb == jprd) then - write(nout, "(a)") " ectrans-benchmark-dp [options]" - else - write(nout, "(a)") " ectrans-benchmark-sp [options]" - end if - write(nout, "(a)") "" - - write(nout, "(a)") "OPTIONS" - write(nout, "(a)") " -h, --help Print this message" - write(nout, "(a)") " -v Run with verbose output" - write(nout, "(a)") " -t, --truncation T Run with this triangular spectral truncation& - & (default = 79)" - write(nout, "(a)") " -g, --grid GRID Run with this grid. Possible values: O, F" - write(nout, "(a)") " If not specified, O is used with N=truncation+1& - & (cubic relation)" - write(nout, "(a)") " -n, --niter NITER Run for this many inverse/direct transform& - & iterations (default = 10)" - write(nout, "(a)") " -f, --nfld NFLD Number of scalar fields (default = 1)" - write(nout, "(a)") " -l, --nlev NLEV Number of vertical levels (default = 1)" - write(nout, "(a)") " --vordiv Also transform vorticity-divergence to wind" - write(nout, "(a)") " --scders Compute scalar derivatives (default off)" - write(nout, "(a)") " --uvders Compute uv East-West derivatives (default off). Only& - & when also --vordiv is given" - write(nout, "(a)") " --flt Run with fast Legendre transforms (default off)" - write(nout, "(a)") " --nproma NPROMA Run with NPROMA (default no blocking: NPROMA=ngptot)" - write(nout, "(a)") " --norms Calculate and print spectral norms of transformed& - & fields" - write(nout, "(a)") " The computation of spectral norms will skew overall& - & timings" - write(nout, "(a)") " --meminfo Show diagnostic information from FIAT's ec_meminfo& - & subroutine on memory usage, thread-binding etc." - write(nout, "(a)") " --nprtrv Size of V set in spectral decomposition" - write(nout, "(a)") " --nprtrw Size of W set in spectral decomposition" - write(nout, "(a)") " -c, --check VALUE The multiplier of the machine epsilon used as a& - & tolerance for correctness checking" - write(nout, "(a)") "" - write(nout, "(a)") "DEBUGGING" - write(nout, "(a)") " --dump-values Output gridpoint fields in unformatted binary file" - write(nout, "(a)") "" - -end subroutine print_help - -!=================================================================================================== - subroutine initialize_spectral_arrays(nsmax, zsp, sp3d) integer, intent(in) :: nsmax ! Spectral truncation diff --git a/src/programs/ectrans-benchmark.F90 b/src/programs/ectrans-benchmark.F90 index f9a12a8b3..a4dc1bc28 100644 --- a/src/programs/ectrans-benchmark.F90 +++ b/src/programs/ectrans-benchmark.F90 @@ -210,6 +210,8 @@ program ectrans_benchmark integer(kind=jpim) :: ierr +real(kind=jprb), allocatable :: global_field(:,:) + !=================================================================================================== #include "setup_trans0.h" @@ -217,6 +219,7 @@ program ectrans_benchmark #include "inv_trans.h" #include "dir_trans.h" #include "trans_inq.h" +#include "gath_grid.h" #include "specnorm.h" #include "abor1.intfb.h" #include "gstats_setup.intfb.h" @@ -576,7 +579,7 @@ program ectrans_benchmark if (verbosity >= 0 .and. myproc == 1) then write(nout,'(" ")') write(nout,'(a,i0,a,f9.2,a)') "ectrans_benchmark initialisation, on ",nproc,& - & " tasks, took",ztinit," sec" + & " tasks, took ",ztinit," sec" write(nout,'(" ")') endif @@ -652,12 +655,17 @@ program ectrans_benchmark ! While in grid point space, dump the values to disk, for debugging only !================================================================================================= - if (ldump_values) then - ! dump a field to a binary file - call dump_gridpoint_field(jstep, myproc, nproma, ngpblks, zgp2(:,1,:), 'S', noutdump) - call dump_gridpoint_field(jstep, myproc, nproma, ngpblks, zgpuv(:,nflevg,1,:), 'U', noutdump) - call dump_gridpoint_field(jstep, myproc, nproma, ngpblks, zgpuv(:,nflevg,2,:), 'V', noutdump) - call dump_gridpoint_field(jstep, myproc, nproma, ngpblks, zgp3a(:,nflevg,1,:), 'T', noutdump) + if (ldump_values .and. mod(jstep,10) == 1) then + if (myproc == 1) then + allocate(global_field(ngptotg,1)) + endif + call dump_gridpoint_field(jstep, myproc, nproma, global_field, zgp2(:,1:1,:), 's', noutdump) + call dump_gridpoint_field(jstep, myproc, nproma, global_field, zgpuv(:,nflevg:nflevg,1,:), 'u', noutdump) + call dump_gridpoint_field(jstep, myproc, nproma, global_field, zgpuv(:,nflevg:nflevg,2,:), 'v', noutdump) + call dump_gridpoint_field(jstep, myproc, nproma, global_field, zgp3a(:,nflevg:nflevg,1,:), 't', noutdump) + if (myproc == 1) then + deallocate(global_field) + endif endif !================================================================================================= @@ -1007,6 +1015,17 @@ subroutine parse_grid(cgrid,ndgl,nloen) !=================================================================================================== +subroutine str2int(str, int, stat) + + character(len=*), intent(in) :: str + integer, intent(out) :: int + integer, intent(out) :: stat + read(str, *, iostat=stat) int + +end subroutine str2int + +!=================================================================================================== + function get_int_value(cname, iarg) result(value) integer :: value @@ -1043,6 +1062,80 @@ function get_str_value(cname, iarg) result(value) !=================================================================================================== +subroutine print_help(unit) + + integer, optional :: unit + integer :: nout = 6 + if (present(unit)) then + nout = unit + endif + + write(nout, "(a)") "" + + if (jprb == jprd) then + write(nout, "(a)") "NAME ectrans-benchmark-" // VERSION // "-dp" + else + write(nout, "(a)") "NAME ectrans-benchmark-" // VERSION // "-sp" + end if + write(nout, "(a)") "" + + write(nout, "(a)") "DESCRIPTION" + write(nout, "(a)") " This program tests ecTrans by transforming fields back and forth& + & between spectral " + if (jprb == jprd) then + write(nout, "(a)") " space and grid-point space (double-precision version)" + else + write(nout, "(a)") " space and grid-point space (single-precision version)" + end if + write(nout, "(a)") "" + + write(nout, "(a)") "USAGE" + if (jprb == jprd) then + write(nout, "(a)") " ectrans-benchmark-" // VERSION // "-dp [options]" + else + write(nout, "(a)") " ectrans-benchmark-" // VERSION // "-sp [options]" + end if + write(nout, "(a)") "" + + write(nout, "(a)") "OPTIONS" + write(nout, "(a)") " -h, --help Print this message" + write(nout, "(a)") " -v Run with verbose output" + write(nout, "(a)") " -t, --truncation T Run with this triangular spectral truncation& + & (default = 79)" + write(nout, "(a)") " -g, --grid GRID Run with this grid. Possible values: O, F" + write(nout, "(a)") " If not specified, O is used with N=truncation+1& + & (cubic relation)" + write(nout, "(a)") " -n, --niter NITER Run for this many inverse/direct transform& + & iterations (default = 10)" + write(nout, "(a)") " --niter-warmup Number of warm up iterations,& + & for which timing statistics should be ignored (default = 3)" + write(nout, "(a)") " -f, --nfld NFLD Number of scalar fields (default = 1)" + write(nout, "(a)") " -l, --nlev NLEV Number of vertical levels (default = 1)" + write(nout, "(a)") " --vordiv Also transform vorticity-divergence to wind" + write(nout, "(a)") " --scders Compute scalar derivatives (default off)" + write(nout, "(a)") " --uvders Compute uv East-West derivatives (default off). Only& + & when also --vordiv is given" + write(nout, "(a)") " --flt Run with fast Legendre transforms (default off)" + write(nout, "(a)") " --nproma NPROMA Run with NPROMA (default no blocking: NPROMA=ngptot)" + write(nout, "(a)") " --norms Calculate and print spectral norms of transformed& + & fields" + write(nout, "(a)") " The computation of spectral norms will skew overall& + & timings" + write(nout, "(a)") " --meminfo Show diagnostic information from FIAT's ec_meminfo& + & subroutine on memory usage, thread-binding etc." + write(nout, "(a)") " --nprtrv Size of V set in spectral decomposition" + write(nout, "(a)") " --nprtrw Size of W set in spectral decomposition" + write(nout, "(a)") " -c, --check VALUE The multiplier of the machine epsilon used as a& + & tolerance for correctness checking" + write(nout, "(a)") "" + write(nout, "(a)") "DEBUGGING" + write(nout, "(a)") " --dump-values Output gridpoint fields in unformatted binary file" + write(nout, "(a)") "" + +end subroutine print_help + +!=================================================================================================== + subroutine parsing_failed(message) character(len=*), intent(in) :: message @@ -1062,6 +1155,10 @@ subroutine get_command_line_arguments(nsmax, cgrid, iters, iters_warmup, nfld, n & luseflt, nopt_mem_tr, nproma, verbosity, ldump_values, lprint_norms, & & lmeminfo, nprtrv, nprtrw, ncheck) +#ifdef _OPENACC + use openacc, only: acc_init, acc_get_device_type +#endif + integer, intent(inout) :: nsmax ! Spectral truncation character(len=16), intent(inout) :: cgrid ! Spectral truncation integer, intent(inout) :: iters ! Number of iterations for transform test @@ -1087,8 +1184,8 @@ subroutine get_command_line_arguments(nsmax, cgrid, iters, iters_warmup, nfld, n character(len=128) :: carg ! Storage variable for command line arguments integer :: iarg = 1 ! Argument index -#ifdef ACCGPU - !$acc init +#ifdef _OPENACC + call acc_init(acc_get_device_type()) #endif do while (iarg <= command_argument_count()) @@ -1159,16 +1256,6 @@ function cubic_octahedral_gaussian_grid(nsmax) result(cgrid) end function -!=================================================================================================== - -subroutine str2int(str, int, stat) - - character(len=*), intent(in) :: str - integer, intent(out) :: int - integer, intent(out) :: stat - read(str, *, iostat=stat) int - -end subroutine str2int !=================================================================================================== @@ -1208,80 +1295,6 @@ end function get_median !=================================================================================================== -subroutine print_help(unit) - - integer, optional :: unit - integer :: nout = 6 - if (present(unit)) then - nout = unit - endif - - write(nout, "(a)") "" - - if (jprb == jprd) then - write(nout, "(a)") "NAME ectrans-benchmark-" // VERSION // "-dp" - else - write(nout, "(a)") "NAME ectrans-benchmark-" // VERSION // "-sp" - end if - write(nout, "(a)") "" - - write(nout, "(a)") "DESCRIPTION" - write(nout, "(a)") " This program tests ecTrans by transforming fields back and forth& - & between spectral " - if (jprb == jprd) then - write(nout, "(a)") " space and grid-point space (double-precision version)" - else - write(nout, "(a)") " space and grid-point space (single-precision version)" - end if - write(nout, "(a)") "" - - write(nout, "(a)") "USAGE" - if (jprb == jprd) then - write(nout, "(a)") " ectrans-benchmark-" // VERSION // "-dp [options]" - else - write(nout, "(a)") " ectrans-benchmark-" // VERSION // "-sp [options]" - end if - write(nout, "(a)") "" - - write(nout, "(a)") "OPTIONS" - write(nout, "(a)") " -h, --help Print this message" - write(nout, "(a)") " -v Run with verbose output" - write(nout, "(a)") " -t, --truncation T Run with this triangular spectral truncation& - & (default = 79)" - write(nout, "(a)") " -g, --grid GRID Run with this grid. Possible values: O, F" - write(nout, "(a)") " If not specified, O is used with N=truncation+1& - & (cubic relation)" - write(nout, "(a)") " -n, --niter NITER Run for this many inverse/direct transform& - & iterations (default = 10)" - write(nout, "(a)") " --niter-warmup Number of warm up iterations,& - & for which timing statistics should be ignored (default = 3)" - write(nout, "(a)") " -f, --nfld NFLD Number of scalar fields (default = 1)" - write(nout, "(a)") " -l, --nlev NLEV Number of vertical levels (default = 1)" - write(nout, "(a)") " --vordiv Also transform vorticity-divergence to wind" - write(nout, "(a)") " --scders Compute scalar derivatives (default off)" - write(nout, "(a)") " --uvders Compute uv East-West derivatives (default off). Only& - & when also --vordiv is given" - write(nout, "(a)") " --flt Run with fast Legendre transforms (default off)" - write(nout, "(a)") " --nproma NPROMA Run with NPROMA (default no blocking: NPROMA=ngptot)" - write(nout, "(a)") " --norms Calculate and print spectral norms of transformed& - & fields" - write(nout, "(a)") " The computation of spectral norms will skew overall& - & timings" - write(nout, "(a)") " --meminfo Show diagnostic information from FIAT's ec_meminfo& - & subroutine on memory usage, thread-binding etc." - write(nout, "(a)") " --nprtrv Size of V set in spectral decomposition" - write(nout, "(a)") " --nprtrw Size of W set in spectral decomposition" - write(nout, "(a)") " -c, --check VALUE The multiplier of the machine epsilon used as a& - & tolerance for correctness checking" - write(nout, "(a)") "" - write(nout, "(a)") "DEBUGGING" - write(nout, "(a)") " --dump-values Output gridpoint fields in unformatted binary file" - write(nout, "(a)") "" - -end subroutine print_help - -!=================================================================================================== - subroutine initialize_spectral_arrays(nsmax, zsp, sp3d) integer, intent(in) :: nsmax ! Spectral truncation @@ -1349,27 +1362,32 @@ end subroutine initialize_2d_spectral_field !=================================================================================================== -subroutine dump_gridpoint_field(jstep, myproc, nproma, ngpblks, fld, fldchar, noutdump) +subroutine dump_gridpoint_field(jstep, myproc, nproma, gfld, fld, fldchar, noutdump) ! Dump a 2d field to a binary file. integer(kind=jpim), intent(in) :: jstep ! Time step, used for naming file integer(kind=jpim), intent(in) :: myproc ! MPI rank, used for naming file integer(kind=jpim), intent(in) :: nproma ! Size of nproma - integer(kind=jpim), intent(in) :: ngpblks ! Number of nproma blocks - real(kind=jprb) , intent(in) :: fld(nproma,ngpblks) ! 2D field + real(kind=jprb) , intent(inout) :: gfld(:,:) ! 2d global field + real(kind=jprb) , intent(in) :: fld(:,:,:) ! 3d local field character , intent(in) :: fldchar ! Single character field identifier integer(kind=jpim), intent(in) :: noutdump ! Tnit number for output file - character(len=14) :: filename = "x.xxx.xxxx.dat" - - write(filename(1:1),'(a1)') fldchar - write(filename(3:5),'(i3.3)') jstep - write(filename(7:10),'(i4.4)') myproc + character(len=10) :: filename = "x.xxxx.dat" - open(noutdump, file=filename, form="unformatted") - write(noutdump) reshape(fld, (/ nproma*ngpblks /)) - close(noutdump) + if (myproc == 1) then + write(filename(1:1),'(a1)') fldchar + write(filename(3:6),'(i4.4)') jstep + open(noutdump,file=filename,form='unformatted') + endif + do ilev=1,size(fld,2) + call gath_grid(gfld(:,:),nproma,1,(/1/),1,fld(:,ilev:ilev,:)) + if (myproc == 1) write(unit=noutdump) gfld(:,1) + enddo + if (myproc == 1) then + close(noutdump) + endif end subroutine dump_gridpoint_field diff --git a/src/programs/ectrans-lam-benchmark.F90 b/src/programs/ectrans-lam-benchmark.F90 new file mode 100644 index 000000000..bb8201a33 --- /dev/null +++ b/src/programs/ectrans-lam-benchmark.F90 @@ -0,0 +1,1488 @@ +! (C) Copyright 2001- ECMWF. +! (C) Copyright 2001- Meteo-France. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +program ectrans_lam_benchmark + +! +! Spectral transform test for Limited-Area geometry +! +! This test performs spectral to real and real to spectral transforms repeated in +! timed loop. +! +! 1) One "surface" field is always transformed: +! zspsc2(1,1:nspec2) <-> zgmvs(1:nproma,1:1,1:ngbplk) +! +! 2) A Multiple "3d" fields are transformed and can be disabled with "--nfld 0" +! +! zspsc3a(1:nlev,1:nspec2,1:nfld) <-> zgp3a(1:nproma,1:nlev,1:nfld,1:ngpblk) +! +! 3) Optionally a "3d" vorticity/divergence field is transformed to uv (wind) and +! can be enabled with "--vordiv" +! +! zspvor(1:nlev,1:nspec2) / zspdiv(1:nlev,1:nspec2) <-> zgpuv(1:nproma,1:nlev,1:2,1:ngpblk) +! +! 4) Optionally scalar derivatives can be computed for the fields described in 1) and 2) +! This must be enabled with "--scders" +! +! 5) Optionally uv East-West derivate can be computed from vorticity/divergence. +! This must be enabled with "--vordiv --uvders" +! +! +! Authors : George Mozdzynski +! Willem Deconinck +! Ioan Hadade +! Sam Hatfield +! Daan Degrauwe + +use parkind1, only: jpim, jprb, jprd +use oml_mod ,only : oml_max_threads +use omp_lib, only: omp_get_wtime +use mpl_module +use yomgstats, only: jpmaxstat +use yomhook, only : dr_hook_init + +implicit none + +integer(kind=jpim) :: istack, getstackusage +real(kind=jprb), dimension(1) :: zmaxerr(5), zerr(5) +real(kind=jprb) :: zmaxerrg + +! Output unit numbers +integer(kind=jpim), parameter :: nerr = 0 ! Unit number for STDERR +integer(kind=jpim), parameter :: nout = 6 ! Unit number for STDOUT +integer(kind=jpim), parameter :: noutdump = 7 ! Unit number for field output + +! Default parameters +integer(kind=jpim) :: nlon = 128 ! Zonal dimension +integer(kind=jpim) :: nlat = 128 ! Meridional dimension +integer(kind=jpim) :: nsmax = 0 ! Spectral meridional truncation +integer(kind=jpim) :: nmsmax = 0 ! Spectral zonal truncation +integer(kind=jpim) :: iters = 10 ! Number of iterations for transform test +integer(kind=jpim) :: nfld = 1 ! Number of scalar fields +integer(kind=jpim) :: nlev = 1 ! Number of vertical levels + +integer(kind=jpim) :: nloen(1) ! only one value needed for LAM +integer(kind=jpim) :: nflevg +integer(kind=jpim) :: nspec2 +integer(kind=jpim) :: ngptot +integer(kind=jpim) :: ngptotg +integer(kind=jpim) :: ifld +integer(kind=jpim) :: jroc +integer(kind=jpim) :: jb +integer(kind=jpim) :: nspec2g +integer(kind=jpim) :: i +integer(kind=jpim) :: ja +integer(kind=jpim) :: ib +integer(kind=jpim) :: jprtrv + +integer(kind=jpim), allocatable :: nprcids(:) +integer(kind=jpim) :: myproc, jj +integer :: jstep + +real(kind=jprd) :: ztinit, ztloop, ztstepmax, ztstepmin, ztstepavg, ztstepmed +real(kind=jprd) :: ztstepmax1, ztstepmin1, ztstepavg1, ztstepmed1 +real(kind=jprd) :: ztstepmax2, ztstepmin2, ztstepavg2, ztstepmed2 +real(kind=jprd), allocatable :: ztstep(:), ztstep1(:), ztstep2(:) + +real(kind=jprb), allocatable :: znormsp(:), znormsp0(:), znormdiv(:), znormdiv0(:) +real(kind=jprb), allocatable :: znormvor(:), znormvor0(:), znormt(:), znormt0(:) +real(kind=jprd) :: zaveave(0:jpmaxstat) + +! Grid-point space data structures +real(kind=jprb), allocatable, target :: zgmv (:,:,:,:) ! Multilevel fields at t and t-dt +real(kind=jprb), allocatable, target :: zgmvs (:,:,:) ! Single level fields at t and t-dt +real(kind=jprb), pointer :: zgp3a (:,:,:,:) ! Multilevel fields at t and t-dt +real(kind=jprb), pointer :: zgpuv (:,:,:,:) ! Multilevel fields at t and t-dt +real(kind=jprb), pointer :: zgp2 (:,:,:) ! Single level fields at t and t-dt + +! Spectral space data structures +real(kind=jprb), allocatable, target :: sp3d(:,:,:) +real(kind=jprb), pointer :: zspvor(:,:) => null() +real(kind=jprb), pointer :: zspdiv(:,:) => null() +real(kind=jprb), pointer :: zspsc3a(:,:,:) => null() +real(kind=jprb), allocatable :: zspsc2(:,:) +real(kind=jprb), allocatable :: zmeanu(:), zmeanv(:) + +logical :: lstack = .false. ! Output stack info +logical :: luserpnm = .false. +logical :: lkeeprpnm = .false. +logical :: ltrace_stats = .false. +logical :: lstats_omp = .false. +logical :: lstats_comms = .false. +logical :: lstats_mpl = .false. +logical :: lstats = .false. ! gstats statistics +logical :: lbarrier_stats = .false. +logical :: lbarrier_stats2 = .false. +logical :: ldetailed_stats = .false. +logical :: lstats_alloc = .false. +logical :: lsyncstats = .false. +logical :: lstatscpu = .false. +logical :: lstats_mem = .false. +logical :: lxml_stats = .false. +logical :: lfftw = .false. ! Use FFTW for Fourier transforms +logical :: lvordiv = .false. +logical :: lscders = .false. +logical :: luvders = .false. +logical :: lprint_norms = .false. ! Calculate and print spectral norms +logical :: lmeminfo = .false. ! Show information from FIAT routine ec_meminfo at the end + +integer(kind=jpim) :: nstats_mem = 0 +integer(kind=jpim) :: ntrace_stats = 0 +integer(kind=jpim) :: nprnt_stats = 1 + +! The multiplier of the machine epsilon used as a tolerance for correctness checking +! ncheck = 0 (the default) means that correctness checking is disabled +integer(kind=jpim) :: ncheck = 0 + +logical :: lmpoff = .false. ! Message passing switch + +! Verbosity level (0 or 1) +integer :: verbosity = 0 + +integer(kind=jpim) :: nmax_resol = 37 ! Max number of resolutions +integer(kind=jpim) :: npromatr = 0 ! nproma for trans lib + +integer(kind=jpim) :: nproc ! Number of procs +integer(kind=jpim) :: nthread +integer(kind=jpim) :: nprgpns = 0 ! Grid-point decomp +integer(kind=jpim) :: nprgpew = 0 ! Grid-point decomp +integer(kind=jpim) :: nprtrv = 0 ! Spectral decomp +integer(kind=jpim) :: nprtrw = 0 ! Spectral decomp +integer(kind=jpim) :: nspecresmin = 80 ! Minimum spectral resolution, for controlling nprtrw +integer(kind=jpim) :: mysetv +integer(kind=jpim) :: mysetw +integer(kind=jpim) :: mp_type = 2 ! Message passing type +integer(kind=jpim) :: mbx_size = 150000000 ! Mailbox size + +integer(kind=jpim), allocatable :: numll(:), ivset(:) +integer(kind=jpim) :: ivsetsc(1) + +integer(kind=jpim) :: nflevl + +! sumpini +integer(kind=jpim) :: isqr +logical :: lsync_trans = .false. ! Activate barrier sync + + +integer(kind=jpim) :: nproma = 0 +integer(kind=jpim) :: ngpblks +! locals +integer(kind=jpim) :: iprtrv +integer(kind=jpim) :: iprtrw +integer(kind=jpim) :: iprused, ilevpp, irest, ilev, jlev + +integer(kind=jpim) :: ndimgmv = 0 ! Third dim. of gmv "(nproma,nflevg,ndimgmv,ngpblks)" +integer(kind=jpim) :: ndimgmvs = 0 ! Second dim. gmvs "(nproma,ndimgmvs,ngpblks)" + +integer(kind=jpim) :: jbegin_uv = 0 +integer(kind=jpim) :: jend_uv = 0 +integer(kind=jpim) :: jbegin_sc = 0 +integer(kind=jpim) :: jend_sc = 0 +integer(kind=jpim) :: jbegin_scder_NS = 0 +integer(kind=jpim) :: jend_scder_NS = 0 +integer(kind=jpim) :: jbegin_scder_EW = 0 +integer(kind=jpim) :: jend_scder_EW = 0 +integer(kind=jpim) :: jbegin_uder_EW = 0 +integer(kind=jpim) :: jend_uder_EW = 0 +integer(kind=jpim) :: jbegin_vder_EW = 0 +integer(kind=jpim) :: jend_vder_EW = 0 + +logical :: ldump_values = .false. + +integer, external :: ec_mpirank +logical :: luse_mpi = .true. + +real(kind=jprb) :: zexwn, zeywn + +!=================================================================================================== + +#include "setup_trans0.h" +#include "esetup_trans.h" +#include "einv_trans.h" +#include "edir_trans.h" +#include "etrans_inq.h" +#include "especnorm.h" +#include "abor1.intfb.h" +#include "gstats_setup.intfb.h" +#include "ec_meminfo.intfb.h" + +!=================================================================================================== + +luse_mpi = detect_mpirun() + +! Setup +call get_command_line_arguments(nlon, nlat, nsmax, nmsmax, iters, nfld, nlev, lvordiv, lscders, luvders, & + & nproma, verbosity, ldump_values, lprint_norms, lmeminfo, nprgpns, nprgpew, nprtrv, nprtrw, ncheck) +! derived defaults +if ( nsmax == 0 ) nsmax = nlat/2-1 +if ( nmsmax == 0 ) nmsmax = nlon/2-1 +nflevg = nlev + +!=================================================================================================== + +if (luse_mpi) then + call mpl_init(ldinfo=(verbosity>=1)) + nproc = mpl_nproc() + myproc = mpl_myrank() +else + nproc = 1 + myproc = 1 + mpl_comm = -1 +endif +nthread = oml_max_threads() + +call dr_hook_init() + +!=================================================================================================== + +if( lstats ) call gstats(0,0) +ztinit = omp_get_wtime() + +! only output to stdout on pe 1 +!if (nproc > 1) then + !if (myproc /= 1) then + !open(unit=nout, file='output_'//char(myproc/10+48)//char(myproc+48)//'.dat') + !endif +!endif + +if (ldetailed_stats) then + lstats_omp = .true. + lstats_comms = .true. + lstats_mpl = .true. + lstatscpu = .true. + nprnt_stats = nproc + lstats_mem = .true. + lstats_alloc = .true. +endif + +!=================================================================================================== + +allocate(nprcids(nproc)) +do jj = 1, nproc + nprcids(jj) = jj +enddo + +if (nproc <= 1) then + lmpoff = .true. +endif + +! Compute nprgpns and nprgpew +! This version selects most square-like distribution +if (nproc == 0) nproc = 1 +if ( nprgpew == 0 .and. nprgpns == 0 ) then + isqr = int(sqrt(real(nproc,jprb))) + do ja = isqr, nproc + ib = nproc/ja + if (ja*ib == nproc) then + nprgpns = max(ja,ib) + nprgpew = min(ja,ib) + exit + endif + enddo +elseif (nprgpns == 0 ) then + nprgpns=nproc/nprgpew +elseif (nprgpew == 0 ) then + nprgpew=nproc/nprgpns +endif +if (nprgpns*nprgpew /= nproc) call abor1('transform_test:nprgpns*nprgpew /= nproc') + +! From sumpini, although this should be specified in namelist +if (nspecresmin == 0) nspecresmin = nproc + +! Compute nprtrv and nprtrw if not provided on the command line +if (nprtrv ==0 .and. nprtrw == 0 ) then + nprtrv=nprgpew + nprtrw=nprgpns +elseif (nprtrv == 0 ) then + nprtrv=nproc/nprtrw +elseif (nprtrw == 0 ) then + nprtrw=nproc/nprtrv +endif +if (nprtrv*nprtrw /= nproc) call abor1('transform_test:nprtrv*nprtrw /= nproc') + +mysetv=mod(myproc-1,nprtrv)+1 + +! Determine number of local levels for zonal and meridional fourier calculations +! based on the values of nflevg and nprtrv +allocate(numll(nprtrv)) +numll=nflevg/nprtrv +numll(1:modulo(nflevg,nprtrv))=numll(1:modulo(nflevg,nprtrv))+1 +ivsetsc(1)=min(nflevg+1, nprtrv) +nflevl = numll(mysetv) + +!=================================================================================================== +! Setup gstats +!=================================================================================================== + +if (lstats) then + call gstats_setup(nproc, myproc, nprcids, & + & lstats, lstatscpu, lsyncstats, ldetailed_stats, lbarrier_stats, lbarrier_stats2, & + & lstats_omp, lstats_comms, lstats_mem, nstats_mem, lstats_alloc, & + & ltrace_stats, ntrace_stats, nprnt_stats, lxml_stats) + call gstats_psut + + ! Assign labels to GSTATS regions + call gstats_labels +endif + +!=================================================================================================== +! Call ecTrans setup routines +!=================================================================================================== + +if (verbosity >= 1) write(nout,'(a)')'======= Setup ecTrans =======' + +if( lstats ) call gstats(1, 0) +call setup_trans0(kout=nout, kerr=nerr, kprintlev=merge(2, 0, verbosity == 1), & + & kmax_resol=nmax_resol, kpromatr=0, kprgpns=nprgpns, kprgpew=nprgpew, & + & kprtrw=nprtrw, ldsync_trans=lsync_trans, & + & ldalloperm=.true., ldmpoff=.not.luse_mpi) + if( lstats ) call gstats(1, 1) + + if( lstats ) call gstats(2, 0) +zexwn=1._jprb ! 2*pi/(nx*dx): spectral resolution +zeywn=1._jprb ! 2*pi/(ny*dy) +nloen=nlon +call esetup_trans(ksmax=nsmax, kmsmax=nmsmax, kdgl=nlat, kdgux=nlat, kloen=nloen, ldsplit=.true., & + & ldusefftw=lfftw,pexwn=zexwn,peywn=zeywn) + + if( lstats ) call gstats(2, 1) + +call etrans_inq(kspec2=nspec2, kspec2g=nspec2g, kgptot=ngptot, kgptotg=ngptotg) + +if (nproma == 0) then ! no blocking (default when not specified) + nproma = ngptot +endif + +! Calculate number of NPROMA blocks +ngpblks = (ngptot - 1)/nproma+1 + +!=================================================================================================== +! Print information before starting +!=================================================================================================== + +! Print configuration details +if (verbosity >= 0) then + write(nout,'(" ")') + write(nout,'(a)')'======= Start of runtime parameters =======' + write(nout,'(" ")') + write(nout,'("nlon ",i0)') nlon + write(nout,'("nlat ",i0)') nlat + write(nout,'("nsmax ",i0)') nsmax + write(nout,'("nmsmax ",i0)') nmsmax + write(nout,'("nproc ",i0)') nproc + write(nout,'("nthread ",i0)') nthread + write(nout,'("nprgpns ",i0)') nprgpns + write(nout,'("nprgpew ",i0)') nprgpew + write(nout,'("nprtrw ",i0)') nprtrw + write(nout,'("nprtrv ",i0)') nprtrv + write(nout,'("ngptot ",i0)') ngptot + write(nout,'("ngptotg ",i0)') ngptotg + write(nout,'("nfld ",i0)') nfld + write(nout,'("nlev ",i0)') nlev + write(nout,'("nflevl ",i0)') nflevl + write(nout,'("nproma ",i0)') nproma + write(nout,'("ngpblks ",i0)') ngpblks + write(nout,'("nspec2 ",i0)') nspec2 + write(nout,'("nspec2g ",i0)') nspec2g + write(nout,'("lvordiv ",l)') lvordiv + write(nout,'("lscders ",l)') lscders + write(nout,'("luvders ",l)') luvders + write(nout,'(" ")') + write(nout,'(a)') '======= End of runtime parameters =======' + write(nout,'(" ")') +end if + +!=================================================================================================== +! Allocate and Initialize spectral arrays +!=================================================================================================== + +! Allocate spectral arrays +! Try to mimick IFS layout as much as possible +nullify(zspvor) +nullify(zspdiv) +nullify(zspsc3a) +allocate(sp3d(nflevl,nspec2,2+nfld)) +allocate(zspsc2(1,nspec2)) +allocate(zmeanu(nflevl),zmeanv(nflevl)) +zmeanu(:)=0._jprb +zmeanv(:)=0._jprb + +call initialize_spectral_arrays(nsmax, nmsmax, zspsc2, sp3d) + +! Point convenience variables to storage variable sp3d +zspvor => sp3d(:,:,1) +zspdiv => sp3d(:,:,2) +zspsc3a => sp3d(:,:,3:3+(nfld-1)) + +!=================================================================================================== +! Allocate gridpoint arrays +!=================================================================================================== + +allocate(ivset(nflevg)) + +! Compute spectral distribution +ilev = 0 +do jb = 1, nprtrv + do jlev=1, numll(jb) + ilev = ilev + 1 + ivset(ilev) = jb + enddo +enddo + +! Allocate grid-point arrays +if (lvordiv) then + jbegin_uv = 1 + jend_uv = 2 +endif +if (luvders) then + jbegin_uder_EW = jend_uv + 1 + jend_uder_EW = jbegin_uder_EW + 1 + jbegin_vder_EW = jend_uder_EW + 1 + jend_vder_EW = jbegin_vder_EW + 1 +else + jbegin_uder_EW = jend_uv + jend_uder_EW = jend_uv + jbegin_vder_EW = jend_uv + jend_vder_EW = jend_uv +endif + +jbegin_sc = jend_vder_EW + 1 +jend_sc = jend_vder_EW + nfld + +if (lscders) then + ndimgmvs = 3 + jbegin_scder_NS = jend_sc + 1 + jend_scder_NS = jend_sc + nfld + jbegin_scder_EW = jend_scder_NS + 1 + jend_scder_EW = jend_scder_NS + nfld +else + ndimgmvs = 1 + jbegin_scder_NS = jend_sc + jend_scder_NS = jend_sc + jbegin_scder_EW = jend_sc + jend_scder_EW = jend_sc +endif + +ndimgmv = jend_scder_EW + +!allocate(zgmv(nproma,nflevg,ndimgmv,ngpblks)) +!allocate(zgmvs(nproma,ndimgmvs,ngpblks)) +!zgpuv => zgmv(:,:,1:jend_vder_EW,:) +!zgp3a => zgmv(:,:,jbegin_sc:jend_scder_EW,:) +!zgp2 => zgmvs(:,:,:) + +! allocate separately since non-contiguous host-device transfers are not supported. +allocate(zgpuv(nproma,nflevg,jend_vder_EW,ngpblks)) +allocate(zgp3a(nproma,nflevg,jend_scder_EW-jbegin_sc+1,ngpblks)) +allocate(zgp2(nproma,ndimgmvs,ngpblks)) + +zgp2=0. +zgp3a=0. +zgpuv=0. + +!=================================================================================================== +! Allocate norm arrays +!=================================================================================================== + +if (lprint_norms .or. ncheck > 0) then + allocate(znormsp(1)) + allocate(znormsp0(1)) + allocate(znormvor(nflevg)) + allocate(znormvor0(nflevg)) + allocate(znormdiv(nflevg)) + allocate(znormdiv0(nflevg)) + allocate(znormt(nflevg)) + allocate(znormt0(nflevg)) + + call especnorm(pspec=zspvor(1:nflevl,:), pnorm=znormvor0, kvset=ivset(1:nflevg)) + call especnorm(pspec=zspdiv(1:nflevl,:), pnorm=znormdiv0, kvset=ivset(1:nflevg)) + call especnorm(pspec=zspsc3a(1:nflevl,:,1), pnorm=znormt0, kvset=ivset(1:nflevg)) + call especnorm(pspec=zspsc2(1:1,:), pnorm=znormsp0, kvset=ivsetsc) + + if (verbosity >= 1 .and. myproc == 1) then + do ifld = 1, nflevg + write(nout,'("norm zspvor( ",i4,",:) = ",f20.15)') ifld, znormvor0(ifld) + enddo + do ifld = 1, nflevg + write(nout,'("norm zspdiv( ",i4,",:) = ",f20.15)') ifld, znormdiv0(ifld) + enddo + do ifld = 1, nflevg + write(nout,'("norm zspsc3a(",i4,",:,1) = ",f20.15)') ifld, znormt0(ifld) + enddo + do ifld = 1, 1 + write(nout,'("norm zspsc2( ",i4,",:) = ",f20.15)') ifld, znormsp0(ifld) + enddo + endif +endif + +!=================================================================================================== +! Setup timers +!=================================================================================================== + +ztinit = (omp_get_wtime() - ztinit) + +if (verbosity >= 0) then + write(nout,'(" ")') + write(nout,'(a,i6,a,f9.2,a)') "transform_test initialisation, on",nproc,& + & " tasks, took",ztinit," sec" + write(nout,'(" ")') +endif + +if (iters <= 0) call abor1('transform_test:iters <= 0') + +allocate(ztstep(iters)) +allocate(ztstep1(iters)) +allocate(ztstep2(iters)) + +ztstepavg = 0._jprd +ztstepmax = 0._jprd +ztstepmin = 9999999999999999._jprd +ztstepavg1 = 0._jprd +ztstepmax1 = 0._jprd +ztstepmin1 = 9999999999999999._jprd +ztstepavg2 = 0._jprd +ztstepmax2 = 0._jprd +ztstepmin2 = 9999999999999999._jprd + +!================================================================================================= +! Dump the values to disk, for debugging only +!================================================================================================= + +if (ldump_values) then + ! dump a field to a binary file + call dump_spectral_field(jstep, myproc, nspec2, nsmax, nmsmax, zspsc2(1,:),ivsetsc(1:1), 'S', noutdump) + if (lvordiv) then + call dump_spectral_field(jstep, myproc, nspec2, nsmax, nmsmax, zspdiv(1,:),ivset(1:1), 'D', noutdump) + call dump_spectral_field(jstep, myproc, nspec2, nsmax, nmsmax, zspvor(1,:),ivset(1:1), 'V', noutdump) + endif + call dump_spectral_field(jstep, myproc, nspec2, nsmax, nmsmax, zspsc3a(1,:,1),ivset(1:1), 'T', noutdump) +endif + +write(nout,'(a)') '======= Start of spectral transforms =======' +write(nout,'(" ")') + +ztloop = omp_get_wtime() + +!=================================================================================================== +! Do spectral transform loop +!=================================================================================================== + +do jstep = 1, iters + if( lstats ) call gstats(3,0) + ztstep(jstep) = omp_get_wtime() + + !================================================================================================= + ! Do inverse transform + !================================================================================================= + + ztstep1(jstep) = omp_get_wtime() + if( lstats ) call gstats(4,0) + if (lvordiv) then + + call einv_trans(kresol=1, kproma=nproma, & + & pspsc2=zspsc2, & ! spectral surface pressure + & pspvor=zspvor, & ! spectral vorticity + & pspdiv=zspdiv, & ! spectral divergence + & pspsc3a=zspsc3a, & ! spectral scalars + & ldscders=lscders, & + & ldvorgp=.false., & ! no gridpoint vorticity + & lddivgp=.false., & ! no gridpoint divergence + & lduvder=luvders, & + & kvsetuv=ivset, & + & kvsetsc2=ivsetsc, & + & kvsetsc3a=ivset, & + & pgp2=zgp2, & + & pgpuv=zgpuv, & + & pgp3a=zgp3a, & + & pmeanu=zmeanu, & + & pmeanv=zmeanv) + + else + + call einv_trans(kresol=1, kproma=nproma, & + & pspsc2=zspsc2, & ! spectral surface pressure + & pspsc3a=zspsc3a, & ! spectral scalars + & ldscders=lscders, & ! scalar derivatives + & kvsetsc2=ivsetsc, & + & kvsetsc3a=ivset, & + & pgp2=zgp2, & + & pgp3a=zgp3a) + + endif + + if( lstats ) call gstats(4,1) + + ztstep1(jstep) = (omp_get_wtime() - ztstep1(jstep)) + + !================================================================================================= + ! While in grid point space, dump the values to disk, for debugging only + !================================================================================================= + + if (ldump_values) then + ! dump a field to a binary file + call dump_gridpoint_field(jstep, myproc, nlat, nproma, ngpblks, zgp2(:,1,:), 'S', noutdump) + if (lvordiv) then + call dump_gridpoint_field(jstep, myproc, nlat, nproma, ngpblks, zgpuv(:,nflevg,1,:), 'U', noutdump) + call dump_gridpoint_field(jstep, myproc, nlat, nproma, ngpblks, zgpuv(:,nflevg,2,:), 'V', noutdump) + endif + call dump_gridpoint_field(jstep, myproc, nlat, nproma, ngpblks, zgp3a(:,nflevg,1,:), 'T', noutdump) + endif + + !================================================================================================= + ! Do direct transform + !================================================================================================= + + ztstep2(jstep) = omp_get_wtime() + + if( lstats ) call gstats(5,0) + + + if (lvordiv) then + call edir_trans(kresol=1, kproma=nproma, & + & pgp2=zgp2(:,1:1,:), & + & pgpuv=zgpuv(:,:,1:2,:), & + & pgp3a=zgp3a(:,:,1:nfld,:), & + & pspvor=zspvor, & + & pspdiv=zspdiv, & + & pspsc2=zspsc2, & + & pspsc3a=zspsc3a, & + & kvsetuv=ivset, & + & kvsetsc2=ivsetsc, & + & kvsetsc3a=ivset, & + & pmeanu=zmeanu, & + & pmeanv=zmeanv) + else + + call edir_trans(kresol=1, kproma=nproma, & + & pgp2=zgp2(:,1:1,:), & + & pgp3a=zgp3a(:,:,1:nfld,:), & + & pspsc2=zspsc2, & + & pspsc3a=zspsc3a, & + & kvsetsc2=ivsetsc, & + & kvsetsc3a=ivset) + endif + if( lstats ) call gstats(5,1) + ztstep2(jstep) = (omp_get_wtime() - ztstep2(jstep)) + + !================================================================================================= + ! Dump the values to disk, for debugging only + !================================================================================================= + + if (ldump_values) then + ! dump a field to a binary file + call dump_spectral_field(jstep, myproc, nspec2, nsmax, nmsmax, zspsc2(1,:),ivsetsc(1:1), 'S', noutdump) + if (lvordiv) then + call dump_spectral_field(jstep, myproc, nspec2, nsmax, nmsmax, zspdiv(1,:),ivset(1), 'D', noutdump) + call dump_spectral_field(jstep, myproc, nspec2, nsmax, nmsmax, zspvor(1,:),ivset(1:1), 'V', noutdump) + endif + call dump_spectral_field(jstep, myproc, nspec2, nsmax, nmsmax, zspsc3a(1,:,1),ivset(1:1), 'T', noutdump) + endif + + !================================================================================================= + ! Calculate timings + !================================================================================================= + + ztstep(jstep) = (omp_get_wtime() - ztstep(jstep)) + + ztstepavg = ztstepavg + ztstep(jstep) + ztstepmin = min(ztstep(jstep), ztstepmin) + ztstepmax = max(ztstep(jstep), ztstepmax) + + ztstepavg1 = ztstepavg1 + ztstep1(jstep) + ztstepmin1 = min(ztstep1(jstep), ztstepmin1) + ztstepmax1 = max(ztstep1(jstep), ztstepmax1) + + ztstepavg2 = ztstepavg2 + ztstep2(jstep) + ztstepmin2 = min(ztstep2(jstep), ztstepmin2) + ztstepmax2 = max(ztstep2(jstep), ztstepmax2) + + !================================================================================================= + ! Print norms + !================================================================================================= + + if (lprint_norms) then + if( lstats ) call gstats(6,0) + call especnorm(pspec=zspsc2(1:1,:), pnorm=znormsp, kvset=ivsetsc(1:1)) + call especnorm(pspec=zspvor(1:nflevl,:), pnorm=znormvor, kvset=ivset(1:nflevg)) + call especnorm(pspec=zspdiv(1:nflevl,:), pnorm=znormdiv, kvset=ivset(1:nflevg)) + call especnorm(pspec=zspsc3a(1:nflevl,:,1), pnorm=znormt, kvset=ivset(1:nflevg)) + + if ( myproc == 1 ) then + + ! Surface pressure + zmaxerr(:) = -999.0 + do ifld = 1, 1 + zerr(1) = abs(znormsp(ifld)/znormsp0(ifld) - 1.0_jprb) + zmaxerr(1) = max(zmaxerr(1), zerr(1)) + enddo + ! Divergence + do ifld = 1, nflevg + zerr(2) = abs(znormdiv(ifld)/znormdiv0(ifld) - 1.0_jprb) + zmaxerr(2) = max(zmaxerr(2), zerr(2)) + enddo + ! Vorticity + do ifld = 1, nflevg + zerr(3) = abs(znormvor(ifld)/znormvor0(ifld) - 1.0_jprb) + zmaxerr(3) = max(zmaxerr(3),zerr(3)) + enddo + ! Temperature + do ifld = 1, nflevg + zerr(4) = abs(znormt(ifld)/znormt0(ifld) - 1.0_jprb) + zmaxerr(4) = max(zmaxerr(4), zerr(4)) + enddo + write(nout,'("time step ",i6," took", f8.4," | zspvor max err="e10.3,& + & " | zspdiv max err="e10.3," | zspsc3a max err="e10.3," | zspsc2 max err="e10.3)') & + & jstep, ztstep(jstep), zmaxerr(3), zmaxerr(2), zmaxerr(4), zmaxerr(1) + if( lstats )call gstats(6,1) + else + write(nout,'("Time step ",i6," took", f8.4)') jstep, ztstep(jstep) + endif + + endif + + if( lstats ) call gstats(3,1) + +enddo + +!=================================================================================================== + +ztloop = (omp_get_wtime() - ztloop) + +write(nout,'(" ")') +write(nout,'(a)') '======= End of spectral transforms =======' +write(nout,'(" ")') + +if (lprint_norms .or. ncheck > 0) then + call especnorm(pspec=zspvor(1:nflevl,:), pnorm=znormvor, kvset=ivset) + call especnorm(pspec=zspdiv(1:nflevl,:), pnorm=znormdiv, kvset=ivset) + call especnorm(pspec=zspsc3a(1:nflevl,:,1), pnorm=znormt, kvset=ivset) + call especnorm(pspec=zspsc2(1:1,:), pnorm=znormsp, kvset=ivsetsc) + + if ( myproc == 1 ) then + + zmaxerr(:) = -999.0 + do ifld = 1, nflevg + zerr(3) = abs(real(znormvor(ifld),kind=jprd)/real(znormvor0(ifld),kind=jprd) - 1.0_jprd) + zmaxerr(3) = max(zmaxerr(3), zerr(3)) + if (verbosity >= 1) then + write(nout,'("norm zspvor( ",i4,") = ",f20.15," error = ",e10.3)') ifld, znormvor0(ifld), zerr(3) + endif + enddo + do ifld = 1, nflevg + zerr(2) = abs(real(znormdiv(ifld),kind=jprd)/real(znormdiv0(ifld),kind=jprd) - 1.0d0) + zmaxerr(2) = max(zmaxerr(2),zerr(2)) + if (verbosity >= 1) then + write(nout,'("norm zspdiv( ",i4,",:) = ",f20.15," error = ",e10.3)') ifld, znormdiv0(ifld), zerr(2) + endif + enddo + do ifld = 1, nflevg + zerr(4) = abs(real(znormt(ifld),kind=jprd)/real(znormt0(ifld),kind=jprd) - 1.0d0) + zmaxerr(4) = max(zmaxerr(4), zerr(4)) + if (verbosity >= 1) then + write(nout,'("norm zspsc3a(",i4,",:,1) = ",f20.15," error = ",e10.3)') ifld, znormt0(ifld), zerr(4) + endif + enddo + do ifld = 1, 1 + zerr(1) = abs(real(znormsp(ifld),kind=jprd)/real(znormsp0(ifld),kind=jprd) - 1.0d0) + zmaxerr(1) = max(zmaxerr(1), zerr(1)) + if (verbosity >= 1) then + write(nout,'("norm zspsc2( ",i4,",:) = ",f20.15," error = ",e10.3)') ifld, znormsp0(ifld), zerr(1) + endif + enddo + + ! maximum error across all fields + zmaxerrg = max(max(zmaxerr(1),zmaxerr(2)), max(zmaxerr(2), zmaxerr(3))) + + if (verbosity >= 1) write(nout,*) + write(nout,'("max error zspvor(1:nlev,:) = ",e10.3)') zmaxerr(3) + write(nout,'("max error zspdiv(1:nlev,:) = ",e10.3)') zmaxerr(2) + write(nout,'("max error zspsc3a(1:nlev,:,1) = ",e10.3)') zmaxerr(4) + write(nout,'("max error zspsc2(1:1,:) = ",e10.3)') zmaxerr(1) + write(nout,*) + write(nout,'("max error combined = = ",e10.3)') zmaxerrg + write(nout,*) + + if (ncheck > 0) then + ! If the maximum spectral norm error across all fields is greater than 100 times the machine + ! epsilon, fail the test + if (zmaxerrg > real(ncheck, jprb) * epsilon(1.0_jprb)) then + write(nout, '(a)') '*******************************' + write(nout, '(a)') 'Correctness test failed' + write(nout, '(a,1e7.2)') 'Maximum spectral norm error = ', zmaxerrg + write(nout, '(a,1e7.2)') 'Error tolerance = ', real(ncheck, jprb) * epsilon(1.0_jprb) + write(nout, '(a)') '*******************************' + error stop + endif + endif + endif +endif + +if (luse_mpi) then + call mpl_allreduce(ztloop, 'sum', ldreprod=.false.) + call mpl_allreduce(ztstep, 'sum', ldreprod=.false.) + call mpl_allreduce(ztstepavg, 'sum', ldreprod=.false.) + call mpl_allreduce(ztstepmax, 'max', ldreprod=.false.) + call mpl_allreduce(ztstepmin, 'min', ldreprod=.false.) + + call mpl_allreduce(ztstep1, 'sum', ldreprod=.false.) + call mpl_allreduce(ztstepavg1, 'sum', ldreprod=.false.) + call mpl_allreduce(ztstepmax1, 'max', ldreprod=.false.) + call mpl_allreduce(ztstepmin1, 'min', ldreprod=.false.) + + call mpl_allreduce(ztstep2, 'sum', ldreprod=.false.) + call mpl_allreduce(ztstepavg2, 'sum', ldreprod=.false.) + call mpl_allreduce(ztstepmax2, 'max', ldreprod=.false.) + call mpl_allreduce(ztstepmin2, 'min', ldreprod=.false.) +endif + +ztstepavg = (ztstepavg/real(nproc,jprb))/real(iters,jprd) +ztloop = ztloop/real(nproc,jprd) +ztstep(:) = ztstep(:)/real(nproc,jprd) + +call sort(ztstep,iters) +ztstepmed = ztstep(iters/2) + +ztstepavg1 = (ztstepavg1/real(nproc,jprb))/real(iters,jprd) +ztstep1(:) = ztstep1(:)/real(nproc,jprd) + +call sort(ztstep1, iters) +ztstepmed1 = ztstep1(iters/2) + +ztstepavg2 = (ztstepavg2/real(nproc,jprb))/real(iters,jprd) +ztstep2(:) = ztstep2(:)/real(nproc,jprd) + +call sort(ztstep2,iters) +ztstepmed2 = ztstep2(iters/2) + + +write(nout,'(a)') '======= Start of time step stats =======' +write(nout,'(" ")') +write(nout,'("Inverse transforms")') +write(nout,'("------------------")') +write(nout,'("avg (s): ",f8.4)') ztstepavg1 +write(nout,'("min (s): ",f8.4)') ztstepmin1 +write(nout,'("max (s): ",f8.4)') ztstepmax1 +write(nout,'("med (s): ",f8.4)') ztstepmed1 +write(nout,'(" ")') +write(nout,'("Direct transforms")') +write(nout,'("-----------------")') +write(nout,'("avg (s): ",f8.4)') ztstepavg2 +write(nout,'("min (s): ",f8.4)') ztstepmin2 +write(nout,'("max (s): ",f8.4)') ztstepmax2 +write(nout,'("med (s): ",f8.4)') ztstepmed2 +write(nout,'(" ")') +write(nout,'("Inverse-direct transforms")') +write(nout,'("-------------------------")') +write(nout,'("avg (s): ",f8.4)') ztstepavg +write(nout,'("min (s): ",f8.4)') ztstepmin +write(nout,'("max (s): ",f8.4)') ztstepmax +write(nout,'("med (s): ",f8.4)') ztstepmed +write(nout,'("loop (s): ",f8.4)') ztloop +write(nout,'(" ")') +write(nout,'(a)') '======= End of time step stats =======' +write(nout,'(" ")') + +if (lstack) then + ! Gather stack usage statistics + istack = getstackusage() + if (myproc == 1) then + print 9000, istack + 9000 format("Stack utilisation information",/,& + &"=============================",//,& + &"Task size(bytes)",/,& + &"==== ===========",//,& + &" 1",11x,i10) + + do i = 2, nproc + call mpl_recv(istack, ksource=nprcids(i), ktag=i, cdstring='transform_test:') + print '(i4,11x,i10)', i, istack + enddo + else + call mpl_send(istack, kdest=nprcids(1), ktag=myproc, cdstring='transform_test:') + endif +endif + +!=================================================================================================== +! Cleanup +!=================================================================================================== + +! TODO: many more arrays to deallocate + +!=================================================================================================== + +if (lstats) then + call gstats(0,1) + call gstats_print(nout, zaveave, jpmaxstat) +endif + +if (lmeminfo) then + write(nout,*) + call ec_meminfo(nout, "", mpl_comm, kbarr=1, kiotask=-1, & + & kcall=1) +endif + +!=================================================================================================== +! Finalize MPI +!=================================================================================================== + +if (luse_mpi) then + call mpl_end(ldmeminfo=.false.) +endif + +!=================================================================================================== +! Close file +!=================================================================================================== + +if (nproc > 1) then + if (myproc /= 1) then + close(unit=nout) + endif +endif + +!=================================================================================================== + +contains + +!=================================================================================================== + +function get_int_value(cname, iarg) result(value) + + integer :: value + character(len=*), intent(in) :: cname + integer, intent(inout) :: iarg + character(len=128) :: carg + integer :: stat + + carg = get_str_value(cname, iarg) + call str2int(carg, value, stat) + + if (stat /= 0) then + call parsing_failed("Invalid argument for " // trim(cname) // ": " // trim(carg)) + end if + +end function + +!=================================================================================================== + +function get_str_value(cname, iarg) result(value) + + character(len=128) :: value + character(len=*), intent(in) :: cname + integer, intent(inout) :: iarg + + iarg = iarg + 1 + call get_command_argument(iarg, value) + + if (value == "") then + call parsing_failed("Invalid argument for " // trim(cname) // ": no value provided") + end if + +end function + +!=================================================================================================== + +subroutine parsing_failed(message) + + character(len=*), intent(in) :: message + if (luse_mpi) call mpl_init(ldinfo=.false.) + if (ec_mpirank() == 0) then + write(nerr,"(a)") trim(message) + call print_help(unit=nerr) + endif + if (luse_mpi) call mpl_end(ldmeminfo=.false.) + stop + +end subroutine + +!=================================================================================================== + +subroutine get_command_line_arguments(nlon, nlat, nsmax, nmsmax, & + & iters, nfld, nlev, lvordiv, lscders, luvders, & + & nproma, verbosity, ldump_values, lprint_norms, & + & lmeminfo, nprgpns, nprgpew, nprtrv, nprtrw, ncheck) + + integer, intent(inout) :: nlon ! Zonal dimension + integer, intent(inout) :: nlat ! Meridional dimension + integer, intent(inout) :: nsmax ! Meridional truncation + integer, intent(inout) :: nmsmax ! Zonal trunciation + integer, intent(inout) :: iters ! Number of iterations for transform test + integer, intent(inout) :: nfld ! Number of scalar fields + integer, intent(inout) :: nlev ! Number of vertical levels + logical, intent(inout) :: lvordiv ! Also transform vorticity/divergence + logical, intent(inout) :: lscders ! Compute scalar derivatives + logical, intent(inout) :: luvders ! Compute uv East-West derivatives + integer, intent(inout) :: nproma ! NPROMA + integer, intent(inout) :: verbosity ! Level of verbosity + logical, intent(inout) :: ldump_values ! Dump values of grid point fields for debugging + logical, intent(inout) :: lprint_norms ! Calculate and print spectral norms of fields + logical, intent(inout) :: lmeminfo ! Show information from FIAT ec_meminfo routine at the + ! end + integer, intent(inout) :: nprgpns ! Size of NS set (gridpoint decomposition) + integer, intent(inout) :: nprgpew ! Size of EW set (gridpoint decomposition) + integer, intent(inout) :: nprtrv ! Size of V set (spectral decomposition) + integer, intent(inout) :: nprtrw ! Size of W set (spectral decomposition) + integer, intent(inout) :: ncheck ! The multiplier of the machine epsilon used as a + ! tolerance for correctness checking + + character(len=128) :: carg ! Storage variable for command line arguments + integer :: iarg = 1 ! Argument index + integer :: stat ! For storing success status of string->integer conversion + integer :: myproc + + do while (iarg <= command_argument_count()) + call get_command_argument(iarg, carg) + + select case(carg) + ! Parse help argument + case('-h', '--help') + if (luse_mpi) call mpl_init(ldinfo=.false.) + if (ec_mpirank()==0) call print_help() + if (luse_mpi) call mpl_end(ldmeminfo=.false.) + stop + ! Parse verbosity argument + case('-v') + verbosity = 1 + ! Parse number of iterations argument + case('-n', '--niter') + iters = get_int_value('-n', iarg) + if (iters < 1) then + call parsing_failed("Invalid argument for -n: must be > 0") + end if + ! Parse spectral truncation argument + case('--nlon'); nlon = get_int_value('--nlon', iarg) + case('--nlat'); nlat = get_int_value('--nlat', iarg) + case('--nsmax'); nsmax = get_int_value('--nsmax', iarg) + case('--nmsmax'); nmsmax = get_int_value('--nmsmax', iarg) + case('-f', '--nfld'); nfld = get_int_value('-f', iarg) + case('-l', '--nlev'); nlev = get_int_value('-l', iarg) + case('--vordiv'); lvordiv = .True. + case('--scders'); lscders = .True. + case('--uvders'); luvders = .True. + case('--nproma'); nproma = get_int_value('--nproma', iarg) + case('--dump-values'); ldump_values = .true. + case('--norms'); lprint_norms = .true. + case('--meminfo'); lmeminfo = .true. + case('--nprgpns'); nprgpns = get_int_value('--nprgpns', iarg) + case('--nprgpew'); nprgpew = get_int_value('--nprgpew', iarg) + case('--nprtrv'); nprtrv = get_int_value('--nprtrv', iarg) + case('--nprtrw'); nprtrw = get_int_value('--nprtrw', iarg) + case('-c', '--check'); ncheck = get_int_value('-c', iarg) + case default + call parsing_failed("Unrecognised argument: " // trim(carg)) + + end select + iarg = iarg + 1 + end do + + if (.not. lvordiv) then + luvders = .false. + endif + +end subroutine get_command_line_arguments + +!=================================================================================================== + +subroutine str2int(str, int, stat) + + character(len=*), intent(in) :: str + integer, intent(out) :: int + integer, intent(out) :: stat + read(str, *, iostat=stat) int + +end subroutine str2int + +!=================================================================================================== + +subroutine sort(a, n) + + real(kind=jprd), intent(inout) :: a(n) + integer(kind=jpim), intent(in) :: n + + real(kind=jprd) :: x + + integer :: i, j + + do i = 2, n + x = a(i) + j = i - 1 + do while (j >= 1) + if (a(j) <= x) exit + a(j + 1) = a(j) + j = j - 1 + end do + a(j + 1) = x + end do + +end subroutine sort + +!=================================================================================================== + +subroutine print_help(unit) + + integer, optional :: unit + integer :: nout = 6 + if (present(unit)) then + nout = unit + endif + + write(nout, "(a)") "" + + if (jprb == jprd) then + write(nout, "(a)") "NAME ectrans-lam-benchmark-dp" + else + write(nout, "(a)") "NAME ectrans-lam-benchmark-sp" + end if + write(nout, "(a)") "" + + write(nout, "(a)") "DESCRIPTION" + write(nout, "(a)") " This program tests ecTrans-lam by transforming fields back and forth& + & between spectral " + if (jprb == jprd) then + write(nout, "(a)") " space and grid-point space (double-precision version)" + else + write(nout, "(a)") " space and grid-point space (single-precision version)" + end if + write(nout, "(a)") "" + + write(nout, "(a)") "USAGE" + if (jprb == jprd) then + write(nout, "(a)") " ectrans-lam-benchmark-dp [options]" + else + write(nout, "(a)") " ectrans-lam-benchmark-sp [options]" + end if + write(nout, "(a)") "" + + write(nout, "(a)") "OPTIONS" + write(nout, "(a)") " -h, --help Print this message" + write(nout, "(a)") " -v Run with verbose output" + write(nout, "(a)") " --nlon NLON Number of gridpoints in zonal direction (default = 128)" + write(nout, "(a)") " --nlat NLAT Number of gridpoints in meridional direction (default = 128)" + write(nout, "(a)") " --nsmax NSMAX Spectral truncation in meridional direction (default = NLAT/2-1)" + write(nout, "(a)") " --nmsmax NMSMAX Spectral truncation in zonal direction (default = NLON/2-1)" + write(nout, "(a)") " -n, --niter NITER Run for this many inverse/direct transform& + & iterations (default = 10)" + write(nout, "(a)") " -f, --nfld NFLD Number of scalar fields (default = 1)" + write(nout, "(a)") " -l, --nlev NLEV Number of vertical levels (default = 1)" + write(nout, "(a)") " --vordiv Also transform vorticity-divergence to wind" + write(nout, "(a)") " --scders Compute scalar derivatives (default off)" + write(nout, "(a)") " --uvders Compute uv East-West derivatives (default off). Only& + & when also --vordiv is given" + write(nout, "(a)") " --nproma NPROMA Run with NPROMA (default no blocking: NPROMA=ngptot)" + write(nout, "(a)") " --norms Calculate and print spectral norms of transformed& + & fields" + write(nout, "(a)") " The computation of spectral norms will skew overall& + & timings" + write(nout, "(a)") " --meminfo Show diagnostic information from FIAT's ec_meminfo& + & subroutine on memory usage, thread-binding etc." + write(nout, "(a)") " --nprgpew Size of East-West set in gridpoint decomposition" + write(nout, "(a)") " --nprgpns Size of North-South set in gridpoint decomposition" + write(nout, "(a)") " --nprtrv Size of Vertical set in spectral decomposition" + write(nout, "(a)") " --nprtrw Size of Wave set in spectral decomposition" + write(nout, "(a)") " -c, --check VALUE The multiplier of the machine epsilon used as a& + & tolerance for correctness checking" + write(nout, "(a)") "" + write(nout, "(a)") "DEBUGGING" + write(nout, "(a)") " --dump-values Output gridpoint fields in unformatted binary file" + write(nout, "(a)") "" + +end subroutine print_help + +!=================================================================================================== + +subroutine initialize_spectral_arrays(nsmax, nmsmax, zsp, sp3d) + + integer, intent(in) :: nsmax ! Spectral truncation in meridional direction + integer, intent(in) :: nmsmax ! Spectral truncation in zonal direction + real(kind=jprb), intent(inout) :: zsp(:,:) ! Surface pressure + real(kind=jprb), intent(inout) :: sp3d(:,:,:) ! 3D fields + + integer(kind=jpim) :: nflevl + integer(kind=jpim) :: nfield + + integer :: i, j + + nflevl = size(sp3d, 1) + nfield = size(sp3d, 3) + + ! First initialize surface pressure + call initialize_2d_spectral_field(nsmax, nmsmax, zsp(1,:)) + + ! Then initialize all of the 3D fields + do i = 1, nflevl + do j = 1, nfield + call initialize_2d_spectral_field(nsmax, nmsmax, sp3d(i,:,j)) + end do + end do + +end subroutine initialize_spectral_arrays + +!=================================================================================================== + +subroutine initialize_2d_spectral_field(nsmax, nmsmax, field) + + integer, intent(in) :: nsmax ! Spectral truncation in meridional direction + integer, intent(in) :: nmsmax ! Spectral truncation in zonal direction + real(kind=jprb), intent(inout) :: field(:) ! Field to initialize + + integer :: ispec, kspec2 + integer, allocatable :: my_km(:), my_kn(:) + + ! Choose a harmonic to initialize arrays + integer :: m_num = 1 ! Zonal wavenumber + integer :: n_num = 0 ! Meridional wavenumber + + ! Type of initialization: (single) 'harmonic' or (random) 'spectrum' + character(len=32) :: init_type='harmonic' + + ! First initialise all spectral coefficients to zero + field(:) = 0.0 + + ! make sure wavenumbers are within truncation + if ( m_num>nmsmax .or. n_num > nsmax .or. & + & ( nsmax>0 .and. nmsmax>0 .and. ( (m_num/real(nmsmax))**2+(n_num/real(nsmax))**2 ) > 1.) ) then + write (nerr,*) + write (nerr,*) 'WARNING: INITIAL WAVENUMBERS OUTSIDE OF TRUNCATION! ' + write (nerr,*) ' m_num = ',m_num,'; nmsmax = ',nmsmax,'; n_num = ',n_num,'; nsmax = ',nsmax,& + & '; ellips check: ',(m_num/real(nmsmax))**2+(n_num/real(nsmax))**2 + write (nerr,*) ' using (kx=',NMSMAX/2,', ky=', NSMAX/2,') instead' + write (nerr,*) + m_num=nmsmax/2 + n_num=nsmax/2 + endif + + ! Get wavenumbers this rank is responsible for + call etrans_inq(kspec2=kspec2) + allocate(my_kn(kspec2),my_km(kspec2)) + call etrans_inq(knvalue=my_kn,kmvalue=my_km) + + ! If rank is responsible for the chosen zonal wavenumber... + if ( init_type == 'harmonic' ) then + do ispec=1,nspec2,4 + if ( my_kn(ispec)== n_num .and. my_km(ispec) == m_num ) then + field(ispec)=1.0 ! cos*cos + !field(ispec+1)=1.0 ! cos*sin + !field(ispec+2)=1.0 ! sin*cos + !field(ispec+3)=1.0 ! sin*sin + end if + enddo + endif + + ! random power spectrum + if ( init_type == 'spectrum' ) then + call random_number(field) + field=2*field-1. ! center around zero + ! set some components to zero because they are unphysical + do ispec=1,nspec2,4 + if ( my_kn(ispec)== 0 .and. my_km(ispec) == 0 ) field(ispec:ispec+3)=0. ! remove mean value for vorticity and divergence + if ( my_kn(ispec)== 0 ) field(ispec+1:ispec+3:2)=0. ! remove sine component on zero-wavenumber + if ( my_kn(ispec)== nmsmax ) field(ispec+1:ispec+3:2)=0. ! remove sine component on last-wavenumber + if ( my_km(ispec)== 0 ) field(ispec+2:ispec+3)=0. ! remove sine component on zero-wavenumber + if ( my_km(ispec)== nsmax ) field(ispec+2:ispec+3)=0. ! remove sine component on last-wavenumber + enddo + + ! scale according to wavenumber**2 + do ispec=1,nspec2 + field(ispec)=field(ispec)/(0.01+(my_kn(ispec)/real(nsmax))**2+(my_km(ispec)/real(nmsmax))**2) + enddo + endif + +end subroutine initialize_2d_spectral_field + +!=================================================================================================== + +subroutine dump_gridpoint_field(jstep, myproc, nlat, nproma, ngpblks, fld, fldchar, noutdump) + + ! Dump a 2d gridpoint field to screen or a binary file. + + integer(kind=jpim), intent(in) :: jstep ! Time step, used for naming file + integer(kind=jpim), intent(in) :: myproc ! MPI rank, used for naming file + integer(kind=jpim), intent(in) :: nlat ! Number of latitudes + integer(kind=jpim), intent(in) :: nproma ! Size of nproma + integer(kind=jpim), intent(in) :: ngpblks ! Number of nproma blocks + real(kind=jprb) , intent(in) :: fld(nproma,1,ngpblks) ! 2D field + character , intent(in) :: fldchar ! Single character field identifier + integer(kind=jpim), intent(in) :: noutdump ! Unit number for output file + + integer(kind=jpim) :: kgptotg ! global number of gridpoints + real(kind=jprb), allocatable :: fldg(:,:) ! global field + integer(kind=jpim) :: kfgathg=1 ! number of fields to gather + integer(kind=jpim) :: kto(1)=(/1/) ! processor where to gather + character(len=14) :: filename = "x.xxx.xxx.grid" + character(len=13) :: frmt='(4X,xxxxF8.2)' + +#include "etrans_inq.h" +#include "egath_grid.h" + + call etrans_inq(kgptotg=kgptotg) + + if ( myproc == 1 ) allocate(fldg(kgptotg,1)) + + call egath_grid(pgpg=fldg,kproma=nproma,kfgathg=kfgathg,kto=kto,pgp=fld) + + if ( myproc == 1 ) then + + ! write to file + write(filename(1:1),'(a1)') fldchar + write(filename(3:5),'(i3.3)') jstep +#ifdef ACCGPU + write(filename(7:9),'(a3)') 'gpu' +#else + write(filename(7:9),'(a3)') 'cpu' +#endif + open(noutdump, file=filename, form="unformatted", access="stream") + write(noutdump) kgptotg/nlat,nlat ! dimensions + write(noutdump) fldg ! data + close(noutdump) + + ! write to screen + write(frmt(5:8),'(i4.4)') kgptotg/nlat + write (*,*) fldchar,' at iteration ',jstep,':' + write (*,frmt) fldg + call flush(6) + + deallocate(fldg) + + endif + + +end subroutine dump_gridpoint_field + +!=================================================================================================== + +subroutine dump_spectral_field(jstep, myproc, nspec2, nsmax, nmsmax, fld, kvset, fldchar, noutdump) + + ! Dump a 2d spectral field to screen or a binary file. + + integer(kind=jpim), intent(in) :: jstep ! Time step, used for naming file + integer(kind=jpim), intent(in) :: myproc ! MPI rank, used for naming file + integer(kind=jpim), intent(in) :: nspec2 ! Size of nspec2 (number of waves on this proc in M-space) + integer(kind=jpim), intent(in) :: nsmax + integer(kind=jpim), intent(in) :: nmsmax + real(kind=jprb) , intent(in) :: fld(1,nspec2) ! 2D field + integer(kind=jpim), intent(in) :: kvset(1) ! B-set on which the field resides + character , intent(in) :: fldchar ! Single character field identifier + integer(kind=jpim), intent(in) :: noutdump ! Unit number for output file + + integer(kind=jpim) :: nspec2g ! global number of gridpoints + real(kind=jprb), allocatable :: fldg(:,:) ! global field (nspec2g) + integer(kind=jpim) :: kfgathg=1 ! number of fields to gather + integer(kind=jpim) :: kto(1)=(/1/) ! processor where to gather + character(len=14) :: filename = "x.xxx.xxx.spec" + character(len=13) :: frmt='(4X,xxxxF8.2)' ! for printing to screen + integer(kind=jpim) :: knse(0:nmsmax),kmse(0:nsmax) ! elliptic truncation + real(kind=jprb) :: fld2g(0:2*nmsmax+1,0:2*nsmax+1) ! 2D representation of spectral field + integer(kind=jpim) :: jj, jms, jns + +#include "etrans_inq.h" +#include "egath_spec.h" + + if ( myproc == 1 ) then + call etrans_inq(kspec2g=nspec2g) + allocate(fldg(1,nspec2g)) + call ellips(nsmax,nmsmax,knse,kmse) + endif + + call egath_spec(PSPECG=fldg,kfgathg=kfgathg,kto=kto,kvset=kvset,PSPEC=fld) + + if ( myproc == 1 ) then + + fld2g=0. + jj=1 + do jms=0,nmsmax + do jns=0,knse(jms) + fld2g(2*jms+0,2*jns+0)=fldg(1,jj) + fld2g(2*jms+0,2*jns+1)=fldg(1,jj+1) + fld2g(2*jms+1,2*jns+0)=fldg(1,jj+2) + fld2g(2*jms+1,2*jns+1)=fldg(1,jj+3) + jj=jj+4 + enddo + enddo + + ! write to binary file + write(filename(1:1),'(a1)') fldchar + write(filename(3:5),'(i3.3)') jstep +#ifdef ACCGPU + write(filename(7:9),'(a3)') 'gpu' +#else + write(filename(7:9),'(a3)') 'cpu' +#endif + open(noutdump, file=filename, form="unformatted", access="stream") + write(noutdump) 2*nmsmax+2,2*nsmax+2 ! dimensions + write(noutdump) fld2g ! data + close(noutdump) + + ! write to screen + write(frmt(5:8),'(i4.4)') 2*(nmsmax+1) + write (*,*) fldchar,' at iteration ',jstep,':' + write (*,frmt) fld2g + call flush(6) + + deallocate(fldg) + + endif + + +end subroutine dump_spectral_field + +!=================================================================================================== + +function detect_mpirun() result(lmpi_required) + logical :: lmpi_required + integer :: ilen + integer, parameter :: nvars = 5 + character(len=32), dimension(nvars) :: cmpirun_detect + character(len=4) :: clenv_dr_hook_assert_mpi_initialized + integer :: ivar + + ! Environment variables that are set when mpirun, srun, aprun, ... are used + cmpirun_detect(1) = 'OMPI_COMM_WORLD_SIZE' ! openmpi + cmpirun_detect(2) = 'ALPS_APP_PE' ! cray pe + cmpirun_detect(3) = 'PMI_SIZE' ! intel + cmpirun_detect(4) = 'SLURM_NTASKS' ! slurm + cmpirun_detect(5) = 'ECTRANS_USE_MPI' ! forced + + lmpi_required = .false. + do ivar = 1, nvars + call get_environment_variable(name=trim(cmpirun_detect(ivar)), length=ilen) + if (ilen > 0) then + lmpi_required = .true. + exit ! break + endif + enddo +end function + +!=================================================================================================== + +! Assign GSTATS labels to the main regions of ecTrans +subroutine gstats_labels + + call gstats_label(0, ' ', 'PROGRAM - Total') + call gstats_label(1, ' ', 'SETUP_TRANS0 - Setup ecTrans') + call gstats_label(2, ' ', 'SETUP_TRANS - Setup ecTrans handle') + call gstats_label(3, ' ', 'TIME STEP - Time step') + call gstats_label(4, ' ', 'INV_TRANS - Inverse transform') + call gstats_label(5, ' ', 'DIR_TRANS - Direct transform') + call gstats_label(6, ' ', 'NORMS - Norm comp. (optional)') + call gstats_label(102, ' ', 'LTINV_CTL - Inv. Legendre transform') + call gstats_label(103, ' ', 'LTDIR_CTL - Dir. Legendre transform') + call gstats_label(106, ' ', 'FTDIR_CTL - Dir. Fourier transform') + call gstats_label(107, ' ', 'FTINV_CTL - Inv. Fourier transform') + call gstats_label(140, ' ', 'SULEG - Comp. of Leg. poly.') + call gstats_label(152, ' ', 'LTINV_CTL - M to L transposition') + call gstats_label(153, ' ', 'LTDIR_CTL - L to M transposition') + call gstats_label(157, ' ', 'FTINV_CTL - L to G transposition') + call gstats_label(158, ' ', 'FTDIR_CTL - G to L transposition') + call gstats_label(400, ' ', 'GSTATS - GSTATS itself') + +end subroutine gstats_labels + +end program ectrans_lam_benchmark + +!=================================================================================================== \ No newline at end of file diff --git a/src/trans/common/internal/tpm_dim.F90 b/src/trans/common/internal/tpm_dim.F90 index 236d08d0d..59287add4 100755 --- a/src/trans/common/internal/tpm_dim.F90 +++ b/src/trans/common/internal/tpm_dim.F90 @@ -49,10 +49,4 @@ MODULE TPM_DIM TYPE(DIM_TYPE),ALLOCATABLE,TARGET :: DIM_RESOL(:) TYPE(DIM_TYPE),POINTER :: R -! flat copies of above -INTEGER(KIND=JPIM) :: R_NSMAX ! Truncation order -INTEGER(KIND=JPIM) :: R_NTMAX ! Truncation order for tendencies -INTEGER(KIND=JPIM) :: R_NDGNH ! Number of rows in northern hemisphere -INTEGER(KIND=JPIM) :: R_NDGL ! Number of rows of latitudes - END MODULE TPM_DIM diff --git a/src/trans/common/internal/tpm_distr.F90 b/src/trans/common/internal/tpm_distr.F90 index 6a151192f..eddb16843 100755 --- a/src/trans/common/internal/tpm_distr.F90 +++ b/src/trans/common/internal/tpm_distr.F90 @@ -12,7 +12,7 @@ MODULE TPM_DISTR ! Module for distributed memory environment. -USE EC_PARKIND ,ONLY : JPIM ,JPRD +USE EC_PARKIND ,ONLY : JPIM ,JPRD, JPIB IMPLICIT NONE @@ -97,7 +97,7 @@ MODULE TPM_DISTR INTEGER(KIND=JPIM) :: NDGL_FS ! Number of rows of latitudes for which this process is ! performing Fourier Space calculations -INTEGER(KIND=JPIM) ,ALLOCATABLE :: NSTAGTF(:) ! Offset for specific latitude in +INTEGER(KIND=JPIB) ,ALLOCATABLE :: NSTAGTF(:) ! Offset for specific latitude in ! Fourier/gridpoint buffer INTEGER(KIND=JPIM) :: NLENGTF ! Second dimension of Fourier/gridpoint buffer ! (sum of (NLOEN+3) over local latitudes) @@ -171,36 +171,13 @@ MODULE TPM_DISTR REAL(KIND=JPRD) ,ALLOCATABLE :: RWEIGHT(:) ! Weight per grid-point (if weighted distribution) INTEGER(KIND=JPIM) ,ALLOCATABLE :: NPROCA_GP(:) ! Number of grid-points per a-set -INTEGER(KIND=JPIM), ALLOCATABLE :: OFFSETS_GEMM1(:), OFFSETS_GEMM2(:) +INTEGER(KIND=JPIB), ALLOCATABLE :: OFFSETS_GEMM1(:), OFFSETS_GEMM2(:), OFFSETS_GEMM_MATRIX(:) +INTEGER(KIND=JPIM), ALLOCATABLE :: LEGENDRE_MATRIX_STRIDES(:) END TYPE DISTR_TYPE TYPE(DISTR_TYPE),ALLOCATABLE,TARGET :: DISTR_RESOL(:) TYPE(DISTR_TYPE),POINTER :: D -!flat versions of the above -INTEGER(KIND=JPIM) :: D_NUMP ! No. of spectral waves handled by this processor -INTEGER(KIND=JPIM) ,ALLOCATABLE :: D_MYMS(:) ! Wave numbers handled by this PE -INTEGER(KIND=JPIM) ,ALLOCATABLE :: D_NSTAGT0B(:) ! Start adresses for segments within buffer - ! (according to processors to whom data - ! is going to be sent) -INTEGER(KIND=JPIM) ,ALLOCATABLE :: D_NSTAGT1B(:) -INTEGER(KIND=JPIM) ,ALLOCATABLE :: D_NPROCL(:) ! Process responsible for each lat. (F.S) -INTEGER(KIND=JPIM) ,ALLOCATABLE :: D_NPNTGTB1(:,:) -INTEGER(KIND=JPIM) ,ALLOCATABLE :: D_NASM0(:) ! Address in a spectral array of (m, n=m) -INTEGER(KIND=JPIM) ,ALLOCATABLE :: D_NSTAGTF(:) ! Offset for specific latitude in -INTEGER(KIND=JPIM) :: D_NDGL_FS ! Number of rows of latitudes for which this process is - ! performing Fourier Space calculations -INTEGER(KIND=JPIM) ,ALLOCATABLE :: D_MSTABF(:) -INTEGER(KIND=JPIM) ,ALLOCATABLE :: D_NPNTGTB0(:,:) -INTEGER(KIND=JPIM) ,ALLOCATABLE :: D_NPROCM(:) ! Process that does the calc. for certain -INTEGER(KIND=JPIM) ,ALLOCATABLE :: D_NPTRLS(:) ! Pointer to first lat. (F.S) - - -! The offsets in the input and output arrays to the gemms. -! (1) are the offsets in the "inputs" of dirtrans ("outputs" invtrans) -! (2) are the offsets in the "outputs" of invtrans ("inputs" dirtrans) -INTEGER(KIND=JPIM), POINTER :: D_OFFSETS_GEMM1(:), D_OFFSETS_GEMM2(:) - END MODULE TPM_DISTR diff --git a/src/trans/common/internal/tpm_gen.F90 b/src/trans/common/internal/tpm_gen.F90 index f153f4b5d..cf4657d18 100644 --- a/src/trans/common/internal/tpm_gen.F90 +++ b/src/trans/common/internal/tpm_gen.F90 @@ -42,7 +42,7 @@ MODULE TPM_GEN ! NSTACK_MEMORY_TR : optional memory strategy in gridpoint transpositions ! = 0 : prefer heap (slower but less memory consuming) ! > 0 : prefer stack (faster but more memory consuming) -INTEGER(KIND=JPIM) :: NSTACK_MEMORY_TR +INTEGER(KIND=JPIM) :: NSTACK_MEMORY_TR = 0 LOGICAL, ALLOCATABLE :: LENABLED(:) ! true: the resolution is enabled (it has been ! initialised and has not been released afterward) diff --git a/src/trans/common/internal/tpm_geometry.F90 b/src/trans/common/internal/tpm_geometry.F90 index 1ff1a1be9..48454a371 100644 --- a/src/trans/common/internal/tpm_geometry.F90 +++ b/src/trans/common/internal/tpm_geometry.F90 @@ -34,11 +34,4 @@ MODULE TPM_GEOMETRY TYPE(GEOM_TYPE),ALLOCATABLE,TARGET :: GEOM_RESOL(:) TYPE(GEOM_TYPE),POINTER :: G -!flat copies of the above -INTEGER(KIND=JPIM),ALLOCATABLE :: G_NDGLU(:) ! NUMBER OF HEMISPERIC LATITUDES -INTEGER(KIND=JPIM),ALLOCATABLE :: G_NMEN(:) ! ASSOCIATED CUT-OFF WAVE NUMBER -INTEGER(KIND=JPIM) :: G_NMEN_MAX -INTEGER(KIND=JPIM),ALLOCATABLE :: G_NLOEN(:) ! NUMBER OF POINTS ON A PARALLEL -INTEGER(KIND=JPIM) :: G_NLOEN_MAX - END MODULE TPM_GEOMETRY diff --git a/src/trans/cpu/CMakeLists.txt b/src/trans/cpu/CMakeLists.txt index 05787dcbb..cd36db8e2 100644 --- a/src/trans/cpu/CMakeLists.txt +++ b/src/trans/cpu/CMakeLists.txt @@ -44,9 +44,9 @@ function(generate_backend_sources) ecbuild_list_add_pattern( LIST files GLOB - algor/* - internal/* - external/* + algor/*.F90 + internal/*.F90 + external/*.F90 QUIET ) diff --git a/src/trans/cpu/external/setup_trans.F90 b/src/trans/cpu/external/setup_trans.F90 index cbe7df786..0deea417e 100644 --- a/src/trans/cpu/external/setup_trans.F90 +++ b/src/trans/cpu/external/setup_trans.F90 @@ -10,7 +10,7 @@ SUBROUTINE SETUP_TRANS(KSMAX,KDGL,KDLON,KLOEN,LDSPLIT,PSTRET,& &KTMAX,KRESOL,PWEIGHT,LDGRIDONLY,LDUSERPNM,LDKEEPRPNM,LDUSEFLT,& -&LDSPSETUPONLY,LDPNMONLY,LDUSEFFTW,& +&LDSPSETUPONLY,LDPNMONLY,LDUSEFFTW,LD_ALL_FFTW,& &LDLL,LDSHIFTLL,CDIO_LEGPOL,CDLEGPOLFNAME,KLEGPOLPTR,KLEGPOLPTR_LEN) !**** *SETUP_TRANS* - Setup transform package for specific resolution @@ -54,6 +54,7 @@ SUBROUTINE SETUP_TRANS(KSMAX,KDGL,KDLON,KLOEN,LDSPLIT,PSTRET,& ! FLT, otherwise always kept) ! LDPNMONLY - Compute the Legendre polynomials only, not the FFTs. ! LDUSEFFTW - Use FFTW for FFTs (option deprecated - FFTW is now mandatory) +! LD_ALL_FFTW : T to transform all fields in one call, F to transforms fields one after another ! LDLL - Setup second set of input/output latitudes ! the number of input/output latitudes to transform is equal KDGL ! or KDGL+2 in the case that includes poles + equator @@ -96,6 +97,7 @@ SUBROUTINE SETUP_TRANS(KSMAX,KDGL,KDLON,KLOEN,LDSPLIT,PSTRET,& ! G. Mozdzynski : Jun 2015 Support alternative FFTs to FFTW ! M.Hamrud/W.Deconinck : July 2015 IO options for Legenndre polynomials ! R. El Khatib 07-Mar-2016 Better flexibility for Legendre polynomials computation in stretched mode +! R. El Khatib 08-Jun-2023 LALL_FFTW for better flexibility ! ------------------------------------------------------------------ USE PARKIND1, ONLY: JPIM, JPRD, JPRB @@ -140,6 +142,7 @@ SUBROUTINE SETUP_TRANS(KSMAX,KDGL,KDLON,KLOEN,LDSPLIT,PSTRET,& REAL(KIND=JPRD) ,OPTIONAL,INTENT(IN) :: PSTRET LOGICAL ,OPTIONAL,INTENT(IN):: LDGRIDONLY LOGICAL ,OPTIONAL,INTENT(IN):: LDUSEFLT +LOGICAL ,OPTIONAL,INTENT(IN):: LD_ALL_FFTW LOGICAL ,OPTIONAL,INTENT(IN):: LDUSERPNM LOGICAL ,OPTIONAL,INTENT(IN):: LDKEEPRPNM LOGICAL ,OPTIONAL,INTENT(IN):: LDSPSETUPONLY @@ -228,6 +231,7 @@ SUBROUTINE SETUP_TRANS(KSMAX,KDGL,KDLON,KLOEN,LDSPLIT,PSTRET,& S%LUSE_BELUSOV=.TRUE. ! use Belusov algorithm to compute RPNM array instead of per m S%LKEEPRPNM=.FALSE. ! Keep Legendre polonomials (RPNM) S%LUSEFLT=.FALSE. ! Use fast legendre transforms +TW%LALL_FFTW=.FALSE. ! transform fields one at a time LLSPSETUPONLY = .FALSE. ! Only create distributed spectral setup S%LDLL = .FALSE. ! use mapping to/from second set of latitudes S%LSHIFTLL = .FALSE. ! shift output lat-lon by 0.5dx, 0.5dy @@ -340,6 +344,10 @@ SUBROUTINE SETUP_TRANS(KSMAX,KDGL,KDLON,KLOEN,LDSPLIT,PSTRET,& ! Setup distribution independent dimensions CALL SETUP_DIMS +IF(PRESENT(LD_ALL_FFTW)) THEN + TW%LALL_FFTW=LD_ALL_FFTW +ENDIF + S%LSOUTHPNM=.FALSE. IF(PRESENT(PSTRET)) THEN IF (ABS(PSTRET-1.0_JPRD)>100._JPRD*EPSILON(1._JPRD)) THEN diff --git a/src/trans/cpu/external/trans_inq.F90 b/src/trans/cpu/external/trans_inq.F90 index a67a69de2..ea0a91886 100644 --- a/src/trans/cpu/external/trans_inq.F90 +++ b/src/trans/cpu/external/trans_inq.F90 @@ -419,7 +419,7 @@ SUBROUTINE TRANS_INQ(KRESOL,KSPEC,KSPEC2,KSPEC2G,KSPEC2MX,KNUMP,& IF(UBOUND(PGW,1) < R%NDGL) THEN CALL ABORT_TRANS('TRANS_INQ: PGW TOO SMALL') ELSE - PGW(1:R%NDGL) = F%RW + PGW(1:R%NDGL) = REAL(F%RW,JPRB) ENDIF ENDIF diff --git a/src/trans/cpu/internal/ftdir_mod.F90 b/src/trans/cpu/internal/ftdir_mod.F90 index 48ecc9a35..8ee2905cc 100644 --- a/src/trans/cpu/internal/ftdir_mod.F90 +++ b/src/trans/cpu/internal/ftdir_mod.F90 @@ -43,14 +43,14 @@ SUBROUTINE FTDIR(PREEL,KFIELDS,KGL) ! D. Degrauwe (Feb 2012): Alternative extension zone (E') ! G. Mozdzynski (Oct 2014): support for FFTW transforms ! G. Mozdzynski (Jun 2015): Support alternative FFTs to FFTW - +! R. El Khatib 08-Jun-2023 LALL_FFTW for better flexibility ! ------------------------------------------------------------------ USE PARKIND1 ,ONLY : JPIM, JPIB, JPRB USE TPM_DISTR ,ONLY : D, MYSETW USE TPM_GEOMETRY ,ONLY : G -USE TPM_FFTW ,ONLY : EXEC_FFTW +USE TPM_FFTW ,ONLY : TW, EXEC_FFTW USE TPM_DIM ,ONLY : R ! @@ -61,7 +61,6 @@ SUBROUTINE FTDIR(PREEL,KFIELDS,KGL) INTEGER(KIND=JPIM) :: IGLG,IST,ILEN,JJ,IST1 INTEGER(KIND=JPIM) :: IOFF,IRLEN,ICLEN, ITYPE -LOGICAL :: LL_ALL=.FALSE. ! T=do kfields ffts in one batch, F=do kfields ffts one at a time ! ------------------------------------------------------------------ @@ -75,7 +74,7 @@ SUBROUTINE FTDIR(PREEL,KFIELDS,KGL) IRLEN=G%NLOEN(IGLG)+R%NNOEXTZL ICLEN=(IRLEN/2+1)*2 - CALL EXEC_FFTW(ITYPE,IRLEN,ICLEN,IOFF,KFIELDS,LL_ALL,PREEL) + CALL EXEC_FFTW(ITYPE,IRLEN,ICLEN,IOFF,KFIELDS,TW%LALL_FFTW,PREEL) ENDIF IST1=1 diff --git a/src/trans/cpu/internal/ftdirad_mod.F90 b/src/trans/cpu/internal/ftdirad_mod.F90 index 4d7471f0a..0bfd68da9 100644 --- a/src/trans/cpu/internal/ftdirad_mod.F90 +++ b/src/trans/cpu/internal/ftdirad_mod.F90 @@ -42,14 +42,14 @@ SUBROUTINE FTDIRAD(PREEL,KFIELDS,KGL) ! D. Degrauwe (Feb 2012): Alternative extension zone (E') ! G. Mozdzynski (Oct 2014): support for FFTW transforms ! G. Mozdzynski (Jun 2015): Support alternative FFTs to FFTW - +! R. El Khatib 08-Jun-2023 LALL_FFTW for better flexibility ! ------------------------------------------------------------------ USE PARKIND1 ,ONLY : JPIM, JPRB USE TPM_DISTR ,ONLY : D, MYSETW USE TPM_GEOMETRY ,ONLY : G -USE TPM_FFTW ,ONLY : EXEC_FFTW +USE TPM_FFTW ,ONLY : TW, EXEC_FFTW USE TPM_DIM ,ONLY : R IMPLICIT NONE @@ -60,7 +60,6 @@ SUBROUTINE FTDIRAD(PREEL,KFIELDS,KGL) INTEGER(KIND=JPIM) :: IGLG,IST,ILEN,JJ,JF,ILOEN INTEGER(KIND=JPIM) :: IOFF,IRLEN,ICLEN,ITYPE REAL(KIND=JPRB) :: ZMUL -LOGICAL :: LL_ALL=.FALSE. ! T=do kfields ffts in one batch, F=do kfields ffts one at a time ! ------------------------------------------------------------------ ITYPE = 1 @@ -78,7 +77,7 @@ SUBROUTINE FTDIRAD(PREEL,KFIELDS,KGL) ENDDO ENDDO -CALL EXEC_FFTW(ITYPE,IRLEN,ICLEN,IOFF,KFIELDS,LL_ALL,PREEL) +CALL EXEC_FFTW(ITYPE,IRLEN,ICLEN,IOFF,KFIELDS,TW%LALL_FFTW,PREEL) ! Change of metric (not in forward routine) diff --git a/src/trans/cpu/internal/ftinv_mod.F90 b/src/trans/cpu/internal/ftinv_mod.F90 index 00c341077..705089cfe 100644 --- a/src/trans/cpu/internal/ftinv_mod.F90 +++ b/src/trans/cpu/internal/ftinv_mod.F90 @@ -42,13 +42,14 @@ SUBROUTINE FTINV(PREEL,KFIELDS,KGL) ! D. Degrauwe (Feb 2012): Alternative extension zone (E') ! G. Mozdzynski (Oct 2014): support for FFTW transforms ! G. Mozdzynski (Jun 2015): Support alternative FFTs to FFTW +! R. El Khatib 08-Jun-2023 LALL_FFTW for better flexibility ! ------------------------------------------------------------------ USE PARKIND1 ,ONLY : JPIM, JPRB USE TPM_DISTR ,ONLY : D, MYSETW USE TPM_GEOMETRY ,ONLY : G -USE TPM_FFTW ,ONLY : EXEC_FFTW +USE TPM_FFTW ,ONLY : TW, EXEC_FFTW USE TPM_DIM ,ONLY : R IMPLICIT NONE @@ -58,7 +59,6 @@ SUBROUTINE FTINV(PREEL,KFIELDS,KGL) INTEGER(KIND=JPIM) :: IGLG,IST,ILEN,JJ,JF,IST1 INTEGER(KIND=JPIM) :: IOFF,IRLEN,ICLEN, ITYPE -LOGICAL :: LL_ALL=.FALSE. ! T=do kfields ffts in one batch, F=do kfields ffts one at a time ! ------------------------------------------------------------------ @@ -80,7 +80,7 @@ SUBROUTINE FTINV(PREEL,KFIELDS,KGL) IRLEN=G%NLOEN(IGLG)+R%NNOEXTZL ICLEN=(IRLEN/2+1)*2 - CALL EXEC_FFTW(ITYPE,IRLEN,ICLEN,IOFF,KFIELDS,LL_ALL,PREEL) + CALL EXEC_FFTW(ITYPE,IRLEN,ICLEN,IOFF,KFIELDS,TW%LALL_FFTW,PREEL) ENDIF ! ------------------------------------------------------------------ diff --git a/src/trans/cpu/internal/ftinvad_mod.F90 b/src/trans/cpu/internal/ftinvad_mod.F90 index 7e5c2ddef..396ac4f78 100644 --- a/src/trans/cpu/internal/ftinvad_mod.F90 +++ b/src/trans/cpu/internal/ftinvad_mod.F90 @@ -42,7 +42,7 @@ SUBROUTINE FTINVAD(PREEL,KFIELDS,KGL) ! D. Degrauwe (Feb 2012): Alternative extension zone (E') ! G. Mozdzynski (Oct 2014): support for FFTW transforms ! G. Mozdzynski (Jun 2015): Support alternative FFTs to FFTW - +! R. El Khatib 08-Jun-2023 LALL_FFTW for better flexibility ! ------------------------------------------------------------------ USE PARKIND1 ,ONLY : JPIM, JPIB, JPRB @@ -50,7 +50,7 @@ SUBROUTINE FTINVAD(PREEL,KFIELDS,KGL) USE TPM_DISTR ,ONLY : D, MYSETW USE TPM_DIM ,ONLY : R USE TPM_GEOMETRY ,ONLY : G -USE TPM_FFTW ,ONLY : EXEC_FFTW +USE TPM_FFTW ,ONLY : TW, EXEC_FFTW USE TPM_DIM ,ONLY : R ! @@ -61,7 +61,6 @@ SUBROUTINE FTINVAD(PREEL,KFIELDS,KGL) INTEGER(KIND=JPIM) :: IGLG,IST,ILEN,JJ,JF,ILOEN INTEGER(KIND=JPIM) :: IOFF,IRLEN,ICLEN,ITYPE -LOGICAL :: LL_ALL=.FALSE. ! T=do kfields ffts in one batch, F=do kfields ffts one at a time ! ------------------------------------------------------------------ @@ -81,7 +80,7 @@ SUBROUTINE FTINVAD(PREEL,KFIELDS,KGL) ENDDO ENDDO -CALL EXEC_FFTW(ITYPE,IRLEN,ICLEN,IOFF,KFIELDS,LL_ALL,PREEL) +CALL EXEC_FFTW(ITYPE,IRLEN,ICLEN,IOFF,KFIELDS,TW%LALL_FFTW,PREEL) DO JJ=1,ILEN DO JF=1,KFIELDS diff --git a/src/trans/cpu/internal/tpm_fftw.F90 b/src/trans/cpu/internal/tpm_fftw.F90 index f424d0e3f..4672bc9d3 100644 --- a/src/trans/cpu/internal/tpm_fftw.F90 +++ b/src/trans/cpu/internal/tpm_fftw.F90 @@ -17,6 +17,7 @@ MODULE TPM_FFTW ! -------------- ! Original October 2014 ! R. El Khatib 01-Sep-2015 More subroutines for better modularity +! R. El Khatib 08-Jun-2023 LALL_FFTW for better flexibility USE, INTRINSIC :: ISO_C_BINDING @@ -44,7 +45,8 @@ MODULE TPM_FFTW INTEGER(KIND=JPIM),ALLOCATABLE :: N_PLANS(:) TYPE(FFTW_PLAN),POINTER :: FFTW_PLANS(:) => NULL() INTEGER(KIND=JPIM) :: N_MAX=0 ! maximum number of latitudes - INTEGER(KIND=JPIM) :: N_MAX_PLANS=4 ! maximum number of plans for each active latitudes + INTEGER(KIND=JPIM) :: N_MAX_PLANS=4 ! maximum number of plans for each active latitude + LOGICAL :: LALL_FFTW=.FALSE. ! T=do kfields ffts in one batch, F=do kfields ffts one at a time END TYPE FFTW_TYPE @@ -422,8 +424,8 @@ SUBROUTINE EXEC_EFFTW(KTYPE,KRLEN,KCLEN,KOFF,KFIELDS,LD_ALL,PREEL) CALL SFFTW_EXECUTE_DFT_C2R(IPLAN_C2R,ZFFT,ZFFT) END IF IF (LHOOK) CALL DR_HOOK('FFTW_EXECUTE_DFT_C2R',1,ZHOOK_HANDLE2) - DO JJ=1,KRLEN - DO JF=1,KFIELDS + DO JF=1,KFIELDS + DO JJ=1,KRLEN PREEL(KOFF+JJ-1,JF)=ZFFT(JJ,JF) ENDDO ENDDO @@ -440,8 +442,8 @@ SUBROUTINE EXEC_EFFTW(KTYPE,KRLEN,KCLEN,KOFF,KFIELDS,LD_ALL,PREEL) CALL SFFTW_EXECUTE_DFT_R2C(IPLAN_C2R,ZFFT,ZFFT) END IF IF (LHOOK) CALL DR_HOOK('FFTW_EXECUTE_DFT_R2C',1,ZHOOK_HANDLE2) - DO JJ=1,KCLEN - DO JF=1,KFIELDS + DO JF=1,KFIELDS + DO JJ=1,KCLEN PREEL(KOFF+JJ-1,JF)=ZFFT(JJ,JF)/REAL(KRLEN,JPRB) ENDDO ENDDO diff --git a/src/trans/gpu/CMakeLists.txt b/src/trans/gpu/CMakeLists.txt index 0769aa7dc..97579f52b 100644 --- a/src/trans/gpu/CMakeLists.txt +++ b/src/trans/gpu/CMakeLists.txt @@ -58,6 +58,7 @@ ecbuild_add_library( PUBLIC_LIBS fiat ectrans_common PRIVATE_LIBS ${ECTRANS_GPU_HIP_LIBRARIES} $<${HAVE_ACC}:OpenACC::OpenACC_Fortran> + $<${HAVE_OMP}:OpenMP::OpenMP_Fortran> $<${HAVE_CUTLASS}:nvidia::cutlass::cutlass> PRIVATE_DEFINITIONS ${GPU_RUNTIME}GPU ${GPU_OFFLOAD}GPU $<${HAVE_CUTLASS}:USE_CUTLASS> @@ -66,6 +67,12 @@ ecbuild_add_library( $<${HAVE_GPU_GRAPHS_FFT}:USE_GRAPHS_FFT> ) +# The ecTrans libraries may be linked against an executable with the -cuda flag +# In that case we must link with -cuda here to ensure compatibility +target_link_options(ectrans_gpu_common PUBLIC + $<$:-cuda> +) + ectrans_target_fortran_module_directory( TARGET ectrans_gpu_common MODULE_DIRECTORY ${PROJECT_BINARY_DIR}/module/ectrans @@ -88,8 +95,8 @@ function(generate_backend_sources) ecbuild_list_add_pattern( LIST files GLOB - internal/* - external/* + internal/*.F90 + external/*.F90 QUIET ) list( APPEND files @@ -157,6 +164,12 @@ foreach( prec dp sp ) ECTRANS_HAVE_MPI=${ectrans_HAVE_MPI} ) + # The ecTrans libraries may be linked against an executable with the -cuda flag + # In that case we must link with -cuda here to ensure compatibility + target_link_options(ectrans_gpu_${prec} PUBLIC + $<$:-cuda> + ) + ectrans_target_fortran_module_directory( TARGET ectrans_gpu_${prec} MODULE_DIRECTORY ${CMAKE_BINARY_DIR}/module/ectrans diff --git a/src/trans/gpu/algor/buffered_allocator_mod.F90 b/src/trans/gpu/algor/buffered_allocator_mod.F90 index 6f8ce3331..346b85977 100644 --- a/src/trans/gpu/algor/buffered_allocator_mod.F90 +++ b/src/trans/gpu/algor/buffered_allocator_mod.F90 @@ -12,7 +12,9 @@ MODULE BUFFERED_ALLOCATOR_MOD USE ABORT_TRANS_MOD, ONLY: ABORT_TRANS USE ISO_C_BINDING, ONLY: C_INT8_T, C_SIZE_T, C_LOC, C_F_POINTER USE GROWING_ALLOCATOR_MOD, ONLY: GROWING_ALLOCATION_TYPE +#ifdef ACCGPU USE OPENACC, ONLY: ACC_ASYNC_SYNC +#endif IMPLICIT NONE @@ -69,10 +71,11 @@ FUNCTION MAKE_BUFFERED_ALLOCATOR() MAKE_BUFFERED_ALLOCATOR%NEXT_BUF = 0 END FUNCTION MAKE_BUFFERED_ALLOCATOR - FUNCTION RESERVE(ALLOCATOR, SZ) + FUNCTION RESERVE(ALLOCATOR, SZ, WHO) IMPLICIT NONE TYPE(BUFFERED_ALLOCATOR), INTENT(INOUT) :: ALLOCATOR INTEGER(KIND=C_SIZE_T), INTENT(IN) :: SZ + CHARACTER(*), INTENT(IN), OPTIONAL :: WHO TYPE(ALLOCATION_RESERVATION_HANDLE) :: RESERVE @@ -88,7 +91,7 @@ SUBROUTINE INSTANTIATE_ALLOCATOR(ALLOCATOR, GROWING_ALLOCATION) IMPLICIT NONE TYPE(BUFFERED_ALLOCATOR), INTENT(INOUT) :: ALLOCATOR !!TYPE(GROWING_ALLOCATION_TYPE), INTENT(IN), POINTER :: GROWING_ALLOCATION - TYPE(GROWING_ALLOCATION_TYPE), target, INTENT(INout) :: GROWING_ALLOCATION + TYPE(GROWING_ALLOCATION_TYPE), TARGET, INTENT(INOUT) :: GROWING_ALLOCATION INTEGER :: I DO I = 0, NBUF-1 @@ -118,7 +121,7 @@ FUNCTION GET_ALLOCATION(ALLOCATOR, RESERVATION) END FUNCTION GET_ALLOCATION SUBROUTINE ASSIGN_PTR_FLOAT(DST, SRC, START_IN_BYTES, LENGTH_IN_BYTES, SET_VALUE, SET_STREAM) - USE ISO_C_BINDING, ONLY: C_FLOAT + USE ISO_C_BINDING, ONLY: C_FLOAT, C_F_POINTER, C_SIZEOF IMPLICIT NONE INTEGER(KIND=C_INT8_T), POINTER, INTENT(INOUT) :: SRC(:) REAL(KIND=C_FLOAT), POINTER, INTENT(OUT) :: DST(:) @@ -126,10 +129,13 @@ SUBROUTINE ASSIGN_PTR_FLOAT(DST, SRC, START_IN_BYTES, LENGTH_IN_BYTES, SET_VALUE INTEGER(KIND=4), INTENT(IN), OPTIONAL :: SET_STREAM LOGICAL :: SET_VALUE_EFF INTEGER(KIND=4) :: SET_STREAM_EFF - INTEGER(KIND=C_SIZE_T) :: START_IN_BYTES, LENGTH_IN_BYTES + INTEGER(KIND=C_SIZE_T) :: START_IN_BYTES, LENGTH_IN_BYTES, END_IN_BYTES, J IF (START_IN_BYTES + LENGTH_IN_BYTES - 1 > SIZE(SRC, KIND=C_SIZE_T)) THEN CALL ABORT_TRANS("Logical Error in ASSIGN_PTR - OOB assignment") ENDIF + IF (START_IN_BYTES < 1) THEN + CALL ABORT_TRANS("Logical Error in ASSIGN_PTR - OOB assignment") + ENDIF IF (PRESENT(SET_VALUE)) THEN SET_VALUE_EFF = SET_VALUE ELSE @@ -138,20 +144,34 @@ SUBROUTINE ASSIGN_PTR_FLOAT(DST, SRC, START_IN_BYTES, LENGTH_IN_BYTES, SET_VALUE IF (PRESENT(SET_STREAM)) THEN SET_STREAM_EFF = SET_STREAM ELSE +#ifdef ACCGPU SET_STREAM_EFF = ACC_ASYNC_SYNC +#endif +#ifdef OMPGPU +#endif ENDIF IF (SET_VALUE_EFF .AND. LENGTH_IN_BYTES > 0) THEN ! This option is turned off by default, but for experimentation we can turn it on. This is ! setting all bits to 1 (meaning NaN in floating point) - !$ACC KERNELS PRESENT(SRC) ASYNC(SET_STREAM_EFF) - SRC(START_IN_BYTES:START_IN_BYTES+LENGTH_IN_BYTES-1) = -1 - !$ACC END KERNELS!! LOOP +#ifdef ACCGPU + !$ACC PARALLEL PRESENT(SRC) ASYNC(SET_STREAM_EFF) +#endif +#ifdef OMPGPU +#endif + DO J=1_C_SIZE_T,LENGTH_IN_BYTES + SRC(J) = -1 + ENDDO +#ifdef ACCGPU + !$ACC END PARALLEL +#endif +#ifdef OMPGPU +#endif ENDIF CALL C_F_POINTER(C_LOC(SRC(START_IN_BYTES:START_IN_BYTES+LENGTH_IN_BYTES-1)), DST, & - & [SIZEOF(SRC(START_IN_BYTES:START_IN_BYTES+LENGTH_IN_BYTES-1))/SIZEOF(DST(0))]) + & [C_SIZEOF(SRC(START_IN_BYTES:START_IN_BYTES+LENGTH_IN_BYTES-1))/C_SIZEOF(DST(0))]) END SUBROUTINE ASSIGN_PTR_FLOAT SUBROUTINE ASSIGN_PTR_DOUBLE(DST, SRC, START_IN_BYTES, LENGTH_IN_BYTES, SET_VALUE, SET_STREAM) - USE ISO_C_BINDING, ONLY: C_DOUBLE + USE ISO_C_BINDING, ONLY: C_DOUBLE, C_F_POINTER, C_SIZEOF IMPLICIT NONE INTEGER(KIND=C_INT8_T), POINTER, INTENT(INOUT) :: SRC(:) REAL(KIND=C_DOUBLE), POINTER, INTENT(OUT) :: DST(:) @@ -159,10 +179,13 @@ SUBROUTINE ASSIGN_PTR_DOUBLE(DST, SRC, START_IN_BYTES, LENGTH_IN_BYTES, SET_VALU INTEGER(KIND=4), INTENT(IN), OPTIONAL :: SET_STREAM LOGICAL :: SET_VALUE_EFF INTEGER(KIND=4) :: SET_STREAM_EFF - INTEGER(KIND=C_SIZE_T) :: START_IN_BYTES, LENGTH_IN_BYTES + INTEGER(KIND=C_SIZE_T) :: START_IN_BYTES, LENGTH_IN_BYTES, END_IN_BYTES, J IF (START_IN_BYTES + LENGTH_IN_BYTES - 1 > SIZE(SRC, KIND=C_SIZE_T)) THEN CALL ABORT_TRANS("Logical Error in ASSIGN_PTR - OOB assignment") ENDIF + IF (START_IN_BYTES < 1) THEN + CALL ABORT_TRANS("Logical Error in ASSIGN_PTR - OOB assignment") + ENDIF IF (PRESENT(SET_VALUE)) THEN SET_VALUE_EFF = SET_VALUE ELSE @@ -171,16 +194,31 @@ SUBROUTINE ASSIGN_PTR_DOUBLE(DST, SRC, START_IN_BYTES, LENGTH_IN_BYTES, SET_VALU IF (PRESENT(SET_STREAM)) THEN SET_STREAM_EFF = SET_STREAM ELSE +#ifdef ACCGPU SET_STREAM_EFF = ACC_ASYNC_SYNC +#endif +#ifdef OMPGPU +#endif ENDIF IF (SET_VALUE_EFF .AND. LENGTH_IN_BYTES > 0) THEN ! This option is turned off by default, but for experimentation we can turn it on. This is ! setting all bits to 1 (meaning NaN in floating point) - !$ACC KERNELS PRESENT(SRC) ASYNC(SET_STREAM_EFF) - SRC(START_IN_BYTES:START_IN_BYTES+LENGTH_IN_BYTES-1) = -1 - !$ACC END KERNELS!! LOOP + END_IN_BYTES=START_IN_BYTES+LENGTH_IN_BYTES-1 +#ifdef ACCGPU + !$ACC PARALLEL PRESENT(SRC) ASYNC(SET_STREAM_EFF) +#endif +#ifdef OMPGPU +#endif + DO J=1_C_SIZE_T,LENGTH_IN_BYTES + SRC(J) = -1 + ENDDO +#ifdef ACCGPU + !$ACC END PARALLEL +#endif +#ifdef OMPGPU +#endif ENDIF CALL C_F_POINTER(C_LOC(SRC(START_IN_BYTES:START_IN_BYTES+LENGTH_IN_BYTES-1)), DST, & - & [SIZEOF(SRC(START_IN_BYTES:START_IN_BYTES+LENGTH_IN_BYTES-1))/SIZEOF(DST(0))]) + & [C_SIZEOF(SRC(START_IN_BYTES:START_IN_BYTES+LENGTH_IN_BYTES-1))/C_SIZEOF(DST(0))]) END SUBROUTINE ASSIGN_PTR_DOUBLE END MODULE diff --git a/src/trans/gpu/algor/ext_acc.F90 b/src/trans/gpu/algor/ext_acc.F90 index 98cee38ce..ffc760e61 100644 --- a/src/trans/gpu/algor/ext_acc.F90 +++ b/src/trans/gpu/algor/ext_acc.F90 @@ -17,14 +17,23 @@ module openacc_ext_type end type end module module openacc_ext - use iso_c_binding, only: c_ptr, c_size_t, c_loc - use openacc, only: acc_create, acc_copyin, acc_handle_kind + use iso_c_binding, only: c_ptr, c_size_t, c_loc, c_sizeof +#ifdef ACCGPU + use openacc, only: acc_handle_kind +#endif +#ifdef OMPGPU +#endif use openacc_ext_type, only: ext_acc_arr_desc implicit none private public :: ext_acc_pass, ext_acc_create, ext_acc_copyin, ext_acc_copyout, & - & ext_acc_delete, ext_acc_arr_desc, acc_handle_kind +#ifdef ACCGPU + & ext_acc_delete, ext_acc_arr_desc, acc_handle_kind +#endif +#ifdef OMPGPU + & ext_acc_delete, ext_acc_arr_desc +#endif type common_pointer_descr type(c_ptr) :: ptr @@ -247,97 +256,132 @@ function get_common_pointers(in_ptrs, out_ptrs) result(num_ranges) enddo end function subroutine ext_acc_create(ptrs, stream) - use openacc, only: acc_create, acc_async_sync +#ifdef ACCGPU + use openacc, only: acc_async_sync +#endif use iso_fortran_env, only: int32 implicit none type(ext_acc_arr_desc), intent(in) :: ptrs(:) +#ifdef ACCGPU integer(acc_handle_kind), optional :: stream +#endif +#ifdef OMPGPU + integer(4), optional :: stream +#endif type(common_pointer_descr), allocatable :: common_ptrs(:) integer :: i, num_ranges integer(kind=int32), pointer :: pp(:) +#ifdef ACCGPU integer(acc_handle_kind) :: stream_act - if (present(stream)) then stream_act = stream else stream_act = acc_async_sync endif +#endif allocate(common_ptrs(size(ptrs))) num_ranges = get_common_pointers(ptrs, common_ptrs) do i = 1, num_ranges - call c_f_pointer(common_ptrs(i)%ptr, pp, shape=[common_ptrs(i)%sz/sizeof(pp(1))]) - !!call acc_create_async(pp, common_ptrs(i)%sz, async=stream_act) - call acc_create(pp, int(common_ptrs(i)%sz)) + call c_f_pointer(common_ptrs(i)%ptr, pp, [common_ptrs(i)%sz/c_sizeof(pp(1))]) +#ifdef ACCGPU + !$acc enter data create(pp) async(stream_act) +#endif +#ifdef OMPGPU +#endif enddo end subroutine subroutine ext_acc_copyin(ptrs, stream) +#ifdef ACCGPU use openacc, only: acc_async_sync +#endif implicit none type(ext_acc_arr_desc), intent(in) :: ptrs(:) +#ifdef ACCGPU integer(acc_handle_kind), optional :: stream +#endif +#ifdef OMPGPU + integer(4), optional :: stream +#endif type(common_pointer_descr), allocatable :: common_ptrs(:) integer :: i, num_ranges integer(4), pointer :: pp(:) - +#ifdef ACCGPU integer(acc_handle_kind) :: stream_act - if (present(stream)) then stream_act = stream else stream_act = acc_async_sync endif +#endif allocate(common_ptrs(size(ptrs))) num_ranges = get_common_pointers(ptrs, common_ptrs) do i = 1, num_ranges - call c_f_pointer(common_ptrs(i)%ptr, pp, shape=[common_ptrs(i)%sz/sizeof(pp(1))]) - !!call acc_copyin_async(pp, common_ptrs(i)%sz, async=stream_act) - call acc_copyin(pp, int(common_ptrs(i)%sz)) + call c_f_pointer(common_ptrs(i)%ptr, pp, [common_ptrs(i)%sz/c_sizeof(pp(1))]) +#ifdef ACCGPU + !$acc enter data copyin(pp) async(stream_act) +#endif +#ifdef OMPGPU +#endif enddo end subroutine subroutine ext_acc_copyout(ptrs, stream) - use openacc, only: acc_async_sync, acc_copyout +#ifdef ACCGPU + use openacc, only: acc_async_sync +#endif implicit none type(ext_acc_arr_desc), intent(in) :: ptrs(:) +#ifdef ACCGPU integer(acc_handle_kind), optional :: stream - +#endif +#ifdef OMPGPU + integer(4), optional :: stream +#endif type(common_pointer_descr), allocatable :: common_ptrs(:) integer :: i, num_ranges integer(4), pointer :: pp(:) - +#ifdef ACCGPU integer(acc_handle_kind) :: stream_act - if (present(stream)) then stream_act = stream else stream_act = acc_async_sync endif +#endif allocate(common_ptrs(size(ptrs))) num_ranges = get_common_pointers(ptrs, common_ptrs) do i = 1, num_ranges - call c_f_pointer(common_ptrs(i)%ptr, pp, shape=[common_ptrs(i)%sz/sizeof(pp(1))]) - !!call acc_copyout_async(pp, common_ptrs(i)%sz, async=stream_act) - call acc_copyout(pp, int(common_ptrs(i)%sz)) + call c_f_pointer(common_ptrs(i)%ptr, pp, [common_ptrs(i)%sz/c_sizeof(pp(1))]) +#ifdef ACCGPU + !$acc exit data copyout(pp) async(stream_act) +#endif +#ifdef OMPGPU +#endif enddo end subroutine subroutine ext_acc_delete(ptrs, stream) - use openacc, only: acc_async_sync, acc_delete +#ifdef ACCGPU + use openacc, only: acc_async_sync +#endif implicit none type(ext_acc_arr_desc), intent(in) :: ptrs(:) +#ifdef ACCGPU integer(acc_handle_kind), optional :: stream - +#else + integer(4), optional :: stream +#endif type(common_pointer_descr), allocatable :: common_ptrs(:) integer :: i, num_ranges integer(4), pointer :: pp(:) - +#ifdef ACCGPU integer(acc_handle_kind) :: stream_act if (present(stream)) then @@ -345,13 +389,17 @@ subroutine ext_acc_delete(ptrs, stream) else stream_act = acc_async_sync endif +#endif allocate(common_ptrs(size(ptrs))) num_ranges = get_common_pointers(ptrs, common_ptrs) do i = 1, num_ranges - call c_f_pointer(common_ptrs(i)%ptr, pp, shape=[common_ptrs(i)%sz/sizeof(pp(1))]) - !!call acc_delete_async(pp, common_ptrs(i)%sz, async=stream_act) - call acc_delete(pp, int(common_ptrs(i)%sz)) + call c_f_pointer(common_ptrs(i)%ptr, pp, [common_ptrs(i)%sz/c_sizeof(pp(1))]) +#ifdef ACCGPU + !$acc exit data delete(pp) async(stream_act) +#endif +#ifdef OMPGPU +#endif enddo end subroutine end module diff --git a/src/trans/gpu/algor/growing_allocator_mod.F90 b/src/trans/gpu/algor/growing_allocator_mod.F90 index f8de0fc90..283db018d 100644 --- a/src/trans/gpu/algor/growing_allocator_mod.F90 +++ b/src/trans/gpu/algor/growing_allocator_mod.F90 @@ -5,6 +5,7 @@ MODULE GROWING_ALLOCATOR_MOD PRIVATE PUBLIC :: GROWING_ALLOCATION_TYPE PUBLIC :: REALLOCATE_GROWING_ALLOCATION, REGISTER_FREE_FUNCTION + PUBLIC :: DESTROY_GROWING_ALLOCATOR ABSTRACT INTERFACE SUBROUTINE FREE_FUNC_PROC(PTR, SZ) BIND(C) @@ -32,24 +33,22 @@ SUBROUTINE REALLOCATE_GROWING_ALLOCATION(ALLOC, SZ) USE TPM_GEN, ONLY: NOUT IMPLICIT NONE TYPE(GROWING_ALLOCATION_TYPE), INTENT(INOUT) :: ALLOC - INTEGER(C_SIZE_T) :: SZ - INTEGER :: I + INTEGER(C_SIZE_T), INTENT(IN) :: SZ ! Deallocate existing pointer IF (ASSOCIATED(ALLOC%PTR) .AND. SZ > SIZE(ALLOC%PTR, 1, C_SIZE_T)) THEN WRITE(NOUT,*) "WARNING: REALLOCATING GROWING POINTER CAUSING GRAPH REINSTANTIATION" - DO I = 1, ALLOC%FREE_FUNCS_SZ - CALL ALLOC%FREE_FUNCS(I)%FUNC(ALLOC%PTR, & - SIZE(ALLOC%PTR, 1, C_SIZE_T)) - ENDDO - !$ACC EXIT DATA DELETE(ALLOC%PTR) - DEALLOCATE(ALLOC%PTR) - NULLIFY(ALLOC%PTR) + CALL DESTROY_GROWING_ALLOCATOR(ALLOC) ENDIF IF (.NOT. ASSOCIATED(ALLOC%PTR)) THEN ALLOCATE(ALLOC%PTR(SZ)) +#ifdef OMPGPU + !$OMP TARGET ENTER DATA MAP(ALLOC:ALLOC%PTR) +#endif +#ifdef ACCGPU !$ACC ENTER DATA CREATE(ALLOC%PTR) +#endif ALLOC%FREE_FUNCS_SZ = 0 ENDIF END SUBROUTINE @@ -89,4 +88,25 @@ SUBROUTINE REGISTER_FREE_C(ALLOC_C, FREE_FUNC_C) BIND(C, NAME="growing_allocator END SUBROUTINE + SUBROUTINE DESTROY_GROWING_ALLOCATOR(ALLOC) + USE ISO_C_BINDING, ONLY: C_SIZE_T + IMPLICIT NONE + TYPE(GROWING_ALLOCATION_TYPE) :: ALLOC + INTEGER :: I + IF (ASSOCIATED(ALLOC%PTR)) THEN + DO I = 1, ALLOC%FREE_FUNCS_SZ + CALL ALLOC%FREE_FUNCS(I)%FUNC(ALLOC%PTR, & + SIZE(ALLOC%PTR, 1, C_SIZE_T)) + ENDDO +#ifdef OMPGPU + !$OMP TARGET EXIT DATA MAP(DELETE:ALLOC%PTR) +#endif +#ifdef ACCGPU + !$ACC EXIT DATA DELETE(ALLOC%PTR) +#endif + DEALLOCATE(ALLOC%PTR) + NULLIFY(ALLOC%PTR) + ENDIF + END SUBROUTINE + END MODULE diff --git a/src/trans/gpu/algor/hicblas_cutlass.cuda.h b/src/trans/gpu/algor/hicblas_cutlass.cuda.h index 7a842a808..8b0eacf88 100644 --- a/src/trans/gpu/algor/hicblas_cutlass.cuda.h +++ b/src/trans/gpu/algor/hicblas_cutlass.cuda.h @@ -1,3 +1,6 @@ +// (C) Copyright 2000- ECMWF. +// (C) Copyright 2024- NVIDIA. + #ifdef USE_CUTLASS //#include "hicblas.h" #include "cutlass/gemm/device/gemm.h" @@ -72,6 +75,7 @@ class cutlass_sgemm_grouped { static constexpr int sz_align = 8; public: + using real_type = float; void operator()(cudaStream_t stream, int m, int n, int k, float alpha, const float *A, int lda, const float *B, int ldb, float beta, float *C, int ldc) const { @@ -129,6 +133,7 @@ class cutlass_sgemm_grouped { static constexpr int sz_align = 1; public: + using real_type = float; void operator()(cudaStream_t stream, int m, int n, int k, float alpha, const float *A, int lda, const float *B, int ldb, float beta, float *C, int ldc) const { @@ -149,11 +154,11 @@ class cutlass_sgemm_grouped { } // namespace detail template -void cutlass_sgemm_wrapper_grouped_op(int blas_id, int m, int *n, int *k, +void cutlass_sgemm_wrapper_grouped_op(int resol_id, int blas_id, int m, const int *n, const int *k, float alpha, const float *A, int lda, - int *offsetsA, const float *B, int ldb, - int *offsetsB, float beta, float *C, - int ldc, int *offsetsC, int batchCount, + const int64_t *offsetsA, const float *B, const int *ldb, + const int64_t *offsetsB, float beta, float *C, + int ldc, const int64_t *offsetsC, int batchCount, cudaStream_t stream, void *growing_allocator) { using namespace detail; @@ -165,40 +170,40 @@ void cutlass_sgemm_wrapper_grouped_op(int blas_id, int m, int *n, int *k, if (capability_major >= 8 && use_3xtf32) run_group_graph(cutlass_sgemm_grouped(), - m, n, k, alpha, A, lda, offsetsA, B, ldb, offsetsB, beta, C, + resol_id, m, n, k, alpha, A, lda, offsetsA, B, ldb, offsetsB, beta, C, ldc, offsetsC, batchCount, stream, blas_id, growing_allocator); else run_group_graph(cutlass_sgemm_grouped(), - m, n, k, alpha, A, lda, offsetsA, B, ldb, offsetsB, beta, C, + resol_id, m, n, k, alpha, A, lda, offsetsA, B, ldb, offsetsB, beta, C, ldc, offsetsC, batchCount, stream, blas_id, growing_allocator); } -void cutlass_sgemm_wrapper_grouped(int blas_id, char transa, char transb, - int m, int *n, int *k, float alpha, - const float *A, int lda, int *offsetsA, - const float *B, int ldb, int *offsetsB, float beta, - float *C, int ldc, int *offsetsC, +void cutlass_sgemm_wrapper_grouped(int resol_id, int blas_id, char transa, char transb, + int m, const int *n, const int *k, float alpha, + const float *A, int lda, const int64_t *offsetsA, + const float *B, const int *ldb, const int64_t *offsetsB, float beta, + float *C, int ldc, const int64_t *offsetsC, int batchCount, cudaStream_t stream, void *growing_allocator) { if (transa == 'N' && transb == 'N') cutlass_sgemm_wrapper_grouped_op( - blas_id, m, n, k, alpha, A, lda, offsetsA, B, ldb, offsetsB, beta, C, + resol_id, blas_id, m, n, k, alpha, A, lda, offsetsA, B, ldb, offsetsB, beta, C, ldc, offsetsC, batchCount, stream, growing_allocator); else if (transa == 'N' && transb == 'T') cutlass_sgemm_wrapper_grouped_op( - blas_id, m, n, k, alpha, A, lda, offsetsA, B, ldb, offsetsB, beta, C, + resol_id, blas_id, m, n, k, alpha, A, lda, offsetsA, B, ldb, offsetsB, beta, C, ldc, offsetsC, batchCount, stream, growing_allocator); else if (transa == 'T' && transb == 'N') cutlass_sgemm_wrapper_grouped_op( - blas_id, m, n, k, alpha, A, lda, offsetsA, B, ldb, offsetsB, beta, C, + resol_id, blas_id, m, n, k, alpha, A, lda, offsetsA, B, ldb, offsetsB, beta, C, ldc, offsetsC, batchCount, stream, growing_allocator); else if (transa == 'T' && transb == 'T') cutlass_sgemm_wrapper_grouped_op( - blas_id, m, n, k, alpha, A, lda, offsetsA, B, ldb, offsetsB, beta, C, + resol_id, blas_id, m, n, k, alpha, A, lda, offsetsA, B, ldb, offsetsB, beta, C, ldc, offsetsC, batchCount, stream, growing_allocator); else assert(false); diff --git a/src/trans/gpu/algor/hicblas_gemm.hip.cpp b/src/trans/gpu/algor/hicblas_gemm.hip.cpp index 9d6178bed..f9caa0383 100644 --- a/src/trans/gpu/algor/hicblas_gemm.hip.cpp +++ b/src/trans/gpu/algor/hicblas_gemm.hip.cpp @@ -1,4 +1,5 @@ // (C) Copyright 2000- ECMWF. +// (C) Copyright 2024- NVIDIA. // // This software is licensed under the terms of the Apache Licence Version 2.0 // which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. @@ -21,63 +22,89 @@ #include "growing_allocator.h" +bool hip_alreadyAllocated_sgemm = false; +bool hip_alreadyAllocated_sgemm_handle = false; -bool hip_alreadyAllocated_sgemm=false; -bool hip_alreadyAllocated_sgemm_handle=false; - -bool hip_alreadyAllocated_dsgemm=false; -bool hip_alreadyAllocated_dgemm_handle=false; +bool hip_alreadyAllocated_dsgemm = false; +bool hip_alreadyAllocated_dgemm_handle = false; hipblasHandle_t handle_hip_sgemm; hipblasHandle_t handle_hip_dgemm; - namespace { -namespace detail { -struct pair_hash { - std::size_t operator()(const std::pair &p) const { - return p.first * 10000 + p.second; +struct cache_key { + int resol_id; + int m; + int blas_id; + + bool operator==(const cache_key &other) const { + return resol_id == other.resol_id && m == other.m && + blas_id == other.blas_id; + } + cache_key(int resol_id_, int m_, int blas_id_) + : resol_id(resol_id_), m(m_), blas_id(blas_id_) {} +}; +} // namespace +template <> struct std::hash { + std::size_t operator()(const cache_key &k) const { + return k.blas_id * 1000000 + k.resol_id * 10000 + k.m; } }; -} // namespace detail -template auto &get_graph_cache() { - // we store at most one graph per "m" (# fields) and "blas id" - static std::unordered_map, hipGraphExec_t, - detail::pair_hash> +namespace { +template auto &get_graph_cache() { + // we store at most one graph per "m" (# fields) and "blas id" and resolution + static std::unordered_map> graphCache; return graphCache; } -template auto &get_ptr_cache() { +template auto &get_ptr_cache() { + using real_t = typename Gemm::real_type; static std::unordered_map< - std::pair, std::tuple, - detail::pair_hash> + cache_key, std::tuple> ptrCache; return ptrCache; } -template void free_gemm_cache(float *, size_t) { - get_graph_cache().clear(); - get_ptr_cache().clear(); +template void free_gemm_graph_cache(float *, size_t) { + get_graph_cache().clear(); + get_ptr_cache().clear(); +} +template +void erase_resol_from_cache(Cache &cache, int resol_id) { + // Note that in C++20 this could also be std::erase_if + int erased = 0; + for (auto it = cache.begin(); it != cache.end();) { + if (it->first.resol_id == resol_id) { + it = cache.erase(it); + ++erased; + } else + ++it; + } +} +template void erase_from_caches(int resol_id) { + erase_resol_from_cache(get_graph_cache(), resol_id); + erase_resol_from_cache(get_ptr_cache(), resol_id); } // this version is using graphs and caches the graphs template -void run_group_graph(Gemm &&gemm, int m, int *n, int *k, Real alpha, - const Real *A, int lda, int *offsetsA, const Real *B, - int ldb, int *offsetsB, Real beta, Real *C, int ldc, - int *offsetsC, int batchCount, hipStream_t stream, - int blas_id, void *growing_allocator) { +void run_group_graph(Gemm &&gemm, int resol_id, int m, const int *n, + const int *k, Real alpha, const Real *A, int lda, + const int64_t *offsetsA, const Real *B, const int *ldb, + const int64_t *offsetsB, Real beta, Real *C, int ldc, + const int64_t *offsetsC, int batchCount, + hipStream_t stream, int blas_id, void *growing_allocator) { growing_allocator_register_free_c(growing_allocator, - free_gemm_cache); + free_gemm_graph_cache); // we store at most one graph per "m" (# fields) and "blas id" - auto &graphCache = get_graph_cache(); + auto &graphCache = get_graph_cache(); // we also store A, B, and C and recreate the graph if they change - auto &ptrCache = get_ptr_cache(); + auto &ptrCache = get_ptr_cache(); - auto key = std::make_pair(m, blas_id); + auto key = cache_key{resol_id, m, blas_id}; auto ptrs = ptrCache.find(key); if (ptrs != ptrCache.end() && @@ -86,13 +113,14 @@ void run_group_graph(Gemm &&gemm, int m, int *n, int *k, Real alpha, // the plan is cached, but the pointers are not correct. we remove and // delete the graph, but we keep the hipblas handles, if this happens more // often, we should cache this... - std::cout << "WARNING GEMM: POINTER CHANGE - Graph recreation might be slow." << std::endl; + std::cout + << "WARNING GEMM: POINTER CHANGE - Graph recreation might be slow." + << std::endl; std::cout << "We have an entry with key {m=" << m << ", blas_id=" << blas_id - << "}\n"; + << ", resol_id=" << resol_id << "}\n"; std::cout << "Pointers: " << std::get<0>(ptrs->second) << ", " << std::get<1>(ptrs->second) << ", " << std::get<2>(ptrs->second) << " vs. " << A << ", " << B << ", " << C << std::endl; - HIC_CHECK(hipGraphExecDestroy(graphCache[key])); graphCache.erase(key); ptrCache.erase(key); } @@ -111,36 +139,41 @@ void run_group_graph(Gemm &&gemm, int m, int *n, int *k, Real alpha, HIC_CHECK(hipStreamBeginCapture(stream, hipStreamCaptureModeGlobal)); gemm(stream, m, n[i], k[i], alpha, A + offsetsA[i], lda, B + offsetsB[i], - ldb, beta, C + offsetsC[i], ldc); + ldb[i], beta, C + offsetsC[i], ldc); hipGraph_t my_graph; HIC_CHECK(hipStreamEndCapture(stream, &my_graph)); hipGraphNode_t my_node; - HIC_CHECK(hipGraphAddChildGraphNode(&my_node, new_graph, nullptr, 0, - my_graph)); + HIC_CHECK( + hipGraphAddChildGraphNode(&my_node, new_graph, nullptr, 0, my_graph)); } hipGraphExec_t instance; HIC_CHECK(hipGraphInstantiate(&instance, new_graph, NULL, NULL, 0)); HIC_CHECK(hipStreamDestroy(stream)); HIC_CHECK(hipGraphDestroy(new_graph)); - graphCache.insert({key, instance}); + graphCache.insert({key, std::shared_ptr( + new hipGraphExec_t{instance}, [](auto ptr) { + HIC_CHECK(hipGraphExecDestroy(*ptr)); + delete ptr; + })}); ptrCache.insert({key, std::make_tuple(A, B, C)}); } - HIC_CHECK(hipGraphLaunch(graphCache.at(key), stream)); + HIC_CHECK(hipGraphLaunch(*graphCache.at(key), stream)); } // stupid simple gemm calls template -void run_group(Gemm &&gemm, int m, int *n, int *k, Real alpha, const Real *A, - int lda, int *offsetsA, const Real *B, int ldb, int *offsetsB, - Real beta, Real *C, int ldc, int *offsetsC, int batchCount, - hipStream_t stream, int = -1) { +void run_group(Gemm &&gemm, int resol_id, int m, const int *n, const int *k, + Real alpha, const Real *A, int lda, const int64_t *offsetsA, + const Real *B, const int *ldb, const int64_t *offsetsB, + Real beta, Real *C, int ldc, const int64_t *offsetsC, + int batchCount, hipStream_t stream, int = -1) { for (int i = 0; i < batchCount; ++i) { if (m == 0 || n[i] == 0 || k[i] == 0) continue; - gemm(stream, m, n[i], k[i], alpha, A + offsetsA[i], lda, B + offsetsB[i], ldb, - beta, C + offsetsC[i], ldc); + gemm(stream, m, n[i], k[i], alpha, A + offsetsA[i], lda, B + offsetsB[i], + ldb[i], beta, C + offsetsC[i], ldc); } } @@ -148,7 +181,6 @@ void run_group(Gemm &&gemm, int m, int *n, int *k, Real alpha, const Real *A, #include "hicblas_cutlass.cuda.h" #endif -namespace detail { hipblasHandle_t get_hipblas_handle() { static hipblasHandle_t handle; if (!handle) @@ -157,6 +189,7 @@ hipblasHandle_t get_hipblas_handle() { } template struct hipblas_gemm_grouped { public: + using real_type = Real; hipblas_gemm_grouped(hipblasOperation_t transa, hipblasOperation_t transb) : transa_(transa), transb_(transb) { // we need to get the hipblas handle here, otherwise this could be created @@ -171,156 +204,169 @@ template struct hipblas_gemm_grouped { if constexpr (std::is_same::value) HICBLAS_CHECK(hipblasSgemm(handle, transa_, transb_, m, n, k, &alpha, A, - lda, B, ldb, &beta, C, ldc)); + lda, B, ldb, &beta, C, ldc)); if constexpr (std::is_same::value) HICBLAS_CHECK(hipblasDgemm(handle, transa_, transb_, m, n, k, &alpha, A, - lda, B, ldb, &beta, C, ldc)); + lda, B, ldb, &beta, C, ldc)); } private: hipblasOperation_t transa_, transb_; }; -} // namespace detail #ifndef USE_CUTLASS -void hipblas_sgemm_wrapper_grouped(int blas_id, char transa, char transb, - int m, int *n, int *k, float alpha, - const float *A, int lda, int *offsetsA, - const float *B, int ldb, int *offsetsB, float beta, - float *C, int ldc, int *offsetsC, - int batchCount, hipStream_t stream, - void *growing_allocator) { - - hipblasOperation_t op_t1=HIPBLAS_OP_N, op_t2=HIPBLAS_OP_N; - if (transa=='T' || transa=='t') - op_t1=HIPBLAS_OP_T; - if (transb=='T' || transb=='t') - op_t2=HIPBLAS_OP_T; - - using namespace detail; +void hipblas_sgemm_wrapper_grouped( + int resol_id, int blas_id, char transa, char transb, int m, const int *n, + const int *k, float alpha, const float *A, int lda, const int64_t *offsetsA, + const float *B, const int *ldb, const int64_t *offsetsB, float beta, + float *C, int ldc, const int64_t *offsetsC, int batchCount, + hipStream_t stream, void *growing_allocator) { + + hipblasOperation_t op_t1 = HIPBLAS_OP_N, op_t2 = HIPBLAS_OP_N; + if (transa == 'T' || transa == 't') + op_t1 = HIPBLAS_OP_T; + if (transb == 'T' || transb == 't') + op_t2 = HIPBLAS_OP_T; + #ifdef USE_GRAPHS_GEMM - run_group_graph(hipblas_gemm_grouped(op_t1, op_t2), m, n, k, alpha, A, - lda, offsetsA, B, ldb, offsetsB, beta, C, ldc, offsetsC, - batchCount, stream, blas_id, growing_allocator); + run_group_graph(hipblas_gemm_grouped(op_t1, op_t2), resol_id, m, n, k, + alpha, A, lda, offsetsA, B, ldb, offsetsB, beta, C, ldc, + offsetsC, batchCount, stream, blas_id, growing_allocator); #else - run_group(hipblas_gemm_grouped(op_t1, op_t2), m, n, k, alpha, A, - lda, offsetsA, B, ldb, offsetsB, beta, C, ldc, offsetsC, + run_group(hipblas_gemm_grouped(op_t1, op_t2), resol_id, m, n, k, alpha, + A, lda, offsetsA, B, ldb, offsetsB, beta, C, ldc, offsetsC, batchCount, stream); #endif } #endif -void hipblas_dgemm_wrapper_grouped(int blas_id, char transa, char transb, - int m, int *n, int *k, - double alpha, - const double *A, int lda, int *offsetsA, - const double *B, int ldb, int *offsetsB, - double beta, - double *C, int ldc, int *offsetsC, +void hipblas_dgemm_wrapper_grouped(int resol_id, int blas_id, char transa, + char transb, int m, const int *n, + const int *k, double alpha, const double *A, + int lda, const int64_t *offsetsA, + const double *B, const int *ldb, + const int64_t *offsetsB, double beta, + double *C, int ldc, const int64_t *offsetsC, int batchCount, hipStream_t stream, void *) { - hipblasOperation_t op_t1=HIPBLAS_OP_N, op_t2=HIPBLAS_OP_N; - if (transa=='T' || transa=='t') - op_t1=HIPBLAS_OP_T; - if (transb=='T' || transb=='t') - op_t2=HIPBLAS_OP_T; + hipblasOperation_t op_t1 = HIPBLAS_OP_N, op_t2 = HIPBLAS_OP_N; + if (transa == 'T' || transa == 't') + op_t1 = HIPBLAS_OP_T; + if (transb == 'T' || transb == 't') + op_t2 = HIPBLAS_OP_T; - using namespace detail; - run_group(hipblas_gemm_grouped(op_t1, op_t2), m, n, k, alpha, - A, lda, offsetsA, B, ldb, offsetsB, beta, C, ldc, offsetsC, - batchCount, stream, blas_id); + run_group(hipblas_gemm_grouped(op_t1, op_t2), resol_id, m, n, k, + alpha, A, lda, offsetsA, B, ldb, offsetsB, beta, C, ldc, offsetsC, + batchCount, stream, blas_id); } } // namespace extern "C" { -void hipblas_dgemm_wrapper (char transa, char transb, - int m, int n,int k, double alpha, - const double *A, int lda, int tda, - const double *B, int ldb, int tdb, double beta, - double *C, int ldc, int tdc, int batchCount, - size_t stream, - void *growing_allocator) { - - hipblasOperation_t op_t1=HIPBLAS_OP_N, op_t2=HIPBLAS_OP_N; - - if (transa=='T' || transa=='t') - op_t1=HIPBLAS_OP_T; - if (transb=='T' || transb=='t') - op_t2=HIPBLAS_OP_T; - - if (!hip_alreadyAllocated_dgemm_handle){ - hipblasCreate(&handle_hip_dgemm); - hip_alreadyAllocated_dgemm_handle=true; +void hipblas_dgemm_wrapper(char transa, char transb, int m, int n, int k, + double alpha, const double *A, int lda, int tda, + const double *B, int ldb, int tdb, double beta, + double *C, int ldc, int tdc, int batchCount, + size_t stream, void *growing_allocator) { + + hipblasOperation_t op_t1 = HIPBLAS_OP_N, op_t2 = HIPBLAS_OP_N; + + if (transa == 'T' || transa == 't') + op_t1 = HIPBLAS_OP_T; + if (transb == 'T' || transb == 't') + op_t2 = HIPBLAS_OP_T; + + if (!hip_alreadyAllocated_dgemm_handle) { + HICBLAS_CHECK(hipblasCreate(&handle_hip_dgemm)); + hip_alreadyAllocated_dgemm_handle = true; } - hipblasHandle_t handle = detail::get_hipblas_handle(); - HICBLAS_CHECK( - hipblasSetStream(handle, *(hipStream_t*)stream)); - - HICBLAS_CHECK(hipblasDgemmStridedBatched(handle,op_t1,op_t2,m,n,k, - &alpha,(const double *) A,lda,tda, (const double *) B,ldb,tdb, - &beta,(double *) C,ldc,tdc,batchCount)); + hipblasHandle_t handle = get_hipblas_handle(); + HICBLAS_CHECK(hipblasSetStream(handle, *(hipStream_t *)stream)); + HICBLAS_CHECK(hipblasDgemmStridedBatched( + handle, op_t1, op_t2, m, n, k, &alpha, (const double *)A, lda, tda, + (const double *)B, ldb, tdb, &beta, (double *)C, ldc, tdc, batchCount)); } -void hipblas_sgemm_wrapper (char transa, char transb, - int m, int n,int k, float alpha, - const float *A, int lda, int tda, - const float *B, int ldb, int tdb, float beta, - float *C, int ldc, int tdc, - int batchCount, - void *growing_allocator) { +void hipblas_sgemm_wrapper(char transa, char transb, int m, int n, int k, + float alpha, const float *A, int lda, int tda, + const float *B, int ldb, int tdb, float beta, + float *C, int ldc, int tdc, int batchCount, + void *growing_allocator) { - hipblasOperation_t op_t1=HIPBLAS_OP_N, op_t2=HIPBLAS_OP_N; + hipblasOperation_t op_t1 = HIPBLAS_OP_N, op_t2 = HIPBLAS_OP_N; - if (transa=='T' || transa=='t') - op_t1=HIPBLAS_OP_T; - if (transb=='T' || transb=='t') - op_t2=HIPBLAS_OP_T; + if (transa == 'T' || transa == 't') + op_t1 = HIPBLAS_OP_T; + if (transb == 'T' || transb == 't') + op_t2 = HIPBLAS_OP_T; - if (!hip_alreadyAllocated_sgemm_handle){ - hipblasCreate(&handle_hip_sgemm); - hip_alreadyAllocated_sgemm_handle=true; + if (!hip_alreadyAllocated_sgemm_handle) { + HICBLAS_CHECK(hipblasCreate(&handle_hip_sgemm)); + hip_alreadyAllocated_sgemm_handle = true; } - HICBLAS_CHECK(hipblasSgemmStridedBatched(handle_hip_sgemm,op_t1,op_t2,m,n,k, - &alpha,(const float *) A,lda,tda, (const float *) B,ldb,tdb, - &beta,(float*) C,ldc,tdc,batchCount)); - + HICBLAS_CHECK(hipblasSgemmStridedBatched( + handle_hip_sgemm, op_t1, op_t2, m, n, k, &alpha, (const float *)A, lda, + tda, (const float *)B, ldb, tdb, &beta, (float *)C, ldc, tdc, + batchCount)); } -void hipblas_sgemm_wrapper_grouped(int blas_id, char transa, char transb, - int m, int *n, int *k, float alpha, - const float *A, int lda, int *offsetsA, - const float *B, int ldb, int *offsetsB, float beta, - float *C, int ldc, int *offsetsC, - int batchCount, size_t stream, - void *growing_allocator) { +void hipblas_sgemm_wrapper_grouped( + int resol_id, int blas_id, char transa, char transb, int m, const int *n, + const int *k, float alpha, const float *A, int lda, const int64_t *offsetsA, + const float *B, const int *ldb, const int64_t *offsetsB, float beta, + float *C, int ldc, const int64_t *offsetsC, int batchCount, size_t stream, + void *growing_allocator) { #ifdef USE_CUTLASS - cutlass_sgemm_wrapper_grouped(blas_id, transa, transb, m, n, k, alpha, A, lda, offsetsA, - B, ldb, offsetsB, beta, C, ldc, offsetsC, batchCount, - *(hipStream_t*)stream, - growing_allocator); + cutlass_sgemm_wrapper_grouped(resol_id, blas_id, transa, transb, m, n, k, + alpha, A, lda, offsetsA, B, ldb, offsetsB, beta, + C, ldc, offsetsC, batchCount, + *(hipStream_t *)stream, growing_allocator); #else - hipblas_sgemm_wrapper_grouped(blas_id, transa, transb, m, n, k, alpha, A, lda, - offsetsA, B, ldb, offsetsB, beta, C, ldc, - offsetsC, batchCount, - *(hipStream_t*)stream, - growing_allocator); + hipblas_sgemm_wrapper_grouped(resol_id, blas_id, transa, transb, m, n, k, + alpha, A, lda, offsetsA, B, ldb, offsetsB, beta, + C, ldc, offsetsC, batchCount, + *(hipStream_t *)stream, growing_allocator); #endif } -void hipblas_dgemm_wrapper_grouped(int blas_id, char transa, char transb, - int m, int *n, int *k, double alpha, - const double *A, int lda, int *offsetsA, - const double *B, int ldb, int *offsetsB, double beta, - double *C, int ldc, int *offsetsC, - int batchCount, size_t stream, - void *growing_allocator) { - hipblas_dgemm_wrapper_grouped(blas_id, transa, transb, m, n, k, alpha, A, lda, offsetsA, B, - ldb, offsetsB, beta, C, ldc, offsetsC, batchCount, - *(hipStream_t*)stream, - growing_allocator); +void hipblas_dgemm_wrapper_grouped(int resol_id, int blas_id, char transa, + char transb, int m, const int *n, + const int *k, double alpha, const double *A, + int lda, const int64_t *offsetsA, + const double *B, const int *ldb, + const int64_t *offsetsB, double beta, + double *C, int ldc, const int64_t *offsetsC, + int batchCount, size_t stream, + void *growing_allocator) { + hipblas_dgemm_wrapper_grouped(resol_id, blas_id, transa, transb, m, n, k, + alpha, A, lda, offsetsA, B, ldb, offsetsB, beta, + C, ldc, offsetsC, batchCount, + *(hipStream_t *)stream, growing_allocator); +} + +void clean_gemm(int resol_id) { + erase_from_caches>(resol_id); + erase_from_caches>(resol_id); +#ifdef USE_CUTLASS + erase_from_caches>(resol_id); + erase_from_caches>(resol_id); + erase_from_caches>(resol_id); + erase_from_caches>(resol_id); + erase_from_caches>(resol_id); + erase_from_caches>(resol_id); + erase_from_caches>(resol_id); + erase_from_caches>(resol_id); +#endif } } diff --git a/src/trans/gpu/algor/hicblas_mod.F90 b/src/trans/gpu/algor/hicblas_mod.F90 index 988e1b3ef..21adae46d 100644 --- a/src/trans/gpu/algor/hicblas_mod.F90 +++ b/src/trans/gpu/algor/hicblas_mod.F90 @@ -14,9 +14,13 @@ MODULE HICBLAS_MOD -USE EC_PARKIND, ONLY: JPIM, JPRM, JPRD +USE EC_PARKIND, ONLY: JPIM, JPRM, JPRD, JPIB USE GROWING_ALLOCATOR_MOD, ONLY: GROWING_ALLOCATION_TYPE +#ifdef ACCGPU USE OPENACC_LIB, ONLY: ACC_GET_HIP_STREAM +#endif +#ifdef OMPGPU +#endif IMPLICIT NONE @@ -63,10 +67,16 @@ SUBROUTINE HIP_SGEMM_BATCHED( & TYPE(C_PTR), INTENT(IN), VALUE :: ALLOC END SUBROUTINE HIP_SGEMM_BATCHED END INTERFACE +INTERFACE + SUBROUTINE CLEAN_GEMM(RESOL_ID) BIND(C, NAME="clean_gemm") + USE ISO_C_BINDING + INTEGER(KIND=C_INT), INTENT(IN), VALUE :: RESOL_ID + END SUBROUTINE +END INTERFACE INTERFACE SUBROUTINE HIP_DGEMM_GROUPED( & - & BLAS_ID, CTA, CTB, & + & RESOL_ID, BLAS_ID, CTA, CTB, & & M, N, K, & & ALPHA, & & A, LDA, OFFSETA, & @@ -75,10 +85,11 @@ SUBROUTINE HIP_DGEMM_GROUPED( & & C, LDC, OFFSETC, & & BATCHCOUNT, STREAM, ALLOC & &) BIND(C, NAME='hipblas_dgemm_wrapper_grouped') - USE ISO_C_BINDING, ONLY: C_CHAR, C_INT, C_DOUBLE, C_SIZE_T, C_PTR + USE ISO_C_BINDING, ONLY: C_CHAR, C_INT, C_DOUBLE, C_SIZE_T, C_PTR, C_INT64_T CHARACTER(1,C_CHAR), VALUE :: CTA, CTB - INTEGER(C_INT), VALUE :: BLAS_ID, M, LDA, LDB, LDC, BATCHCOUNT - INTEGER(C_INT) :: N(*), K(*), OFFSETA(*), OFFSETB(*), OFFSETC(*) + INTEGER(C_INT), VALUE :: RESOL_ID, BLAS_ID, M, LDA, LDC, BATCHCOUNT + INTEGER(C_INT) :: N(*), K(*), LDB(*) + INTEGER(C_INT64_T) :: OFFSETA(*), OFFSETB(*), OFFSETC(*) REAL(C_DOUBLE), VALUE :: ALPHA,BETA REAL(C_DOUBLE) :: A(*), B(*), C(*) INTEGER(KIND=C_SIZE_T) :: STREAM @@ -86,7 +97,7 @@ SUBROUTINE HIP_DGEMM_GROUPED( & END SUBROUTINE HIP_DGEMM_GROUPED SUBROUTINE HIP_SGEMM_GROUPED( & - & BLAS_ID, CTA, CTB, & + & RESOL_ID, BLAS_ID, CTA, CTB, & & M, N, K, & & ALPHA, & & A, LDA, OFFSETA, & @@ -95,10 +106,11 @@ SUBROUTINE HIP_SGEMM_GROUPED( & & C, LDC, OFFSETC, & & BATCHCOUNT, STREAM, ALLOC & &) BIND(C, NAME='hipblas_sgemm_wrapper_grouped') - USE ISO_C_BINDING, ONLY: C_CHAR, C_INT, C_FLOAT, C_SIZE_T, C_PTR + USE ISO_C_BINDING, ONLY: C_CHAR, C_INT, C_FLOAT, C_SIZE_T, C_PTR, C_INT64_T CHARACTER(1,C_CHAR), VALUE :: CTA, CTB - INTEGER(C_INT), VALUE :: BLAS_ID, M, LDA, LDB, LDC, BATCHCOUNT - INTEGER(C_INT) :: N(*), K(*), OFFSETA(*), OFFSETB(*), OFFSETC(*) + INTEGER(C_INT), VALUE :: RESOL_ID, BLAS_ID, M, LDA, LDC, BATCHCOUNT + INTEGER(C_INT) :: N(*), K(*), LDB(*) + INTEGER(C_INT64_T) :: OFFSETA(*), OFFSETB(*), OFFSETC(*) REAL(C_FLOAT), VALUE :: ALPHA,BETA REAL(C_FLOAT) :: A(*), B(*), C(*) INTEGER(KIND=C_SIZE_T) :: STREAM @@ -135,14 +147,20 @@ SUBROUTINE HIP_DGEMM_BATCHED_OVERLOAD( & INTEGER(KIND=JPIM) :: STRIDEC INTEGER(KIND=JPIM) :: BATCHCOUNT INTEGER(KIND=C_INT) :: STREAM - TYPE(GROWING_ALLOCATION_TYPE), INTENT(IN) :: ALLOC + TYPE(GROWING_ALLOCATION_TYPE), INTENT(IN), POINTER :: ALLOC INTEGER(KIND=C_LONG) :: HIP_STREAM +#ifdef ACCGPU HIP_STREAM = INT(ACC_GET_HIP_STREAM(STREAM), C_LONG) +#endif +#ifdef OMPGPU +#endif #if defined(_CRAYFTN) +#ifdef ACCGPU !$ACC HOST_DATA USE_DEVICE(AARRAY,BARRAY,CARRAY) +#endif #endif CALL HIP_DGEMM_BATCHED( & & TRANSA, TRANSB, & @@ -154,8 +172,10 @@ SUBROUTINE HIP_DGEMM_BATCHED_OVERLOAD( & & CARRAY, LDC, STRIDEC, & & BATCHCOUNT, HIP_STREAM, C_LOC(ALLOC)) #if defined(_CRAYFTN) +#ifdef ACCGPU !$ACC END HOST_DATA #endif +#endif END SUBROUTINE HIP_DGEMM_BATCHED_OVERLOAD SUBROUTINE HIP_SGEMM_BATCHED_OVERLOAD( & @@ -185,11 +205,15 @@ SUBROUTINE HIP_SGEMM_BATCHED_OVERLOAD( & INTEGER(KIND=JPIM) :: STRIDEC INTEGER(KIND=JPIM) :: BATCHCOUNT INTEGER(KIND=C_INT) :: STREAM - TYPE(GROWING_ALLOCATION_TYPE), INTENT(IN) :: ALLOC + TYPE(GROWING_ALLOCATION_TYPE), INTENT(IN), POINTER :: ALLOC INTEGER(KIND=C_LONG) :: HIP_STREAM +#ifdef ACCGPU HIP_STREAM = INT(ACC_GET_HIP_STREAM(STREAM), C_LONG) +#endif +#ifdef OMPGPU +#endif CALL HIP_SGEMM_BATCHED( & & TRANSA, TRANSB, & @@ -203,7 +227,7 @@ SUBROUTINE HIP_SGEMM_BATCHED_OVERLOAD( & END SUBROUTINE HIP_SGEMM_BATCHED_OVERLOAD SUBROUTINE HIP_DGEMM_GROUPED_OVERLOAD( & - & BLAS_ID, TRANSA, TRANSB, & + & RESOL_ID, BLAS_ID, TRANSA, TRANSB, & & M, N, K, & & ALPHA, & & AARRAY, LDA, OFFSETA, & @@ -212,6 +236,7 @@ SUBROUTINE HIP_DGEMM_GROUPED_OVERLOAD( & & CARRAY, LDC, OFFSETC, & & BATCHCOUNT, STREAM, ALLOC) USE ISO_C_BINDING, ONLY: C_INT, C_CHAR, C_LONG, C_LOC + INTEGER(KIND=C_INT), INTENT(IN) :: RESOL_ID INTEGER(KIND=C_INT), INTENT(IN) :: BLAS_ID CHARACTER(1,C_CHAR), VALUE :: TRANSA, TRANSB INTEGER(KIND=JPIM) :: M @@ -220,24 +245,28 @@ SUBROUTINE HIP_DGEMM_GROUPED_OVERLOAD( & REAL(KIND=JPRD) :: ALPHA REAL(KIND=JPRD), DIMENSION(:) :: AARRAY INTEGER(KIND=JPIM) :: LDA - INTEGER(KIND=JPIM) :: OFFSETA(:) + INTEGER(KIND=JPIB) :: OFFSETA(:) REAL(KIND=JPRD), DIMENSION(*) :: BARRAY - INTEGER(KIND=JPIM) :: LDB - INTEGER(KIND=JPIM) :: OFFSETB(:) + INTEGER(KIND=JPIM) :: LDB(:) + INTEGER(KIND=JPIB) :: OFFSETB(:) REAL(KIND=JPRD) :: BETA REAL(KIND=JPRD), DIMENSION(:) :: CARRAY INTEGER(KIND=JPIM) :: LDC - INTEGER(KIND=JPIM) :: OFFSETC(:) + INTEGER(KIND=JPIB) :: OFFSETC(:) INTEGER(KIND=JPIM) :: BATCHCOUNT INTEGER(KIND=C_INT) :: STREAM - TYPE(GROWING_ALLOCATION_TYPE), INTENT(IN) :: ALLOC + TYPE(GROWING_ALLOCATION_TYPE), INTENT(IN), POINTER :: ALLOC INTEGER(KIND=C_LONG) :: HIP_STREAM +#ifdef ACCGPU HIP_STREAM = INT(ACC_GET_HIP_STREAM(STREAM), C_LONG) +#endif +#ifdef OMPGPU +#endif CALL HIP_DGEMM_GROUPED( & - & BLAS_ID, TRANSA, TRANSB, & + & RESOL_ID, BLAS_ID, TRANSA, TRANSB, & & M, N, K, & & ALPHA, & & AARRAY, LDA, OFFSETA, & @@ -249,7 +278,7 @@ SUBROUTINE HIP_DGEMM_GROUPED_OVERLOAD( & END SUBROUTINE HIP_DGEMM_GROUPED_OVERLOAD SUBROUTINE HIP_SGEMM_GROUPED_OVERLOAD(& - & BLAS_ID, TRANSA, TRANSB, & + & RESOL_ID, BLAS_ID, TRANSA, TRANSB, & & M, N, K, & & ALPHA, & & AARRAY, LDA, OFFSETA, & @@ -258,6 +287,7 @@ SUBROUTINE HIP_SGEMM_GROUPED_OVERLOAD(& & CARRAY, LDC, OFFSETC, & & BATCHCOUNT, STREAM, ALLOC) USE ISO_C_BINDING, ONLY: C_INT, C_CHAR, C_LONG, C_LOC + INTEGER(KIND=C_INT), INTENT(IN) :: RESOL_ID INTEGER(KIND=C_INT), INTENT(IN) :: BLAS_ID CHARACTER(1,C_CHAR), VALUE :: TRANSA, TRANSB INTEGER(KIND=JPIM) :: M @@ -266,27 +296,33 @@ SUBROUTINE HIP_SGEMM_GROUPED_OVERLOAD(& REAL(KIND=JPRM) :: ALPHA REAL(KIND=JPRM), DIMENSION(:) :: AARRAY INTEGER(KIND=JPIM) :: LDA - INTEGER(KIND=JPIM) :: OFFSETA(:) - REAL(KIND=JPRM), DIMENSION(:,:,:) :: BARRAY - INTEGER(KIND=JPIM) :: LDB - INTEGER(KIND=JPIM) :: OFFSETB(:) + INTEGER(KIND=JPIB) :: OFFSETA(:) + REAL(KIND=JPRM), DIMENSION(*) :: BARRAY + INTEGER(KIND=JPIM) :: LDB(:) + INTEGER(KIND=JPIB) :: OFFSETB(:) REAL(KIND=JPRM) :: BETA REAL(KIND=JPRM), DIMENSION(:) :: CARRAY INTEGER(KIND=JPIM) :: LDC - INTEGER(KIND=JPIM) :: OFFSETC(:) + INTEGER(KIND=JPIB) :: OFFSETC(:) INTEGER(KIND=JPIM) :: BATCHCOUNT INTEGER(KIND=C_INT) :: STREAM - TYPE(GROWING_ALLOCATION_TYPE), INTENT(IN) :: ALLOC + TYPE(GROWING_ALLOCATION_TYPE), INTENT(IN), POINTER :: ALLOC INTEGER(KIND=C_LONG) :: HIP_STREAM +#ifdef ACCGPU HIP_STREAM = INT(ACC_GET_HIP_STREAM(STREAM), C_LONG) +#endif +#ifdef OMPGPU +#endif #if defined(_CRAYFTN) +#ifdef ACCGPU !$ACC HOST_DATA USE_DEVICE(AARRAY,BARRAY,CARRAY) +#endif #endif CALL HIP_SGEMM_GROUPED( & - & BLAS_ID, TRANSA, TRANSB, & + & RESOL_ID, BLAS_ID, TRANSA, TRANSB, & & M, N, K, & & ALPHA, & & AARRAY, LDA, OFFSETA, & @@ -295,8 +331,10 @@ SUBROUTINE HIP_SGEMM_GROUPED_OVERLOAD(& & CARRAY, LDC, OFFSETC, & & BATCHCOUNT, HIP_STREAM, C_LOC(ALLOC)) #if defined(_CRAYFTN) +#ifdef ACCGPU !$ACC END HOST_DATA #endif +#endif END SUBROUTINE HIP_SGEMM_GROUPED_OVERLOAD diff --git a/src/trans/gpu/algor/hicfft.hip.cpp b/src/trans/gpu/algor/hicfft.hip.cpp index 8278d9a4b..56a8e52e3 100644 --- a/src/trans/gpu/algor/hicfft.hip.cpp +++ b/src/trans/gpu/algor/hicfft.hip.cpp @@ -1,5 +1,8 @@ #include "hicfft.h" +#include +#include + #include "growing_allocator.h" #define fftSafeCall(err) __fftSafeCall(err, __FILE__, __LINE__) @@ -43,57 +46,100 @@ template class hicfft_plan { real *data_real_l = &data_real[offset]; cmplx *data_complex_l = &data_complex[offset / 2]; if constexpr (Direction == HIPFFT_R2C) - fftSafeCall(hipfftExecR2C(handle, data_real_l, data_complex_l)); + fftSafeCall(hipfftExecR2C(*handle_ptr, data_real_l, data_complex_l)); else if constexpr (Direction == HIPFFT_C2R) - fftSafeCall(hipfftExecC2R(handle, data_complex_l, data_real_l)); + fftSafeCall(hipfftExecC2R(*handle_ptr, data_complex_l, data_real_l)); else if constexpr (Direction == HIPFFT_D2Z) - fftSafeCall(hipfftExecD2Z(handle, data_real_l, data_complex_l)); + fftSafeCall(hipfftExecD2Z(*handle_ptr, data_real_l, data_complex_l)); else if constexpr (Direction == HIPFFT_Z2D) - fftSafeCall(hipfftExecZ2D(handle, data_complex_l, data_real_l)); + fftSafeCall(hipfftExecZ2D(*handle_ptr, data_complex_l, data_real_l)); } void set_stream(hipStream_t stream) { - fftSafeCall(hipfftSetStream(handle, stream)); + fftSafeCall(hipfftSetStream(*handle_ptr, stream)); } - hicfft_plan(hipfftHandle handle_, int offset_) - : handle(handle_), offset(offset_) {} + hicfft_plan(hipfftHandle handle_, int64_t offset_) + : handle_ptr(new hipfftHandle{handle_}, + [](auto ptr) { + fftSafeCall(hipfftDestroy(*ptr)); + delete ptr; + }), + offset(offset_) {} private: - hipfftHandle handle; - int offset; + std::shared_ptr handle_ptr; + int64_t offset; +}; + +struct cache_key { + int resol_id; + int kfield; + bool operator==(const cache_key &other) const { + return resol_id == other.resol_id && kfield == other.kfield; + } + cache_key(int resol_id_, int kfield_) + : resol_id(resol_id_), kfield(kfield_) {} +}; +} // namespace + +template <> struct std::hash { + std::size_t operator()(const cache_key &k) const { + return k.resol_id * 10000 + k.kfield; + } }; +namespace { // kfield -> handles template auto &get_fft_plan_cache() { - static std::unordered_map>> + static std::unordered_map>> fftPlansCache; return fftPlansCache; } // kfield -> graphs template auto &get_graph_cache() { - static std::unordered_map graphCache; + static std::unordered_map> + graphCache; return graphCache; } // kfield -> ptrs template auto &get_ptr_cache() { using real = typename Type::real; using cmplx = typename Type::cmplx; - static std::unordered_map> ptrCache; + static std::unordered_map> ptrCache; return ptrCache; } template -void free_fft_cache(float *, size_t) { +void free_fft_graph_cache(float *, size_t) { get_graph_cache().clear(); get_ptr_cache().clear(); } +template +void erase_resol_from_cache(Cache &cache, int resol_id) { + // Note that in C++20 this could also be std::erase_if + int erased = 0; + for (auto it = cache.begin(); it != cache.end();) { + if (it->first.resol_id == resol_id) { + it = cache.erase(it); + ++erased; + } else + ++it; + } +} +template +void erase_from_caches(int resol_id) { + erase_resol_from_cache(get_fft_plan_cache(), resol_id); + erase_resol_from_cache(get_graph_cache(), resol_id); + erase_resol_from_cache(get_ptr_cache(), resol_id); +} template -std::vector> plan_all(int kfield, int *loens, - int nfft, int *offsets) { +std::vector> plan_all(int resol_id, int kfield, int *loens, + int nfft, int64_t *offsets) { static constexpr bool is_forward = Direction == HIPFFT_R2C || Direction == HIPFFT_D2Z; - auto key = kfield; + auto key = cache_key{resol_id, kfield}; auto &fftPlansCache = get_fft_plan_cache(); auto fftPlans = fftPlansCache.find(key); if (fftPlans == fftPlansCache.end()) { @@ -104,7 +150,6 @@ std::vector> plan_all(int kfield, int *loens, int nloen = loens[i]; hipfftHandle plan; - fftSafeCall(hipfftCreate(&plan)); int dist = offsets[i + 1] - offsets[i]; int embed[] = {1}; fftSafeCall(hipfftPlanMany( @@ -119,17 +164,17 @@ std::vector> plan_all(int kfield, int *loens, template void run_group_graph(typename Type::real *data_real, - typename Type::cmplx *data_complex, int kfield, int *loens, - int *offsets, int nfft, void *growing_allocator) { + typename Type::cmplx *data_complex, int resol_id, int kfield, int *loens, + int64_t *offsets, int nfft, void *growing_allocator) { growing_allocator_register_free_c(growing_allocator, - free_fft_cache); + free_fft_graph_cache); // if the pointers are changed, we need to update the graph auto &ptrCache = get_ptr_cache(); // kfield -> ptrs auto &graphCache = get_graph_cache(); // kfield -> graphs - auto key = kfield; + auto key = cache_key{resol_id, kfield}; auto ptrs = ptrCache.find(key); if (ptrs != ptrCache.end() && (ptrs->second.first != data_real || ptrs->second.second != data_complex)) { @@ -138,7 +183,6 @@ void run_group_graph(typename Type::real *data_real, // we should cache this... std::cout << "WARNING FFT: POINTER CHANGE --> THIS MIGHT BE SLOW" << std::endl; - HIC_CHECK(hipGraphExecDestroy(graphCache[key])); graphCache.erase(key); ptrCache.erase(key); } @@ -146,7 +190,8 @@ void run_group_graph(typename Type::real *data_real, auto graph = graphCache.find(key); if (graph == graphCache.end()) { // this graph does not exist yet - auto plans = plan_all(kfield, loens, nfft, offsets); + auto plans = + plan_all(resol_id, kfield, loens, nfft, offsets); // create a temporary stream hipStream_t stream; @@ -172,19 +217,23 @@ void run_group_graph(typename Type::real *data_real, HIC_CHECK(hipStreamDestroy(stream)); HIC_CHECK(hipGraphDestroy(new_graph)); - graphCache.insert({key, instance}); + graphCache.insert({key, std::shared_ptr( + new hipGraphExec_t{instance}, [](auto ptr) { + HIC_CHECK(hipGraphExecDestroy(*ptr)); + delete ptr; + })}); ptrCache.insert({key, std::make_pair(data_real, data_complex)}); } - HIC_CHECK(hipGraphLaunch(graphCache.at(key), 0)); + HIC_CHECK(hipGraphLaunch(*graphCache.at(key), 0)); HIC_CHECK(hipDeviceSynchronize()); } template void run_group(typename Type::real *data_real, - typename Type::cmplx *data_complex, int kfield, int *loens, - int *offsets, int nfft, void *growing_allocator) { - auto plans = plan_all(kfield, loens, nfft, offsets); + typename Type::cmplx *data_complex, int resol_id, int kfield, int *loens, + int64_t *offsets, int nfft, void *growing_allocator) { + auto plans = plan_all(resol_id, kfield, loens, nfft, offsets); for (auto &plan : plans) plan.exec(data_real, data_complex); @@ -199,29 +248,36 @@ extern "C" { #define RUN run_group #endif void execute_dir_fft_float(float *data_real, hipfftComplex *data_complex, - int kfield, int *loens, int *offsets, int nfft, + int resol_id, int kfield, int *loens, int64_t *offsets, int nfft, void *growing_allocator) { - RUN(data_real, data_complex, kfield, loens, offsets, nfft, + RUN(data_real, data_complex, resol_id, kfield, loens, offsets, nfft, growing_allocator); } void execute_inv_fft_float(hipfftComplex *data_complex, float *data_real, - int kfield, int *loens, int *offsets, int nfft, + int resol_id, int kfield, int *loens, int64_t *offsets, int nfft, void *growing_allocator) { - RUN(data_real, data_complex, kfield, loens, offsets, nfft, + RUN(data_real, data_complex, resol_id, kfield, loens, offsets, nfft, growing_allocator); } void execute_dir_fft_double(double *data_real, - hipfftDoubleComplex *data_complex, int kfield, - int *loens, int *offsets, int nfft, + hipfftDoubleComplex *data_complex, int resol_id, int kfield, + int *loens, int64_t *offsets, int nfft, void *growing_allocator) { - RUN(data_real, data_complex, kfield, loens, offsets, nfft, - growing_allocator); + RUN(data_real, data_complex, resol_id, kfield, loens, + offsets, nfft, growing_allocator); } void execute_inv_fft_double(hipfftDoubleComplex *data_complex, - double *data_real, int kfield, int *loens, - int *offsets, int nfft, void *growing_allocator) { - RUN(data_real, data_complex, kfield, loens, offsets, nfft, + double *data_real, int resol_id, int kfield, int *loens, + int64_t *offsets, int nfft, void *growing_allocator) { + RUN(data_real, data_complex, resol_id, kfield, loens, offsets, nfft, growing_allocator); } #undef RUN + +void clean_fft(int resol_id) { + erase_from_caches(resol_id); + erase_from_caches(resol_id); + erase_from_caches(resol_id); + erase_from_caches(resol_id); +} } diff --git a/src/trans/gpu/external/gath_spec.F90 b/src/trans/gpu/external/gath_spec.F90 index 2e7746780..1f515be73 100755 --- a/src/trans/gpu/external/gath_spec.F90 +++ b/src/trans/gpu/external/gath_spec.F90 @@ -82,9 +82,13 @@ SUBROUTINE GATH_SPEC(PSPECG,KFGATHG,KTO,KVSET,KRESOL,PSPEC,LDIM1_IS_FLD,KSMAX,LD INTEGER(KIND=JPIM) :: IVSET(KFGATHG) INTEGER(KIND=JPIM) :: IFRECV,IFSEND,J INTEGER(KIND=JPIM) :: IFLD,ICOEFF -INTEGER(KIND=JPIM) :: ISMAX, ISPEC2, ISPEC2_G +INTEGER(KIND=JPIM) :: ISMAX, ISPEC2, ISPEC2_G, ISPEC2MX INTEGER(KIND=JPIM) :: IPOSSP(NPRTRW+1) +INTEGER(KIND=JPIM) :: IUMPP(NPRTRW) +INTEGER(KIND=JPIM) :: IPTRMS(NPRTRW) INTEGER(KIND=JPIM),ALLOCATABLE :: IDIM0G(:) +INTEGER(KIND=JPIM),ALLOCATABLE :: IALLMS(:) +INTEGER(KIND=JPIM),ALLOCATABLE :: IKN(:) LOGICAL :: LLDIM1_IS_FLD REAL(KIND=JPHOOK) :: ZHOOK_HANDLE @@ -111,8 +115,11 @@ SUBROUTINE GATH_SPEC(PSPECG,KFGATHG,KTO,KVSET,KRESOL,PSPEC,LDIM1_IS_FLD,KSMAX,LD ISMAX = R%NSMAX IF(PRESENT(KSMAX)) ISMAX = KSMAX ALLOCATE(IDIM0G(0:ISMAX)) +ALLOCATE(IALLMS(ISMAX+1)) +ALLOCATE(IKN(0:ISMAX)) IF(ISMAX /= R%NSMAX) THEN CALL SUWAVEDI(ISMAX,ISMAX,NPRTRW,MYSETW,KPOSSP=IPOSSP,KSPEC2=ISPEC2,& + & KUMPP=IUMPP,KALLMS=IALLMS,KPTRMS=IPTRMS,KSPEC2MX=ISPEC2MX, & & KDIM0G=IDIM0G) ISPEC2_G = (ISMAX+1)*(ISMAX+2) ELSE @@ -120,7 +127,14 @@ SUBROUTINE GATH_SPEC(PSPECG,KFGATHG,KTO,KVSET,KRESOL,PSPEC,LDIM1_IS_FLD,KSMAX,LD ISPEC2_G = R%NSPEC2_G IPOSSP(:) = D%NPOSSP(:) IDIM0G(:) = D%NDIM0G(:) + ISPEC2MX = D%NSPEC2MX + IUMPP(:) = D%NUMPP(:) + IALLMS(:) = D%NALLMS(:) + IPTRMS(:) = D%NPTRMS(:) ENDIF +DO J=0,ISMAX + IKN(J)=2*(ISMAX+1-J) +ENDDO IFSEND = 0 IFRECV = 0 @@ -181,7 +195,7 @@ SUBROUTINE GATH_SPEC(PSPECG,KFGATHG,KTO,KVSET,KRESOL,PSPEC,LDIM1_IS_FLD,KSMAX,LD ENDIF CALL GATH_SPEC_CONTROL(PSPECG,KFGATHG,KTO,IVSET,PSPEC,LLDIM1_IS_FLD,& - & ISMAX,ISPEC2,ISPEC2_G,IPOSSP,IDIM0G,LDZA0IP) + & ISMAX,ISPEC2,ISPEC2MX,ISPEC2_G,IPOSSP,IDIM0G,IUMPP,IALLMS,IPTRMS,IKN,LDZA0IP) DEALLOCATE(IDIM0G) IF (LHOOK) CALL DR_HOOK('GATH_SPEC',1,ZHOOK_HANDLE) @@ -190,4 +204,3 @@ SUBROUTINE GATH_SPEC(PSPECG,KFGATHG,KTO,KVSET,KRESOL,PSPEC,LDIM1_IS_FLD,KSMAX,LD ! ------------------------------------------------------------------ END SUBROUTINE GATH_SPEC - diff --git a/src/trans/gpu/external/gpnorm_trans.F90 b/src/trans/gpu/external/gpnorm_trans.F90 index 7a691a538..80ad505b7 100755 --- a/src/trans/gpu/external/gpnorm_trans.F90 +++ b/src/trans/gpu/external/gpnorm_trans.F90 @@ -59,11 +59,9 @@ SUBROUTINE GPNORM_TRANS(PGP,KFIELDS,KPROMA,PAVE,PMIN,PMAX,LDAVE_ONLY,KRESOL) USE TPM_GEN, ONLY: NOUT USE TPM_DIM, ONLY: R USE TPM_TRANS, ONLY: LGPNORM, NGPBLKS, NPROMA -USE TPM_DISTR, ONLY: D, NPRCIDS, NPRTRV, NPRTRW, MYSETV, MYSETW, NPROC, D_NSTAGTF, & - & D_NPTRLS, MYPROC -USE TPM_GEOMETRY, ONLY: G, G_NLOEN +USE TPM_DISTR, ONLY: D, NPRCIDS, NPRTRV, NPRTRW, MYSETV, MYSETW, NPROC, MYPROC +USE TPM_GEOMETRY, ONLY: G USE TPM_FIELDS, ONLY: F -USE TPM_FIELDS_FLAT, ONLY: F_RW USE SET_RESOL_MOD, ONLY: SET_RESOL USE SET2PE_MOD, ONLY: SET2PE USE MPL_MODULE, ONLY: MPL_RECV, MPL_SEND, JP_BLOCKING_STANDARD @@ -142,6 +140,7 @@ SUBROUTINE GPNORM_TRANS(PGP,KFIELDS,KPROMA,PAVE,PMIN,PMAX,LDAVE_ONLY,KRESOL) CALL ABORT_TRANS('GPNORM_TRANS:THIRD DIMENSION OF PGP TOO SMALL ') ENDIF +ASSOCIATE(F_RW=>F%RW, D_NSTAGTF=>D%NSTAGTF, D_NPTRLS=>D%NPTRLS, G_NLOEN=>G%NLOEN) IF_GP=KFIELDS IF_SCALARS_G=KFIELDS @@ -164,7 +163,13 @@ SUBROUTINE GPNORM_TRANS(PGP,KFIELDS,KPROMA,PAVE,PMIN,PMAX,LDAVE_ONLY,KRESOL) ZMAXGL = 0._JPRBT ZMINGPN = 0._JPRBT ZMAXGPN = 0._JPRBT + +#ifdef OMPGPU +!$OMP TARGET DATA MAP(TOFROM:ZAVE,ZMINGL,ZMAXGL,ZMINGPN,ZMAXGPN) +#endif +#ifdef ACCGPU !$ACC DATA COPY(ZAVE,ZMINGL,ZMAXGL,ZMINGPN,ZMAXGPN) +#endif ALLOCATE(IVSETS(NPRTRV)) IVSETS(:)=0 @@ -200,26 +205,43 @@ SUBROUTINE GPNORM_TRANS(PGP,KFIELDS,KPROMA,PAVE,PMIN,PMAX,LDAVE_ONLY,KRESOL) CALL GSTATS(1429,0) IF( IF_FS > 0 )THEN - !$ACC DATA & - !$ACC& PRESENT(F,F_RW) & - !$ACC& PRESENT(D,D_NSTAGTF,D_NPTRLS,G_NLOEN) - - !$ACC KERNELS - DO JF=1,IF_FS - V = PREEL_REAL(IF_FS*D_NSTAGTF(1)+(JF-1)*(D%NSTAGTF(2)-D%NSTAGTF(1))) - ZMINGL(JF,IBEG:IEND)=HUGE(1_JPRBT) - ZMAXGL(JF,IBEG:IEND)=-HUGE(1_JPRBT) - ENDDO - !$ACC END KERNELS +#ifdef OMPGPU + !$OMP TARGET DATA MAP(PRESENT,ALLOC:F,F_RW,D,D_NSTAGTF,D_NPTRLS,G_NLOEN) +#endif +#ifdef ACCGPU + !$ACC DATA PRESENT(F,F_RW,D,D_NSTAGTF,D_NPTRLS,G_NLOEN) +#endif + +#ifdef OMPGPU + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO +#endif +#ifdef ACCGPU + !$ACC KERNELS +#endif + DO JF=1,IF_FS + V = PREEL_REAL(IF_FS*D_NSTAGTF(1)+(JF-1)*(D%NSTAGTF(2)-D%NSTAGTF(1))) + ZMINGL(JF,IBEG:IEND)=HUGE(1_JPRBT) + ZMAXGL(JF,IBEG:IEND)=-HUGE(1_JPRBT) + ENDDO +#ifdef ACCGPU + !$ACC END KERNELS +#endif -! FIRST DO SUMS IN EACH FULL LATITUDE + ! FIRST DO SUMS IN EACH FULL LATITUDE +#ifdef OMPGPU + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO +#endif +#ifdef ACCGPU !$ACC KERNELS +#endif DO JGL=1,D%NDGL_FS IGL = D_NPTRLS(MYSETW) + JGL - 1 DO JF=1,IF_FS ZAVE(JF,JGL)=0.0_JPRB +#ifdef ACCGPU !$ACC loop +#endif DO JL=1,G_NLOEN(IGL) V = PREEL_REAL(IF_FS*D%NSTAGTF(JGL)+(JF-1)*(D%NSTAGTF(JGL+1)-D%NSTAGTF(JGL))+JL) ZAVE(JF,JGL)=ZAVE(JF,JGL)+V @@ -228,16 +250,30 @@ SUBROUTINE GPNORM_TRANS(PGP,KFIELDS,KPROMA,PAVE,PMIN,PMAX,LDAVE_ONLY,KRESOL) ENDDO ENDDO ENDDO +#ifdef ACCGPU !$ACC END KERNELS +#endif +#ifdef OMPGPU + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO +#endif +#ifdef ACCGPU !$ACC KERNELS +#endif DO JF=1,IF_FS ZMINGPN(JF)=MINVAL(ZMINGL(JF,IBEG:IEND)) ZMAXGPN(JF)=MAXVAL(ZMAXGL(JF,IBEG:IEND)) ENDDO +#ifdef ACCGPU !$ACC END KERNELS +#endif +#ifdef OMPGPU + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO +#endif +#ifdef ACCGPU !$ACC KERNELS +#endif DO JGL=IBEG,IEND IGL = D_NPTRLS(MYSETW) + JGL - 1 DO JF=1,IF_FS @@ -245,14 +281,28 @@ SUBROUTINE GPNORM_TRANS(PGP,KFIELDS,KPROMA,PAVE,PMIN,PMAX,LDAVE_ONLY,KRESOL) !write(iunit,*) 'aver inside ',JF,IF_FS,IGL,ZAVE(JF,JGL), F_RW(IGL), G_NLOEN(IGL),ZMINGPN(JF),ZMAXGPN(JF) ENDDO ENDDO +#ifdef ACCGPU !$ACC END KERNELS +#endif -!$ACC end data +#ifdef OMPGPU + !$OMP END TARGET DATA +#endif +#ifdef ACCGPU + !$ACC END DATA +#endif ENDIF -!$ACC end data +#ifdef OMPGPU +!$OMP END TARGET DATA +#endif +#ifdef ACCGPU +!$ACC END DATA +#endif CALL GSTATS(1429,1) +END ASSOCIATE + ! from here rest on CPU ! IT IS IMPORTANT THAT SUMS ARE NOW DONE IN LATITUDE ORDER diff --git a/src/trans/gpu/external/gpnorm_trans_gpu.F90 b/src/trans/gpu/external/gpnorm_trans_gpu.F90 index d72b7a1bf..23f6e0f70 100755 --- a/src/trans/gpu/external/gpnorm_trans_gpu.F90 +++ b/src/trans/gpu/external/gpnorm_trans_gpu.F90 @@ -58,9 +58,9 @@ SUBROUTINE GPNORM_TRANS_GPU(PGP,KFIELDS,KPROMA,PAVE,PMIN,PMAX,LDAVE_ONLY,KRESOL) USE TPM_GEN, ONLY: NOUT USE TPM_DIM, ONLY: R USE TPM_TRANS, ONLY: LGPNORM, NGPBLKS, NPROMA -USE TPM_DISTR, ONLY: D, NPRCIDS, NPRTRV, NPRTRW, MYSETV, MYSETW, NPROC, D_NSTAGTF, D_NPTRLS -USE TPM_GEOMETRY, ONLY: G, G_NLOEN, G_NLOEN_MAX -USE TPM_FIELDS_FLAT, ONLY: F_RW +USE TPM_DISTR, ONLY: D, NPRCIDS, NPRTRV, NPRTRW, MYSETV, MYSETW, NPROC +USE TPM_GEOMETRY, ONLY: G +USE TPM_FIELDS, ONLY: F USE SET_RESOL_MOD, ONLY: SET_RESOL USE TRGTOL_MOD, ONLY: TRGTOL USE SET2PE_MOD, ONLY: SET2PE @@ -162,12 +162,18 @@ SUBROUTINE GPNORM_TRANS_GPU(PGP,KFIELDS,KPROMA,PAVE,PMIN,PMAX,LDAVE_ONLY,KRESOL) ZMAXGPN = 0._JPRBT #ifdef ACCGPU !$ACC ENTER DATA COPYIN(ZAVE,ZMINGL,ZMAXGL,ZMINGPN,ZMAXGPN) +#endif +#ifdef OMPGPU + !$OMP TARGET ENTER DATA MAP(TO:ZAVE,ZMINGL,ZMAXGL,ZMINGPN,ZMAXGPN) #endif IF (.NOT. ALLOCATED(ZGTF)) THEN ALLOCATE(ZGTF(IF_FS*D%NLENGTF)) WRITE(NOUT,*)'ZGTF :',SIZE(ZGTF) #ifdef ACCGPU !$ACC ENTER DATA CREATE(ZGTF) +#endif +#ifdef OMPGPU + !$OMP TARGET ENTER DATA MAP(ALLOC:ZGTF) #endif ENDIF ENDIF @@ -197,39 +203,37 @@ SUBROUTINE GPNORM_TRANS_GPU(PGP,KFIELDS,KPROMA,PAVE,PMIN,PMAX,LDAVE_ONLY,KRESOL) IBEG=1 IEND=D%NDGL_FS +ASSOCIATE(D_NSTAGTF=>D%NSTAGTF,D_NPTRLS=>D%NPTRLS,G_NLOEN=>G%NLOEN,F_RW=>F%RW) + CALL GSTATS(1429,0) IF( IF_FS > 0 )THEN #ifdef ACCGPU - !$ACC DATA & - !$ACC& COPY(F_RW) & - !$ACC& COPY(D,D_NSTAGTF,D_NPTRLS,G_NLOEN,G_NLOEN_MAX) & - !$ACC& PRESENT(ZGTF,ZAVE,ZMINGL,ZMAXGL,ZMINGPN,ZMAXGPN) + !$ACC DATA & + !$ACC& COPY(D,D_NSTAGTF,D_NPTRLS,G_NLOEN,F,F_RW) & + !$ACC& PRESENT(ZGTF,ZAVE,ZMINGL,ZMAXGL,ZMINGPN,ZMAXGPN) + !$ACC KERNELS #endif #ifdef OMPGPU - !$OMP TARGET DATA MAP(TO:F_RW,D,D_NSTAGTF,D_NPTRLS,G_NLOEN,G_NLOEN_MAX) & - !$OMP& MAP(PRESENT,ALLOC:ZGTF,ZAVE,ZMINGL,ZMAXGL,ZMINGPN,ZMAXGPN) - !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO + !$OMP TARGET DATA MAP(TO:F,D,D_NSTAGTF,D_NPTRLS,G_NLOEN) & + !$OMP& MAP(PRESENT,ALLOC:ZGTF,ZAVE,ZMINGL,ZMAXGL,ZMINGPN,ZMAXGPN) + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO #endif + DO JF=1,IF_FS + V = ZGTF(IF_FS*D_NSTAGTF(1)+(JF-1)*(D%NSTAGTF(2)-D%NSTAGTF(1))) + ZMINGL(JF,IBEG:IEND)=HUGE(1_JPRBT) + ZMAXGL(JF,IBEG:IEND)=-HUGE(1_JPRBT) + ENDDO #ifdef ACCGPU - !$ACC KERNELS + !$ACC END KERNELS #endif - DO JF=1,IF_FS - V = ZGTF(IF_FS*D_NSTAGTF(1)+(JF-1)*(D%NSTAGTF(2)-D%NSTAGTF(1))) - ZMINGL(JF,IBEG:IEND)=HUGE(1_JPRBT) - ZMAXGL(JF,IBEG:IEND)=-HUGE(1_JPRBT) - ENDDO + + ! FIRST DO SUMS IN EACH FULL LATITUDE #ifdef ACCGPU - !$ACC END KERNELS + !$ACC KERNELS #endif - -! FIRST DO SUMS IN EACH FULL LATITUDE - #ifdef OMPGPU !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO -#endif -#ifdef ACCGPU - !$ACC KERNELS #endif DO JGL=1,D%NDGL_FS IGL = D_NPTRLS(MYSETW) + JGL - 1 @@ -250,11 +254,11 @@ SUBROUTINE GPNORM_TRANS_GPU(PGP,KFIELDS,KPROMA,PAVE,PMIN,PMAX,LDAVE_ONLY,KRESOL) !$ACC END KERNELS #endif -#ifdef OMPGPU - !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO -#endif #ifdef ACCGPU !$ACC KERNELS +#endif +#ifdef OMPGPU + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO #endif DO JF=1,IF_FS ZMINGPN(JF)=MINVAL(ZMINGL(JF,IBEG:IEND)) @@ -264,17 +268,17 @@ SUBROUTINE GPNORM_TRANS_GPU(PGP,KFIELDS,KPROMA,PAVE,PMIN,PMAX,LDAVE_ONLY,KRESOL) !$ACC END KERNELS #endif -#ifdef OMPGPU - !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO -#endif #ifdef ACCGPU !$ACC KERNELS +#endif +#ifdef OMPGPU + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO #endif DO JGL=IBEG,IEND IGL = D_NPTRLS(MYSETW) + JGL - 1 DO JF=1,IF_FS ZAVE(JF,JGL)=ZAVE(JF,JGL)*F_RW(IGL)/G_NLOEN(IGL) - !write(iunit,*) 'aver inside ',JF,IF_FS,IGL,ZAVE(JF,JGL), F_RW(IGL), G_NLOEN(IGL),ZMINGPN(JF),ZMAXGPN(JF) + !write(iunit,*) 'aver inside ',JF,IF_FS,IGL,ZAVE(JF,JGL), F%RW(IGL), G_NLOEN(IGL),ZMINGPN(JF),ZMAXGPN(JF) ENDDO ENDDO #ifdef ACCGPU @@ -316,6 +320,8 @@ SUBROUTINE GPNORM_TRANS_GPU(PGP,KFIELDS,KPROMA,PAVE,PMIN,PMAX,LDAVE_ONLY,KRESOL) ENDIF CALL GSTATS(1429,1) +END ASSOCIATE + ! from here rest on CPU ! IT IS IMPORTANT THAT SUMS ARE NOW DONE IN LATITUDE ORDER diff --git a/src/trans/gpu/external/setup_trans.F90 b/src/trans/gpu/external/setup_trans.F90 index bf1cec9db..08e7946e8 100755 --- a/src/trans/gpu/external/setup_trans.F90 +++ b/src/trans/gpu/external/setup_trans.F90 @@ -103,20 +103,14 @@ SUBROUTINE SETUP_TRANS(KSMAX,KDGL,KDLON,KLOEN,LDSPLIT,PSTRET,& !ifndef INTERFACE -USE, INTRINSIC :: ISO_C_BINDING, ONLY: C_PTR, C_INT, C_ASSOCIATED, C_SIZE_T -USE EC_ENV_MOD, ONLY: EC_GETENV +USE, INTRINSIC :: ISO_C_BINDING, ONLY: C_PTR, C_ASSOCIATED, C_SIZE_T, C_SIZEOF, C_LOC USE TPM_GEN, ONLY: NOUT, MSETUP0, NCUR_RESOL, NDEF_RESOL, & & NMAX_RESOL, NPRINTLEV, LENABLED, NERR -USE TPM_DIM, ONLY: R, DIM_RESOL, R_NSMAX,R_NTMAX, R_NDGNH, R_NDGL -USE TPM_DISTR, ONLY: D, DISTR_RESOL, NPROC, NPRTRV, D_NUMP, D_NDGL_FS, D_MYMS, & - & D_NSTAGT0B, D_NSTAGT1B, D_NPROCL, D_NPNTGTB1, D_NASM0, & - & D_NSTAGTF, D_MSTABF, D_NPNTGTB0, D_NPROCM, D_NPTRLS, & - & MYPROC, D_OFFSETS_GEMM1, D_OFFSETS_GEMM2 -USE TPM_GEOMETRY, ONLY: G, GEOM_RESOL, G_NDGLU, G_NMEN, G_NMEN_MAX, G_NLOEN, & - & G_NLOEN_MAX +USE TPM_DIM, ONLY: R, DIM_RESOL +USE TPM_DISTR, ONLY: D, DISTR_RESOL, NPROC, MYPROC +USE TPM_GEOMETRY, ONLY: G, GEOM_RESOL USE TPM_FIELDS, ONLY: FIELDS_RESOL, F -USE TPM_FIELDS_FLAT, ONLY: F_RW, F_RLAPIN, F_RACTHE, ZEPSNM, & - & ZAA, ZAS, ZAA0, ZAS0, KMLOC0 +USE TPM_FIELDS_GPU, ONLY: FIELDS_GPU_RESOL, FG USE TPM_FLT, ONLY: FLT_RESOL, S USE TPM_CTL, ONLY: CTL_RESOL, C USE SET_RESOL_MOD, ONLY: SET_RESOL @@ -134,10 +128,6 @@ SUBROUTINE SETUP_TRANS(KSMAX,KDGL,KDLON,KLOEN,LDSPLIT,PSTRET,& USE OPENACC, ONLY: ACC_DEVICE_KIND, ACC_GET_DEVICE_TYPE, ACC_GET_NUM_DEVICES, & & ACC_SET_DEVICE_NUM, ACC_GET_DEVICE_NUM #endif -#ifdef OMPGPU -! TODO: add OMP equivalents to ACC library routines -!USE OMP_LIB -#endif !endif INTERFACE @@ -170,23 +160,19 @@ SUBROUTINE SETUP_TRANS(KSMAX,KDGL,KDLON,KLOEN,LDSPLIT,PSTRET,& !ifndef INTERFACE ! Local variables -INTEGER(KIND=JPIM) :: JGL,JRES,IDEF_RESOL -INTEGER(KIND=JPIM) :: JMLOC, KM, ILA, ILS, KMLOC, KDGLU, JK, I, J +INTEGER(KIND=JPIM) :: JGL, JRES, IDEF_RESOL +INTEGER(KIND=JPIM) :: JMLOC, KM, ILA, ILS, KDGLU +INTEGER(KIND=JPIM) :: IMLOC0(1) -INTEGER(KIND=JPIM) :: IPROC, IPROCS, ISTAN, ISTAS, ISL, IGLS, JFLD - -LOGICAL :: LLP1,LLP2, LLSPSETUPONLY -REAL(KIND=JPRD) :: ZTIME0,ZTIME1,ZTIME2 +LOGICAL :: LLP1, LLP2, LLSPSETUPONLY REAL(KIND=JPHOOK) :: ZHOOK_HANDLE -CHARACTER(LEN=8) :: CENV - #ifdef ACCGPU INTEGER(ACC_DEVICE_KIND) :: IDEVTYPE #endif -INTEGER :: INUMDEVS, IUNIT, ISTAT, IDEV, MYGPU +INTEGER :: INUMDEVS, IDEV, MYGPU -#include "user_clock.intfb.h" +REAL(KIND=JPRBT), POINTER :: LOCAL_ARR(:,:) ! ------------------------------------------------------------------ IF (LHOOK) CALL DR_HOOK('SETUP_TRANS',0,ZHOOK_HANDLE) @@ -232,6 +218,7 @@ SUBROUTINE SETUP_TRANS(KSMAX,KDGL,KDLON,KLOEN,LDSPLIT,PSTRET,& IDEF_RESOL = 1 ALLOCATE(DIM_RESOL(NMAX_RESOL)) ALLOCATE(FIELDS_RESOL(NMAX_RESOL)) + ALLOCATE(FIELDS_GPU_RESOL(NMAX_RESOL)) ALLOCATE(GEOM_RESOL(NMAX_RESOL)) ALLOCATE(DISTR_RESOL(NMAX_RESOL)) ALLOCATE(FLT_RESOL(NMAX_RESOL)) @@ -261,8 +248,6 @@ SUBROUTINE SETUP_TRANS(KSMAX,KDGL,KDLON,KLOEN,LDSPLIT,PSTRET,& IF(LLP1) WRITE(NOUT,*) '=== DEFINING RESOLUTION ',NCUR_RESOL - - ! Defaults for optional arguments @@ -287,6 +272,24 @@ SUBROUTINE SETUP_TRANS(KSMAX,KDGL,KDLON,KLOEN,LDSPLIT,PSTRET,& R%NNOEXTZL=0 R%NNOEXTZG=0 + +IF(PRESENT(LDSPSETUPONLY)) THEN + LLSPSETUPONLY=LDSPSETUPONLY +! <<<<<<<<<<< EXTRA TO WORKAROUND NOT YET IMPLEMENTED FEATURE + IF (LLSPSETUPONLY) THEN + WRITE(NOUT,'(A)') "DEVELOPER WARNING: LDSPSETUPONLY IS NOT YET IMPLEMENTED CORRECTLY WITH GPU BACKEND. IGNORING IT FOR NOW" + LLSPSETUPONLY = .FALSE. + R%NDGL = NPROC + ! Make even and positive + IF (MOD(R%NDGL,2) /= 0) THEN + R%NDGL = NPROC+1 + ENDIF + R%NDGL = MAX(2,R%NDGL) + ENDIF +! >>>>>>>>>>>>> +ENDIF + + ! IMPLICIT argument : G%LAM = .FALSE. @@ -299,6 +302,8 @@ SUBROUTINE SETUP_TRANS(KSMAX,KDGL,KDLON,KLOEN,LDSPLIT,PSTRET,& IF(PRESENT(LDLL)) THEN S%LDLL=LDLL IF( LDLL ) THEN + CALL ABORT_TRANS ('SETUP_TRANS: LDLL=.TRUE. is not yet supported with GPU backend') + S%NDLON=R%NDLON ! account for pole + equator R%NDGL=R%NDGL+2 @@ -318,7 +323,8 @@ SUBROUTINE SETUP_TRANS(KSMAX,KDGL,KDLON,KLOEN,LDSPLIT,PSTRET,& ! Optional arguments ALLOCATE(G%NLOEN(R%NDGL)) -IF(LLP2)WRITE(NOUT,9) 'NLOEN ',SIZE(G%NLOEN ),SHAPE(G%NLOEN ) +IF (LLP2) WRITE(NOUT,'("ARRAY NLOEN ALLOCATED",8I8)') SIZE(G%NLOEN ),SHAPE(G%NLOEN ) + IF(PRESENT(KLOEN)) THEN IF( MINVAL(KLOEN(:)) <= 0 )THEN CALL ABORT_TRANS ('SETUP_TRANS: KLOEN INVALID (ONE or MORE POINTS <= 0)') @@ -367,16 +373,24 @@ SUBROUTINE SETUP_TRANS(KSMAX,KDGL,KDLON,KLOEN,LDSPLIT,PSTRET,& IF(PRESENT(LDGRIDONLY)) THEN D%LGRIDONLY=LDGRIDONLY -ENDIF - -IF(PRESENT(LDSPSETUPONLY)) THEN - LLSPSETUPONLY=LDSPSETUPONLY +! <<<<<<<<<<< EXTRA TO WORKAROUND NOT YET IMPLEMENTED FEATURE + IF (D%LGRIDONLY) THEN + R%NSMAX=1 + R%NTMAX = R%NSMAX + WRITE(NOUT,'(A,I0)') "DEVELOPER WARNING: LDGRIDONLY IS NOT YET IMPLEMENTED CORRECTLY WITH GPU BACKEND. IGNORE AND USE TRUNCATION: ", R%NSMAX + D%LGRIDONLY = .FALSE. + ENDIF +! >>>>>>>>>>>>> ENDIF IF(PRESENT(LDPNMONLY)) THEN D%LCPNMONLY=LDPNMONLY ENDIF +IF(PRESENT(LDUSEFFTW)) THEN + WRITE(NOUT,*) 'SETUP_TRANS: LDUSEFFTW option is not relevant for GPUs' +ENDIF + ! Setup distribution independent dimensions CALL SETUP_DIMS @@ -425,6 +439,7 @@ SUBROUTINE SETUP_TRANS(KSMAX,KDGL,KDLON,KLOEN,LDSPLIT,PSTRET,& IF(PRESENT(LDKEEPRPNM)) THEN S%LKEEPRPNM=LDKEEPRPNM ENDIF + ! Setup resolution dependent structures ! ------------------------------------- @@ -433,14 +448,14 @@ SUBROUTINE SETUP_TRANS(KSMAX,KDGL,KDLON,KLOEN,LDSPLIT,PSTRET,& IF( .NOT.LLSPSETUPONLY ) THEN -! Compute Legendre polonomial and Gaussian Latitudes and Weights + ! Compute Legendre polonomial and Gaussian Latitudes and Weights CALL SULEG -! Second part of setup of distributed environment + ! Second part of setup of distributed environment CALL SUMP_TRANS CALL GSTATS(1802,0) -! Initialize Fast Fourier Transform package + ! Initialize Fast Fourier Transform package IF (.NOT.D%LCPNMONLY) CALL SUFFT CALL GSTATS(1802,1) ELSE @@ -453,272 +468,116 @@ SUBROUTINE SETUP_TRANS(KSMAX,KDGL,KDLON,KLOEN,LDSPLIT,PSTRET,& IF (LHOOK) CALL DR_HOOK('SETUP_TRANS',1,ZHOOK_HANDLE) ! ------------------------------------------------------------------ -9 FORMAT(1X,'ARRAY ',A10,' ALLOCATED ',8I8) - -IF( .NOT.D%LGRIDONLY ) THEN - -!allocating arrays for the GPU: -!! CALL EC_GETENV("ECTRANS_GPU_NFLEV",CENV) -!! IF(LEN_TRIM(CENV)>0) THEN -!! WRITE(NOUT,'(2A)') "Using temporary solution for buffer allocation using ${ECTRANS_GPU_NFLEV}=",CENV -!! READ(CENV,*) NFLEV0 -!! ELSE -!! NFLEV0 = ceiling(REAL(IMAXFLD)/NPRTRV) -!! ENDIF -IUNIT=300+MYPROC - -#ifdef ACCGPU -!!IDEVTYPE=ACC_DEVICE_NVIDIA -IDEVTYPE=ACC_GET_DEVICE_TYPE() -INUMDEVS = ACC_GET_NUM_DEVICES(IDEVTYPE) -MYGPU = MOD(MYPROC-1,INUMDEVS) -CALL ACC_SET_DEVICE_NUM(MYGPU, IDEVTYPE) -MYGPU = ACC_GET_DEVICE_NUM(IDEVTYPE) -!ISTAT = CUDA_GETDEVICE(IDEV) -#endif -WRITE(NOUT,*) 'R%NTMAX=',R%NTMAX -WRITE(NOUT,*) 'R%NSMAX=',R%NSMAX +IF( .NOT.D%LGRIDONLY ) THEN #ifdef ACCGPU -!$ACC ENTER DATA COPYIN(F,S,D,R,G) -!$ACC ENTER DATA & -!$ACC& COPYIN(F%RN,F%RLAPIN) & -!$ACC& COPYIN(S%FA,S%ITHRESHOLD) & -!$ACC& COPYIN(D%NUMP,D%MYMS,D%NPNTGTB0,D%NPNTGTB1,D%NSTAGT0B,D%NSTAGT1B,D%NSTAGTF,D%NPROCM,D%NPTRLS,D%MSTABF) & -!$ACC& COPYIN(R%NDGNH,R%NSMAX) & -!$ACC& COPYIN(G%NDGLU,G%NMEN,G%NLOEN) - -#endif -#ifdef OMPGPU -!$OMP TARGET ENTER DATA MAP(ALLOC:ZAA,ZAS) -!$OMP TARGET ENTER DATA MAP(TO:F,S,D,D_NUMP,D_MYMS,R,R_NDGNH,R_NSMAX,G,G_NDGLU) -!$OMP TARGET ENTER DATA MAP(TO:D_NPNTGTB0,D_NPNTGTB1,D_NSTAGT0B,D_NSTAGT1B,D_NSTAGTF,G_NMEN,D_NPROCM,D_NPTRLS,G,G_NLOEN,D_MSTABF) + IDEVTYPE = ACC_GET_DEVICE_TYPE() + INUMDEVS = ACC_GET_NUM_DEVICES(IDEVTYPE) + MYGPU = MOD(MYPROC-1, INUMDEVS) + CALL ACC_SET_DEVICE_NUM(MYGPU, IDEVTYPE) #endif -! Initialize A arrays + WRITE(NOUT,*) 'R%NTMAX=',R%NTMAX + WRITE(NOUT,*) 'R%NSMAX=',R%NSMAX -ALLOCATE(ZAA(ALIGN(R%NDGNH,8),ALIGN((R%NTMAX+2)/2,8),D%NUMP)) -ALLOCATE(ZAS(ALIGN(R%NDGNH,8),ALIGN((R%NTMAX+3)/2,8),D%NUMP)) + ! Initialize A arrays -WRITE(NOUT,*)'setup_trans: sizes1 NUMP=',D%NUMP -WRITE(NOUT,*)'ZAS:',size(ZAS) -WRITE(NOUT,*)'ZAA:',size(ZAA) + ALLOCATE(FG%ZAA(D%OFFSETS_GEMM_MATRIX(D%NUMP+1))) + ALLOCATE(FG%ZAS(D%OFFSETS_GEMM_MATRIX(D%NUMP+1))) -ZAA(:,:,:) = 0._JPRBT -ZAS(:,:,:) = 0._JPRBT + FG%ZAA(:) = 0._JPRBT + FG%ZAS(:) = 0._JPRBT -DO JMLOC=1,D%NUMP - KM = D%MYMS(JMLOC) - KDGLU = G%NDGLU(KM) - ILA = (R%NSMAX-KM+2)/2 - ILS = (R%NSMAX-KM+3)/2 + DO JMLOC=1,D%NUMP + KM = D%MYMS(JMLOC) + KDGLU = G%NDGLU(KM) + ILA = (R%NSMAX-KM+2)/2 + ILS = (R%NSMAX-KM+3)/2 - ZAA(1:KDGLU,1:ILA,JMLOC)=S%FA(JMLOC)%RPNMA(1:KDGLU,1:ILA) - ZAS(1:KDGLU,1:ILS,JMLOC)=S%FA(JMLOC)%RPNMS(1:KDGLU,1:ILS) -ENDDO + IF (KM /= 0) THEN + CALL C_F_POINTER(C_LOC(FG%ZAA(1+D%OFFSETS_GEMM_MATRIX(JMLOC))), LOCAL_ARR, & + & (/D%LEGENDRE_MATRIX_STRIDES(JMLOC),ILA/)) + LOCAL_ARR(1:KDGLU,1:ILA) = S%FA(JMLOC)%RPNMA(1:KDGLU,1:ILA) -! permanent copy of Legendre polynomials into device + CALL C_F_POINTER(C_LOC(FG%ZAS(1+D%OFFSETS_GEMM_MATRIX(JMLOC))), LOCAL_ARR, & + & (/D%LEGENDRE_MATRIX_STRIDES(JMLOC),ILS/)) + LOCAL_ARR(1:KDGLU,1:ILS) = S%FA(JMLOC)%RPNMS(1:KDGLU,1:ILS) + ELSE + ALLOCATE(FG%ZAA0(ALIGN(KDGLU,8),ILA)) + ALLOCATE(FG%ZAS0(ALIGN(KDGLU,8),ILS)) + FG%ZAA0(:,:) = 0 + FG%ZAS0(:,:) = 0 + FG%ZAA0(1:KDGLU,1:ILA)=S%FA(JMLOC)%RPNMA(1:KDGLU,1:ILA) + FG%ZAS0(1:KDGLU,1:ILS)=S%FA(JMLOC)%RPNMS(1:KDGLU,1:ILS) + ENDIF + ENDDO + + ALLOCATE(FG%ZEPSNM(D%NUMP,0:R%NTMAX+2)) + FG%ZEPSNM = 0._JPRBT + CALL PREPSNM + WRITE(NOUT,*)'setup_trans: sizes1 NUMP=',D%NUMP #ifdef ACCGPU -!$ACC ENTER DATA COPYIN(ZAA,ZAS) + WRITE(NOUT,*) 'Using OpenACC' #endif #ifdef OMPGPU + WRITE(NOUT,*) 'Using OpenMP offloading' #endif + WRITE(NOUT,'(A10,":",I11,"B")') 'FG%ZAS', C_SIZEOF(FG%ZAS(1))*SIZE(FG%ZAS) + WRITE(NOUT,'(A10,":",I11,"B")') 'FG%ZAA', C_SIZEOF(FG%ZAA(1))*SIZE(FG%ZAA) + WRITE(NOUT,'(A10,":",I11,"B")') 'FG%ZAS0', C_SIZEOF(FG%ZAS0(1,1))*SIZE(FG%ZAS0) + WRITE(NOUT,'(A10,":",I11,"B")') 'FG%ZAA0', C_SIZEOF(FG%ZAA0(1,1))*SIZE(FG%ZAA0) + WRITE(NOUT,'(A10,":",I11,"B")') 'FG%ZEPSNM', C_SIZEOF(FG%ZEPSNM(1,1))*SIZE(FG%ZEPSNM) -ALLOCATE(ZEPSNM(D%NUMP,0:R%NTMAX+2)) -WRITE(NOUT,*)'ZEPSNM :',SIZE(ZEPSNM) - -ZEPSNM = 0._JPRBT -! on the host -CALL PREPSNM + IF (ANY(D%MYMS == 0)) THEN #ifdef ACCGPU -!$ACC ENTER DATA COPYIN(ZEPSNM) + !$ACC ENTER DATA COPYIN(FG%ZAA0,FG%ZAS0) ASYNC(1) #endif - -! TODO: I guess tose might be needed again -! add arrays for GPNORM1 -!ALLOCATE(ZAVE(IF_FS,R%NDGL)) -!ALLOCATE(ZMINGL(IF_FS,R%NDGL)) -!ALLOCATE(ZMAXGL(IF_FS,R%NDGL)) -!ALLOCATE(ZMINGPN(IF_FS)) -!ALLOCATE(ZMAXGPN(IF_FS)) -! -!ZAVE = 0._JPRBT -!ZMINGL = 0._JPRBT -!ZMAXGL = 0._JPRBT -!ZMINGPN = 0._JPRBT -!ZMAXGPN = 0._JPRBT -!#ifdef ACCGPU -!!$ACC ENTER DATA COPYIN(ZAVE,ZMINGL,ZMAXGL,ZMINGPN,ZMAXGPN) -!#endif - -!set up flat copies of constant data -R_NSMAX=R%NSMAX -R_NTMAX=R%NTMAX -R_NDGNH=R%NDGNH -R_NDGL=R%NDGL - - -ALLOCATE(D_NSTAGT0B(SIZE(D%NSTAGT0B))) -ALLOCATE(D_NSTAGT1B(SIZE(D%NSTAGT1B))) -ALLOCATE(D_NPNTGTB0(0:SIZE(D%NPNTGTB0,1)-1,SIZE(D%NPNTGTB0,2))) -ALLOCATE(D_NPNTGTB1(SIZE(D%NPNTGTB1,1),SIZE(D%NPNTGTB1,2))) -ALLOCATE(D_MYMS(SIZE(D%MYMS))) -ALLOCATE(D_NPROCL(SIZE(D%NPROCL))) -ALLOCATE(D_NASM0(0:SIZE(D%NASM0)-1)) -ALLOCATE(D_NSTAGTF(SIZE(D%NSTAGTF))) -ALLOCATE(D_MSTABF(SIZE(D%MSTABF))) -ALLOCATE(D_NPROCM(0:SIZE(D%NPROCM)-1)) -ALLOCATE(D_NPTRLS(SIZE(D%NPTRLS))) - -ALLOCATE(G_NDGLU(0:SIZE(G%NDGLU)-1)) -ALLOCATE(G_NMEN(SIZE(G%NMEN))) -ALLOCATE(G_NLOEN(SIZE(G%NLOEN))) - -ALLOCATE(F_RW(SIZE(F%RW))) -ALLOCATE(F_RLAPIN(-1:SIZE(F%RLAPIN)-2)) -ALLOCATE(F_RACTHE(SIZE(F%RACTHE))) - - -DO I=0,SIZE(G%NDGLU)-1 - G_NDGLU(I)=G%NDGLU(I) -END DO - -G_NMEN_MAX=0 -DO I=1,SIZE(G%NMEN) - G_NMEN(I)=G%NMEN(I) - IF (G_NMEN(I) .GT. G_NMEN_MAX) G_NMEN_MAX=G_NMEN(I) -END DO - -G_NLOEN_MAX=0 -DO I=1,SIZE(G%NLOEN) - G_NLOEN(I)=G%NLOEN(I) - IF (G_NLOEN(I) .GT. G_NLOEN_MAX) G_NLOEN_MAX=G_NLOEN(I) -END DO - -DO I=1,SIZE(D%NSTAGT0B) - D_NSTAGT0B(I)=D%NSTAGT0B(I) -END DO - -DO I=1,SIZE(D%NSTAGT1B) - D_NSTAGT1B(I)=D%NSTAGT1B(I) -END DO - -DO I=1,SIZE(D%NPROCL) - D_NPROCL(I)=D%NPROCL(I) -END DO - -DO I=0,SIZE(D%NASM0)-1 - D_NASM0(I)=D%NASM0(I) -END DO - -DO I=1,SIZE(D%NSTAGTF) - D_NSTAGTF(I)=D%NSTAGTF(I) -END DO - -DO I=1,SIZE(D%MSTABF) - D_MSTABF(I)=D%MSTABF(I) -END DO - -DO I=0,SIZE(D%NPROCM)-1 - D_NPROCM(I)=D%NPROCM(I) -END DO - -DO I=1,SIZE(D%NPTRLS) - D_NPTRLS(I)=D%NPTRLS(I) -END DO - -DO I=1,SIZE(D%NPNTGTB0,2) - DO J=0,SIZE(D%NPNTGTB0,1)-1 - D_NPNTGTB0(J,I)=D%NPNTGTB0(J,I) - END DO -END DO - -DO I=1,SIZE(D%NPNTGTB1,2) - DO J=1,SIZE(D%NPNTGTB1,1) - D_NPNTGTB1(J,I)=D%NPNTGTB1(J,I) - END DO -END DO - -D_OFFSETS_GEMM1 => D%OFFSETS_GEMM1 -D_OFFSETS_GEMM2 => D%OFFSETS_GEMM2 #ifdef OMPGPU + !$OMP TARGET ENTER DATA MAP(TO:FG%ZAA0,FG%ZAS0) #endif -#ifdef ACCGPU -!$ACC ENTER DATA COPYIN(D_OFFSETS_GEMM1,D_OFFSETS_GEMM2) -#endif + ENDIF -D_NUMP=D%NUMP -D_NDGL_FS=D%NDGL_FS - -KMLOC0 = -1 -DO I=1,SIZE(D%MYMS) - D_MYMS(I)=D%MYMS(I) - IF(D_MYMS(I) == 0) KMLOC0 = I -end DO - -! arrays for m=0 in ledir_mod: -IF(KMLOC0 >= 0) THEN - ALLOCATE(ZAA0(SIZE(ZAA,1),SIZE(ZAA,2))) - ALLOCATE(ZAS0(SIZE(ZAS,1),SIZE(ZAS,2))) - ZAA0 = ZAA(:,:,KMLOC0) - ZAS0 = ZAS(:,:,KMLOC0) #ifdef ACCGPU - !$ACC ENTER DATA COPYIN(ZAA0,ZAS0) +#ifdef _CRAYFTN + !$ACC ENTER DATA COPYIN(R,R%NSMAX,R%NTMAX,R%NDGL,R%NDGNH) ASYNC(1) +#else + !$ACC ENTER DATA COPYIN(R) ASYNC(1) #endif -#ifdef OMPGPU - !$OMP TARGET ENTER DATA MAP(TO:ZAA0,ZAS0) + !$ACC ENTER DATA COPYIN(F,F%RLAPIN,F%RACTHE,F%RW) ASYNC(1) + !$ACC ENTER DATA COPYIN(FG,FG%ZAA,FG%ZAS,FG%ZEPSNM) ASYNC(1) +#ifdef _CRAYFTN + !$ACC ENTER DATA COPYIN(D,D%NUMP,D%MYMS,D%NPNTGTB0,D%NPNTGTB1,D%NSTAGT0B,D%NSTAGT1B,D%NSTAGTF,D%NPROCM,D%NPROCL)& + !$ACC& COPYIN(D%NPTRLS,D%MSTABF,D%NASM0,D%OFFSETS_GEMM1,D%OFFSETS_GEMM2,D%NDGL_FS) ASYNC(1) +#else + !$ACC ENTER DATA COPYIN(D,D%MYMS,D%NPNTGTB0,D%NPNTGTB1,D%NSTAGT0B,D%NSTAGT1B,D%NSTAGTF,D%NPROCM,D%NPROCL)& + !$ACC& COPYIN(D%NPTRLS,D%MSTABF,D%NASM0,D%OFFSETS_GEMM1,D%OFFSETS_GEMM2) ASYNC(1) #endif - WRITE(NOUT,*) 'GPU arrays for m=0 successfully allocated' -#ifdef ACCGPU - WRITE(NOUT,*) 'Using OpenACC' + !$ACC ENTER DATA COPYIN(G,G%NDGLU,G%NMEN,G%NLOEN) ASYNC(1) + !$ACC WAIT(1) #endif #ifdef OMPGPU - WRITE(NOUT,*) 'Using OpenMP offloading' + !$OMP TARGET ENTER DATA MAP(TO:R) + !$OMP TARGET ENTER DATA MAP(TO:F%RLAPIN,F%RACTHE,F%RW) + !$OMP TARGET ENTER DATA MAP(TO:FG%ZAA,FG%ZAS,FG%ZEPSNM) + !$OMP TARGET ENTER DATA MAP(TO:D%MYMS,D%NPNTGTB0,D%NPNTGTB1,D%NSTAGT0B,D%NSTAGT1B,D%NSTAGTF,& + !$OMP& D%NPROCM,D%NPROCL,D%NPTRLS,D%MSTABF,D%NASM0,D%OFFSETS_GEMM1,& + !$OMP& D%OFFSETS_GEMM2) + !$OMP TARGET ENTER DATA MAP(TO:G%NDGLU,G%NMEN,G%NLOEN) #endif -ENDIF -DO I=1,SIZE(F%RW) - F_RW(I)=F%RW(I) -END DO -DO I=-1,SIZE(F%RLAPIN)-2 - F_RLAPIN(I)=REAL(F%RLAPIN(I),JPRBT) -END DO -DO I=1,SIZE(F%RACTHE) - F_RACTHE(I)=F%RACTHE(I) -END DO + WRITE(NOUT,*) '===GPU arrays successfully allocated' -#ifdef ACCGPU -!$ACC ENTER DATA COPYIN(R_NSMAX,R_NTMAX,R_NDGL,R_NDGNH,D_NSTAGT0B,D_NSTAGT1B,& -!$ACC& D_NPNTGTB1,D_NPROCL,D_NUMP,D_NDGL_FS,D_MYMS,D_NASM0,D_NSTAGTF,D_MSTABF,& -!$ACC& D_NPNTGTB0,D_NPROCM,D_NPTRLS,G_NDGLU,G_NMEN,G_NMEN_MAX,G_NLOEN,& -!$ACC& G_NLOEN_MAX,F_RW,F_RLAPIN,F_RACTHE) -#endif -#ifdef OMPGPU -!$OMP TARGET ENTER DATA MAP(TO:R_NSMAX,R_NTMAX,R_NDGL,R_NDGNH,D_NSTAGT0B,D_NSTAGT1B) -!$OMP TARGET ENTER DATA MAP(TO:D_NPNTGTB1,D_NPROCL,D_NUMP,D_NDGL_FS,D_MYMS,D_NASM0,D_NSTAGTF,D_MSTABF) -!$OMP TARGET ENTER DATA MAP(TO:D_NPNTGTB0,D_NPROCM,D_NPTRLS,G_NDGLU,G_NMEN,G_NMEN_MAX,G_NLOEN) -!$OMP TARGET ENTER DATA MAP(TO:G_NLOEN_MAX,F_RW,F_RLAPIN,F_RACTHE) -#endif + ! TODO: This might be good idea - those polynomials are not needed + !DO JMLOC=1,D%NUMP + ! DEALLOCATE(S%FA(JMLOC)%RPNMA) + ! DEALLOCATE(S%FA(JMLOC)%RPNMS) + !ENDDO -WRITE(NOUT,*) '===GPU arrays successfully allocated' -#ifdef ACCGPU -!$ACC wait -#endif -#ifdef OMPGPU -!$OMP BARRIER -#endif - -! free memory -!DO JMLOC=1,D%NUMP -! DEALLOCATE(S%FA(JMLOC)%RPNMA) -! DEALLOCATE(S%FA(JMLOC)%RPNMS) -!ENDDO +ENDIF ! D%LGRIDONLY !endif INTERFACE -ENDIF - END SUBROUTINE SETUP_TRANS diff --git a/src/trans/gpu/external/trans_end.F90 b/src/trans/gpu/external/trans_end.F90 index 5329c8598..366c759a3 100755 --- a/src/trans/gpu/external/trans_end.F90 +++ b/src/trans/gpu/external/trans_end.F90 @@ -47,16 +47,15 @@ SUBROUTINE TRANS_END(CDMODE) !ifndef INTERFACE USE TPM_GEN, ONLY: MSETUP0, NCUR_RESOL, NMAX_RESOL, LENABLED, NDEF_RESOL -USE TPM_DIM, ONLY: R, DIM_RESOL, R_NSMAX, R_NTMAX, R_NDGNH, R_NDGL -USE TPM_DISTR, ONLY: D, DISTR_RESOL, NPRCIDS, D_NUMP, D_MYMS, D_NSTAGT0B, D_NSTAGT1B, & - & D_NPROCL, D_NPNTGTB1, D_NASM0, D_NSTAGTF, D_MSTABF, D_NPNTGTB0, & - & D_NPROCM, D_NPTRLS -USE TPM_GEOMETRY, ONLY: G, GEOM_RESOL, G_NDGLU, G_NMEN, G_NMEN_MAX,G_NLOEN, G_NLOEN_MAX +USE TPM_DIM, ONLY: R, DIM_RESOL +USE TPM_DISTR, ONLY: D, DISTR_RESOL, NPRCIDS +USE TPM_GEOMETRY, ONLY: G, GEOM_RESOL USE TPM_FIELDS, ONLY: F, FIELDS_RESOL -USE TPM_FIELDS_FLAT, ONLY: F_RW, ZEPSNM, ZAA, ZAS, ZAA0, ZAS0 +USE TPM_FIELDS_GPU, ONLY: FG, FIELDS_GPU_RESOL USE TPM_CTL, ONLY: C, CTL_RESOL USE TPM_FLT, ONLY: S, FLT_RESOL -USE TPM_TRANS, ONLY: FOUBUF, FOUBUF_IN +USE TPM_TRANS, ONLY: GROWING_ALLOCATION +USE GROWING_ALLOCATOR_MOD,ONLY: DESTROY_GROWING_ALLOCATOR USE EQ_REGIONS_MOD, ONLY: N_REGIONS USE SET_RESOL_MOD, ONLY: SET_RESOL USE DEALLOC_RESOL_MOD, ONLY: DEALLOC_RESOL @@ -67,32 +66,12 @@ SUBROUTINE TRANS_END(CDMODE) ! Local variables INTEGER(KIND=JPIM) :: JRES CHARACTER*5 :: CLMODE + ! ------------------------------------------------------------------ CLMODE='FINAL' IF (PRESENT(CDMODE)) CLMODE=CDMODE IF (CLMODE == 'FINAL') THEN -#ifdef ACCGPU - !$ACC EXIT DATA DELETE(ZAA0,ZAS0,ZEPSNM,ZAA,ZAS) -#endif -#ifdef OMPGPU -#endif - DEALLOCATE(ZAA0) - DEALLOCATE(ZAS0) - DEALLOCATE(ZEPSNM) - DEALLOCATE(ZAA) - DEALLOCATE(ZAS) - - - DEALLOCATE(D_NSTAGT0B,D_NSTAGT1B,D_NPNTGTB1,D_MYMS,D_NPROCL,D_NASM0,D_NSTAGTF,D_MSTABF,D_NPNTGTB0,D_NPROCM,D_NPTRLS,G_NDGLU,G_NMEN,G_NLOEN,F_RW) -#ifdef ACCGPU - !$ACC EXIT DATA DELETE(R_NSMAX,R_NTMAX,R_NDGL,R_NDGNH,D_NSTAGT0B,D_NSTAGT1B,D_NPNTGTB1,D_NPROCL, D_NUMP,D_MYMS, & - !$ACC& G_NDGLU,G_NMEN,G_NMEN_MAX,G_NLOEN,G_NLOEN_MAX,D_NSTAGTF,D_MSTABF,D_NPNTGTB0,D_NPROCM,D_NPTRLS,D_NASM0,F_RW) - -#endif -#ifdef OMPGPU - !$OMP TARGET EXIT DATA MAP(DELETE: ) -#endif !CALL HIP_DGEMM_BATCHED_FINALIZE() IF( ALLOCATED( LENABLED ) ) THEN @@ -104,6 +83,8 @@ SUBROUTINE TRANS_END(CDMODE) DEALLOCATE(LENABLED) ENDIF + CALL DESTROY_GROWING_ALLOCATOR(GROWING_ALLOCATION) + NULLIFY(R) IF( ALLOCATED(DIM_RESOL) ) DEALLOCATE(DIM_RESOL) @@ -122,15 +103,14 @@ SUBROUTINE TRANS_END(CDMODE) NULLIFY(F) IF( ALLOCATED(FIELDS_RESOL) ) DEALLOCATE(FIELDS_RESOL) + !TPM_FIELDS_GPU + NULLIFY(FG) + IF( ALLOCATED(FIELDS_GPU_RESOL) ) DEALLOCATE(FIELDS_GPU_RESOL) !TPM_GEOMETRY NULLIFY(G) IF( ALLOCATED(GEOM_RESOL) ) DEALLOCATE(GEOM_RESOL) - !TPM_TRANS - IF(ALLOCATED(FOUBUF_IN)) DEALLOCATE(FOUBUF_IN) - IF(ALLOCATED(FOUBUF)) DEALLOCATE(FOUBUF) - MSETUP0 = 0 NMAX_RESOL = 0 NCUR_RESOL = 0 diff --git a/src/trans/gpu/external/trans_inq.F90 b/src/trans/gpu/external/trans_inq.F90 index fb540d949..39be06d2b 100755 --- a/src/trans/gpu/external/trans_inq.F90 +++ b/src/trans/gpu/external/trans_inq.F90 @@ -176,7 +176,7 @@ SUBROUTINE TRANS_INQ(KRESOL,KSPEC,KSPEC2,KSPEC2G,KSPEC2MX,KNUMP,& INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KNMENG(:) REAL(KIND=JPRD) ,OPTIONAL, INTENT(OUT) :: PMU(:) -REAL(KIND=JPRD) ,OPTIONAL, INTENT(OUT) :: PGW(:) +REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PGW(:) REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PRPNM(:,:) INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KLEI3 INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KSPOLEGL @@ -417,7 +417,7 @@ SUBROUTINE TRANS_INQ(KRESOL,KSPEC,KSPEC2,KSPEC2G,KSPEC2MX,KNUMP,& IF(UBOUND(PGW,1) < R%NDGL) THEN CALL ABORT_TRANS('TRANS_INQ: PGW TOO SMALL') ELSE - PGW(1:R%NDGL) = F%RW + PGW(1:R%NDGL) = REAL(F%RW,JPRB) ENDIF ENDIF diff --git a/src/trans/gpu/external/vordiv_to_uv.F90 b/src/trans/gpu/external/vordiv_to_uv.F90 index c8667a2f0..6e1d86c37 100755 --- a/src/trans/gpu/external/vordiv_to_uv.F90 +++ b/src/trans/gpu/external/vordiv_to_uv.F90 @@ -91,6 +91,8 @@ SUBROUTINE VORDIV_TO_UV(PSPVOR,PSPDIV,PSPU,PSPV,KSMAX,KVSETUV) IF (LHOOK) CALL DR_HOOK('VORDIV_TO_UV',0,ZHOOK_HANDLE) +CALL ABORT_TRANS('VORDIV_TO_UV: Code path not (yet) supported in GPU version') + !CALL GSTATS(XXXX,0) IF(MSETUP0 == 0) THEN diff --git a/src/trans/gpu/internal/cdmap_mod.F90 b/src/trans/gpu/internal/cdmap_mod.F90 index e0e7d0b55..98aac868a 100755 --- a/src/trans/gpu/internal/cdmap_mod.F90 +++ b/src/trans/gpu/internal/cdmap_mod.F90 @@ -17,8 +17,9 @@ SUBROUTINE CDMAP(KM,KMLOC,KSL,KSLO,PEPSNM, KDIR, KDGNH, KDGNHD,& USE YOMHOOK, ONLY: LHOOK, DR_HOOK, JPHOOK USE TPM_FLT, ONLY: S USE TPM_DISTR, ONLY: D -USE TPM_TRANS, ONLY: FOUBUF_IN, FOUBUF +!USE TPM_TRANS, ONLY: FOUBUF_IN, FOUBUF USE SEEFMM_MIX, ONLY: SEEFMM_MULM +USE MPL_MODULE, ONLY: MPL_ABORT !**** *CDMAP* - REMAP ROOTS ! @@ -80,6 +81,8 @@ SUBROUTINE CDMAP(KM,KMLOC,KSL,KSLO,PEPSNM, KDIR, KDGNH, KDGNHD,& ! ------------------------------------------------------------------ +CALL MPL_ABORT("CDMAP not yet supported in ecTrans GPU version") + !* 1. PERFORM LEGENDRE TRANFORM. ! -------------------------- @@ -121,10 +124,10 @@ SUBROUTINE CDMAP(KM,KMLOC,KSL,KSLO,PEPSNM, KDIR, KDGNH, KDGNHD,& DO IGL=KSLO,KDGNHD IGLS = 2*KDGNHD+1-IGL DO JF=1,KFIELDS - FOUBUF_IN(ISTN(IGL)+JF) = S%FA(KMLOC)%RPNMWO(IGL-KSLO+1,1)*ZALL1(JF,IGL) & - & - S%FA(KMLOC)%RPNMWO(IGL-KSLO+1,2)*ZALL(JF,IGL) - FOUBUF_IN(ISTS(IGL)+JF) = S%FA(KMLOC)%RPNMWO(IGLS-KSLO+1,1)*ZALL1(JF,IGLS) & - & - S%FA(KMLOC)%RPNMWO(IGLS-KSLO+1,2)*ZALL(JF,IGLS) + !FOUBUF_IN(ISTN(IGL)+JF) = S%FA(KMLOC)%RPNMWO(IGL-KSLO+1,1)*ZALL1(JF,IGL) & + ! & - S%FA(KMLOC)%RPNMWO(IGL-KSLO+1,2)*ZALL(JF,IGL) + !FOUBUF_IN(ISTS(IGL)+JF) = S%FA(KMLOC)%RPNMWO(IGLS-KSLO+1,1)*ZALL1(JF,IGLS) & + ! & - S%FA(KMLOC)%RPNMWO(IGLS-KSLO+1,2)*ZALL(JF,IGLS) ENDDO ENDDO DEALLOCATE(ZALL1) @@ -154,8 +157,8 @@ SUBROUTINE CDMAP(KM,KMLOC,KSL,KSLO,PEPSNM, KDIR, KDGNH, KDGNHD,& DO JGL=KSLO, KDGNHD IGLS = 2*KDGNHD+1-JGL DO JF=1,KFIELDS - ZQX(JF,JGL)=FOUBUF(ISTN(JGL)+JF) - ZQX(JF,IGLS)=FOUBUF(ISTS(JGL)+JF) + !ZQX(JF,JGL)=FOUBUF(ISTN(JGL)+JF) + !ZQX(JF,IGLS)=FOUBUF(ISTS(JGL)+JF) ENDDO ENDDO diff --git a/src/trans/gpu/internal/dealloc_resol_mod.F90 b/src/trans/gpu/internal/dealloc_resol_mod.F90 index 8c8726506..76b211ff5 100755 --- a/src/trans/gpu/internal/dealloc_resol_mod.F90 +++ b/src/trans/gpu/internal/dealloc_resol_mod.F90 @@ -1,5 +1,6 @@ ! (C) Copyright 2013- ECMWF. ! (C) Copyright 2013- Meteo-France. +! (C) Copyright 2024- NVIDIA. ! ! This software is licensed under the terms of the Apache Licence Version 2.0 ! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. @@ -42,12 +43,15 @@ SUBROUTINE DEALLOC_RESOL(KRESOL) ! ------------------------------------------------------------------ USE PARKIND_ECTRANS, ONLY: JPIM -USE TPM_DIM, ONLY: R +USE TPM_DIM, ONLY: R, DIM_TYPE USE TPM_GEN, ONLY: LENABLED, NOUT, NDEF_RESOL -USE TPM_DISTR, ONLY: D, NPRTRV -USE TPM_GEOMETRY, ONLY: G -USE TPM_FIELDS, ONLY: F -USE TPM_FLT, ONLY: S +USE TPM_DISTR, ONLY: D, DISTR_TYPE, NPRTRV +USE TPM_GEOMETRY, ONLY: G, GEOM_TYPE +USE TPM_FIELDS, ONLY: F, FIELDS_TYPE +USE TPM_FIELDS_GPU, ONLY: FG, FIELDS_GPU_TYPE +USE TPM_HICFFT, ONLY: CLEAN_FFT +USE HICBLAS_MOD, ONLY: CLEAN_GEMM +USE TPM_FLT, ONLY: S, FLT_TYPE_WRAP USE TPM_CTL, ONLY: C USE SEEFMM_MIX, ONLY: FREE_SEEFMM USE SET_RESOL_MOD, ONLY: SET_RESOL @@ -57,6 +61,12 @@ SUBROUTINE DEALLOC_RESOL(KRESOL) INTEGER(KIND=JPIM), INTENT(IN) :: KRESOL INTEGER(KIND=JPIM) :: JMLOC,IPRTRV,JSETV,IMLOC,IM,ILA,ILS, JRESOL +TYPE(DIM_TYPE) :: R_ +TYPE(DISTR_TYPE) :: D_ +TYPE(GEOM_TYPE) :: G_ +TYPE(FIELDS_TYPE) :: F_ +TYPE(FIELDS_GPU_TYPE) :: FG_ +TYPE(FLT_TYPE_WRAP) :: S_ ! ------------------------------------------------------------------ @@ -68,7 +78,20 @@ SUBROUTINE DEALLOC_RESOL(KRESOL) CALL SET_RESOL(KRESOL) - !TPM_FLT +#ifdef ACCGPU +!$ACC EXIT DATA DELETE(R) ASYNC(1) +!$ACC EXIT DATA DELETE(FG,FG%ZAA0,FG%ZAS0) IF(ALLOCATED(FG%ZAA0)) ASYNC(1) +!$ACC EXIT DATA DELETE(FG,FG%ZAA,FG%ZAS,FG%ZEPSNM) ASYNC(1) +!$ACC EXIT DATA DELETE(F,F%RLAPIN,F%RACTHE,F%RW) ASYNC(1) +!$ACC EXIT DATA DELETE(D,D%MYMS,D%NPNTGTB0,D%NPNTGTB1,D%NSTAGT0B,D%NSTAGT1B,D%NSTAGTF,D%NPROCM)& +!$ACC& DELETE(D%NPROCL,D%NPTRLS,D%MSTABF,D%NASM0,D%OFFSETS_GEMM1,D%OFFSETS_GEMM2) ASYNC(1) +!$ACC EXIT DATA DELETE(G,G%NDGLU,G%NMEN,G%NLOEN) ASYNC(1) +!$ACC WAIT(1) +#endif +#ifdef OMPGPU +#endif + + ! TPM_FLD is more complex because it has pointers IF( ALLOCATED(S%FA) ) THEN DO JMLOC=1,D%NUMP,NPRTRV ! +++++++++++++++++++++ JMLOC LOOP ++++++++++ IPRTRV=MIN(NPRTRV,D%NUMP-JMLOC+1) @@ -91,70 +114,17 @@ SUBROUTINE DEALLOC_RESOL(KRESOL) CALL FREE_SEEFMM(S%FMM_INTI) IF(ASSOCIATED(S%FMM_INTI)) DEALLOCATE(S%FMM_INTI) ENDIF + S = S_ - !TPM_DISTR - IF(ALLOCATED(D%NFRSTLAT)) DEALLOCATE(D%NFRSTLAT) - IF(ALLOCATED(D%NLSTLAT)) DEALLOCATE(D%NLSTLAT) - IF(ALLOCATED(D%NPTRLAT)) DEALLOCATE(D%NPTRLAT) - IF(ALLOCATED(D%NPTRFRSTLAT)) DEALLOCATE(D%NPTRFRSTLAT) - IF(ALLOCATED(D%NPTRLSTLAT)) DEALLOCATE(D%NPTRLSTLAT) - IF(ALLOCATED(D%LSPLITLAT)) DEALLOCATE(D%LSPLITLAT) - IF(ALLOCATED(D%NSTA)) DEALLOCATE(D%NSTA) - IF(ALLOCATED(D%NONL)) DEALLOCATE(D%NONL) - IF(ALLOCATED(D%NGPTOTL)) DEALLOCATE(D%NGPTOTL) - IF(ALLOCATED(D%NPROCA_GP)) DEALLOCATE(D%NPROCA_GP) - - IF(D%LWEIGHTED_DISTR) THEN - IF(ALLOCATED(D%RWEIGHT)) DEALLOCATE(D%RWEIGHT) - ENDIF - - IF(ALLOCATED(D%MYMS)) DEALLOCATE(D%MYMS) - IF(ALLOCATED(D%NUMPP)) DEALLOCATE(D%NUMPP) - IF(ALLOCATED(D%NPOSSP)) DEALLOCATE(D%NPOSSP) - IF(ALLOCATED(D%NPROCM)) DEALLOCATE(D%NPROCM) - IF(ALLOCATED(D%NDIM0G)) DEALLOCATE(D%NDIM0G) - IF(ALLOCATED(D%NASM0)) DEALLOCATE(D%NASM0) - IF(ALLOCATED(D%NATM0)) DEALLOCATE(D%NATM0) - IF(ALLOCATED(D%NLATLS)) DEALLOCATE(D%NLATLS) - IF(ALLOCATED(D%NLATLE)) DEALLOCATE(D%NLATLE) - IF(ALLOCATED(D%NPMT)) DEALLOCATE(D%NPMT) - IF(ALLOCATED(D%NPMS)) DEALLOCATE(D%NPMS) - IF(ALLOCATED(D%NPMG)) DEALLOCATE(D%NPMG) - IF(ALLOCATED(D%NULTPP)) DEALLOCATE(D%NULTPP) - IF(ALLOCATED(D%NPROCL)) DEALLOCATE(D%NPROCL) - IF(ALLOCATED(D%NPTRLS)) DEALLOCATE(D%NPTRLS) - IF(ALLOCATED(D%NALLMS)) DEALLOCATE(D%NALLMS) - IF(ALLOCATED(D%NPTRMS)) DEALLOCATE(D%NPTRMS) - IF(ALLOCATED(D%NSTAGT0B)) DEALLOCATE(D%NSTAGT0B) - IF(ALLOCATED(D%NSTAGT1B)) DEALLOCATE(D%NSTAGT1B) - IF(ALLOCATED(D%NPNTGTB0)) DEALLOCATE(D%NPNTGTB0) - IF(ALLOCATED(D%NPNTGTB1)) DEALLOCATE(D%NPNTGTB1) - IF(ALLOCATED(D%NLTSFTB)) DEALLOCATE(D%NLTSFTB) - IF(ALLOCATED(D%NLTSGTB)) DEALLOCATE(D%NLTSGTB) - IF(ALLOCATED(D%MSTABF)) DEALLOCATE(D%MSTABF) - IF(ALLOCATED(D%NSTAGTF)) DEALLOCATE(D%NSTAGTF) - - !TPM_FIELDS - IF(ALLOCATED(F%RMU)) DEALLOCATE(F%RMU) - IF(ALLOCATED(F%RW)) DEALLOCATE(F%RW) - IF(ALLOCATED(F%R1MU2)) DEALLOCATE(F%R1MU2) - IF(ALLOCATED(F%RACTHE)) DEALLOCATE(F%RACTHE) - IF(ALLOCATED(F%REPSNM)) DEALLOCATE(F%REPSNM) - IF(ALLOCATED(F%RN)) DEALLOCATE(F%RN) - IF(ALLOCATED(F%RLAPIN)) DEALLOCATE(F%RLAPIN) - IF(ALLOCATED(F%NLTN)) DEALLOCATE(F%NLTN) - IF( S%LKEEPRPNM ) THEN - IF(ALLOCATED(F%RPNM)) DEALLOCATE(F%RPNM) - ENDIF - IF( S%LDLL ) THEN - IF(ALLOCATED(F%RMU2)) DEALLOCATE(F%RMU2) - IF(ALLOCATED(F%RACTHE2)) DEALLOCATE(F%RACTHE2) - ENDIF + ! Empty all fields (none of them has pointers; allocatable arrays implicitly deallocate) + D = D_ + F = F_ + FG = FG_ + R = R_ + G = G_ - !TPM_GEOMETRY - IF(ALLOCATED(G%NMEN)) DEALLOCATE(G%NMEN) - IF(ALLOCATED(G%NDGLU)) DEALLOCATE(G%NDGLU) - IF(ALLOCATED(G%NLOEN)) DEALLOCATE(G%NLOEN) + CALL CLEAN_FFT(KRESOL) + CALL CLEAN_GEMM(KRESOL) LENABLED(KRESOL)=.FALSE. NDEF_RESOL = COUNT(LENABLED) diff --git a/src/trans/gpu/internal/dir_trans_ctl_mod.F90 b/src/trans/gpu/internal/dir_trans_ctl_mod.F90 index 954251074..5f20e93ba 100755 --- a/src/trans/gpu/internal/dir_trans_ctl_mod.F90 +++ b/src/trans/gpu/internal/dir_trans_ctl_mod.F90 @@ -156,10 +156,12 @@ SUBROUTINE DIR_TRANS_CTL(KF_UV_G,KF_SCALARS_G,KF_GP,KF_FS,KF_UV,KF_SCALARS,& CALL INSTANTIATE_ALLOCATOR(ALLOCATOR, GROWING_ALLOCATION) ! from the PGP arrays to PREEL_REAL + CALL GSTATS(158,0) CALL TRGTOL(ALLOCATOR,HTRGTOL,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G,& & KVSETUV=KVSETUV,KVSETSC=KVSETSC,& & KVSETSC3A=KVSETSC3A,KVSETSC3B=KVSETSC3B,KVSETSC2=KVSETSC2,& & PGP=PGP,PGPUV=PGPUV,PGP3A=PGP3A,PGP3B=PGP3B,PGP2=PGP2) + CALL GSTATS(158,1) IF (KF_FS > 0) THEN diff --git a/src/trans/gpu/internal/fsc_mod.F90 b/src/trans/gpu/internal/fsc_mod.F90 index 2254af6fb..0122306a6 100755 --- a/src/trans/gpu/internal/fsc_mod.F90 +++ b/src/trans/gpu/internal/fsc_mod.F90 @@ -11,7 +11,7 @@ MODULE FSC_MOD USE BUFFERED_ALLOCATOR_MOD, ONLY: BUFFERED_ALLOCATOR - USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT + USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT, JPIB USE TPM_DISTR, ONLY: D IMPLICIT NONE @@ -65,11 +65,11 @@ SUBROUTINE FSC(ALLOCATOR,HFSC,PREEL_COMPLEX, KF_FS, KF_UV, KF_SCALARS, KUV_OFFSE ! ------------------------------------------------------------------ USE TPM_TRANS, ONLY: LATLON -USE TPM_DISTR, ONLY: MYSETW, MYPROC, NPROC, D_NUMP, D_NPTRLS, D_NSTAGTF -USE TPM_GEOMETRY, ONLY: G_NMEN, G_NLOEN, G_NLOEN_MAX -USE TPM_FIELDS_FLAT, ONLY: F_RACTHE +USE TPM_DISTR, ONLY: MYSETW, MYPROC, NPROC, D +USE TPM_GEOMETRY, ONLY: G +USE TPM_FIELDS, ONLY: F USE TPM_GEN, ONLY: NOUT -USE TPM_DIM, ONLY: R_NSMAX +USE TPM_DIM, ONLY: R ! IMPLICIT NONE @@ -82,14 +82,15 @@ SUBROUTINE FSC(ALLOCATOR,HFSC,PREEL_COMPLEX, KF_FS, KF_UV, KF_SCALARS, KUV_OFFSE INTEGER(KIND=JPIM) :: KGL REAL(KIND=JPRBT) :: ZACHTE2 -REAL(KIND=JPRBT) :: ZAMP, ZPHASE -INTEGER(KIND=JPIM) :: IOFF_LAT,OFFSET_VAR -INTEGER(KIND=JPIM) :: IOFF_SCALARS,IOFF_SCALARS_EWDER,IOFF_UV,IOFF_UV_EWDER,IOFF_KSCALARS_NSDER -INTEGER(KIND=JPIM) :: JF,IGLG,II,JM +INTEGER(KIND=JPIM) :: OFFSET_VAR,ILOEN_MAX +INTEGER(KIND=JPIB) :: IOFF_LAT +INTEGER(KIND=JPIB) :: IOFF_SCALARS,IOFF_SCALARS_EWDER,IOFF_UV,IOFF_UV_EWDER,IOFF_KSCALARS_NSDER +INTEGER(KIND=JPIM) :: JF,IGLG,JM INTEGER(KIND=JPIM) :: IBEG,IEND,IINC REAL(KIND=JPRBT) :: RET_REAL, RET_COMPLEX - +ASSOCIATE(D_NUMP=>D%NUMP, D_NPTRLS=>D%NPTRLS, D_NSTAGTF=>D%NSTAGTF, G_NMEN=>G%NMEN, & + & G_NLOEN=>G%NLOEN, F_RACTHE=>F%RACTHE, R_NSMAX=>R%NSMAX) ! ------------------------------------------------------------------ IF(MYPROC > NPROC/2)THEN @@ -104,11 +105,12 @@ SUBROUTINE FSC(ALLOCATOR,HFSC,PREEL_COMPLEX, KF_FS, KF_UV, KF_SCALARS, KUV_OFFSE #ifdef ACCGPU !$ACC DATA & -!$ACC& PRESENT(D_NPTRLS,D_NSTAGTF,PREEL_COMPLEX,F_RACTHE,G_NMEN,G_NLOEN, G_NLOEN_MAX, R_NSMAX) +!$ACC& PRESENT(D,D_NPTRLS,D_NSTAGTF,PREEL_COMPLEX,F,F_RACTHE,G,G_NMEN,G_NLOEN,R,R_NSMAX) #endif #ifdef OMPGPU -!$OMP TARGET DATA MAP(PRESENT,ALLOC:ZGTF) & -!$OMP& MAP(ALLOC:PUV,PSCALAR,PNSDERS,PEWDERS,PUVDERS) +!$OMP TARGET DATA & +!$OMP& MAP(PRESENT,ALLOC:D_NSTAGTF,PREEL_COMPLEX,F_RACTHE,G_NMEN,G_NLOEN) & +!$OMP& MAP(TO:R_NSMAX) #endif ! ------------------------------------------------------------------ @@ -120,10 +122,13 @@ SUBROUTINE FSC(ALLOCATOR,HFSC,PREEL_COMPLEX, KF_FS, KF_UV, KF_SCALARS, KUV_OFFSE !* 1.1 U AND V. #ifdef OMPGPU - !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO DEFAULT(NONE) SHARED(KF_UV,PUV,ZACHTE2) +!$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(3) & +!$OMP& PRIVATE(IGLG,IOFF_LAT,IOFF_UV,ZACHTE2,JM,JF,KGL) & +!$OMP& FIRSTPRIVATE(IBEG,IEND,IINC,OFFSET_VAR,KF_UV,KUV_OFFSET,KF_FS) #endif #ifdef ACCGPU -!$ACC PARALLEL LOOP COLLAPSE(3) DEFAULT(NONE) PRIVATE(IGLG,IOFF_LAT,IOFF_UV,ZACHTE2,JM,JF,KGL) & +!$ACC PARALLEL LOOP COLLAPSE(3) DEFAULT(NONE) & +!$ACC& PRIVATE(IGLG,IOFF_LAT,IOFF_UV,ZACHTE2,JM,JF,KGL) & !$ACC& FIRSTPRIVATE(IBEG,IEND,IINC,OFFSET_VAR,KF_UV,KUV_OFFSET,KF_FS) ASYNC(1) #endif DO KGL=IBEG,IEND,IINC @@ -131,7 +136,7 @@ SUBROUTINE FSC(ALLOCATOR,HFSC,PREEL_COMPLEX, KF_FS, KF_UV, KF_SCALARS, KUV_OFFSE DO JM=0,R_NSMAX !(note that R_NSMAX <= G_NMEN(IGLG) for all IGLG) IGLG = OFFSET_VAR+KGL-1 IF (JM <= G_NMEN(IGLG)) THEN - IOFF_LAT = KF_FS*D_NSTAGTF(KGL) + IOFF_LAT = 1_JPIB*KF_FS*D_NSTAGTF(KGL) IOFF_UV = IOFF_LAT+(KUV_OFFSET+JF-1)*(D_NSTAGTF(KGL+1)-D_NSTAGTF(KGL)) ZACHTE2 = REAL(F_RACTHE(IGLG),JPRBT) @@ -149,7 +154,9 @@ SUBROUTINE FSC(ALLOCATOR,HFSC,PREEL_COMPLEX, KF_FS, KF_UV, KF_SCALARS, KUV_OFFSE IF (KSCALARS_NSDER_OFFSET >= 0) THEN #ifdef OMPGPU - !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO DEFAULT(NONE) SHARED(KF_SCALARS,PNSDERS,ZACHTE2) + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(3) & + !$OMP& PRIVATE(IGLG,IOFF_LAT,IOFF_KSCALARS_NSDER,ZACHTE2,KGL,JF,JM) & + !$OMP& FIRSTPRIVATE(IBEG,IEND,IINC,OFFSET_VAR,KF_SCALARS,KSCALARS_NSDER_OFFSET,KF_FS) #endif #ifdef ACCGPU !$ACC PARALLEL LOOP COLLAPSE(3) DEFAULT(NONE) PRIVATE(IGLG,IOFF_LAT,IOFF_KSCALARS_NSDER,ZACHTE2,KGL,JF,JM) & @@ -160,7 +167,7 @@ SUBROUTINE FSC(ALLOCATOR,HFSC,PREEL_COMPLEX, KF_FS, KF_UV, KF_SCALARS, KUV_OFFSE DO JM=0,R_NSMAX !(note that R_NSMAX <= G_NMEN(IGLG) for all IGLG) IGLG = OFFSET_VAR+KGL-1 IF (JM <= G_NMEN(IGLG)) THEN - IOFF_LAT = KF_FS*D_NSTAGTF(KGL) + IOFF_LAT = 1_JPIB*KF_FS*D_NSTAGTF(KGL) IOFF_KSCALARS_NSDER = IOFF_LAT+(KSCALARS_NSDER_OFFSET+JF-1)*(D_NSTAGTF(KGL+1)-D_NSTAGTF(KGL)) ZACHTE2 = REAL(F_RACTHE(IGLG),JPRBT) @@ -182,23 +189,26 @@ SUBROUTINE FSC(ALLOCATOR,HFSC,PREEL_COMPLEX, KF_FS, KF_UV, KF_SCALARS, KUV_OFFSE !* 2.1 U AND V. +ILOEN_MAX = MAXVAL(G_NLOEN) IF (KUV_EWDER_OFFSET >= 0) THEN #ifdef OMPGPU - !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO DEFAULT(NONE) SHARED(KF_UV,PUVDERS,ZACHTE2,PUV) + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(3) & + !$OMP& PRIVATE(IGLG,IOFF_LAT,IOFF_UV,IOFF_UV_EWDER,RET_REAL,RET_COMPLEX,ZACHTE2,JM,JF,KGL) & + !$OMP& FIRSTPRIVATE(IBEG,IEND,IINC,OFFSET_VAR,KF_UV,KUV_EWDER_OFFSET,KUV_OFFSET,KF_FS) #endif #ifdef ACCGPU !$ACC PARALLEL LOOP COLLAPSE(3) DEFAULT(NONE) PRIVATE(IGLG,IOFF_LAT,IOFF_UV,IOFF_UV_EWDER,RET_REAL,RET_COMPLEX,ZACHTE2,JM,JF,KGL) & - !$ACC& FIRSTPRIVATE(IBEG,IEND,IINC,OFFSET_VAR,KF_UV,KUV_EWDER_OFFSET,KUV_OFFSET,KF_FS) ASYNC(1) + !$ACC& FIRSTPRIVATE(IBEG,IEND,IINC,OFFSET_VAR,KF_UV,KUV_EWDER_OFFSET,KUV_OFFSET,KF_FS,ILOEN_MAX) ASYNC(1) #endif DO KGL=IBEG,IEND,IINC DO JF=1,2*KF_UV - DO JM=0,G_NLOEN_MAX/2 + DO JM=0,ILOEN_MAX/2 IGLG = OFFSET_VAR+KGL-1 ! FFT transforms NLON real values to floor(NLON/2)+1 complex numbers. Hence we have ! to fill those floor(NLON/2)+1 values. ! Truncation happens starting at G_NMEN+1. Hence, we zero-fill those values. IF (JM <= G_NLOEN(IGLG)/2) THEN - IOFF_LAT = KF_FS*D_NSTAGTF(KGL) + IOFF_LAT = 1_JPIB*KF_FS*D_NSTAGTF(KGL) IOFF_UV = IOFF_LAT+(KUV_OFFSET+JF-1)*(D_NSTAGTF(KGL+1)-D_NSTAGTF(KGL)) IOFF_UV_EWDER = IOFF_LAT+(KUV_EWDER_OFFSET+JF-1)*(D_NSTAGTF(KGL+1)-D_NSTAGTF(KGL)) @@ -225,21 +235,23 @@ SUBROUTINE FSC(ALLOCATOR,HFSC,PREEL_COMPLEX, KF_FS, KF_UV, KF_SCALARS, KUV_OFFSE IF (KSCALARS_EWDER_OFFSET > 0) THEN #ifdef OMPGPU - !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO DEFAULT(NONE) SHARED(KF_SCALARS,PEWDERS,ZACHTE2,PSCALAR) + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(3) & + !$OMP& PRIVATE(IGLG,IOFF_LAT,IOFF_SCALARS_EWDER,IOFF_SCALARS,ZACHTE2,RET_REAL,RET_COMPLEX) & + !$OMP& FIRSTPRIVATE(IBEG,IEND,IINC,KF_SCALARS,OFFSET_VAR,KSCALARS_EWDER_OFFSET,KSCALARS_OFFSET,KF_FS) #endif #ifdef ACCGPU !$ACC PARALLEL LOOP COLLAPSE(3) DEFAULT(NONE) PRIVATE(IGLG,IOFF_LAT,IOFF_SCALARS_EWDER,IOFF_SCALARS,ZACHTE2,RET_REAL,RET_COMPLEX) & - !$ACC& FIRSTPRIVATE(IBEG,IEND,IINC,KF_SCALARS,OFFSET_VAR,KSCALARS_EWDER_OFFSET,KSCALARS_OFFSET,KF_FS) ASYNC(1) + !$ACC& FIRSTPRIVATE(IBEG,IEND,IINC,KF_SCALARS,OFFSET_VAR,KSCALARS_EWDER_OFFSET,KSCALARS_OFFSET,KF_FS,ILOEN_MAX) ASYNC(1) #endif DO KGL=IBEG,IEND,IINC DO JF=1,KF_SCALARS - DO JM=0,G_NLOEN_MAX/2 + DO JM=0,ILOEN_MAX/2 IGLG = OFFSET_VAR+KGL-1 ! FFT transforms NLON real values to floor(NLON/2)+1 complex numbers. Hence we have ! to fill those floor(NLON/2)+1 values. ! Truncation happens starting at G_NMEN+1. Hence, we zero-fill those values. IF (JM <= G_NLOEN(IGLG)/2) THEN - IOFF_LAT = KF_FS*D_NSTAGTF(KGL) + IOFF_LAT = 1_JPIB*KF_FS*D_NSTAGTF(KGL) IOFF_SCALARS_EWDER = IOFF_LAT+(KSCALARS_EWDER_OFFSET+JF-1)*(D_NSTAGTF(KGL+1)-D_NSTAGTF(KGL)) IOFF_SCALARS = IOFF_LAT+(KSCALARS_OFFSET+JF-1)*(D_NSTAGTF(KGL+1)-D_NSTAGTF(KGL)) @@ -272,6 +284,7 @@ SUBROUTINE FSC(ALLOCATOR,HFSC,PREEL_COMPLEX, KF_FS, KF_UV, KF_SCALARS, KUV_OFFSE !$OMP END TARGET DATA #endif ! ------------------------------------------------------------------ +END ASSOCIATE END SUBROUTINE FSC END MODULE FSC_MOD diff --git a/src/trans/gpu/internal/ftdir_mod.F90 b/src/trans/gpu/internal/ftdir_mod.F90 index b9ffe4477..7d373bbc3 100755 --- a/src/trans/gpu/internal/ftdir_mod.F90 +++ b/src/trans/gpu/internal/ftdir_mod.F90 @@ -22,10 +22,10 @@ MODULE FTDIR_MOD CONTAINS FUNCTION PREPARE_FTDIR(ALLOCATOR,KF_FS) RESULT(HFTDIR) - USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT + USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT, JPIB USE TPM_DISTR, ONLY: D USE BUFFERED_ALLOCATOR_MOD, ONLY: BUFFERED_ALLOCATOR, RESERVE - USE ISO_C_BINDING, ONLY: C_SIZE_T + USE ISO_C_BINDING, ONLY: C_SIZE_T, C_SIZEOF IMPLICIT NONE @@ -36,7 +36,7 @@ FUNCTION PREPARE_FTDIR(ALLOCATOR,KF_FS) RESULT(HFTDIR) REAL(KIND=JPRBT) :: DUMMY #ifndef IN_PLACE_FFT - HFTDIR%HREEL_COMPLEX = RESERVE(ALLOCATOR, INT(KF_FS*D%NLENGTF*SIZEOF(DUMMY), KIND=C_SIZE_T)) + HFTDIR%HREEL_COMPLEX = RESERVE(ALLOCATOR, 1_JPIB*KF_FS*D%NLENGTF*C_SIZEOF(DUMMY), "HFTDIR%HREEL_COMPLEX") #endif END FUNCTION PREPARE_FTDIR @@ -73,16 +73,15 @@ SUBROUTINE FTDIR(ALLOCATOR,HFTDIR,PREEL_REAL,PREEL_COMPLEX,KFIELD) ! G. Mozdzynski (Jun 2015): Support alternative FFTs to FFTW ! ------------------------------------------------------------------ - USE TPM_GEN, ONLY: LSYNC_TRANS - USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT - USE TPM_DISTR, ONLY: MYSETW, MYPROC, NPROC, D_NSTAGT0B, D_NSTAGTF,D_NPTRLS, & - & D_NPNTGTB0, D_NPROCM, D_NDGL_FS, D - USE TPM_GEOMETRY, ONLY: G_NMEN, G_NLOEN + USE TPM_GEN, ONLY: LSYNC_TRANS, NCUR_RESOL + USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT, JPIB + USE TPM_DISTR, ONLY: MYSETW, MYPROC, NPROC, D + USE TPM_GEOMETRY, ONLY: G USE BUFFERED_ALLOCATOR_MOD, ONLY: BUFFERED_ALLOCATOR, ASSIGN_PTR, GET_ALLOCATION USE TPM_HICFFT, ONLY: EXECUTE_DIR_FFT USE MPL_MODULE, ONLY: MPL_BARRIER,MPL_ALL_MS_COMM USE TPM_STATS, ONLY: GSTATS => GSTATS_NVTX - USE ISO_C_BINDING, ONLY: C_SIZE_T + USE ISO_C_BINDING, ONLY: C_SIZE_T, C_SIZEOF IMPLICIT NONE @@ -98,9 +97,13 @@ SUBROUTINE FTDIR(ALLOCATOR,HFTDIR,PREEL_REAL,PREEL_COMPLEX,KFIELD) PREEL_COMPLEX => PREEL_REAL #else CALL ASSIGN_PTR(PREEL_COMPLEX, GET_ALLOCATION(ALLOCATOR, HFTDIR%HREEL_COMPLEX),& - & 1_C_SIZE_T, INT(KFIELD*D%NLENGTF*SIZEOF(PREEL_COMPLEX(1)),KIND=C_SIZE_T)) + & 1_JPIB, 1_JPIB*KFIELD*D%NLENGTF*C_SIZEOF(PREEL_COMPLEX(1))) #endif + ASSOCIATE(D_NDGL_FS=>D%NDGL_FS, D_NSTAGT0B=>D%NSTAGT0B, D_NSTAGTF=>D%NSTAGTF, & + & D_NPTRLS=>D%NPTRLS, D_NPNTGTB0=>D%NPNTGTB0, D_NPROCM=>D%NPROCM, & + & G_NMEN=>G%NMEN, G_NLOEN=>G%NLOEN) + #ifdef ACCGPU !$ACC DATA PRESENT(PREEL_REAL, PREEL_COMPLEX, & !$ACC& D_NSTAGTF,D_NSTAGT0B,D_NPTRLS,D_NPROCM,D_NPNTGTB0,G_NMEN,G_NLOEN) @@ -115,7 +118,7 @@ SUBROUTINE FTDIR(ALLOCATOR,HFTDIR,PREEL_REAL,PREEL_COMPLEX,KFIELD) CALL GSTATS(430,1) ENDIF CALL GSTATS(413,0) - CALL EXECUTE_DIR_FFT(PREEL_REAL(:),PREEL_COMPLEX(:),KFIELD, & + CALL EXECUTE_DIR_FFT(PREEL_REAL(:),PREEL_COMPLEX(:),NCUR_RESOL,KFIELD, & & LOENS=G_NLOEN(D_NPTRLS(MYSETW):D_NPTRLS(MYSETW)+D_NDGL_FS-1), & & OFFSETS=D_NSTAGTF(1:D_NDGL_FS+1),ALLOC=ALLOCATOR%PTR) @@ -134,6 +137,7 @@ SUBROUTINE FTDIR(ALLOCATOR,HFTDIR,PREEL_REAL,PREEL_COMPLEX,KFIELD) #endif NULLIFY(PREEL_REAL) + END ASSOCIATE ! ------------------------------------------------------------------ END SUBROUTINE FTDIR diff --git a/src/trans/gpu/internal/ftinv_mod.F90 b/src/trans/gpu/internal/ftinv_mod.F90 index 49e43dd72..ddd07deb0 100755 --- a/src/trans/gpu/internal/ftinv_mod.F90 +++ b/src/trans/gpu/internal/ftinv_mod.F90 @@ -21,10 +21,10 @@ MODULE FTINV_MOD END TYPE CONTAINS FUNCTION PREPARE_FTINV(ALLOCATOR,KF_FS) RESULT(HFTINV) - USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT + USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT, JPIB USE TPM_DISTR, ONLY: D USE BUFFERED_ALLOCATOR_MOD, ONLY: BUFFERED_ALLOCATOR, RESERVE - USE ISO_C_BINDING, ONLY: C_SIZE_T + USE ISO_C_BINDING, ONLY: C_SIZE_T, C_SIZEOF IMPLICIT NONE @@ -35,7 +35,7 @@ FUNCTION PREPARE_FTINV(ALLOCATOR,KF_FS) RESULT(HFTINV) REAL(KIND=JPRBT) :: DUMMY #ifndef IN_PLACE_FFT - HFTINV%HREEL_REAL = RESERVE(ALLOCATOR, INT(D%NLENGTF*KF_FS*SIZEOF(DUMMY),KIND=C_SIZE_T)) + HFTINV%HREEL_REAL = RESERVE(ALLOCATOR, 1_JPIB*D%NLENGTF*KF_FS*C_SIZEOF(DUMMY),"HFTINV%HREEL_REAL") #endif END FUNCTION @@ -72,15 +72,15 @@ SUBROUTINE FTINV(ALLOCATOR,HFTINV,PREEL_COMPLEX,PREEL_REAL,KFIELD) ! G. Mozdzynski (Jun 2015): Support alternative FFTs to FFTW ! ------------------------------------------------------------------ - USE TPM_GEN, ONLY: LSYNC_TRANS - USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT - USE TPM_DISTR, ONLY: MYSETW, D_NPTRLS, D_NDGL_FS, D_NSTAGTF, D - USE TPM_GEOMETRY, ONLY: G_NLOEN - USE TPM_HICFFT, ONLY: EXECUTE_INV_FFT - USE MPL_MODULE, ONLY: MPL_BARRIER,MPL_ALL_MS_COMM - USE TPM_STATS, ONLY: GSTATS => GSTATS_NVTX + USE TPM_GEN, ONLY: LSYNC_TRANS, NCUR_RESOL + USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT, JPIB + USE TPM_DISTR, ONLY: MYSETW, D + USE TPM_GEOMETRY, ONLY: G + USE TPM_HICFFT, ONLY: EXECUTE_INV_FFT + USE MPL_MODULE, ONLY: MPL_BARRIER,MPL_ALL_MS_COMM + USE TPM_STATS, ONLY: GSTATS => GSTATS_NVTX USE BUFFERED_ALLOCATOR_MOD, ONLY: ASSIGN_PTR, GET_ALLOCATION - USE ISO_C_BINDING, ONLY: C_SIZE_T + USE ISO_C_BINDING, ONLY: C_SIZE_T, C_SIZEOF IMPLICIT NONE @@ -92,11 +92,13 @@ SUBROUTINE FTINV(ALLOCATOR,HFTINV,PREEL_COMPLEX,PREEL_REAL,KFIELD) INTEGER(KIND=JPIM) :: KGL + ASSOCIATE(D_NDGL_FS=>D%NDGL_FS, D_NPTRLS=>D%NPTRLS, D_NSTAGTF=>D%NSTAGTF, G_NLOEN=>G%NLOEN) + #ifdef IN_PLACE_FFT PREEL_REAL => PREEL_COMPLEX #else CALL ASSIGN_PTR(PREEL_REAL, GET_ALLOCATION(ALLOCATOR, HFTINV%HREEL_REAL),& - & 1_C_SIZE_T, INT(KFIELD*D%NLENGTF*SIZEOF(PREEL_REAL(1)),KIND=C_SIZE_T)) + & 1_JPIB, 1_JPIB*KFIELD*D%NLENGTF*C_SIZEOF(PREEL_REAL(1))) #endif #ifdef OMPGPU @@ -111,7 +113,7 @@ SUBROUTINE FTINV(ALLOCATOR,HFTINV,PREEL_COMPLEX,PREEL_REAL,KFIELD) CALL GSTATS(440,1) ENDIF CALL GSTATS(423,0) - CALL EXECUTE_INV_FFT(PREEL_COMPLEX(:),PREEL_REAL(:),KFIELD, & + CALL EXECUTE_INV_FFT(PREEL_COMPLEX(:),PREEL_REAL(:),NCUR_RESOL,KFIELD, & & LOENS=G_NLOEN(D_NPTRLS(MYSETW):D_NPTRLS(MYSETW)+D_NDGL_FS-1), & & OFFSETS=D_NSTAGTF(1:D_NDGL_FS),ALLOC=ALLOCATOR%PTR) @@ -130,6 +132,8 @@ SUBROUTINE FTINV(ALLOCATOR,HFTINV,PREEL_COMPLEX,PREEL_REAL,KFIELD) NULLIFY(PREEL_COMPLEX) + END ASSOCIATE + ! ------------------------------------------------------------------ END SUBROUTINE FTINV END MODULE FTINV_MOD diff --git a/src/trans/gpu/internal/gath_spec_control_mod.F90 b/src/trans/gpu/internal/gath_spec_control_mod.F90 index da94be477..b41944fa4 100755 --- a/src/trans/gpu/internal/gath_spec_control_mod.F90 +++ b/src/trans/gpu/internal/gath_spec_control_mod.F90 @@ -11,7 +11,7 @@ MODULE GATH_SPEC_CONTROL_MOD CONTAINS SUBROUTINE GATH_SPEC_CONTROL(PSPECG,KFGATHG,KTO,KVSET,PSPEC,LDIM1_IS_FLD,& - & KSMAX,KSPEC2,KSPEC2_G,KPOSSP,KDIM0G,LDZA0IP) + & KSMAX,KSPEC2,KSPEC2MX,KSPEC2G,KPOSSP,KDIM0G,KUMPP,KALLMS,KPTRMS,KN,LDZA0IP) !**** *GATH_SPEC_CONTROL* - Gather global spectral array from processors @@ -26,22 +26,44 @@ SUBROUTINE GATH_SPEC_CONTROL(PSPECG,KFGATHG,KTO,KVSET,PSPEC,LDIM1_IS_FLD,& ! Explicit arguments : ! -------------------- ! PSPECG(:,:) - Global spectral array -! KFGATHG - Global number of fields to be distributed +! KFGATHG - Global number of fields to be gathered ! KTO(:) - Processor responsible for distributing each field ! KVSET(:) - "B-Set" for each field ! PSPEC(:,:) - Local spectral array -! LDZA0IP - Set first coefficients (imaginary part) to zero +! LDIM1_IS_FLD - .TRUE. if first dimension contains the fields +! KSMAX - Spectral truncation limit +! KSPEC2 - Local number of spectral coefficients +! KSPEC2MX - Maximum local number of spectral coefficients +! KSPEC2G - Global number of spectral coefficients +! KPOSSP - Position of local waves for each task +! KDIM0G - Defines partitioning of global spectral fields among PEs +! KUMPP - Number of spectral waves on this a-set +! KALLMS - Wave numbers for all a-set concatenated together to give all wave numbers in a-set order +! KPTRMS - Pointer to the first wave number of a given a-set in kallms array. +! KN - Number of spectral coefficients for each m wave +! LDZA0IP - Set first coefficients (imaginary part) to zero (global model only) +! Externals. SET2PE - compute "A and B" set from PE +! ---------- MPL.. - message passing routines + +! Author. +! ------- +! Mats Hamrud *ECMWF* + +! Modifications. +! -------------- +! Original : 2000-04-01 +! R. El Khatib 02-Dec-2020 re-write for optimizations and merge with LAM counterpart ! ------------------------------------------------------------------ USE PARKIND1, ONLY: JPIM, JPRB USE MPL_MODULE, ONLY: MPL_RECV, MPL_SEND, MPL_BARRIER, MPL_WAIT, JP_BLOCKING_STANDARD, & & JP_NON_BLOCKING_STANDARD -USE TPM_DISTR, ONLY: MTAGDISTSP, NPRCIDS, NPRTRW, MYSETV, MYSETW, MYPROC, NPROC +USE TPM_DISTR, ONLY: MTAGDISTSP, NPRCIDS, NPRTRW, MYSETV, MYSETW, MYPROC, NPROC, NPRTRV USE ABORT_TRANS_MOD, ONLY: ABORT_TRANS USE SET2PE_MOD, ONLY: SET2PE -! +USE TPM_GEOMETRY, ONLY : G IMPLICIT NONE @@ -53,175 +75,234 @@ SUBROUTINE GATH_SPEC_CONTROL(PSPECG,KFGATHG,KTO,KVSET,PSPEC,LDIM1_IS_FLD,& LOGICAL ,OPTIONAL, INTENT(IN) :: LDIM1_IS_FLD INTEGER(KIND=JPIM) , INTENT(IN) :: KSMAX INTEGER(KIND=JPIM) , INTENT(IN) :: KSPEC2 -INTEGER(KIND=JPIM) , INTENT(IN) :: KSPEC2_G +INTEGER(KIND=JPIM) , INTENT(IN) :: KSPEC2MX +INTEGER(KIND=JPIM) , INTENT(IN) :: KSPEC2G INTEGER(KIND=JPIM) , INTENT(IN) :: KPOSSP(:) INTEGER(KIND=JPIM) , INTENT(IN) :: KDIM0G(0:) +INTEGER(KIND=JPIM) , INTENT(IN) :: KUMPP(NPRTRW) +INTEGER(KIND=JPIM) , INTENT(IN) :: KALLMS(KSMAX+1) +INTEGER(KIND=JPIM) , INTENT(IN) :: KPTRMS(NPRTRW) +INTEGER(KIND=JPIM) , INTENT(IN) :: KN(0:KSMAX) LOGICAL ,OPTIONAL, INTENT(IN) :: LDZA0IP -REAL(KIND=JPRB) :: ZFLD(KSPEC2,KFGATHG),ZDUM(KSPEC2) -REAL(KIND=JPRB),ALLOCATABLE :: ZRECV(:,:) -INTEGER(KIND=JPIM) :: JM,JN,II,IFLDR,IFLDS,JFLD,ITAG,IBSET,ILEN,JA,ISND -INTEGER(KIND=JPIM) :: IRCV,ISP,ILENR,ISTA,ISTP,ISENDREQ(KFGATHG),IPOS0,JNM -INTEGER(KIND=JPIM) :: IDIST(KSPEC2_G),IMYFIELDS +REAL(KIND=JPRB) :: ZBUFSEND(KSPEC2MX,COUNT(KVSET(1:KFGATHG) == MYSETV)) +REAL(KIND=JPRB) :: ZRECV(KSPEC2MX,COUNT(KTO(1:KFGATHG) == MYPROC)) +INTEGER(KIND=JPIM) :: IASM0G(0:KSMAX) +INTEGER(KIND=JPIM) :: JM,JN,II,IFLDR,IFLDS,JFLD,ITAG,IB,ILEN(NPRTRW),JA,JB,ISND,JMLOC +INTEGER(KIND=JPIM) :: IPE(NPRTRV,NPRTRW),ILENR,ISENDREQ(NPROC),IPOSSP,JNM,JROC +INTEGER(KIND=JPIM) :: IFLD,IFLDLOC(COUNT(KTO(1:KFGATHG) == MYPROC)),IOFFPROC +INTEGER(KIND=JPIM) :: ILOCFLD(COUNT(KVSET(1:KFGATHG) == MYSETV)) LOGICAL :: LLZA0IP ! ------------------------------------------------------------------ -LLZA0IP=.TRUE. +! Compute help array for distribution + +DO JA=1,NPRTRW + ILEN(JA) = KPOSSP(JA+1)-KPOSSP(JA) +ENDDO +DO JA=1,NPRTRW + DO JB=1,NPRTRV + CALL SET2PE(IPE(JB,JA),0,0,JA,JB) + ENDDO +ENDDO +IASM0G(0)=1 +DO JM=1,KSMAX + IASM0G(JM)=IASM0G(JM-1)+KN(JM-1) +ENDDO + +LLZA0IP=.NOT.G%LAM ! or it should have been coded in the original code, please :-( IF (PRESENT (LDZA0IP)) LLZA0IP=LDZA0IP !GATHER SPECTRAL ARRAY -IF( NPROC == 1 ) THEN - CALL GSTATS(1644,0) - IF(LDIM1_IS_FLD) THEN -!$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(JM,JFLD) - DO JM=1,KSPEC2_G +!Send +ISND=0 +IOFFPROC=0 +IF (KSPEC2 > 0) THEN + CALL GSTATS(810,0) + DO JROC=1,NPROC + IF (JROC /= MYPROC) THEN + IFLD=0 ! counter of fields in PSPEC + IFLDS=0 ! counter of fields in ZBUFSEND DO JFLD=1,KFGATHG - PSPECG(JFLD,JM) =PSPEC(JFLD,JM) + IF (KVSET(JFLD) == MYSETV) THEN + IFLD=IFLD+1 + IF (JROC==KTO(JFLD)) THEN + IFLDS=IFLDS+1 + IF (LDIM1_IS_FLD) THEN + ZBUFSEND(1:KSPEC2,IOFFPROC+IFLDS)=PSPEC(IFLD,1:KSPEC2) + ELSE + ZBUFSEND(1:KSPEC2,IOFFPROC+IFLDS)=PSPEC(1:KSPEC2,IFLD) + ENDIF + ENDIF + ENDIF + ENDDO + IF (IFLDS > 0) THEN + ITAG=MTAGDISTSP+MYPROC + ISND=ISND+1 + CALL MPL_SEND(ZBUFSEND(:,IOFFPROC+1:IOFFPROC+IFLDS),KDEST=NPRCIDS(JROC),KTAG=ITAG,& + & KMP_TYPE=JP_NON_BLOCKING_STANDARD,KREQUEST=ISENDREQ(ISND),& + & CDSTRING='GATH_SPEC_CONTROL') + ENDIF + IOFFPROC=IOFFPROC+IFLDS + ENDIF + ENDDO + CALL GSTATS(810,1) + +! Myself : + IFLD=0 + IFLDR=0 + DO JFLD=1,KFGATHG + IF (KTO(JFLD) == MYPROC) THEN + IFLD=IFLD+1 + IF (KVSET(JFLD)==MYSETV) THEN + IFLDR = IFLDR+1 + IFLDLOC(IFLDR)=IFLD + ENDIF + ENDIF + ENDDO + IFLD=0 + IFLDR=0 + DO JFLD=1,KFGATHG + IF (KVSET(JFLD)==MYSETV) THEN + IFLD=IFLD+1 + IF (KTO(JFLD) == MYPROC) THEN + IFLDR = IFLDR+1 + ILOCFLD(IFLDR)=IFLD + ENDIF + ENDIF + ENDDO + IF (IFLDR > 0) THEN + IF (LDIM1_IS_FLD) THEN + CALL GSTATS(1644,0) +!$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(JFLD,JMLOC,JM,IPOSSP,II,JN) + DO JFLD=1,IFLDR + DO JMLOC=1,KUMPP(MYSETW) + JM=KALLMS(KPTRMS(MYSETW)+JMLOC-1) + IPOSSP=KDIM0G(JM)-KPOSSP(MYSETW)+1 + PSPECG(IFLDLOC(JFLD),IASM0G(JM):IASM0G(JM)+KN(JM)-1)=PSPEC(ILOCFLD(JFLD),IPOSSP:IPOSSP+KN(JM)-1) + ENDDO + IF (LLZA0IP) THEN + II = 0 + DO JN=0,KSMAX + II = II+2 + PSPECG(IFLDLOC(JFLD),II) = 0.0_JPRB + ENDDO + ENDIF ENDDO - ENDDO !$OMP END PARALLEL DO - ELSE -!$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(JM,JFLD) - DO JFLD=1,KFGATHG - DO JM=1,KSPEC2_G - PSPECG(JM,JFLD) =PSPEC(JM,JFLD) + CALL GSTATS(1644,1) + ELSE + CALL GSTATS(1644,0) +!$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(JFLD,JMLOC,JM,IPOSSP,II,JN) + DO JFLD=1,IFLDR + DO JMLOC=1,KUMPP(MYSETW) + JM=KALLMS(KPTRMS(MYSETW)+JMLOC-1) + IPOSSP=KDIM0G(JM)-KPOSSP(MYSETW)+1 + PSPECG(IASM0G(JM):IASM0G(JM)+KN(JM)-1,IFLDLOC(JFLD))=PSPEC(IPOSSP:IPOSSP+KN(JM)-1,ILOCFLD(JFLD)) + ENDDO + IF (LLZA0IP) THEN + II = 0 + DO JN=0,KSMAX + II = II+2 + PSPECG(II,IFLDLOC(JFLD)) = 0.0_JPRB + ENDDO + ENDIF ENDDO - ENDDO !$OMP END PARALLEL DO - ENDIF - CALL GSTATS(1644,1) -ELSE - IMYFIELDS = 0 - DO JFLD=1,KFGATHG - IF(KTO(JFLD) == MYPROC) THEN - IMYFIELDS = IMYFIELDS+1 + CALL GSTATS(1644,1) ENDIF - ENDDO - IF(IMYFIELDS>0) THEN - ALLOCATE(ZRECV(KSPEC2_G,IMYFIELDS)) - II = 0 - CALL GSTATS(1804,0) - DO JM=0,KSMAX - DO JN=JM,KSMAX - IDIST(II+1) = KDIM0G(JM)+(JN-JM)*2 - IDIST(II+2) = KDIM0G(JM)+(JN-JM)*2+1 - II = II+2 - ENDDO - ENDDO - CALL GSTATS(1804,1) ENDIF - CALL GSTATS_BARRIER(788) - - !Send - CALL GSTATS(810,0) - IFLDS = 0 - IF(KSPEC2 > 0 )THEN - DO JFLD=1,KFGATHG - - IBSET = KVSET(JFLD) - IF( IBSET == MYSETV )THEN - - IFLDS = IFLDS+1 - ISND = KTO(JFLD) - ITAG = MTAGDISTSP+JFLD+17 - IF(LDIM1_IS_FLD) THEN - ZFLD(1:KSPEC2,IFLDS)=PSPEC(IFLDS,1:KSPEC2) - CALL MPL_SEND(ZFLD(1:KSPEC2,IFLDS),KDEST=NPRCIDS(ISND),KTAG=ITAG,& - &KMP_TYPE=JP_NON_BLOCKING_STANDARD,KREQUEST=ISENDREQ(JFLD),& - &CDSTRING='GATH_SPEC_CONTROL') - ELSE - CALL MPL_SEND(PSPEC(1:KSPEC2,IFLDS),KDEST=NPRCIDS(ISND),KTAG=ITAG,& - &KMP_TYPE=JP_NON_BLOCKING_STANDARD,KREQUEST=ISENDREQ(JFLD),& - &CDSTRING='GATH_SPEC_CONTROL') - ENDIF - ENDIF - ENDDO - ENDIF +ENDIF - ! Recieve - IFLDR = 0 - DO JFLD=1,KFGATHG - IF(KTO(JFLD) == MYPROC) THEN - IBSET = KVSET(JFLD) - IFLDR = IFLDR+1 - DO JA=1,NPRTRW - ILEN = KPOSSP(JA+1)-KPOSSP(JA) - IF( ILEN > 0 )THEN - CALL SET2PE(IRCV,0,0,JA,IBSET) - ITAG = MTAGDISTSP+JFLD+17 - ISTA = KPOSSP(JA) - ISTP = ISTA+ILEN-1 - CALL MPL_RECV(ZRECV(ISTA:ISTP,IFLDR),KSOURCE=NPRCIDS(IRCV),KTAG=ITAG,& - &KMP_TYPE=JP_BLOCKING_STANDARD,KOUNT=ILENR, & - &CDSTRING='GATH_SPEC_CONTROL') - IF( ILENR /= ILEN )THEN - WRITE(0,'("GATH_SPEC_CONTROL: JFLD=",I4," JA=",I4," ILEN=",I10," ILENR=",I10)')& - &JFLD,JA,ILEN,ILENR +! Receive +DO JA=1,NPRTRW + IF (ILEN(JA) > 0) THEN + DO JB=1,NPRTRV + IF (IPE(JB,JA) /= MYPROC) THEN + ! Locate received fields in source array : + IFLD=0 + IFLDR=0 + DO JFLD=1,KFGATHG + IF (KTO(JFLD) == MYPROC) THEN + IFLD=IFLD+1 + IF (KVSET(JFLD)==JB) THEN + IFLDR = IFLDR+1 + IFLDLOC(IFLDR)=IFLD + ENDIF + ENDIF + ENDDO + IF (IFLDR > 0) THEN + ITAG=MTAGDISTSP+IPE(JB,JA) + CALL GSTATS(810,0) + CALL MPL_RECV(ZRECV(:,1:IFLDR),KSOURCE=NPRCIDS(IPE(JB,JA)),KTAG=ITAG,& + & KMP_TYPE=JP_BLOCKING_STANDARD,KOUNT=ILENR, & + & CDSTRING='GATH_SPEC_CONTROL') + IF (ILENR /= KSPEC2MX*IFLDR) THEN CALL ABORT_TRANS('GATH_SPEC_CONTROL:INVALID RECEIVE MESSAGE LENGTH') ENDIF + CALL GSTATS(810,1) + CALL GSTATS(1644,0) + IF (LDIM1_IS_FLD) THEN +!$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(JFLD,JMLOC,JM,IPOSSP,II,JN) + DO JFLD=1,IFLDR + DO JMLOC=1,KUMPP(JA) + JM=KALLMS(KPTRMS(JA)+JMLOC-1) + IPOSSP=KDIM0G(JM)-KPOSSP(JA)+1 + PSPECG(IFLDLOC(JFLD),IASM0G(JM):IASM0G(JM)+KN(JM)-1)=ZRECV(IPOSSP:IPOSSP+KN(JM)-1,JFLD) + ENDDO + IF (LLZA0IP) THEN + II = 0 + DO JN=0,KSMAX + II = II+2 + PSPECG(IFLDLOC(JFLD),II) = 0.0_JPRB + ENDDO + ENDIF + ENDDO +!$OMP END PARALLEL DO + ELSE +!$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(JFLD,JMLOC,JM,IPOSSP,II,JN) + DO JFLD=1,IFLDR + DO JMLOC=1,KUMPP(JA) + JM=KALLMS(KPTRMS(JA)+JMLOC-1) + IPOSSP=KDIM0G(JM)-KPOSSP(JA)+1 + PSPECG(IASM0G(JM):IASM0G(JM)+KN(JM)-1,IFLDLOC(JFLD))=ZRECV(IPOSSP:IPOSSP+KN(JM)-1,JFLD) + ENDDO + IF (LLZA0IP) THEN + II = 0 + DO JN=0,KSMAX + II = II+2 + PSPECG(II,IFLDLOC(JFLD)) = 0.0_JPRB + ENDDO + ENDIF + ENDDO +!$OMP END PARALLEL DO + ENDIF + CALL GSTATS(1644,1) ENDIF - ENDDO - ENDIF - ENDDO - - ! Check for completion of sends - IF(KSPEC2 > 0 )THEN - DO JFLD=1,KFGATHG - IBSET = KVSET(JFLD) - IF( IBSET == MYSETV )THEN - CALL MPL_WAIT(KREQUEST=ISENDREQ(JFLD), & - & CDSTRING='GATH_GRID_CTL: WAIT') ENDIF ENDDO ENDIF - CALL GSTATS(810,1) - CALL GSTATS_BARRIER2(788) - - CALL GSTATS(1644,0) -!$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(JFLD,JNM,II,JN,ISP) - DO JFLD=1,IMYFIELDS - IF(LDIM1_IS_FLD) THEN - DO JNM=1,KSPEC2_G - PSPECG(JFLD,JNM) = ZRECV(IDIST(JNM),JFLD) - ENDDO - IF (LLZA0IP) THEN - II = 0 - DO JN=0,KSMAX - ISP = KDIM0G(0)+JN*2+1 - II = II+2 - PSPECG(JFLD,II) = 0.0_JPRB - ENDDO - ENDIF - ELSE - DO JNM=1,KSPEC2_G - PSPECG(JNM,JFLD) = ZRECV(IDIST(JNM),JFLD) - ENDDO - IF (LLZA0IP) THEN - II = 0 - DO JN=0,KSMAX - ISP = KDIM0G(0)+JN*2+1 - II = II+2 - PSPECG(II,JFLD) = 0.0_JPRB - ENDDO - ENDIF - ENDIF - ENDDO -!$OMP END PARALLEL DO - CALL GSTATS(1644,1) - IF(ALLOCATED(ZRECV)) DEALLOCATE(ZRECV) +ENDDO +CALL GSTATS_BARRIER2(788) - !Synchronize processors - CALL GSTATS(785,0) - CALL MPL_BARRIER(CDSTRING='GATH_SPEC_CONTROL:') - CALL GSTATS(785,1) +! Check for completion of sends +CALL GSTATS(810,0) +IF (ISND > 0) THEN + CALL MPL_WAIT(ISENDREQ(1:ISND),CDSTRING='GATH_GRID_CTL: WAIT') ENDIF +CALL GSTATS(810,1) + +!Synchronize processors. Useful ?? +CALL GSTATS(785,0) +!rekCALL MPL_BARRIER(CDSTRING='GATH_SPEC_CONTROL:') +CALL GSTATS(785,1) + +CALL GSTATS_BARRIER(788) ! ------------------------------------------------------------------ END SUBROUTINE GATH_SPEC_CONTROL END MODULE GATH_SPEC_CONTROL_MOD - - diff --git a/src/trans/gpu/internal/ledir_mod.F90 b/src/trans/gpu/internal/ledir_mod.F90 index e12f89afb..e7a1fb18a 100755 --- a/src/trans/gpu/internal/ledir_mod.F90 +++ b/src/trans/gpu/internal/ledir_mod.F90 @@ -11,7 +11,7 @@ ! MODULE LEDIR_MOD - USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT, JPRD + USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT, JPRD, JPIB USE BUFFERED_ALLOCATOR_MOD, ONLY: BUFFERED_ALLOCATOR IMPLICIT NONE @@ -23,17 +23,21 @@ MODULE LEDIR_MOD SUBROUTINE LEDIR_STRIDES(KF_FS,IOUT_STRIDES0,IOUT_SIZE,IIN_STRIDES0,IIN_SIZE,& IOUT0_STRIDES0,IOUT0_SIZE,IIN0_STRIDES0,IIN0_SIZE) USE TPM_DIM, ONLY: R - USE TPM_DISTR, ONLY: D, D_OFFSETS_GEMM1, D_OFFSETS_GEMM2 + USE TPM_DISTR, ONLY: D IMPLICIT NONE INTEGER(KIND=JPIM), INTENT(IN) :: KF_FS - INTEGER(KIND=JPIM), OPTIONAL :: IOUT_STRIDES0, IOUT_SIZE - INTEGER(KIND=JPIM), OPTIONAL :: IIN_STRIDES0, IIN_SIZE + INTEGER(KIND=JPIM), OPTIONAL :: IOUT_STRIDES0 + INTEGER(KIND=JPIB), OPTIONAL :: IOUT_SIZE + INTEGER(KIND=JPIM), OPTIONAL :: IIN_STRIDES0 + INTEGER(KIND=JPIB), OPTIONAL :: IIN_SIZE INTEGER(KIND=JPIM), OPTIONAL :: IOUT0_STRIDES0, IOUT0_SIZE INTEGER(KIND=JPIM), OPTIONAL :: IIN0_STRIDES0, IIN0_SIZE + ASSOCIATE(D_OFFSETS_GEMM1=>D%OFFSETS_GEMM1, D_OFFSETS_GEMM2=>D%OFFSETS_GEMM2) + IF (PRESENT(IOUT_STRIDES0)) & IOUT_STRIDES0 = ALIGN(2*KF_FS,A) IF (PRESENT(IOUT_SIZE)) & @@ -50,6 +54,8 @@ SUBROUTINE LEDIR_STRIDES(KF_FS,IOUT_STRIDES0,IOUT_SIZE,IIN_STRIDES0,IIN_SIZE,& IIN0_STRIDES0 = ALIGN(KF_FS,A) IF (PRESENT(IIN0_SIZE)) & IIN0_SIZE = IIN0_STRIDES0 * ALIGN(R%NDGNH,A) + + END ASSOCIATE END SUBROUTINE SUBROUTINE LEDIR(ALLOCATOR,ZINPS,ZINPA,ZINPS0,ZINPA0,ZOUT,ZOUT0,POA1,KF_FS) @@ -94,12 +100,12 @@ SUBROUTINE LEDIR(ALLOCATOR,ZINPS,ZINPA,ZINPS0,ZINPA0,ZOUT,ZOUT0,POA1,KF_FS) ! F. Vana 05-Mar-2015 Support for single precision ! ------------------------------------------------------------------ - USE TPM_GEN, ONLY: LSYNC_TRANS, NOUT + USE TPM_GEN, ONLY: LSYNC_TRANS, NOUT, NCUR_RESOL USE YOMHOOK, ONLY: LHOOK, DR_HOOK, JPHOOK - USE TPM_DIM, ONLY: R_NDGNH,R_NSMAX,R_NTMAX,R_NDGL - USE TPM_GEOMETRY, ONLY: G_NDGLU - USE TPM_FIELDS_FLAT, ONLY: ZAA,ZAS,ZAA0,ZAS0,KMLOC0 - USE TPM_DISTR, ONLY: D_NUMP, D_MYMS, D_OFFSETS_GEMM1, D_OFFSETS_GEMM2 + USE TPM_DIM, ONLY: R + USE TPM_GEOMETRY, ONLY: G + USE TPM_FIELDS_GPU, ONLY: FG + USE TPM_DISTR, ONLY: D USE HICBLAS_MOD, ONLY: HIP_DGEMM_BATCHED_OVERLOAD, & & HIP_DGEMM_GROUPED_OVERLOAD, HIP_SGEMM_GROUPED_OVERLOAD USE MPL_MODULE, ONLY: MPL_BARRIER,MPL_ALL_MS_COMM @@ -126,20 +132,27 @@ SUBROUTINE LEDIR(ALLOCATOR,ZINPS,ZINPA,ZINPS0,ZINPA0,ZOUT,ZOUT0,POA1,KF_FS) ! LOCAL VARIABLES INTEGER(KIND=JPIM) :: KM INTEGER(KIND=JPIM) :: KMLOC - INTEGER(KIND=JPIM) :: IA, IS, ISL, J - INTEGER(KIND=JPIM) :: KS(D_NUMP), NS(D_NUMP), AOFFSETS(D_NUMP), BOFFSETS(D_NUMP), COFFSETS(D_NUMP) + INTEGER(KIND=JPIM) :: IA, IS, ISL, J, IMLOC0(1) + INTEGER(KIND=JPIM) :: KS(D%NUMP), NS(D%NUMP) + INTEGER(KIND=JPIB) :: AOFFSETS(D%NUMP), BOFFSETS(D%NUMP), COFFSETS(D%NUMP) REAL(KIND=JPHOOK) :: ZHOOK_HANDLE REAL(KIND=JPRBT) :: PAIA, PAIS, V1, V2 INTEGER(KIND=JPIM) :: IGLS, JF, JGL INTEGER(KIND=JPIM) :: OFFSET1, OFFSET2 - INTEGER(KIND=JPIM) :: IOUT_STRIDES0, IOUT_STRIDES1 - INTEGER(KIND=JPIM) :: IIN_STRIDES0, IIN_STRIDES1 + INTEGER(KIND=JPIM) :: IOUT_STRIDES0 + INTEGER(KIND=JPIB) :: IOUT_STRIDES1 + INTEGER(KIND=JPIM) :: IIN_STRIDES0 + INTEGER(KIND=JPIB) :: IIN_STRIDES1 INTEGER(KIND=JPIM) :: IOUT0_STRIDES0, IOUT0_STRIDES1 INTEGER(KIND=JPIM) :: IIN0_STRIDES0, IIN0_STRIDES1 INTEGER(KIND=8) :: ALLOC_SZ, ALLOC_POS + ASSOCIATE(D_NUMP=>D%NUMP, R_NSMAX=>R%NSMAX, R_NTMAX=>R%NTMAX, G_NDGLU=>G%NDGLU, & + & D_MYMS=>D%MYMS, D_OFFSETS_GEMM1=>D%OFFSETS_GEMM1, & + & D_OFFSETS_GEMM2=>D%OFFSETS_GEMM2, & + & ZAA=>FG%ZAA, ZAS=>FG%ZAS, ZAA0=>FG%ZAA0, ZAS0=>FG%ZAS0) IF (LHOOK) CALL DR_HOOK('LE_DGEMM',0,ZHOOK_HANDLE) CALL LEDIR_STRIDES(KF_FS,IOUT_STRIDES0,IOUT_STRIDES1,IIN_STRIDES0,IIN_STRIDES1,& @@ -150,7 +163,7 @@ SUBROUTINE LEDIR(ALLOCATOR,ZINPS,ZINPA,ZINPS0,ZINPA0,ZOUT,ZOUT0,POA1,KF_FS) #ifdef ACCGPU !$ACC DATA & !$ACC& PRESENT(ZINPS,ZINPA,ZOUT,ZINPS0,ZINPA0,ZOUT0) & - !$ACC& PRESENT(D_MYMS,D_NUMP,R_NTMAX,R_NSMAX,G_NDGLU) & + !$ACC& PRESENT(D,D_MYMS,D_NUMP,R,R_NTMAX,R_NSMAX) & !$ACC& PRESENT(ZAA,ZAS,POA1,D_OFFSETS_GEMM1,D_OFFSETS_GEMM2) #endif @@ -164,7 +177,9 @@ SUBROUTINE LEDIR(ALLOCATOR,ZINPS,ZINPA,ZINPS0,ZINPA0,ZOUT,ZOUT0,POA1,KF_FS) ENDIF CALL GSTATS(414,0) - IF(KMLOC0 > 0) THEN + ! anti-symmetric + IMLOC0 = FINDLOC(D_MYMS,0) + IF(IMLOC0(1) > 0) THEN ! compute m=0 in double precision: #ifdef OMPGPU !$OMP TARGET DATA USE_DEVICE_PTR(ZAA0,ZINPA0,ZOUT0) @@ -196,12 +211,12 @@ SUBROUTINE LEDIR(ALLOCATOR,ZINPS,ZINPA,ZINPS0,ZINPA0,ZOUT,ZOUT0,POA1,KF_FS) NS(KMLOC) = (R_NSMAX-KM+2)/2 KS(KMLOC) = G_NDGLU(KM) AOFFSETS(KMLOC) = IIN_STRIDES0*D_OFFSETS_GEMM1(KMLOC) - BOFFSETS(KMLOC) = SIZE(ZAA,1)*SIZE(ZAA,2)*(KMLOC-1) + BOFFSETS(KMLOC) = D%OFFSETS_GEMM_MATRIX(KMLOC) COFFSETS(KMLOC) = IOUT_STRIDES0*D_OFFSETS_GEMM2(KMLOC) ENDDO - IF(KMLOC0 > 0) THEN - NS(KMLOC0) = 0 - KS(KMLOC0) = 0 + IF(IMLOC0(1) > 0) THEN + NS(IMLOC0(1)) = 0 + KS(IMLOC0(1)) = 0 ENDIF #ifdef OMPGPU !$OMP TARGET DATA USE_DEVICE_PTR(ZAA,ZINPA,ZOUT) @@ -210,12 +225,12 @@ SUBROUTINE LEDIR(ALLOCATOR,ZINPS,ZINPA,ZINPS0,ZINPA0,ZOUT,ZOUT0,POA1,KF_FS) !$ACC HOST_DATA USE_DEVICE(ZAA,ZINPA,ZOUT) #endif CALL HIP_GEMM( & - & 21, & ! unique identifier + & NCUR_RESOL, 21, & ! unique identifier & 'N', 'N', & & 2*KF_FS, NS(:), KS(:), & & 1.0_JPRBT, & & ZINPA, IIN_STRIDES0, AOFFSETS, & - & ZAA, SIZE(ZAA,1), BOFFSETS, & + & ZAA, D%LEGENDRE_MATRIX_STRIDES, BOFFSETS, & & 0.0_JPRBT, & & ZOUT, IOUT_STRIDES0, COFFSETS, & & D_NUMP, STREAM=1_C_INT, ALLOC=ALLOCATOR%PTR) @@ -238,7 +253,12 @@ SUBROUTINE LEDIR(ALLOCATOR,ZINPS,ZINPA,ZINPS0,ZINPA0,ZOUT,ZOUT0,POA1,KF_FS) #ifdef OMPGPU #endif #ifdef ACCGPU - !$ACC PARALLEL LOOP COLLAPSE(2) PRIVATE(KM,IA,J) FIRSTPRIVATE(KF_FS,IOUT_STRIDES0,IOUT0_STRIDES0) DEFAULT(NONE) ASYNC(1) + !$ACC PARALLEL LOOP COLLAPSE(2) PRIVATE(KM,IA,J) FIRSTPRIVATE(KF_FS,IOUT_STRIDES0,IOUT0_STRIDES0) DEFAULT(NONE) & +#ifndef _CRAYFTN + !$ACC& ASYNC(1) +#else + !$ACC& +#endif #endif DO KMLOC=1,D_NUMP DO JF=1,2*KF_FS @@ -278,7 +298,7 @@ SUBROUTINE LEDIR(ALLOCATOR,ZINPS,ZINPA,ZINPS0,ZINPA0,ZOUT,ZOUT0,POA1,KF_FS) ENDIF CALL GSTATS(414,0) - IF(KMLOC0 > 0) THEN + IF(IMLOC0(1) > 0) THEN #ifdef OMPGPU !$OMP TARGET DATA USE_DEVICE_PTR(ZAS0,ZINPS0,ZOUT0) #endif @@ -311,12 +331,12 @@ SUBROUTINE LEDIR(ALLOCATOR,ZINPS,ZINPA,ZINPS0,ZINPA0,ZOUT,ZOUT0,POA1,KF_FS) NS(KMLOC) = (R_NSMAX-KM+3)/2 KS(KMLOC) = G_NDGLU(KM) AOFFSETS(KMLOC) = IIN_STRIDES0*D_OFFSETS_GEMM1(KMLOC) - BOFFSETS(KMLOC) = SIZE(ZAS,1)*SIZE(ZAS,2)*(KMLOC-1) + BOFFSETS(KMLOC) = D%OFFSETS_GEMM_MATRIX(KMLOC) COFFSETS(KMLOC) = IOUT_STRIDES0*D_OFFSETS_GEMM2(KMLOC) ENDDO - IF(KMLOC0 > 0) THEN - NS(KMLOC0) = 0 - KS(KMLOC0) = 0 + IF(IMLOC0(1) > 0) THEN + NS(IMLOC0(1)) = 0 + KS(IMLOC0(1)) = 0 ENDIF #ifdef OMPGPU !$OMP TARGET DATA USE_DEVICE_PTR(ZAS,ZINPS,ZOUT) @@ -325,12 +345,12 @@ SUBROUTINE LEDIR(ALLOCATOR,ZINPS,ZINPA,ZINPS0,ZINPA0,ZOUT,ZOUT0,POA1,KF_FS) !$ACC HOST_DATA USE_DEVICE(ZAS,ZINPS,ZOUT) #endif CALL HIP_GEMM( & - & 22, & ! unique identifier + & NCUR_RESOL, 22, & ! unique identifier & 'N', 'N', & & 2*KF_FS, NS(:), KS(:), & & 1.0_JPRBT, & & ZINPS, IIN_STRIDES0, AOFFSETS, & - & ZAS, SIZE(ZAS,1), BOFFSETS, & + & ZAS, D%LEGENDRE_MATRIX_STRIDES, BOFFSETS, & & 0.0_JPRBT, & & ZOUT, IOUT_STRIDES0, COFFSETS, & & D_NUMP, STREAM=1_C_INT, ALLOC=ALLOCATOR%PTR) @@ -353,7 +373,13 @@ SUBROUTINE LEDIR(ALLOCATOR,ZINPS,ZINPA,ZINPS0,ZINPA0,ZOUT,ZOUT0,POA1,KF_FS) #ifdef OMPGPU #endif #ifdef ACCGPU - !$ACC PARALLEL LOOP COLLAPSE(2) PRIVATE(KM,IS) FIRSTPRIVATE(KF_FS,IOUT_STRIDES0,IOUT0_STRIDES0) DEFAULT(NONE) ASYNC(1) + !$ACC PARALLEL LOOP COLLAPSE(2) PRIVATE(KM,IS) FIRSTPRIVATE(KF_FS,IOUT_STRIDES0,IOUT0_STRIDES0) & + !$ACC& DEFAULT(NONE) & +#ifndef _CRAYFTN + !$ACC& ASYNC(1) +#else + !$ACC& +#endif #endif DO KMLOC=1,D_NUMP DO JF=1,2*KF_FS @@ -390,5 +416,6 @@ SUBROUTINE LEDIR(ALLOCATOR,ZINPS,ZINPA,ZINPS0,ZINPA0,ZOUT,ZOUT0,POA1,KF_FS) IF (LHOOK) CALL DR_HOOK('LE_DGEMM',1,ZHOOK_HANDLE) ! ------------------------------------------------------------------ + END ASSOCIATE END SUBROUTINE LEDIR END MODULE LEDIR_MOD diff --git a/src/trans/gpu/internal/leinv_mod.F90 b/src/trans/gpu/internal/leinv_mod.F90 index 70a729ac0..8bfc2ac0e 100755 --- a/src/trans/gpu/internal/leinv_mod.F90 +++ b/src/trans/gpu/internal/leinv_mod.F90 @@ -11,7 +11,7 @@ ! MODULE LEINV_MOD - USE PARKIND_ECTRANS, ONLY: JPIM, JPRB, JPRBT, JPRD + USE PARKIND_ECTRANS, ONLY: JPIM, JPRB, JPRBT, JPRD, JPIB USE BUFFERED_ALLOCATOR_MOD, ONLY: BUFFERED_ALLOCATOR IMPLICIT NONE @@ -24,17 +24,21 @@ MODULE LEINV_MOD SUBROUTINE LEINV_STRIDES(KF_LEG,IOUT_STRIDES0,IOUT_SIZE,IIN_STRIDES0,IIN_SIZE,& IOUT0_STRIDES0,IOUT0_SIZE,IIN0_STRIDES0,IIN0_SIZE) USE TPM_DIM, ONLY: R - USE TPM_DISTR, ONLY: D, D_OFFSETS_GEMM1, D_OFFSETS_GEMM2 + USE TPM_DISTR, ONLY: D IMPLICIT NONE INTEGER(KIND=JPIM), INTENT(IN) :: KF_LEG - INTEGER(KIND=JPIM), OPTIONAL :: IOUT_STRIDES0, IOUT_SIZE - INTEGER(KIND=JPIM), OPTIONAL :: IIN_STRIDES0, IIN_SIZE + INTEGER(KIND=JPIM), OPTIONAL :: IOUT_STRIDES0 + INTEGER(KIND=JPIB), OPTIONAL :: IOUT_SIZE + INTEGER(KIND=JPIM), OPTIONAL :: IIN_STRIDES0 + INTEGER(KIND=JPIB), OPTIONAL :: IIN_SIZE INTEGER(KIND=JPIM), OPTIONAL :: IOUT0_STRIDES0, IOUT0_SIZE INTEGER(KIND=JPIM), OPTIONAL :: IIN0_STRIDES0, IIN0_SIZE + ASSOCIATE(D_OFFSETS_GEMM1=>D%OFFSETS_GEMM1, D_OFFSETS_GEMM2=>D%OFFSETS_GEMM2) + IF (PRESENT(IOUT_STRIDES0)) & IOUT0_STRIDES0 = ALIGN(KF_LEG,A) @@ -52,6 +56,8 @@ SUBROUTINE LEINV_STRIDES(KF_LEG,IOUT_STRIDES0,IOUT_SIZE,IIN_STRIDES0,IIN_SIZE,& IIN0_STRIDES0 = ALIGN(KF_LEG,A) IF (PRESENT(IIN0_SIZE)) & IIN0_SIZE = IIN0_STRIDES0 * ALIGN(MAX((R%NTMAX+2)/2,(R%NTMAX+3)/2),A) + + END ASSOCIATE END SUBROUTINE LEINV_STRIDES SUBROUTINE LEINV(ALLOCATOR,PIA,ZINP,ZINP0,ZOUTS,ZOUTA,ZOUTS0,ZOUTA0,KF_LEG) @@ -93,12 +99,12 @@ SUBROUTINE LEINV(ALLOCATOR,PIA,ZINP,ZINP0,ZOUTS,ZOUTA,ZOUTS0,ZOUTA0,KF_LEG) ! F. Vana 05-Mar-2015 Support for single precision ! ------------------------------------------------------------------ - USE TPM_GEN, ONLY: LSYNC_TRANS, NOUT + USE TPM_GEN, ONLY: LSYNC_TRANS, NOUT, NCUR_RESOL USE YOMHOOK, ONLY: LHOOK, DR_HOOK, JPHOOK - USE TPM_DIM, ONLY: R_NDGNH, R_NSMAX, R_NDGL - USE TPM_GEOMETRY, ONLY: G_NDGLU - USE TPM_FIELDS_FLAT, ONLY: ZAA, ZAS, ZAA0, ZAS0, KMLOC0 - USE TPM_DISTR, ONLY: D_NUMP, D_MYMS, MYPROC, D_OFFSETS_GEMM1, D_OFFSETS_GEMM2 + USE TPM_DIM, ONLY: R + USE TPM_GEOMETRY, ONLY: G + USE TPM_FIELDS_GPU, ONLY: FG + USE TPM_DISTR, ONLY: D USE HICBLAS_MOD, ONLY: HIP_DGEMM_BATCHED_OVERLOAD, & & HIP_DGEMM_GROUPED_OVERLOAD, HIP_SGEMM_GROUPED_OVERLOAD USE, INTRINSIC :: ISO_C_BINDING, ONLY: C_INT @@ -119,15 +125,21 @@ SUBROUTINE LEINV(ALLOCATOR,PIA,ZINP,ZINP0,ZOUTS,ZOUTA,ZOUTS0,ZOUTA0,KF_LEG) TYPE(BUFFERED_ALLOCATOR), INTENT(IN) :: ALLOCATOR ! LOCAL - INTEGER(KIND=JPIM) :: KS(D_NUMP), NS(D_NUMP), AOFFSETS(D_NUMP), BOFFSETS(D_NUMP), COFFSETS(D_NUMP) - INTEGER(KIND=JPIM) :: KM, KMLOC, IA, IS, ISL, J1, JGL, JK, J - INTEGER(KIND=JPIM) :: IOUT_STRIDES0, IOUT_SIZE - INTEGER(KIND=JPIM) :: IIN_STRIDES0, IIN_SIZE + INTEGER(KIND=JPIM) :: KS(D%NUMP), NS(D%NUMP) + INTEGER(KIND=JPIB) :: AOFFSETS(D%NUMP), BOFFSETS(D%NUMP), COFFSETS(D%NUMP) + INTEGER(KIND=JPIM) :: KM, KMLOC, IA, IS, ISL, J1, JGL, JK, J, IMLOC0(1) + INTEGER(KIND=JPIM) :: IOUT_STRIDES0 + INTEGER(KIND=JPIB) :: IOUT_SIZE + INTEGER(KIND=JPIM) :: IIN_STRIDES0 + INTEGER(KIND=JPIB) :: IIN_SIZE INTEGER(KIND=JPIM) :: IOUT0_STRIDES0, IOUT0_SIZE INTEGER(KIND=JPIM) :: IIN0_STRIDES0, IIN0_SIZE REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + ASSOCIATE(D_NUMP=>D%NUMP, R_NSMAX=>R%NSMAX, G_NDGLU=>G%NDGLU, D_MYMS=>D%MYMS, D_OFFSETS_GEMM1=>D%OFFSETS_GEMM1,& + D_OFFSETS_GEMM2=>D%OFFSETS_GEMM2, & + ZAA=>FG%ZAA, ZAS=>FG%ZAS, ZAA0=>FG%ZAA0, ZAS0=>FG%ZAS0) !* 1.1 PREPARATIONS. IF (LHOOK) CALL DR_HOOK('LE_DGEMM',0,ZHOOK_HANDLE) @@ -146,10 +158,10 @@ SUBROUTINE LEINV(ALLOCATOR,PIA,ZINP,ZINP0,ZOUTS,ZOUTA,ZOUTS0,ZOUTA0,KF_LEG) #ifdef OMPGPU #endif #ifdef ACCGPU - !$ACC DATA PRESENT(D_MYMS,D_NUMP,G_NDGLU) & + !$ACC DATA PRESENT(D,D_MYMS,D_NUMP) & !$ACC& PRESENT(ZINP,ZOUTS,ZOUTA,ZINP0,ZOUTS0,ZOUTA0) & !$ACC& PRESENT(ZAA,ZAS,PIA) & - !$ACC& PRESENT(R_NSMAX,G_NDGLU,D_OFFSETS_GEMM2) + !$ACC& PRESENT(R,R_NSMAX,D_OFFSETS_GEMM2) #endif ! READ 2:NSMAX+3 @@ -166,7 +178,13 @@ SUBROUTINE LEINV(ALLOCATOR,PIA,ZINP,ZINP0,ZOUTS,ZOUTA,ZOUTS0,ZOUTA0,KF_LEG) #ifdef OMPGPU #endif #ifdef ACCGPU - !$ACC PARALLEL LOOP COLLAPSE(2) PRIVATE(KM,IA,J) FIRSTPRIVATE(KF_LEG,IIN_STRIDES0,IIN0_STRIDES0) DEFAULT(NONE) ASYNC(1) + !$ACC PARALLEL LOOP COLLAPSE(2) PRIVATE(KM,IA,J) & + !$ACC& FIRSTPRIVATE(KF_LEG,IIN_STRIDES0,IIN0_STRIDES0) DEFAULT(NONE) & +#ifdef _CRAYFTN + !$ACC& +#else + !$ACC& ASYNC(1) +#endif #endif DO KMLOC=1,D_NUMP DO JK=1,2*KF_LEG @@ -220,7 +238,8 @@ SUBROUTINE LEINV(ALLOCATOR,PIA,ZINP,ZINP0,ZOUTS,ZOUTA,ZOUTS0,ZOUTA0,KF_LEG) ENDIF CALL GSTATS(424,0) - IF (KMLOC0 > 0) THEN + IMLOC0 = FINDLOC(D_MYMS,0) + IF (IMLOC0(1) > 0) THEN ! compute m=0 in double precision #ifdef OMPGPU !$OMP TARGET DATA USE_DEVICE_PTR(ZAA0,ZINP0,ZOUTA0) @@ -252,12 +271,12 @@ SUBROUTINE LEINV(ALLOCATOR,PIA,ZINP,ZINP0,ZOUTS,ZOUTA,ZOUTS0,ZOUTA0,KF_LEG) KS(KMLOC) = (R_NSMAX-KM+2)/2 NS(KMLOC) = G_NDGLU(KM) AOFFSETS(KMLOC) = IIN_STRIDES0*D_OFFSETS_GEMM2(KMLOC) - BOFFSETS(KMLOC) = SIZE(ZAA,1)*SIZE(ZAA,2)*(KMLOC-1) + BOFFSETS(KMLOC) = D%OFFSETS_GEMM_MATRIX(KMLOC) COFFSETS(KMLOC) = IOUT_STRIDES0*D_OFFSETS_GEMM1(KMLOC) ENDDO - IF(KMLOC0 > 0) THEN - NS(KMLOC0) = 0 - KS(KMLOC0) = 0 + IF(IMLOC0(1) > 0) THEN + NS(IMLOC0(1)) = 0 + KS(IMLOC0(1)) = 0 ENDIF #ifdef OMPGPU !$OMP TARGET DATA USE_DEVICE_PTR(ZAA,ZINP,ZOUTA) @@ -266,12 +285,12 @@ SUBROUTINE LEINV(ALLOCATOR,PIA,ZINP,ZINP0,ZOUTS,ZOUTA,ZOUTS0,ZOUTA0,KF_LEG) !$ACC HOST_DATA USE_DEVICE(ZAA,ZINP,ZOUTA) #endif CALL HIP_GEMM( & - & 11, & ! unique identifier + & NCUR_RESOL, 11, & ! unique identifier & 'N', 'T', & & 2*KF_LEG, NS(:), KS(:), & & 1.0_JPRBT, & & ZINP, IIN_STRIDES0, AOFFSETS, & - & ZAA, SIZE(ZAA,1), BOFFSETS, & + & ZAA, D%LEGENDRE_MATRIX_STRIDES, BOFFSETS, & & 0.0_JPRBT, & & ZOUTA, IOUT_STRIDES0, COFFSETS, & & D_NUMP, STREAM=1_C_INT, ALLOC=ALLOCATOR%PTR) @@ -305,7 +324,13 @@ SUBROUTINE LEINV(ALLOCATOR,PIA,ZINP,ZINP0,ZOUTS,ZOUTA,ZOUTS0,ZOUTA0,KF_LEG) #ifdef OMPGPU #endif #ifdef ACCGPU - !$ACC PARALLEL LOOP COLLAPSE(2) PRIVATE(KM,IS,J) FIRSTPRIVATE(KF_LEG,IIN_STRIDES0,IIN0_STRIDES0) DEFAULT(NONE) ASYNC(1) + !$ACC PARALLEL LOOP COLLAPSE(2) PRIVATE(KM,IS,J) & + !$ACC& FIRSTPRIVATE(KF_LEG,IIN_STRIDES0,IIN0_STRIDES0) DEFAULT(NONE) & +#ifndef _CRAYFTN + !$ACC& ASYNC(1) +#else + !$ACC& +#endif #endif DO KMLOC=1,D_NUMP DO JK=1,2*KF_LEG @@ -357,7 +382,7 @@ SUBROUTINE LEINV(ALLOCATOR,PIA,ZINP,ZINP0,ZOUTS,ZOUTA,ZOUTS0,ZOUTA0,KF_LEG) ENDIF CALL GSTATS(424,0) - IF (KMLOC0 > 0) THEN + IF (IMLOC0(1) > 0) THEN #ifdef OMPGPU !$OMP TARGET DATA USE_DEVICE_PTR(ZAS0,ZINP0,ZOUTS0) #endif @@ -386,12 +411,12 @@ SUBROUTINE LEINV(ALLOCATOR,PIA,ZINP,ZINP0,ZOUTS,ZOUTA,ZOUTS0,ZOUTA0,KF_LEG) KS(KMLOC) = (R_NSMAX-KM+3)/2 NS(KMLOC) = G_NDGLU(KM) AOFFSETS(KMLOC) = IIN_STRIDES0*D_OFFSETS_GEMM2(KMLOC) - BOFFSETS(KMLOC) = SIZE(ZAS,1)*SIZE(ZAS,2)*(KMLOC-1) + BOFFSETS(KMLOC) = D%OFFSETS_GEMM_MATRIX(KMLOC) COFFSETS(KMLOC) = IOUT_STRIDES0*D_OFFSETS_GEMM1(KMLOC) ENDDO - IF(KMLOC0 > 0) THEN - NS(KMLOC0) = 0 - KS(KMLOC0) = 0 + IF(IMLOC0(1) > 0) THEN + NS(IMLOC0(1)) = 0 + KS(IMLOC0(1)) = 0 ENDIF #ifdef OMPGPU !$OMP TARGET DATA USE_DEVICE_PTR(ZAS,ZINP,ZOUTS) @@ -400,12 +425,12 @@ SUBROUTINE LEINV(ALLOCATOR,PIA,ZINP,ZINP0,ZOUTS,ZOUTA,ZOUTS0,ZOUTA0,KF_LEG) !$ACC HOST_DATA USE_DEVICE(ZAS,ZINP,ZOUTS) #endif CALL HIP_GEMM( & - & 12, & ! unique identifier + & NCUR_RESOL, 12, & ! unique identifier & 'N', 'T', & & 2*KF_LEG, NS(:), KS(:), & & 1.0_JPRBT, & & ZINP, IIN_STRIDES0, AOFFSETS, & - & ZAS, SIZE(ZAS,1), BOFFSETS, & + & ZAS, D%LEGENDRE_MATRIX_STRIDES, BOFFSETS, & & 0.0_JPRBT, & & ZOUTS, IOUT_STRIDES0, COFFSETS, & & D_NUMP, STREAM=1_C_INT, ALLOC=ALLOCATOR%PTR) @@ -435,5 +460,6 @@ SUBROUTINE LEINV(ALLOCATOR,PIA,ZINP,ZINP0,ZOUTS,ZOUTA,ZOUTS0,ZOUTA0,KF_LEG) IF (LHOOK) CALL DR_HOOK('LE_DGEMM',1,ZHOOK_HANDLE) ! ------------------------------------------------------------------ + END ASSOCIATE END SUBROUTINE LEINV END MODULE LEINV_MOD diff --git a/src/trans/gpu/internal/ltdir_mod.F90 b/src/trans/gpu/internal/ltdir_mod.F90 index 8a3976165..a087c8e9f 100755 --- a/src/trans/gpu/internal/ltdir_mod.F90 +++ b/src/trans/gpu/internal/ltdir_mod.F90 @@ -11,7 +11,7 @@ ! MODULE LTDIR_MOD - USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT, JPRB, JPRD + USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT, JPRB, JPRD, JPIB USE BUFFERED_ALLOCATOR_MOD, ONLY: ALLOCATION_RESERVATION_HANDLE IMPLICIT NONE @@ -26,7 +26,7 @@ MODULE LTDIR_MOD FUNCTION PREPARE_LTDIR(ALLOCATOR, KF_FS, KF_UV) RESULT(HLTDIR) USE TPM_DISTR, ONLY: D USE TPM_DIM, ONLY: R - USE ISO_C_BINDING, ONLY: C_SIZE_T + USE ISO_C_BINDING, ONLY: C_SIZE_T, C_SIZEOF USE LEDIR_MOD, ONLY: LEDIR_STRIDES USE BUFFERED_ALLOCATOR_MOD, ONLY: BUFFERED_ALLOCATOR, RESERVE @@ -36,8 +36,9 @@ FUNCTION PREPARE_LTDIR(ALLOCATOR, KF_FS, KF_UV) RESULT(HLTDIR) INTEGER(KIND=JPIM), INTENT(IN) :: KF_FS, KF_UV TYPE(LTDIR_HANDLE) :: HLTDIR - INTEGER(KIND=C_SIZE_T) :: IALLOC_SZ - INTEGER(KIND=JPIM) :: IOUT_STRIDES0, IOUT_SIZE + INTEGER(KIND=JPIB) :: IALLOC_SZ + INTEGER(KIND=JPIM) :: IOUT_STRIDES0 + INTEGER(KIND=JPIB) :: IOUT_SIZE INTEGER(KIND=JPIM) :: IOUT0_STRIDES0, IOUT0_SIZE REAL(KIND=JPRBT) :: ZPRBT_DUMMY @@ -47,15 +48,15 @@ FUNCTION PREPARE_LTDIR(ALLOCATOR, KF_FS, KF_UV) RESULT(HLTDIR) IOUT0_STRIDES0=IOUT0_STRIDES0,IOUT0_SIZE=IOUT0_SIZE) ! POA1 - IALLOC_SZ = ALIGN(2*KF_FS*(R%NTMAX+3)*D%NUMP*SIZEOF(ZPRBT_DUMMY),128) + IALLOC_SZ = ALIGN(2_JPIB*KF_FS*(R%NTMAX+3)*D%NUMP*C_SIZEOF(ZPRBT_DUMMY),128) ! POA2 - IALLOC_SZ = IALLOC_SZ + ALIGN(4*KF_UV*(R%NTMAX+3)*D%NUMP*SIZEOF(ZPRBT_DUMMY),128) + IALLOC_SZ = IALLOC_SZ + ALIGN(4_JPIB*KF_UV*(R%NTMAX+3)*D%NUMP*C_SIZEOF(ZPRBT_DUMMY),128) ! ZOUT - IALLOC_SZ = IALLOC_SZ + ALIGN(IOUT_SIZE*SIZEOF(ZPRBT_DUMMY),128) + IALLOC_SZ = IALLOC_SZ + ALIGN(IOUT_SIZE*C_SIZEOF(ZPRBT_DUMMY),128) ! ZOUT0 - IALLOC_SZ = IALLOC_SZ+ ALIGN(IOUT0_SIZE*SIZEOF(ZPRD_DUMMY),128) + IALLOC_SZ = IALLOC_SZ+ ALIGN(IOUT0_SIZE*C_SIZEOF(ZPRD_DUMMY),128) - HLTDIR%HOUT_AND_POA = RESERVE(ALLOCATOR, IALLOC_SZ) + HLTDIR%HOUT_AND_POA = RESERVE(ALLOCATOR, IALLOC_SZ, "HLTDIR%HOUT_AND_POA") END FUNCTION PREPARE_LTDIR SUBROUTINE LTDIR(ALLOCATOR,HLTDIR,ZINPS,ZINPA,ZINPS0,ZINPA0,KF_FS,KF_UV,KF_SCALARS,& @@ -77,7 +78,7 @@ SUBROUTINE LTDIR(ALLOCATOR,HLTDIR,ZINPS,ZINPA,ZINPS0,ZINPA0,KF_FS,KF_UV,KF_SCALA USE TPM_TRANS, ONLY: NF_SC2, NF_SC3A, NF_SC3B USE TPM_STATS, ONLY: GSTATS => GSTATS_NVTX USE BUFFERED_ALLOCATOR_MOD, ONLY: BUFFERED_ALLOCATOR, ASSIGN_PTR, GET_ALLOCATION - USE ISO_C_BINDING, ONLY: C_SIZE_T, C_F_POINTER, C_LOC + USE ISO_C_BINDING, ONLY: C_SIZE_T, C_F_POINTER, C_LOC, C_SIZEOF !**** *LTDIR* - Control of Direct Legendre transform step @@ -162,8 +163,9 @@ SUBROUTINE LTDIR(ALLOCATOR,HLTDIR,ZINPS,ZINPA,ZINPS0,ZINPA0,KF_FS,KF_UV,KF_SCALA REAL(KIND=JPRD), POINTER :: ZOUT0(:) TYPE(BUFFERED_ALLOCATOR), INTENT(IN) :: ALLOCATOR TYPE(LTDIR_HANDLE), INTENT(IN) :: HLTDIR - INTEGER(KIND=C_SIZE_T) :: IALLOC_POS, IALLOC_SZ - INTEGER(KIND=JPIM) :: IOUT_STRIDES0, IOUT_SIZE + INTEGER(KIND=JPIB) :: IALLOC_POS, IALLOC_SZ + INTEGER(KIND=JPIM) :: IOUT_STRIDES0 + INTEGER(KIND=JPIB) :: IOUT_SIZE INTEGER(KIND=JPIM) :: IOUT0_STRIDES0, IOUT0_SIZE @@ -186,26 +188,26 @@ SUBROUTINE LTDIR(ALLOCATOR,HLTDIR,ZINPS,ZINPA,ZINPS0,ZINPA0,KF_FS,KF_UV,KF_SCALA IALLOC_POS = 1 - IALLOC_SZ = ALIGN(2*KF_FS*(R%NTMAX+3)*D%NUMP*SIZEOF(POA1_L(1)),128) + IALLOC_SZ = ALIGN(2_JPIB*KF_FS*(R%NTMAX+3)*D%NUMP*C_SIZEOF(POA1_L(1)),128) CALL ASSIGN_PTR(POA1_L, GET_ALLOCATION(ALLOCATOR, HLTDIR%HOUT_AND_POA),& & IALLOC_POS, IALLOC_SZ, SET_STREAM=1) CALL C_F_POINTER(C_LOC(POA1_L), POA1, (/ 2*KF_FS, R%NTMAX+3, D%NUMP /)) IALLOC_POS = IALLOC_POS + IALLOC_SZ - IALLOC_SZ = ALIGN(4*KF_UV*(R%NTMAX+3)*D%NUMP*SIZEOF(POA2_L(1)),128) + IALLOC_SZ = ALIGN(4_JPIB*KF_UV*(R%NTMAX+3)*D%NUMP*C_SIZEOF(POA2_L(1)),128) CALL ASSIGN_PTR(POA2_L, GET_ALLOCATION(ALLOCATOR, HLTDIR%HOUT_AND_POA),& & IALLOC_POS, IALLOC_SZ, SET_STREAM=1) CALL C_F_POINTER(C_LOC(POA2_L), POA2, (/ 4*KF_UV, R%NTMAX+3, D%NUMP /)) IALLOC_POS = IALLOC_POS + IALLOC_SZ ! ZOUT - IALLOC_SZ = ALIGN(IOUT_SIZE*SIZEOF(ZOUT(1)),128) + IALLOC_SZ = ALIGN(IOUT_SIZE*C_SIZEOF(ZOUT(1)),128) CALL ASSIGN_PTR(ZOUT, GET_ALLOCATION(ALLOCATOR, HLTDIR%HOUT_AND_POA),& & IALLOC_POS, IALLOC_SZ, SET_STREAM=1) IALLOC_POS = IALLOC_POS + IALLOC_SZ ! ZOUT0 - IALLOC_SZ = ALIGN(IOUT0_SIZE*SIZEOF(ZOUT0(1)),128) + IALLOC_SZ = ALIGN(IOUT0_SIZE*C_SIZEOF(ZOUT0(1)),128) CALL ASSIGN_PTR(ZOUT0, GET_ALLOCATION(ALLOCATOR, HLTDIR%HOUT_AND_POA),& & IALLOC_POS, IALLOC_SZ, SET_STREAM=1) IALLOC_POS = IALLOC_POS + IALLOC_SZ @@ -259,7 +261,9 @@ SUBROUTINE LTDIR(ALLOCATOR,HLTDIR,ZINPS,ZINPA,ZINPS0,ZINPA0,KF_FS,KF_UV,KF_SCALA & PSPSC3A,PSPSC3B,PSPSC2 , & & KFLDPTRUV,KFLDPTRSC) +#ifdef ACCGPU !$ACC WAIT(1) +#endif IF (LSYNC_TRANS) THEN CALL GSTATS(430,0) diff --git a/src/trans/gpu/internal/ltinv_mod.F90 b/src/trans/gpu/internal/ltinv_mod.F90 index 36ad2bee0..301e55248 100755 --- a/src/trans/gpu/internal/ltinv_mod.F90 +++ b/src/trans/gpu/internal/ltinv_mod.F90 @@ -25,10 +25,10 @@ MODULE LTINV_MOD CONTAINS FUNCTION PREPARE_LTINV(ALLOCATOR,KF_UV,KF_SCALARS,LVORGP,LDIVGP,LSCDERS) RESULT(HLTINV) - USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT, JPRD + USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT, JPRD, JPIB USE TPM_DISTR, ONLY: D USE TPM_DIM, ONLY: R - USE ISO_C_BINDING, ONLY: C_SIZE_T + USE ISO_C_BINDING, ONLY: C_SIZE_T, C_SIZEOF USE LEINV_MOD, ONLY: LEINV_STRIDES USE BUFFERED_ALLOCATOR_MOD, ONLY: BUFFERED_ALLOCATOR, RESERVE @@ -40,10 +40,12 @@ FUNCTION PREPARE_LTINV(ALLOCATOR,KF_UV,KF_SCALARS,LVORGP,LDIVGP,LSCDERS) RESULT( TYPE(LTINV_HANDLE) :: HLTINV - INTEGER(KIND=C_SIZE_T) :: IALLOC_SZ, IPIA_SZ - INTEGER(KIND=JPIM) :: IOUT_STRIDES0, IOUT_SIZE + INTEGER(KIND=JPIB) :: IALLOC_SZ, IPIA_SZ + INTEGER(KIND=JPIM) :: IOUT_STRIDES0 + INTEGER(KIND=JPIB) :: IOUT_SIZE INTEGER(KIND=JPIM) :: IOUT0_STRIDES0, IOUT0_SIZE - INTEGER(KIND=JPIM) :: IIN_STRIDES0, IIN_SIZE + INTEGER(KIND=JPIM) :: IIN_STRIDES0 + INTEGER(KIND=JPIB) :: IIN_SIZE INTEGER(KIND=JPIM) :: IIN0_STRIDES0, IIN0_SIZE REAL(KIND=JPRBT) :: ZPRBT_DUMMY @@ -63,7 +65,7 @@ FUNCTION PREPARE_LTINV(ALLOCATOR,KF_UV,KF_SCALARS,LVORGP,LDIVGP,LSCDERS) RESULT( IF (LSCDERS) & IF_READIN = IF_READIN + KF_SCALARS ! Scalars NS Derivatives - IPIA_SZ = ALIGN(2*IF_READIN*(R%NSMAX+3)*D%NUMP*SIZEOF(ZPRBT_DUMMY),128) + IPIA_SZ = ALIGN(2_JPIB*IF_READIN*(R%NSMAX+3)*D%NUMP*C_SIZEOF(ZPRBT_DUMMY),128) ! In Legendre space, we then ignore vorticity/divergence, if ! they don't need to be transformed. @@ -77,23 +79,23 @@ FUNCTION PREPARE_LTINV(ALLOCATOR,KF_UV,KF_SCALARS,LVORGP,LDIVGP,LSCDERS) RESULT( ! PIA IALLOC_SZ = IPIA_SZ ! ZINP - IALLOC_SZ = IALLOC_SZ + ALIGN(IIN_SIZE*SIZEOF(ZPRBT_DUMMY),128) + IALLOC_SZ = IALLOC_SZ + ALIGN(IIN_SIZE*C_SIZEOF(ZPRBT_DUMMY),128) ! ZINP0 - IALLOC_SZ = IALLOC_SZ + ALIGN(IIN0_SIZE*SIZEOF(ZPRD_DUMMY),128) + IALLOC_SZ = IALLOC_SZ + ALIGN(IIN0_SIZE*C_SIZEOF(ZPRD_DUMMY),128) - HLTINV%HPIA_AND_IN = RESERVE(ALLOCATOR, IALLOC_SZ) + HLTINV%HPIA_AND_IN = RESERVE(ALLOCATOR, IALLOC_SZ, "HLTINV_HPIA_AND_IN") IALLOC_SZ = 0 ! ZOUTA - IALLOC_SZ = IALLOC_SZ + ALIGN(IOUT_SIZE*SIZEOF(ZPRBT_DUMMY),128) + IALLOC_SZ = IALLOC_SZ + ALIGN(IOUT_SIZE*C_SIZEOF(ZPRBT_DUMMY),128) ! ZOUTS - IALLOC_SZ = IALLOC_SZ + ALIGN(IOUT_SIZE*SIZEOF(ZPRBT_DUMMY),128) + IALLOC_SZ = IALLOC_SZ + ALIGN(IOUT_SIZE*C_SIZEOF(ZPRBT_DUMMY),128) ! ZOUTA0 - IALLOC_SZ = IALLOC_SZ + ALIGN(IOUT0_SIZE*SIZEOF(ZPRD_DUMMY),128) + IALLOC_SZ = IALLOC_SZ + ALIGN(IOUT0_SIZE*C_SIZEOF(ZPRD_DUMMY),128) ! ZOUTS0 - IALLOC_SZ = IALLOC_SZ + ALIGN(IOUT0_SIZE*SIZEOF(ZPRD_DUMMY),128) + IALLOC_SZ = IALLOC_SZ + ALIGN(IOUT0_SIZE*C_SIZEOF(ZPRD_DUMMY),128) - HLTINV%HOUTS_AND_OUTA = RESERVE(ALLOCATOR, IALLOC_SZ) + HLTINV%HOUTS_AND_OUTA = RESERVE(ALLOCATOR, IALLOC_SZ, "HLTINV_HOUTS_AND_OUTA") END FUNCTION PREPARE_LTINV @@ -101,7 +103,7 @@ SUBROUTINE LTINV(ALLOCATOR,HLTINV,KF_UV,KF_SCALARS,& & PSPVOR,PSPDIV,PSPSCALAR,PSPSC3A,PSPSC3B,PSPSC2, & & ZOUTS,ZOUTA,ZOUTS0,ZOUTA0) - USE PARKIND_ECTRANS, ONLY: JPIM, JPRB, JPRBT, JPRD + USE PARKIND_ECTRANS, ONLY: JPIM, JPRB, JPRBT, JPRD, JPIB USE YOMHOOK, ONLY: LHOOK, DR_HOOK, JPHOOK USE TPM_DIM, ONLY: R USE TPM_TRANS, ONLY: LDIVGP, LVORGP, NF_SC2, NF_SC3A, NF_SC3B, LSCDERS @@ -113,12 +115,11 @@ SUBROUTINE LTINV(ALLOCATOR,HLTINV,KF_UV,KF_SCALARS,& USE SPNSDE_MOD, ONLY: SPNSDE USE LEINV_MOD, ONLY: LEINV_STRIDES, LEINV USE ABORT_TRANS_MOD, ONLY: ABORT_TRANS - USE TPM_FIELDS, ONLY: F - USE TPM_FIELDS_FLAT, ONLY: ZEPSNM + USE TPM_FIELDS_GPU, ONLY: FG USE MPL_MODULE, ONLY: MPL_BARRIER,MPL_ALL_MS_COMM USE TPM_GEN, ONLY: LSYNC_TRANS USE TPM_STATS, ONLY: GSTATS => GSTATS_NVTX - USE ISO_C_BINDING, ONLY: C_SIZE_T, C_LOC + USE ISO_C_BINDING, ONLY: C_SIZE_T, C_LOC, C_SIZEOF !**** *LTINV* - Inverse Legendre transform ! @@ -193,17 +194,21 @@ SUBROUTINE LTINV(ALLOCATOR,HLTINV,KF_UV,KF_SCALARS,& TYPE(BUFFERED_ALLOCATOR), INTENT(IN) :: ALLOCATOR TYPE(LTINV_HANDLE), INTENT(IN) :: HLTINV - INTEGER(KIND=JPIM) :: IOUT_STRIDES0, IOUT_SIZE + INTEGER(KIND=JPIM) :: IOUT_STRIDES0 + INTEGER(KIND=JPIB) :: IOUT_SIZE INTEGER(KIND=JPIM) :: IOUT0_STRIDES0, IOUT0_SIZE - INTEGER(KIND=JPIM) :: IIN_STRIDES0, IIN_SIZE + INTEGER(KIND=JPIM) :: IIN_STRIDES0 + INTEGER(KIND=JPIB) :: IIN_SIZE INTEGER(KIND=JPIM) :: IIN0_STRIDES0, IIN0_SIZE INTEGER(KIND=JPIM) :: IF_READIN, IF_LEG - INTEGER(KIND=C_SIZE_T) :: IALLOC_POS, IALLOC_SZ + INTEGER(KIND=JPIB) :: IALLOC_POS, IALLOC_SZ REAL(KIND=JPRBT), POINTER :: ZINP(:) REAL(KIND=JPRD), POINTER :: ZINP0(:) + ASSOCIATE(ZEPSNM=>FG%ZEPSNM) + ! ------------------------------------------------------------------ !* 1. PERFORM LEGENDRE TRANFORM. @@ -233,20 +238,20 @@ SUBROUTINE LTINV(ALLOCATOR,HLTINV,KF_UV,KF_SCALARS,& IALLOC_POS = 1 ! PIA - IALLOC_SZ = ALIGN(2*IF_READIN*(R%NTMAX+3)*D%NUMP*SIZEOF(PIA_L(1)),128) + IALLOC_SZ = ALIGN(2_JPIB*IF_READIN*(R%NTMAX+3)*D%NUMP*C_SIZEOF(PIA_L(1)),128) CALL ASSIGN_PTR(PIA_L, GET_ALLOCATION(ALLOCATOR, HLTINV%HPIA_AND_IN),& & IALLOC_POS, IALLOC_SZ) CALL C_F_POINTER(C_LOC(PIA_L), PIA, (/ 2*IF_READIN, R%NTMAX+3, D%NUMP /)) IALLOC_POS = IALLOC_POS + IALLOC_SZ ! ZINP - IALLOC_SZ = ALIGN(IIN_SIZE*SIZEOF(ZINP(1)),128) + IALLOC_SZ = ALIGN(IIN_SIZE*C_SIZEOF(ZINP(1)),128) CALL ASSIGN_PTR(ZINP, GET_ALLOCATION(ALLOCATOR, HLTINV%HPIA_AND_IN),& & IALLOC_POS, IALLOC_SZ) IALLOC_POS = IALLOC_POS + IALLOC_SZ ! ZINP0 - IALLOC_SZ = ALIGN(IIN0_SIZE*SIZEOF(ZINP0(1)),128) + IALLOC_SZ = ALIGN(IIN0_SIZE*C_SIZEOF(ZINP0(1)),128) CALL ASSIGN_PTR(ZINP0, GET_ALLOCATION(ALLOCATOR, HLTINV%HPIA_AND_IN),& & IALLOC_POS, IALLOC_SZ) IALLOC_POS = IALLOC_POS + IALLOC_SZ @@ -254,25 +259,25 @@ SUBROUTINE LTINV(ALLOCATOR,HLTINV,KF_UV,KF_SCALARS,& IALLOC_POS = 1 ! ZOUTA - IALLOC_SZ = ALIGN(IOUT_SIZE*SIZEOF(ZOUTA(1)),128) + IALLOC_SZ = ALIGN(IOUT_SIZE*C_SIZEOF(ZOUTA(1)),128) CALL ASSIGN_PTR(ZOUTA, GET_ALLOCATION(ALLOCATOR, HLTINV%HOUTS_AND_OUTA),& & IALLOC_POS, IALLOC_SZ) IALLOC_POS = IALLOC_POS + IALLOC_SZ ! ZOUTS - IALLOC_SZ = ALIGN(IOUT_SIZE*SIZEOF(ZOUTS(1)),128) + IALLOC_SZ = ALIGN(IOUT_SIZE*C_SIZEOF(ZOUTS(1)),128) CALL ASSIGN_PTR(ZOUTS, GET_ALLOCATION(ALLOCATOR, HLTINV%HOUTS_AND_OUTA),& & IALLOC_POS, IALLOC_SZ) IALLOC_POS = IALLOC_POS + IALLOC_SZ ! ZOUTA0 - IALLOC_SZ = ALIGN(IOUT0_SIZE*SIZEOF(ZOUTA0(1)),128) + IALLOC_SZ = ALIGN(IOUT0_SIZE*C_SIZEOF(ZOUTA0(1)),128) CALL ASSIGN_PTR(ZOUTA0, GET_ALLOCATION(ALLOCATOR, HLTINV%HOUTS_AND_OUTA),& & IALLOC_POS, IALLOC_SZ) IALLOC_POS = IALLOC_POS + IALLOC_SZ ! ZOUTS0 - IALLOC_SZ = ALIGN(IOUT0_SIZE*SIZEOF(ZOUTS0(1)),128) + IALLOC_SZ = ALIGN(IOUT0_SIZE*C_SIZEOF(ZOUTS0(1)),128) CALL ASSIGN_PTR(ZOUTS0, GET_ALLOCATION(ALLOCATOR, HLTINV%HOUTS_AND_OUTA),& & IALLOC_POS, IALLOC_SZ) IALLOC_POS = IALLOC_POS + IALLOC_SZ @@ -319,6 +324,11 @@ SUBROUTINE LTINV(ALLOCATOR,HLTINV,KF_UV,KF_SCALARS,& ENDIF CALL GSTATS(422,0) #ifdef OMPGPU + !$OMP TARGET DATA MAP(TO:PSPVOR,PSPDIV) IF(KF_UV > 0) + !$OMP TARGET DATA MAP(TO:PSPSCALAR) IF(PRESENT(PSPSCALAR) .AND. KF_SCALARS > 0) + !$OMP TARGET DATA MAP(TO:PSPSC2) IF(NF_SC2 > 0) + !$OMP TARGET DATA MAP(TO:PSPSC3A) IF(NF_SC3A > 0) + !$OMP TARGET DATA MAP(TO:PSPSC3B) IF(NF_SC3B > 0) #endif #ifdef ACCGPU !$ACC DATA COPYIN(PSPVOR,PSPDIV) IF(KF_UV > 0) @@ -402,6 +412,7 @@ SUBROUTINE LTINV(ALLOCATOR,HLTINV,KF_UV,KF_SCALARS,& CALL LEINV(ALLOCATOR,PIA(2*(IF_READIN-IF_LEG)+1:IF_READIN,:,:),ZINP,ZINP0,ZOUTS,ZOUTA,ZOUTS0,ZOUTA0,IF_LEG) IF (LHOOK) CALL DR_HOOK('LTINV_MOD',1,ZHOOK_HANDLE) + END ASSOCIATE ! ------------------------------------------------------------------ END SUBROUTINE LTINV END MODULE LTINV_MOD diff --git a/src/trans/gpu/internal/prepsnm_mod.F90 b/src/trans/gpu/internal/prepsnm_mod.F90 index 71841e4e1..351e693e2 100755 --- a/src/trans/gpu/internal/prepsnm_mod.F90 +++ b/src/trans/gpu/internal/prepsnm_mod.F90 @@ -53,7 +53,7 @@ SUBROUTINE PREPSNM USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT USE TPM_DIM, ONLY: R USE TPM_FIELDS, ONLY: F - USE TPM_FIELDS_FLAT, ONLY: ZEPSNM + USE TPM_FIELDS_GPU, ONLY: FG USE TPM_DISTR, ONLY: D ! @@ -85,12 +85,12 @@ SUBROUTINE PREPSNM !$ACC loop #endif DO JN=0,KM-1 - ZEPSNM(KMLOC,JN) = 0.0_JPRBT + FG%ZEPSNM(KMLOC,JN) = 0.0_JPRBT ENDDO ENDIF DO JN=KM,R%NTMAX+2 - ZEPSNM(KMLOC,JN) =REAL(F%REPSNM(D%NPMT(KM)+KMLOC-KM+JN),JPRBT) + FG%ZEPSNM(KMLOC,JN) = REAL(F%REPSNM(D%NPMT(KM)+KMLOC-KM+JN),JPRBT) ENDDO ! end loop over wavenumber ENDDO diff --git a/src/trans/gpu/internal/prfi1b_mod.F90 b/src/trans/gpu/internal/prfi1b_mod.F90 index 00a2985b3..489683f8a 100755 --- a/src/trans/gpu/internal/prfi1b_mod.F90 +++ b/src/trans/gpu/internal/prfi1b_mod.F90 @@ -14,8 +14,8 @@ MODULE PRFI1B_MOD SUBROUTINE PRFI1B(PIA,PSPEC,KFIELDS,KDIM,KFLDPTR) USE PARKIND1, ONLY: JPIM, JPRB - USE TPM_DIM, ONLY: R, R_NSMAX - USE TPM_DISTR, ONLY: D, D_NUMP, D_MYMS, D_NASM0 + USE TPM_DIM, ONLY: R + USE TPM_DISTR, ONLY: D USE ABORT_TRANS_MOD, ONLY: ABORT_TRANS !**** *PRFI1* - Prepare spectral fields for inverse Legendre transform @@ -77,10 +77,11 @@ SUBROUTINE PRFI1B(PIA,PSPEC,KFIELDS,KDIM,KFLDPTR) !* 1. EXTRACT FIELDS FROM SPECTRAL ARRAYS. ! -------------------------------------------------- + ASSOCIATE(D_NUMP=>D%NUMP, D_MYMS=>D%MYMS, D_NASM0=>D%NASM0, R_NSMAX=>R%NSMAX) #ifdef ACCGPU !$ACC DATA & - !$ACC& PRESENT(D_NUMP,R_NSMAX,D_MYMS,D_NASM0) & + !$ACC& PRESENT(D,D_NUMP,R,R_NSMAX,D_MYMS,D_NASM0) & !$ACC& PRESENT(PIA) & !$ACC& PRESENT(PSPEC) ASYNC(1) #endif @@ -101,11 +102,12 @@ SUBROUTINE PRFI1B(PIA,PSPEC,KFIELDS,KDIM,KFLDPTR) !loop over wavenumber #ifdef OMPGPU - !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(3) PRIVATE(KM,ILCM,IFLD,IASM0,IR,II,INM) + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(3) PRIVATE(KM,ILCM,IFLD,IASM0,IR,II,INM) & + !$OMP& FIRSTPRIVATE(KFIELDS) #endif #ifdef ACCGPU !$ACC PARALLEL LOOP COLLAPSE(3) DEFAULT(NONE) PRIVATE(KM,ILCM,IFLD,IASM0,IR,II,INM) & - !$ACC& FIRSTPRIVATE(KFIELDS) ASYNC(1) + !$ACC& FIRSTPRIVATE(KFIELDS) ASYNC(1) #endif DO KMLOC=1,D_NUMP DO JN=1,R_NSMAX+1 @@ -128,11 +130,10 @@ SUBROUTINE PRFI1B(PIA,PSPEC,KFIELDS,KDIM,KFLDPTR) ENDDO #ifdef OMPGPU - !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(2) PRIVATE(KM,ILCM) + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(2) PRIVATE(KM,ILCM) FIRSTPRIVATE(KFIELDS) #endif #ifdef ACCGPU - !$ACC PARALLEL LOOP DEFAULT(NONE) COLLAPSE(2) PRIVATE(KM,ILCM) & - !$ACC& FIRSTPRIVATE(KFIELDS) ASYNC(1) + !$ACC PARALLEL LOOP DEFAULT(NONE) COLLAPSE(2) PRIVATE(KM,ILCM) FIRSTPRIVATE(KFIELDS) ASYNC(1) #endif DO KMLOC=1,D_NUMP DO JFLD=1,2*KFIELDS @@ -150,11 +151,16 @@ SUBROUTINE PRFI1B(PIA,PSPEC,KFIELDS,KDIM,KFLDPTR) !loop over wavenumber #ifdef OMPGPU - !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(3) PRIVATE(KM,ILCM,IOFF,INM,IR,II) + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(3) PRIVATE(KM,IASM0,INM) & + !$OMP& FIRSTPRIVATE(KFIELDS,KDIM) #endif #ifdef ACCGPU - !$ACC PARALLEL LOOP DEFAULT(NONE) COLLAPSE(3) PRIVATE(KM,IASM0,INM) & - !$ACC& FIRSTPRIVATE(KFIELDS,KDIM) ASYNC(1) + !$ACC PARALLEL LOOP DEFAULT(NONE) COLLAPSE(3) PRIVATE(KM,IASM0,INM) FIRSTPRIVATE(KFIELDS,KDIM) & +#ifndef _CRAYFTN + !$ACC& ASYNC(1) +#else + !$ACC& +#endif #endif DO KMLOC=1,D_NUMP DO JN=0,R_NSMAX+3 @@ -184,10 +190,11 @@ SUBROUTINE PRFI1B(PIA,PSPEC,KFIELDS,KDIM,KFLDPTR) !$ACC END DATA #endif #ifdef OMPGPU - !$OMP END TARGET DATA +!$OMP END TARGET DATA #endif ! ------------------------------------------------------------------ + END ASSOCIATE END SUBROUTINE PRFI1B END MODULE PRFI1B_MOD diff --git a/src/trans/gpu/internal/set_resol_mod.F90 b/src/trans/gpu/internal/set_resol_mod.F90 index c7367bdd6..afd7cbcad 100755 --- a/src/trans/gpu/internal/set_resol_mod.F90 +++ b/src/trans/gpu/internal/set_resol_mod.F90 @@ -18,6 +18,7 @@ SUBROUTINE SET_RESOL(KRESOL,LDSETUP) USE TPM_DISTR, ONLY: D, DISTR_RESOL USE TPM_GEOMETRY, ONLY: G, GEOM_RESOL USE TPM_FIELDS, ONLY: F, FIELDS_RESOL +USE TPM_FIELDS_GPU, ONLY: FG, FIELDS_GPU_RESOL USE TPM_FLT, ONLY: S, FLT_RESOL USE TPM_CTL, ONLY: C, CTL_RESOL USE ABORT_TRANS_MOD, ONLY: ABORT_TRANS @@ -57,6 +58,7 @@ SUBROUTINE SET_RESOL(KRESOL,LDSETUP) NCUR_RESOL = IRESOL R => DIM_RESOL(NCUR_RESOL) F => FIELDS_RESOL(NCUR_RESOL) + FG => FIELDS_GPU_RESOL(NCUR_RESOL) G => GEOM_RESOL(NCUR_RESOL) D => DISTR_RESOL(NCUR_RESOL) S => FLT_RESOL(NCUR_RESOL) diff --git a/src/trans/gpu/internal/spnsde_mod.F90 b/src/trans/gpu/internal/spnsde_mod.F90 index 10acc5dc1..d7fdcd282 100755 --- a/src/trans/gpu/internal/spnsde_mod.F90 +++ b/src/trans/gpu/internal/spnsde_mod.F90 @@ -14,9 +14,8 @@ MODULE SPNSDE_MOD SUBROUTINE SPNSDE(KF_SCALARS,PEPSNM,PF,PNSD) USE PARKIND_ECTRANS, ONLY: JPIM, JPRB, JPRBT -USE TPM_DIM, ONLY: R, R_NTMAX -USE TPM_DISTR, ONLY: D, D_MYMS, D_NUMP -USE TPM_FIELDS_FLAT, ONLY: ZEPSNM +USE TPM_DIM, ONLY: R +USE TPM_DISTR, ONLY: D !**** *SPNSDE* - Compute North-South derivative in spectral space @@ -82,15 +81,13 @@ SUBROUTINE SPNSDE(KF_SCALARS,PEPSNM,PF,PNSD) ! LOCAL INTEGER SCALARS INTEGER(KIND=JPIM) :: IJ, ISKIP, J, JN, JI, IR, II +ASSOCIATE(D_NUMP=>D%NUMP, R_NTMAX=>R%NTMAX, D_MYMS=>D%MYMS) + #ifdef ACCGPU !$ACC DATA & -!$ACC& PRESENT (R_NTMAX, D_MYMS) & +!$ACC& PRESENT (R,R_NTMAX, D,D_MYMS) & !$ACC& PRESENT (D_NUMP,PEPSNM, PF, PNSD) ASYNC(1) #endif -#ifdef OMPGPU -!$OMP TARGET DATA & -!$OMP& MAP(PRESENT,ALLOC:ZN) -#endif ! ------------------------------------------------------------------ @@ -101,13 +98,16 @@ SUBROUTINE SPNSDE(KF_SCALARS,PEPSNM,PF,PNSD) !* 1.1 COMPUTE #ifdef OMPGPU - !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO - !! DEFAULT(NONE) PRIVATE(IJ) & - !!$OMP& SHARED(KM,F,ZN,ZEPSNM,KMLOC) +!$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(3) PRIVATE(KM,IR,II,JI) MAP(TO:KF_SCALARS) & +!$OMP& SHARED(D,R) #endif #ifdef ACCGPU - !$ACC PARALLEL LOOP DEFAULT(NONE) COLLAPSE(3) PRIVATE(KM,IR,II,JI) & - !$ACC& FIRSTPRIVATE(KMLOC,KF_SCALARS) ASYNC(1) +!$ACC PARALLEL LOOP DEFAULT(NONE) COLLAPSE(3) PRIVATE(KM,IR,II,JI) FIRSTPRIVATE(KF_SCALARS) & +#ifndef _CRAYFTN +!$ACC& ASYNC(1) +#else +!$ACC& +#endif #endif DO KMLOC=1,D_NUMP DO JN=0,R_NTMAX+1 @@ -134,14 +134,12 @@ SUBROUTINE SPNSDE(KF_SCALARS,PEPSNM,PF,PNSD) ENDDO END DO -#ifdef OMPGPU -!$OMP END TARGET DATA -#endif #ifdef ACCGPU !$ACC END DATA #endif ! ------------------------------------------------------------------ +END ASSOCIATE END SUBROUTINE SPNSDE END MODULE SPNSDE_MOD diff --git a/src/trans/gpu/internal/suleg_mod.F90 b/src/trans/gpu/internal/suleg_mod.F90 index 3f90e62ad..5fe30cdcc 100755 --- a/src/trans/gpu/internal/suleg_mod.F90 +++ b/src/trans/gpu/internal/suleg_mod.F90 @@ -9,9 +9,6 @@ ! MODULE SULEG_MOD -#ifdef __NEC__ -#define SIZEOF(x) STORAGE_SIZE(x)/KIND(x) -#endif CONTAINS SUBROUTINE SULEG !DEC$ OPTIMIZE:1 @@ -544,7 +541,7 @@ SUBROUTINE SULEG IF (ALLOCATED(ZLPOL)) DEALLOCATE(ZLPOL) !$OMP END PARALLEL - stop 'Error: code path not (yet) supported in GPU version' + CALL ABORT_TRANS('SULEG: Code path not (yet) supported in GPU version') !CALL PREPSNM(IM,JMLOC,ZEPSNM) ALLOCATE(S%FA(JMLOC)%RPNMWI(2*IDGLU,1:2)) DO JGL=1,2*IDGLU diff --git a/src/trans/gpu/internal/sump_trans_mod.F90 b/src/trans/gpu/internal/sump_trans_mod.F90 index a0f2260b4..3c3b94d69 100755 --- a/src/trans/gpu/internal/sump_trans_mod.F90 +++ b/src/trans/gpu/internal/sump_trans_mod.F90 @@ -19,7 +19,7 @@ SUBROUTINE SUMP_TRANS ! Modifications : ! P.Marguinaud : 11-Sep-2012 : Fix twice allocated pointer -USE EC_PARKIND ,ONLY : JPIM ,JPRD +USE EC_PARKIND ,ONLY : JPIM ,JPRD, JPIB USE TPM_GEN ,ONLY : NOUT, NPRINTLEV USE TPM_DIM ,ONLY : R @@ -40,7 +40,8 @@ SUBROUTINE SUMP_TRANS INTEGER(KIND=JPIM) :: JM INTEGER(KIND=JPIM) :: JGL,IGL,IPLAT,ISENDSET,IRECVSET,JML,IPOS,IM -INTEGER(KIND=JPIM) :: IGPTOT,IMEDIAP,IRESTM,JA,JB,IOFF,OFFSET1,OFFSET2,KMLOC,KM +INTEGER(KIND=JPIM) :: IGPTOT,IMEDIAP,IRESTM,JA,JB,IOFF,KMLOC,KM +INTEGER(KIND=JPIB) :: OFFSET1,OFFSET2,OFFSET3 INTEGER(KIND=JPIM),ALLOCATABLE :: IGPTOTL(:,:) REAL(KIND=JPRD),ALLOCATABLE :: ZDUM(:) @@ -271,23 +272,33 @@ SUBROUTINE SUMP_TRANS ALLOCATE(D%OFFSETS_GEMM1(D%NUMP+1)) ALLOCATE(D%OFFSETS_GEMM2(D%NUMP+1)) +ALLOCATE(D%OFFSETS_GEMM_MATRIX(D%NUMP+1)) +ALLOCATE(D%LEGENDRE_MATRIX_STRIDES(D%NUMP)) OFFSET1 = 0 OFFSET2 = 0 +OFFSET3 = 0 DO KMLOC=1,D%NUMP KM = D%MYMS(KMLOC) D%OFFSETS_GEMM1(KMLOC) = OFFSET1 D%OFFSETS_GEMM2(KMLOC) = OFFSET2 + D%OFFSETS_GEMM_MATRIX(KMLOC) = OFFSET3 !KM=0 is transformed in double precision, no need to store here IF (KM /= 0) THEN OFFSET1 = OFFSET1 + ALIGN(G%NDGLU(KM),8) ! N_OFFSET takes the max of the two GEMMs OFFSET2 = OFFSET2 + ALIGN((R%NSMAX-KM+3)/2,8) + + D%LEGENDRE_MATRIX_STRIDES(KMLOC) = ALIGN(G%NDGLU(KM),8) + ! Note that both sizes have to be aligned because we make the GEMMs + ! multiples of 8 + OFFSET3 = OFFSET3 + ALIGN((R%NSMAX-KM+3)/2,8) * D%LEGENDRE_MATRIX_STRIDES(KMLOC) ENDIF ENDDO D%OFFSETS_GEMM1(D%NUMP+1) = OFFSET1 D%OFFSETS_GEMM2(D%NUMP+1) = OFFSET2 +D%OFFSETS_GEMM_MATRIX(D%NUMP+1) = OFFSET3 ! ------------------------------------------------------------------ 9 FORMAT(1X,'ARRAY ',A10,' ALLOCATED ',8I8) diff --git a/src/trans/gpu/internal/tpm_fields_flat.F90 b/src/trans/gpu/internal/tpm_fields_gpu.F90 old mode 100755 new mode 100644 similarity index 52% rename from src/trans/gpu/internal/tpm_fields_flat.F90 rename to src/trans/gpu/internal/tpm_fields_gpu.F90 index 780d0629c..7f0694b57 --- a/src/trans/gpu/internal/tpm_fields_flat.F90 +++ b/src/trans/gpu/internal/tpm_fields_gpu.F90 @@ -1,6 +1,6 @@ ! (C) Copyright 2000- ECMWF. ! (C) Copyright 2000- Meteo-France. -! (C) Copyright 2022- NVIDIA. +! (C) Copyright 2024- NVIDIA. ! ! This software is licensed under the terms of the Apache Licence Version 2.0 ! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. @@ -9,29 +9,26 @@ ! nor does it submit to any jurisdiction. ! -MODULE TPM_FIELDS_FLAT +MODULE TPM_FIELDS_GPU -USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT, JPRD +USE EC_PARKIND, ONLY: JPRD, JPRBT IMPLICIT NONE SAVE -! flat copies of the fields defined in TPM_FIELDS -REAL(KIND=JPRD) ,ALLOCATABLE :: F_RW(:) ! Weights of the Gaussian quadrature -REAL(KIND=JPRBT) ,ALLOCATABLE :: F_RLAPIN(:) ! eigen-values of the inverse Laplace operator -REAL(KIND=JPRD) ,ALLOCATABLE :: F_RACTHE(:) ! eigen-values of the inverse Laplace operator - +TYPE FIELDS_GPU_TYPE ! scratch arrays for ltinv and ltdir and associated dimension variables - -REAL(KIND=JPRBT),ALLOCATABLE :: ZAA(:,:,:) !! JPRL for 1/2 -REAL(KIND=JPRBT),ALLOCATABLE :: ZAS(:,:,:) !! JPRL for 1/2 +REAL(KIND=JPRBT),ALLOCATABLE :: ZAA(:) !! JPRL for 1/2 +REAL(KIND=JPRBT),ALLOCATABLE :: ZAS(:) !! JPRL for 1/2 ! for m=0 in ledir_mod: REAL(KIND=JPRD),ALLOCATABLE :: ZAA0(:,:) REAL(KIND=JPRD),ALLOCATABLE :: ZAS0(:,:) -INTEGER(KIND=JPIM) :: KMLOC0 - REAL(KIND=JPRBT),ALLOCATABLE :: ZEPSNM(:,:) +END TYPE FIELDS_GPU_TYPE + +TYPE(FIELDS_GPU_TYPE),ALLOCATABLE,TARGET :: FIELDS_GPU_RESOL(:) +TYPE(FIELDS_GPU_TYPE),POINTER :: FG -END MODULE TPM_FIELDS_FLAT +END MODULE TPM_FIELDS_GPU diff --git a/src/trans/gpu/internal/tpm_hicfft.F90 b/src/trans/gpu/internal/tpm_hicfft.F90 index 9ae63df00..019071635 100755 --- a/src/trans/gpu/internal/tpm_hicfft.F90 +++ b/src/trans/gpu/internal/tpm_hicfft.F90 @@ -19,7 +19,7 @@ MODULE TPM_HICFFT ! Original October 2014 ! HICFFT abstraction for CUDA and HIP August 2023 B. Reuter - USE, INTRINSIC :: ISO_C_BINDING, ONLY: C_INT, C_PTR, C_LOC, C_FLOAT, C_DOUBLE + USE, INTRINSIC :: ISO_C_BINDING, ONLY: C_FLOAT, C_DOUBLE, C_LOC USE GROWING_ALLOCATOR_MOD, ONLY: GROWING_ALLOCATION_TYPE IMPLICIT NONE @@ -28,6 +28,7 @@ MODULE TPM_HICFFT PRIVATE PUBLIC EXECUTE_DIR_FFT, EXECUTE_INV_FFT + PUBLIC CLEAN_FFT INTERFACE EXECUTE_DIR_FFT MODULE PROCEDURE EXECUTE_DIR_FFT_FLOAT,EXECUTE_DIR_FFT_DOUBLE @@ -37,6 +38,13 @@ MODULE TPM_HICFFT MODULE PROCEDURE EXECUTE_INV_FFT_FLOAT,EXECUTE_INV_FFT_DOUBLE END INTERFACE +INTERFACE + SUBROUTINE CLEAN_FFT(RESOL_ID) BIND(C, NAME="clean_fft") + USE ISO_C_BINDING + INTEGER(KIND=C_INT), INTENT(IN), VALUE :: RESOL_ID + END SUBROUTINE +END INTERFACE + ! ------------------------------------------------------------------ @@ -44,24 +52,28 @@ MODULE TPM_HICFFT ! ------------------------------------------------------------------ -SUBROUTINE EXECUTE_DIR_FFT_FLOAT(PREEL_REAL,PREEL_COMPLEX,KFIELD,LOENS,OFFSETS,ALLOC) - USE EC_PARKIND ,ONLY : JPIM +SUBROUTINE EXECUTE_DIR_FFT_FLOAT(PREEL_REAL,PREEL_COMPLEX,RESOL_ID,KFIELD,LOENS,OFFSETS,ALLOC) + USE EC_PARKIND ,ONLY : JPIM, JPIB IMPLICIT NONE REAL(KIND=C_FLOAT), INTENT(IN) :: PREEL_REAL(:) REAL(KIND=C_FLOAT), INTENT(OUT) :: PREEL_COMPLEX(:) + INTEGER(KIND=JPIM),INTENT(IN) :: RESOL_ID INTEGER(KIND=JPIM),INTENT(IN) :: KFIELD - INTEGER(KIND=JPIM),INTENT(IN) :: LOENS(:), OFFSETS(:) - TYPE(GROWING_ALLOCATION_TYPE), INTENT(IN) :: ALLOC + INTEGER(KIND=JPIM),INTENT(IN) :: LOENS(:) + INTEGER(KIND=JPIB),INTENT(IN) :: OFFSETS(:) + TYPE(GROWING_ALLOCATION_TYPE), INTENT(IN), POINTER :: ALLOC INTERFACE - SUBROUTINE EXECUTE_DIR_FFT_FLOAT_C(PREEL_REAL,PREEL_COMPLEX,KFIELD,LOENS,OFFSETS,NFFT,ALLOC) & + SUBROUTINE EXECUTE_DIR_FFT_FLOAT_C(PREEL_REAL,PREEL_COMPLEX,RESOL_ID,KFIELD,LOENS,OFFSETS,NFFT,ALLOC) & & BIND(C, NAME="execute_dir_fft_float") - USE ISO_C_BINDING + USE ISO_C_BINDING, ONLY: C_FLOAT, C_INT, C_PTR, C_INT64_T REAL(KIND=C_FLOAT), INTENT(IN) :: PREEL_REAL(*) REAL(KIND=C_FLOAT), INTENT(OUT) :: PREEL_COMPLEX(*) + INTEGER(KIND=C_INT),INTENT(IN),VALUE :: RESOL_ID INTEGER(KIND=C_INT),INTENT(IN),VALUE :: KFIELD - INTEGER(KIND=C_INT),INTENT(IN) :: LOENS(*), OFFSETS(*) + INTEGER(KIND=C_INT),INTENT(IN) :: LOENS(*) + INTEGER(KIND=C_INT64_T),INTENT(IN) :: OFFSETS(*) INTEGER(KIND=C_INT),INTENT(IN),VALUE :: NFFT TYPE(C_PTR), INTENT(IN), VALUE :: ALLOC END SUBROUTINE @@ -70,30 +82,34 @@ SUBROUTINE EXECUTE_DIR_FFT_FLOAT_C(PREEL_REAL,PREEL_COMPLEX,KFIELD,LOENS,OFFSETS #ifdef ACCGPU !$ACC HOST_DATA USE_DEVICE(PREEL_REAL,PREEL_COMPLEX) #endif - CALL EXECUTE_DIR_FFT_FLOAT_C(PREEL_REAL,PREEL_COMPLEX,KFIELD,LOENS,OFFSETS,SIZE(LOENS),C_LOC(ALLOC)) + CALL EXECUTE_DIR_FFT_FLOAT_C(PREEL_REAL,PREEL_COMPLEX,RESOL_ID,KFIELD,LOENS,OFFSETS,SIZE(LOENS),C_LOC(ALLOC)) #ifdef ACCGPU !$ACC END HOST_DATA #endif END SUBROUTINE EXECUTE_DIR_FFT_FLOAT -SUBROUTINE EXECUTE_DIR_FFT_DOUBLE(PREEL_REAL,PREEL_COMPLEX,KFIELD,LOENS,OFFSETS,ALLOC) - USE EC_PARKIND ,ONLY : JPIM +SUBROUTINE EXECUTE_DIR_FFT_DOUBLE(PREEL_REAL,PREEL_COMPLEX,RESOL_ID,KFIELD,LOENS,OFFSETS,ALLOC) + USE EC_PARKIND ,ONLY : JPIM, JPIB IMPLICIT NONE REAL(KIND=C_DOUBLE), INTENT(IN) :: PREEL_REAL(:) REAL(KIND=C_DOUBLE), INTENT(OUT) :: PREEL_COMPLEX(:) + INTEGER(KIND=JPIM),INTENT(IN) :: RESOL_ID INTEGER(KIND=JPIM),INTENT(IN) :: KFIELD - INTEGER(KIND=JPIM),INTENT(IN) :: LOENS(:), OFFSETS(:) - TYPE(GROWING_ALLOCATION_TYPE), INTENT(IN) :: ALLOC + INTEGER(KIND=JPIM),INTENT(IN) :: LOENS(:) + INTEGER(KIND=JPIB),INTENT(IN) :: OFFSETS(:) + TYPE(GROWING_ALLOCATION_TYPE), INTENT(IN), POINTER :: ALLOC INTERFACE - SUBROUTINE EXECUTE_DIR_FFT_DOUBLE_C(PREEL_REAL,PREEL_COMPLEX,KFIELD,LOENS,OFFSETS,NFFT,ALLOC) & + SUBROUTINE EXECUTE_DIR_FFT_DOUBLE_C(PREEL_REAL,PREEL_COMPLEX,RESOL_ID,KFIELD,LOENS,OFFSETS,NFFT,ALLOC) & & BIND(C, NAME="execute_dir_fft_double") - USE ISO_C_BINDING, ONLY: C_DOUBLE, C_INT, C_PTR + USE ISO_C_BINDING, ONLY: C_DOUBLE, C_INT, C_PTR, C_INT64_T REAL(KIND=C_DOUBLE), INTENT(IN) :: PREEL_REAL(*) REAL(KIND=C_DOUBLE), INTENT(OUT) :: PREEL_COMPLEX(*) + INTEGER(KIND=C_INT),INTENT(IN),VALUE :: RESOL_ID INTEGER(KIND=C_INT),INTENT(IN),VALUE :: KFIELD - INTEGER(KIND=C_INT),INTENT(IN) :: LOENS(*), OFFSETS(*) + INTEGER(KIND=C_INT),INTENT(IN) :: LOENS(*) + INTEGER(KIND=C_INT64_T),INTENT(IN) :: OFFSETS(*) INTEGER(KIND=C_INT),INTENT(IN),VALUE :: NFFT TYPE(C_PTR), INTENT(IN), VALUE :: ALLOC END SUBROUTINE @@ -102,31 +118,35 @@ SUBROUTINE EXECUTE_DIR_FFT_DOUBLE_C(PREEL_REAL,PREEL_COMPLEX,KFIELD,LOENS,OFFSET #ifdef ACCGPU !$ACC HOST_DATA USE_DEVICE(PREEL_REAL,PREEL_COMPLEX) #endif - CALL EXECUTE_DIR_FFT_DOUBLE_C(PREEL_REAL,PREEL_COMPLEX,KFIELD,LOENS,OFFSETS,SIZE(LOENS),C_LOC(ALLOC)) + CALL EXECUTE_DIR_FFT_DOUBLE_C(PREEL_REAL,PREEL_COMPLEX,RESOL_ID,KFIELD,LOENS,OFFSETS,SIZE(LOENS),C_LOC(ALLOC)) #ifdef ACCGPU !$ACC END HOST_DATA #endif END SUBROUTINE EXECUTE_DIR_FFT_DOUBLE -SUBROUTINE EXECUTE_INV_FFT_FLOAT(PREEL_COMPLEX,PREEL_REAL,KFIELD,LOENS,OFFSETS,ALLOC) - USE EC_PARKIND ,ONLY : JPIM +SUBROUTINE EXECUTE_INV_FFT_FLOAT(PREEL_COMPLEX,PREEL_REAL,RESOL_ID,KFIELD,LOENS,OFFSETS,ALLOC) + USE EC_PARKIND ,ONLY : JPIM, JPIB IMPLICIT NONE REAL(KIND=C_FLOAT), INTENT(IN) :: PREEL_COMPLEX(:) REAL(KIND=C_FLOAT), INTENT(OUT) :: PREEL_REAL(:) + INTEGER(KIND=JPIM),INTENT(IN) :: RESOL_ID INTEGER(KIND=JPIM),INTENT(IN) :: KFIELD - INTEGER(KIND=JPIM),INTENT(IN) :: LOENS(:), OFFSETS(:) - TYPE(GROWING_ALLOCATION_TYPE), INTENT(IN) :: ALLOC + INTEGER(KIND=JPIM),INTENT(IN) :: LOENS(:) + INTEGER(KIND=JPIB),INTENT(IN) :: OFFSETS(:) + TYPE(GROWING_ALLOCATION_TYPE), INTENT(IN), POINTER :: ALLOC INTERFACE - SUBROUTINE EXECUTE_INV_FFT_FLOAT_C(PREEL_COMPLEX,PREEL_REAL,KFIELD,LOENS,OFFSETS,NFFT,ALLOC) & + SUBROUTINE EXECUTE_INV_FFT_FLOAT_C(PREEL_COMPLEX,PREEL_REAL,RESOL_ID,KFIELD,LOENS,OFFSETS,NFFT,ALLOC) & & BIND(C, NAME="execute_inv_fft_float") - USE ISO_C_BINDING, ONLY: C_FLOAT, C_INT, C_PTR + USE ISO_C_BINDING, ONLY: C_FLOAT, C_INT, C_PTR, C_INT64_T REAL(KIND=C_FLOAT), INTENT(IN) :: PREEL_COMPLEX(*) REAL(KIND=C_FLOAT), INTENT(OUT) :: PREEL_REAL(*) + INTEGER(KIND=C_INT),INTENT(IN),VALUE :: RESOL_ID INTEGER(KIND=C_INT),INTENT(IN),VALUE :: KFIELD - INTEGER(KIND=C_INT),INTENT(IN) :: LOENS(*), OFFSETS(*) + INTEGER(KIND=C_INT),INTENT(IN) :: LOENS(*) + INTEGER(KIND=C_INT64_T),INTENT(IN) :: OFFSETS(*) INTEGER(KIND=C_INT),INTENT(IN),VALUE :: NFFT TYPE(C_PTR), INTENT(IN), VALUE :: ALLOC END SUBROUTINE @@ -135,30 +155,34 @@ SUBROUTINE EXECUTE_INV_FFT_FLOAT_C(PREEL_COMPLEX,PREEL_REAL,KFIELD,LOENS,OFFSETS #ifdef ACCGPU !$ACC HOST_DATA USE_DEVICE(PREEL_COMPLEX,PREEL_REAL) #endif - CALL EXECUTE_INV_FFT_FLOAT_C(PREEL_COMPLEX,PREEL_REAL,KFIELD,LOENS,OFFSETS,SIZE(LOENS),C_LOC(ALLOC)) + CALL EXECUTE_INV_FFT_FLOAT_C(PREEL_COMPLEX,PREEL_REAL,RESOL_ID,KFIELD,LOENS,OFFSETS,SIZE(LOENS),C_LOC(ALLOC)) #ifdef ACCGPU !$ACC END HOST_DATA #endif END SUBROUTINE -SUBROUTINE EXECUTE_INV_FFT_DOUBLE(PREEL_COMPLEX,PREEL_REAL,KFIELD,LOENS,OFFSETS,ALLOC) - USE EC_PARKIND ,ONLY : JPIM +SUBROUTINE EXECUTE_INV_FFT_DOUBLE(PREEL_COMPLEX,PREEL_REAL,RESOL_ID,KFIELD,LOENS,OFFSETS,ALLOC) + USE EC_PARKIND ,ONLY : JPIM, JPIB IMPLICIT NONE REAL(KIND=C_DOUBLE), INTENT(IN) :: PREEL_COMPLEX(:) REAL(KIND=C_DOUBLE), INTENT(OUT) :: PREEL_REAL(:) + INTEGER(KIND=JPIM),INTENT(IN) :: RESOL_ID INTEGER(KIND=JPIM),INTENT(IN) :: KFIELD - INTEGER(KIND=JPIM),INTENT(IN) :: LOENS(:), OFFSETS(:) - TYPE(GROWING_ALLOCATION_TYPE), INTENT(IN) :: ALLOC + INTEGER(KIND=JPIM),INTENT(IN) :: LOENS(:) + INTEGER(KIND=JPIB),INTENT(IN) :: OFFSETS(:) + TYPE(GROWING_ALLOCATION_TYPE), INTENT(IN), POINTER :: ALLOC INTERFACE - SUBROUTINE EXECUTE_INV_FFT_DOUBLE_C(PREEL_COMPLEX,PREEL_REAL,KFIELD,LOENS,OFFSETS,NFFT,ALLOC) & + SUBROUTINE EXECUTE_INV_FFT_DOUBLE_C(PREEL_COMPLEX,PREEL_REAL,RESOL_ID,KFIELD,LOENS,OFFSETS,NFFT,ALLOC) & & BIND(C, NAME="execute_inv_fft_double") - USE ISO_C_BINDING, ONLY: C_DOUBLE, C_INT, C_PTR + USE ISO_C_BINDING, ONLY: C_DOUBLE, C_INT, C_PTR, C_INT64_T REAL(KIND=C_DOUBLE), INTENT(IN) :: PREEL_COMPLEX(*) REAL(KIND=C_DOUBLE), INTENT(OUT) :: PREEL_REAL(*) + INTEGER(KIND=C_INT),INTENT(IN),VALUE :: RESOL_ID INTEGER(KIND=C_INT),INTENT(IN),VALUE :: KFIELD - INTEGER(KIND=C_INT),INTENT(IN) :: LOENS(*), OFFSETS(*) + INTEGER(KIND=C_INT),INTENT(IN) :: LOENS(*) + INTEGER(KIND=C_INT64_T),INTENT(IN) :: OFFSETS(*) INTEGER(KIND=C_INT),INTENT(IN),VALUE :: NFFT TYPE(C_PTR), INTENT(IN), VALUE :: ALLOC END SUBROUTINE @@ -167,7 +191,7 @@ SUBROUTINE EXECUTE_INV_FFT_DOUBLE_C(PREEL_COMPLEX,PREEL_REAL,KFIELD,LOENS,OFFSET #ifdef ACCGPU !$ACC HOST_DATA USE_DEVICE(PREEL_COMPLEX,PREEL_REAL) #endif - CALL EXECUTE_INV_FFT_DOUBLE_C(PREEL_COMPLEX,PREEL_REAL,KFIELD,LOENS,OFFSETS,SIZE(LOENS),C_LOC(ALLOC)) + CALL EXECUTE_INV_FFT_DOUBLE_C(PREEL_COMPLEX,PREEL_REAL,RESOL_ID,KFIELD,LOENS,OFFSETS,SIZE(LOENS),C_LOC(ALLOC)) #ifdef ACCGPU !$ACC END HOST_DATA #endif diff --git a/src/trans/gpu/internal/tpm_trans.F90 b/src/trans/gpu/internal/tpm_trans.F90 index 9d1262d10..c1d933b77 100755 --- a/src/trans/gpu/internal/tpm_trans.F90 +++ b/src/trans/gpu/internal/tpm_trans.F90 @@ -49,9 +49,6 @@ MODULE TPM_TRANS !INTEGER_M :: NF_UV_G ! Global version of NF_UV (grid-point space) !INTEGER_M :: NF_SCALARS_G ! Global version of NF_SCALARS (grid-point space) -REAL(KIND=JPRBT), ALLOCATABLE :: FOUBUF_IN(:) ! Fourier buffer -REAL(KIND=JPRBT), ALLOCATABLE :: FOUBUF(:) ! Fourier buffer - INTEGER(KIND=JPIM) :: NPROMA ! Blocking factor for gridpoint input/output INTEGER(KIND=JPIM) :: NGPBLKS ! Number of NPROMA blocks diff --git a/src/trans/gpu/internal/trgtol_mod.F90 b/src/trans/gpu/internal/trgtol_mod.F90 index 7ec495ef8..95188f37b 100755 --- a/src/trans/gpu/internal/trgtol_mod.F90 +++ b/src/trans/gpu/internal/trgtol_mod.F90 @@ -22,10 +22,10 @@ MODULE TRGTOL_MOD END TYPE CONTAINS FUNCTION PREPARE_TRGTOL(ALLOCATOR,KF_GP,KF_FS) RESULT(HTRGTOL) - USE PARKIND_ECTRANS, ONLY: JPIM, JPRB, JPRBT + USE PARKIND_ECTRANS, ONLY: JPIM, JPRB, JPRBT, JPIB USE TPM_DISTR, ONLY: D USE BUFFERED_ALLOCATOR_MOD, ONLY: BUFFERED_ALLOCATOR, RESERVE - USE ISO_C_BINDING, ONLY: C_SIZE_T + USE ISO_C_BINDING, ONLY: C_SIZE_T, C_SIZEOF IMPLICIT NONE @@ -35,13 +35,14 @@ FUNCTION PREPARE_TRGTOL(ALLOCATOR,KF_GP,KF_FS) RESULT(HTRGTOL) REAL(KIND=JPRBT) :: DUMMY - INTEGER(KIND=C_SIZE_T) :: NELEM + INTEGER(KIND=JPIB) :: NELEM - HTRGTOL%HCOMBUFS = RESERVE(ALLOCATOR, int(KF_GP*D%NGPTOT*SIZEOF(DUMMY),kind=c_size_t)) + HTRGTOL%HCOMBUFS = RESERVE(ALLOCATOR, 1_JPIB*KF_GP*D%NGPTOT*C_SIZEOF(DUMMY), "HTRGTOL%HCOMBUFS") - NELEM = KF_FS*D%NLENGTF*SIZEOF(DUMMY) ! ZCOMBUFR - NELEM = NELEM + KF_FS*D%NLENGTF*SIZEOF(DUMMY) ! PREEL_REAL - HTRGTOL%HCOMBUFR_AND_REEL = RESERVE(ALLOCATOR, NELEM) + NELEM = 0 + NELEM = NELEM + 1_JPIB*KF_FS*D%NLENGTF*C_SIZEOF(DUMMY) ! ZCOMBUFR + NELEM = NELEM + 1_JPIB*KF_FS*D%NLENGTF*C_SIZEOF(DUMMY) ! PREEL_REAL + HTRGTOL%HCOMBUFR_AND_REEL = RESERVE(ALLOCATOR, NELEM, "HTRGTOL%HCOMBUFR_AND_REEL") END FUNCTION PREPARE_TRGTOL SUBROUTINE TRGTOL(ALLOCATOR,HTRGTOL,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G,& @@ -103,10 +104,10 @@ SUBROUTINE TRGTOL(ALLOCATOR,HTRGTOL,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G, ! 09-01-02 G.Mozdzynski: use non-blocking recv and send ! ------------------------------------------------------------------ - USE PARKIND_ECTRANS, ONLY: JPIM, JPRB, JPRBT + USE PARKIND_ECTRANS, ONLY: JPIM, JPRB, JPRBT, JPIB USE YOMHOOK, ONLY: LHOOK, DR_HOOK, JPHOOK - USE MPL_MODULE, ONLY: MPL_WAIT, MPL_BARRIER - USE TPM_GEN, ONLY: LSYNC_TRANS + USE MPL_MODULE, ONLY: MPL_WAIT, MPL_BARRIER, MPL_ABORT + USE TPM_GEN, ONLY: LSYNC_TRANS, LMPOFF USE EQ_REGIONS_MOD, ONLY: MY_REGION_EW, MY_REGION_NS USE TPM_DISTR, ONLY: D, MYSETV, MYSETW, MTAGLG, NPRCIDS, MYPROC, NPROC, NPRTRW, & & NPRTRV @@ -114,16 +115,18 @@ SUBROUTINE TRGTOL(ALLOCATOR,HTRGTOL,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G, USE MPL_DATA_MODULE, ONLY: MPL_COMM_OML USE OML_MOD, ONLY: OML_MY_THREAD #if ECTRANS_HAVE_MPI - USE MPI_F08, ONLY: MPI_COMM, MPI_REQUEST, MPI_FLOAT, MPI_DOUBLE + USE MPI_F08, ONLY: MPI_COMM, MPI_REQUEST, MPI_REAL4, MPI_REAL8 ! Missing: MPI_ISEND, MPI_IRECV on purpose due to cray-mpi bug (see https://github.com/ecmwf-ifs/ectrans/pull/157) #endif USE TPM_STATS, ONLY: GSTATS => GSTATS_NVTX USE TPM_TRANS, ONLY: NPROMA - USE ISO_C_BINDING, ONLY: C_SIZE_T, C_FLOAT, C_DOUBLE, C_INT8_T + USE ISO_C_BINDING, ONLY: C_SIZE_T, C_FLOAT, C_DOUBLE, C_INT8_T, C_SIZEOF USE BUFFERED_ALLOCATOR_MOD, ONLY: BUFFERED_ALLOCATOR, ASSIGN_PTR, GET_ALLOCATION USE OPENACC_EXT, ONLY: EXT_ACC_ARR_DESC, EXT_ACC_PASS, EXT_ACC_CREATE, & & EXT_ACC_DELETE +#ifdef ACCGPU USE OPENACC, ONLY: ACC_HANDLE_KIND +#endif USE ABORT_TRANS_MOD, ONLY: ABORT_TRANS IMPLICIT NONE @@ -142,28 +145,33 @@ SUBROUTINE TRGTOL(ALLOCATOR,HTRGTOL,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G, ! LOCAL INTEGER SCALARS REAL(KIND=JPRBT), POINTER :: ZCOMBUFS(:),ZCOMBUFR(:) - INTEGER(KIND=JPIM) :: ISENDTOT (NPROC) - INTEGER(KIND=JPIM) :: IRECVTOT (NPROC) + LOGICAL :: LLOCAL_CONTRIBUTION + INTEGER(KIND=JPIB) :: ISENDTOT (NPROC) + INTEGER(KIND=JPIB) :: IRECVTOT (NPROC) + INTEGER(KIND=JPIM) :: ISENDTOT_MPI(NPROC) + INTEGER(KIND=JPIM) :: IRECVTOT_MPI(NPROC) INTEGER(KIND=JPIM) :: IREQ (NPROC*2) INTEGER(KIND=JPIM) :: IRECV_TO_PROC(NPROC) INTEGER(KIND=JPIM) :: ISEND_TO_PROC(NPROC) INTEGER(KIND=JPIM) :: IFIRSTLAT, IGL, IGLL, ILAST,& - &ILASTLAT, ILEN, JROC, IPOS, ISETA, & + &ILASTLAT, ILEN, JROC, ISETA, & &ISETB, IRECV, & &ISETV, ISEND, JBLK, JFLD, & &JGL, JI, JK, JL, ISETW, IFLD, & &II,IBUFLENR,IRECV_COUNTS, IPROC,IFLDS, & &ISEND_COUNTS,INS,INR,IR, JKL, PBOUND, IERROR, ILOCAL_LAT INTEGER(KIND=JPIM) :: KF, KGL, KI, J3 + INTEGER(KIND=JPIB) :: IPOS INTEGER(KIND=JPIM) :: IOFF, ILAT_STRIP - INTEGER(KIND=JPIM) :: IRECV_BUFR_TO_OUT(D%NLENGTF,2),IRECV_BUFR_TO_OUT_OFFSET(NPROC), IRECV_BUFR_TO_OUT_V + INTEGER(KIND=JPIB) :: IRECV_BUFR_TO_OUT(D%NLENGTF,2) + INTEGER(KIND=JPIB) :: IRECV_BUFR_TO_OUT_OFFSET(NPROC), IRECV_BUFR_TO_OUT_V INTEGER(KIND=JPIM) :: ISEND_FIELD_COUNT(NPRTRV),ISEND_FIELD_COUNT_V INTEGER(KIND=JPIM) :: ISEND_WSET_SIZE(NPRTRW),ISEND_WSET_SIZE_V INTEGER(KIND=JPIM) :: ISEND_WSET_OFFSET(NPRTRW+1), ISEND_WSET_OFFSET_V - INTEGER(KIND=JPIM), ALLOCATABLE :: ICOMBUFS_OFFSET(:),ICOMBUFR_OFFSET(:) - INTEGER(KIND=JPIM) :: ICOMBUFS_OFFSET_V, ICOMBUFR_OFFSET_V + INTEGER(KIND=JPIB), ALLOCATABLE :: ICOMBUFS_OFFSET(:),ICOMBUFR_OFFSET(:) + INTEGER(KIND=JPIB) :: ICOMBUFS_OFFSET_V, ICOMBUFR_OFFSET_V INTEGER(KIND=JPIM) :: IFLDA(KF_GP) INTEGER(KIND=JPIM) :: IVSET(KF_GP) @@ -184,14 +192,18 @@ SUBROUTINE TRGTOL(ALLOCATOR,HTRGTOL,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G, TYPE(MPI_REQUEST) :: IREQUEST(2*NPROC) #endif + + #ifdef PARKINDTRANS_SINGLE -#define TRGTOL_DTYPE MPI_FLOAT +#define TRGTOL_DTYPE MPI_REAL4 #else -#define TRGTOL_DTYPE MPI_DOUBLE +#define TRGTOL_DTYPE MPI_REAL8 #endif #if ECTRANS_HAVE_MPI - LOCAL_COMM%MPI_VAL = MPL_COMM_OML( OML_MY_THREAD() ) + IF(.NOT. LMPOFF) THEN + LOCAL_COMM%MPI_VAL = MPL_COMM_OML( OML_MY_THREAD() ) + ENDIF #endif ! ------------------------------------------------------------------ @@ -201,9 +213,10 @@ SUBROUTINE TRGTOL(ALLOCATOR,HTRGTOL,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G, ! Note we have either ! - KVSETUV and KVSETSC (with PGP, which has u, v, and scalar fields), or ! - KVSETUV, KVSETSC2, KVSETSC3A KVSETSC3B (with PGPUV, GP3A, PGP3B and PGP2) - ! KVSETs are optionals. Their sizes canalso be inferred from KV_UV_G/KV_SCALARS_G (which + ! KVSETs are optionals. Their sizes can also be inferred from KV_UV_G/KV_SCALARS_G (which ! should match PSPXXX and PGPXXX arrays) IOFF=0 + IVSET(:) = -1 IF(PRESENT(KVSETUV)) THEN IVSET(IOFF+1:IOFF+KF_UV_G) = KVSETUV(:) IOFF=IOFF+KF_UV_G @@ -237,9 +250,6 @@ SUBROUTINE TRGTOL(ALLOCATOR,HTRGTOL,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G, ENDIF ENDIF - IF (IOFF /= 2*KF_UV_G+KF_SCALARS_G) THEN - CALL ABORT_TRANS("TRGTOL: ERROR in IVSET computation") - ENDIF IF (LHOOK) CALL DR_HOOK('TRGTOL',0,ZHOOK_HANDLE) @@ -281,8 +291,9 @@ SUBROUTINE TRGTOL(ALLOCATOR,HTRGTOL,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G, DO JROC=1,NPROC CALL PE2SET(JROC,ISETA,ISETB,ISETW,ISETV) ! total send size is # points per field * # fields - ISENDTOT(JROC) = ISEND_WSET_SIZE(ISETW)*ISEND_FIELD_COUNT(ISETV) + ISENDTOT(JROC) = 1_JPIB*ISEND_WSET_SIZE(ISETW)*ISEND_FIELD_COUNT(ISETV) ENDDO + LLOCAL_CONTRIBUTION = ISENDTOT(MYPROC) > 0 ! Prepare receiver arrays IRECV_BUFR_TO_OUT_OFFSET(:) = 0 @@ -325,11 +336,13 @@ SUBROUTINE TRGTOL(ALLOCATOR,HTRGTOL,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G, block CALL ASSIGN_PTR(PREEL_REAL, GET_ALLOCATION(ALLOCATOR, HTRGTOL%HCOMBUFR_AND_REEL),& - & int(KF_FS*D%NLENGTF*SIZEOF(PREEL_REAL(1))+1,kind=c_size_t), int(KF_FS*D%NLENGTF*SIZEOF(PREEL_REAL(1)),kind=c_size_t)) + & 1_JPIB*KF_FS*D%NLENGTF*C_SIZEOF(PREEL_REAL(1))+1, 1_JPIB*KF_FS*D%NLENGTF*C_SIZEOF(PREEL_REAL(1))) !!CALL ASSIGN_PTR(PREEL_REAL, GET_ALLOCATION(ALLOCATOR, HTRGTOL%HCOMBUFR_AND_REEL), size1, size2) end block #ifdef OMPGPU + !$OMP TARGET DATA MAP(TO:IRECV_BUFR_TO_OUT) MAP(ALLOC:PREEL_REAL) IF (KF_FS > 0) + !$OMP TARGET DATA MAP(TO:PGP_INDICES) #endif #ifdef ACCGPU !$ACC DATA COPYIN(IRECV_BUFR_TO_OUT) PRESENT(PREEL_REAL) IF (KF_FS > 0) ASYNC(1) @@ -369,8 +382,18 @@ SUBROUTINE TRGTOL(ALLOCATOR,HTRGTOL,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G, ACC_POINTERS_CNT = ACC_POINTERS_CNT + 1 ACC_POINTERS(ACC_POINTERS_CNT) = EXT_ACC_PASS(PGP3B) ENDIF - IF (ACC_POINTERS_CNT > 0) CALL EXT_ACC_CREATE(ACC_POINTERS(1:ACC_POINTERS_CNT),STREAM=1_ACC_HANDLE_KIND) + + IF (ACC_POINTERS_CNT > 0) CALL EXT_ACC_CREATE(ACC_POINTERS(1:ACC_POINTERS_CNT), & +#ifdef ACCGPU + & STREAM=1_ACC_HANDLE_KIND) +#endif +#ifdef OMPGPU + & STREAM=1) +#endif + +#ifdef ACCGPU !$ACC WAIT(1) +#endif IF (PRESENT(PGP)) THEN #ifdef OMPGPU #endif @@ -456,7 +479,7 @@ SUBROUTINE TRGTOL(ALLOCATOR,HTRGTOL,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G, IF (ISEND_COUNTS > 0) THEN CALL ASSIGN_PTR(ZCOMBUFS, GET_ALLOCATION(ALLOCATOR, HTRGTOL%HCOMBUFS),& - & 1_C_SIZE_T, int(ICOMBUFS_OFFSET(ISEND_COUNTS+1)*SIZEOF(ZCOMBUFS(1)),kind=c_size_t)) + & 1_JPIB, ICOMBUFS_OFFSET(ISEND_COUNTS+1)*C_SIZEOF(ZCOMBUFS(1))) ENDIF !....Pack loop......................................................... @@ -565,7 +588,7 @@ SUBROUTINE TRGTOL(ALLOCATOR,HTRGTOL,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G, CALL GSTATS(411,0) IF (IRECV_COUNTS > 0) THEN CALL ASSIGN_PTR(ZCOMBUFR, GET_ALLOCATION(ALLOCATOR, HTRGTOL%HCOMBUFR_AND_REEL),& - & 1_C_SIZE_T, int(ICOMBUFR_OFFSET(IRECV_COUNTS+1)*SIZEOF(ZCOMBUFR(1)),kind=c_size_t)) + & 1_JPIB, ICOMBUFR_OFFSET(IRECV_COUNTS+1)*C_SIZEOF(ZCOMBUFR(1))) ENDIF #ifdef OMPGPU #endif @@ -582,15 +605,31 @@ SUBROUTINE TRGTOL(ALLOCATOR,HTRGTOL,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G, !$ACC HOST_DATA USE_DEVICE(ZCOMBUFR,ZCOMBUFS) #endif #else +#ifdef OMPGPU +#endif +#ifdef ACCGPU !! this is safe-but-slow fallback for running without GPU-aware MPI !$ACC UPDATE HOST(ZCOMBUFS) IF(ISEND_COUNTS > 0) #endif +#endif + + ! Skip the own contribution because this is ok to overflow + ISENDTOT(MYPROC) = 0 + IRECVTOT(MYPROC) = 0 + + ISENDTOT_MPI = ISENDTOT + IRECVTOT_MPI = IRECVTOT + IF (ANY(ISENDTOT_MPI /= ISENDTOT)) & + & CALL MPL_ABORT("Overflow in trgtol") + IF (ANY(IRECVTOT_MPI /= IRECVTOT)) & + & CALL MPL_ABORT("Overflow in trgtol") + ! Receive loop......................................................... DO INR=1,IRECV_COUNTS IR=IR+1 IPROC=IRECV_TO_PROC(INR) #if ECTRANS_HAVE_MPI - CALL MPI_IRECV(ZCOMBUFR(ICOMBUFR_OFFSET(INR)+1:ICOMBUFR_OFFSET(INR+1)),IRECVTOT(IPROC), & + CALL MPI_IRECV(ZCOMBUFR(ICOMBUFR_OFFSET(INR)+1:ICOMBUFR_OFFSET(INR+1)),IRECVTOT_MPI(IPROC), & & TRGTOL_DTYPE,NPRCIDS(IPROC)-1,MTAGLG,LOCAL_COMM,IREQUEST(IR),IERROR) IREQ(IR) = IREQUEST(IR)%MPI_VAL #else @@ -603,7 +642,7 @@ SUBROUTINE TRGTOL(ALLOCATOR,HTRGTOL,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G, IR=IR+1 ISEND=ISEND_TO_PROC(INS) #if ECTRANS_HAVE_MPI - CALL MPI_ISEND(ZCOMBUFS(ICOMBUFS_OFFSET(INS)+1:ICOMBUFS_OFFSET(INS+1)),ISENDTOT(ISEND), & + CALL MPI_ISEND(ZCOMBUFS(ICOMBUFS_OFFSET(INS)+1:ICOMBUFS_OFFSET(INS+1)),ISENDTOT_MPI(ISEND), & & TRGTOL_DTYPE,NPRCIDS(ISEND)-1,MTAGLG,LOCAL_COMM,IREQUEST(IR),IERROR) IREQ(IR) = IREQUEST(IR)%MPI_VAL #else @@ -612,7 +651,7 @@ SUBROUTINE TRGTOL(ALLOCATOR,HTRGTOL,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G, ENDDO ! Copy local contribution - IF(ISENDTOT(MYPROC) > 0 )THEN + IF(LLOCAL_CONTRIBUTION)THEN ! I have to send something to myself... ! Input is KF_GP fields. We find the resulting KF_FS fields. @@ -750,6 +789,8 @@ SUBROUTINE TRGTOL(ALLOCATOR,HTRGTOL,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G, CALL GSTATS(1603,1) #ifdef OMPGPU + !$OMP END TARGET DATA ! PGP_INDICES + !$OMP END TARGET DATA ! IRECV_BUFR_TO_OUT #endif #ifdef ACCGPU !$ACC END DATA ! ZCOMBUFR @@ -762,7 +803,14 @@ SUBROUTINE TRGTOL(ALLOCATOR,HTRGTOL,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G, !$ACC END DATA !PGPUV !$ACC END DATA !PGP #endif - IF (ACC_POINTERS_CNT > 0) CALL EXT_ACC_DELETE(ACC_POINTERS(1:ACC_POINTERS_CNT),STREAM=1_ACC_HANDLE_KIND) + + IF (ACC_POINTERS_CNT > 0) CALL EXT_ACC_DELETE(ACC_POINTERS(1:ACC_POINTERS_CNT), & +#ifdef ACCGPU + & STREAM=1_ACC_HANDLE_KIND) +#endif +#ifdef OMPGPU + & STREAM=1) +#endif IF (LHOOK) CALL DR_HOOK('TRGTOL',1,ZHOOK_HANDLE) END SUBROUTINE TRGTOL diff --git a/src/trans/gpu/internal/trltog_mod.F90 b/src/trans/gpu/internal/trltog_mod.F90 index 46dec8747..7df708b0c 100755 --- a/src/trans/gpu/internal/trltog_mod.F90 +++ b/src/trans/gpu/internal/trltog_mod.F90 @@ -22,10 +22,10 @@ MODULE TRLTOG_MOD END TYPE CONTAINS FUNCTION PREPARE_TRLTOG(ALLOCATOR,KF_FS,KF_GP) RESULT(HTRLTOG) - USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT + USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT, JPIB USE TPM_DISTR, ONLY: D USE BUFFERED_ALLOCATOR_MOD, ONLY: BUFFERED_ALLOCATOR, RESERVE - USE ISO_C_BINDING, ONLY: C_SIZE_T + USE ISO_C_BINDING, ONLY: C_SIZE_T, C_SIZEOF IMPLICIT NONE @@ -35,12 +35,13 @@ FUNCTION PREPARE_TRLTOG(ALLOCATOR,KF_FS,KF_GP) RESULT(HTRLTOG) REAL(KIND=JPRBT) :: DUMMY - INTEGER(KIND=C_SIZE_T) :: NELEM + INTEGER(KIND=JPIB) :: NELEM - NELEM = ALIGN(KF_GP*D%NGPTOT*SIZEOF(DUMMY),128) ! ZCOMBUFR - NELEM = ALIGN(NELEM + KF_FS*D%NLENGTF*SIZEOF(DUMMY),128) !ZCOMBUFS upper obund + NELEM = 0 + NELEM = NELEM + ALIGN(1_JPIB*KF_GP*D%NGPTOT*C_SIZEOF(DUMMY),128) ! ZCOMBUFR + NELEM = NELEM + ALIGN(1_JPIB*KF_FS*D%NLENGTF*C_SIZEOF(DUMMY),128) !ZCOMBUFS upper obund - HTRLTOG%HCOMBUFR_AND_COMBUFS = RESERVE(ALLOCATOR, NELEM) + HTRLTOG%HCOMBUFR_AND_COMBUFS = RESERVE(ALLOCATOR, NELEM, "HTRLTOG%HCOMBUFR_AND_COMBUFS") END FUNCTION PREPARE_TRLTOG SUBROUTINE TRLTOG(ALLOCATOR,HTRLTOG,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G,KPTRGP,& @@ -104,10 +105,10 @@ SUBROUTINE TRLTOG(ALLOCATOR,HTRLTOG,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G, ! 09-01-02 G.Mozdzynski: use non-blocking recv and send ! ------------------------------------------------------------------ - USE PARKIND_ECTRANS, ONLY: JPIM, JPRB, JPRBT + USE PARKIND_ECTRANS, ONLY: JPIM, JPRB, JPRBT, JPIB USE YOMHOOK, ONLY: LHOOK, DR_HOOK, JPHOOK - USE MPL_MODULE, ONLY: MPL_WAIT, MPL_BARRIER - USE TPM_GEN, ONLY: LSYNC_TRANS, NERR + USE MPL_MODULE, ONLY: MPL_WAIT, MPL_BARRIER, MPL_ABORT + USE TPM_GEN, ONLY: LSYNC_TRANS, NERR, LMPOFF USE EQ_REGIONS_MOD, ONLY: MY_REGION_EW, MY_REGION_NS USE TPM_DISTR, ONLY: D,MYSETV, MYSETW, MTAGLG,NPRCIDS,MYPROC,NPROC,NPRTRW,NPRTRV USE PE2SET_MOD, ONLY: PE2SET @@ -115,23 +116,20 @@ SUBROUTINE TRLTOG(ALLOCATOR,HTRLTOG,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G, USE OML_MOD, ONLY: OML_MY_THREAD USE ABORT_TRANS_MOD, ONLY: ABORT_TRANS #if ECTRANS_HAVE_MPI - USE MPI_F08, ONLY: MPI_COMM, MPI_REQUEST, MPI_FLOAT, MPI_DOUBLE + USE MPI_F08, ONLY: MPI_COMM, MPI_REQUEST, MPI_REAL4, MPI_REAL8 ! Missing: MPI_ISEND, MPI_IRECV on purpose due to cray-mpi bug (see https://github.com/ecmwf-ifs/ectrans/pull/157) #endif USE TPM_STATS, ONLY: GSTATS => GSTATS_NVTX USE TPM_TRANS, ONLY: LDIVGP, LSCDERS, LUVDER, LVORGP, NPROMA USE BUFFERED_ALLOCATOR_MOD, ONLY: BUFFERED_ALLOCATOR, ASSIGN_PTR, GET_ALLOCATION - USE ISO_C_BINDING, ONLY: C_SIZE_T + USE ISO_C_BINDING, ONLY: C_SIZE_T, C_SIZEOF USE OPENACC_EXT, ONLY: EXT_ACC_ARR_DESC, EXT_ACC_PASS, EXT_ACC_CREATE, & & EXT_ACC_DELETE +#ifdef ACCGPU USE OPENACC, ONLY: ACC_HANDLE_KIND - - IMPLICIT NONE - -#ifdef OMPGPU - include 'mpif.h' #endif + IMPLICIT NONE REAL(KIND=JPRBT), INTENT(INOUT), POINTER :: PREEL_REAL(:) INTEGER(KIND=JPIM),INTENT(IN) :: KF_FS,KF_GP @@ -155,17 +153,21 @@ SUBROUTINE TRLTOG(ALLOCATOR,HTRLTOG,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G, REAL(KIND=JPRBT), POINTER :: ZCOMBUFS(:),ZCOMBUFR(:) - INTEGER(KIND=JPIM) :: ISENDTOT (NPROC) - INTEGER(KIND=JPIM) :: IRECVTOT (NPROC) + LOGICAL :: LLOCAL_CONTRIBUTION + INTEGER(KIND=JPIB) :: ISENDTOT (NPROC) + INTEGER(KIND=JPIB) :: IRECVTOT (NPROC) + INTEGER(KIND=JPIM) :: ISENDTOT_MPI(NPROC) + INTEGER(KIND=JPIM) :: IRECVTOT_MPI(NPROC) INTEGER(KIND=JPIM) :: IREQ (NPROC*2) INTEGER(KIND=JPIM) :: IRECV_TO_PROC(NPROC) INTEGER(KIND=JPIM) :: ISEND_TO_PROC(NPROC) INTEGER(KIND=JPIM) :: JFLD, J, JI, J1, J2, JGL, JK, JL, IFLDS, JROC, INR, INS INTEGER(KIND=JPIM) :: IFIRSTLAT, ILASTLAT, IFLD, IGL, IGLL,& - &IPOS, ISETA, ISETB, ISETV, ISEND, IRECV, ISETW, IPROC, & + &ISETA, ISETB, ISETV, ISEND, IRECV, ISETW, IPROC, & &IR, ILOCAL_LAT, ISEND_COUNTS, IRECV_COUNTS, IERROR, II, ILEN, IBUFLENS, IBUFLENR, & &JBLK, ILAT_STRIP + INTEGER(KIND=JPIB) :: IPOS ! Contains FIELD, PARS, LEVS INTEGER(KIND=JPIM) :: IGP_OFFSETS(KF_GP,3) @@ -173,12 +175,13 @@ SUBROUTINE TRLTOG(ALLOCATOR,HTRLTOG,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G, INTEGER(KIND=JPIM) :: IUVPAR,IGP2PAR,IGP3ALEV,IGP3APAR,IGP3BLEV,IGP3BPAR,IPAROFF,IOFF INTEGER(KIND=JPIM) :: IFLDA(KF_GP) - INTEGER(KIND=JPIM) :: IIN_TO_SEND_BUFR(D%NLENGTF,2),IIN_TO_SEND_BUFR_OFFSET(NPROC), IIN_TO_SEND_BUFR_V + INTEGER(KIND=JPIB) :: IIN_TO_SEND_BUFR(D%NLENGTF,2) + INTEGER(KIND=JPIM) :: IIN_TO_SEND_BUFR_OFFSET(NPROC), IIN_TO_SEND_BUFR_V INTEGER(KIND=JPIM) :: IRECV_FIELD_COUNT(NPRTRV),IRECV_FIELD_COUNT_V INTEGER(KIND=JPIM) :: IRECV_WSET_SIZE(NPRTRW),IRECV_WSET_SIZE_V INTEGER(KIND=JPIM) :: IRECV_WSET_OFFSET(NPRTRW+1), IRECV_WSET_OFFSET_V - INTEGER(KIND=JPIM), ALLOCATABLE :: ICOMBUFS_OFFSET(:),ICOMBUFR_OFFSET(:) - INTEGER(KIND=JPIM) :: ICOMBUFS_OFFSET_V, ICOMBUFR_OFFSET_V + INTEGER(KIND=JPIB), ALLOCATABLE :: ICOMBUFS_OFFSET(:),ICOMBUFR_OFFSET(:) + INTEGER(KIND=JPIB) :: ICOMBUFS_OFFSET_V, ICOMBUFR_OFFSET_V INTEGER(KIND=JPIM) :: IVSETUV(KF_UV_G) INTEGER(KIND=JPIM) :: IVSETSC(KF_SCALARS_G) @@ -197,12 +200,14 @@ SUBROUTINE TRLTOG(ALLOCATOR,HTRLTOG,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G, #endif #ifdef PARKINDTRANS_SINGLE -#define TRLTOG_DTYPE MPI_FLOAT +#define TRLTOG_DTYPE MPI_REAL4 #else -#define TRLTOG_DTYPE MPI_DOUBLE +#define TRLTOG_DTYPE MPI_REAL8 #endif #if ECTRANS_HAVE_MPI - LOCAL_COMM%MPI_VAL = MPL_COMM_OML( OML_MY_THREAD() ) + IF(.NOT. LMPOFF) THEN + LOCAL_COMM%MPI_VAL = MPL_COMM_OML( OML_MY_THREAD() ) + ENDIF #endif ! ------------------------------------------------------------------ @@ -444,7 +449,7 @@ SUBROUTINE TRLTOG(ALLOCATOR,HTRLTOG,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G, DO JROC=1,NPROC CALL PE2SET(JROC,ISETA,ISETB,ISETW,ISETV) ! total recv size is # points per field * # fields - IRECVTOT(JROC) = IRECV_WSET_SIZE(ISETW)*IRECV_FIELD_COUNT(ISETV) + IRECVTOT(JROC) = 1_JPIB*IRECV_WSET_SIZE(ISETW)*IRECV_FIELD_COUNT(ISETV) ENDDO ! Prepare sender arrays @@ -476,7 +481,7 @@ SUBROUTINE TRLTOG(ALLOCATOR,HTRLTOG,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G, IPOS = IPOS+1 ! offset to first layer of this gridpoint IIN_TO_SEND_BUFR(IIN_TO_SEND_BUFR_OFFSET(JROC)+IPOS,1) = & - & KF_FS*D%NSTAGTF(IGLL)+(D%NSTA(IGL,ISETB)-1)+(JL-1) + & 1_JPIB*KF_FS*D%NSTAGTF(IGLL)+(D%NSTA(IGL,ISETB)-1)+(JL-1) ! distance between two layers of this gridpoint IIN_TO_SEND_BUFR(IIN_TO_SEND_BUFR_OFFSET(JROC)+IPOS,2) = & & D%NSTAGTF(IGLL+1)-D%NSTAGTF(IGLL) @@ -485,6 +490,7 @@ SUBROUTINE TRLTOG(ALLOCATOR,HTRLTOG,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G, !we always receive the full fourier space ISENDTOT(JROC) = IPOS*KF_FS ENDDO + LLOCAL_CONTRIBUTION = ISENDTOT(MYPROC) > 0 #ifdef OMPGPU #endif @@ -513,7 +519,15 @@ SUBROUTINE TRLTOG(ALLOCATOR,HTRLTOG,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G, ACC_POINTERS_CNT = ACC_POINTERS_CNT + 1 ACC_POINTERS(ACC_POINTERS_CNT) = EXT_ACC_PASS(PGP3B) ENDIF - IF (ACC_POINTERS_CNT > 0) CALL EXT_ACC_CREATE(ACC_POINTERS(1:ACC_POINTERS_CNT),STREAM=1_ACC_HANDLE_KIND) + + IF (ACC_POINTERS_CNT > 0) CALL EXT_ACC_CREATE(ACC_POINTERS(1:ACC_POINTERS_CNT), & +#ifdef ACCGPU + & STREAM=1_ACC_HANDLE_KIND) +#endif +#ifdef OMPGPU + & STREAM=1) +#endif + #ifdef OMPGPU #endif #ifdef ACCGPU @@ -532,7 +546,7 @@ SUBROUTINE TRLTOG(ALLOCATOR,HTRLTOG,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G, CALL GSTATS(1806,1) ! Copy local contribution - IF(ISENDTOT(MYPROC) > 0) THEN + IF(LLOCAL_CONTRIBUTION) THEN ! I have to send something to myself... ! Input is KF_GP fields. We find the resulting KF_FS fields. @@ -645,12 +659,12 @@ SUBROUTINE TRLTOG(ALLOCATOR,HTRLTOG,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G, IF (IRECV_COUNTS > 0) THEN CALL ASSIGN_PTR(ZCOMBUFR, GET_ALLOCATION(ALLOCATOR, HTRLTOG%HCOMBUFR_AND_COMBUFS),& - & 1_C_SIZE_T, int(ICOMBUFR_OFFSET(IRECV_COUNTS+1)*SIZEOF(ZCOMBUFR(1)),kind=c_size_t)) + & 1_JPIB, ICOMBUFR_OFFSET(IRECV_COUNTS+1)*C_SIZEOF(ZCOMBUFR(1))) ENDIF IF (ISEND_COUNTS > 0) THEN CALL ASSIGN_PTR(ZCOMBUFS, GET_ALLOCATION(ALLOCATOR, HTRLTOG%HCOMBUFR_AND_COMBUFS),& - & int(ALIGN(KF_GP*D%NGPTOT*SIZEOF(ZCOMBUFR(1)),128)+1,kind=c_size_t), & - & int(ICOMBUFS_OFFSET(ISEND_COUNTS+1)*SIZEOF(ZCOMBUFS(1)),kind=c_size_t)) + & ALIGN(1_JPIB*KF_GP*D%NGPTOT*C_SIZEOF(ZCOMBUFR(1)),128)+1, & + & ICOMBUFS_OFFSET(ISEND_COUNTS+1)*C_SIZEOF(ZCOMBUFS(1))) ENDIF #ifdef OMPGPU @@ -707,15 +721,31 @@ SUBROUTINE TRLTOG(ALLOCATOR,HTRLTOG,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G, !$ACC HOST_DATA USE_DEVICE(ZCOMBUFS,ZCOMBUFR) #endif #else +#ifdef OMPGPU +#endif +#ifdef ACCGPU !! this is safe-but-slow fallback for running without GPU-aware MPI !$ACC UPDATE HOST(ZCOMBUFS) IF(ISEND_COUNTS > 0) #endif +#endif + + ! Skip the own contribution because this is ok to overflow + ISENDTOT(MYPROC) = 0 + IRECVTOT(MYPROC) = 0 + + ISENDTOT_MPI = ISENDTOT + IRECVTOT_MPI = IRECVTOT + IF (ANY(ISENDTOT_MPI /= ISENDTOT)) & + & CALL MPL_ABORT("Overflow in trltog") + IF (ANY(IRECVTOT_MPI /= IRECVTOT)) & + & CALL MPL_ABORT("Overflow in trltog") + DO INR=1,IRECV_COUNTS IR=IR+1 IRECV=IRECV_TO_PROC(INR) #if ECTRANS_HAVE_MPI CALL MPI_IRECV(ZCOMBUFR(ICOMBUFR_OFFSET(INR)+1:ICOMBUFR_OFFSET(INR+1)), & - & IRECVTOT(IRECV), & + & IRECVTOT_MPI(IRECV), & & TRLTOG_DTYPE,NPRCIDS(IRECV)-1, & & MTAGLG, LOCAL_COMM, IREQUEST(IR), & & IERROR ) @@ -730,7 +760,7 @@ SUBROUTINE TRLTOG(ALLOCATOR,HTRLTOG,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G, IR=IR+1 ISEND=ISEND_TO_PROC(INS) #if ECTRANS_HAVE_MPI - CALL MPI_ISEND(ZCOMBUFS(ICOMBUFS_OFFSET(INS)+1:ICOMBUFS_OFFSET(INS+1)),ISENDTOT(ISEND), & + CALL MPI_ISEND(ZCOMBUFS(ICOMBUFS_OFFSET(INS)+1:ICOMBUFS_OFFSET(INS+1)),ISENDTOT_MPI(ISEND), & & TRLTOG_DTYPE, NPRCIDS(ISEND)-1,MTAGLG,LOCAL_COMM,IREQUEST(IR),IERROR) IREQ(IR) = IREQUEST(IR)%MPI_VAL #else @@ -750,8 +780,12 @@ SUBROUTINE TRLTOG(ALLOCATOR,HTRLTOG,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G, !$ACC END HOST_DATA #endif #else +#ifdef OMPGPU +#endif +#ifdef ACCGPU !! this is safe-but-slow fallback for running without GPU-aware MPI !$ACC UPDATE DEVICE(ZCOMBUFR) IF(IRECV_COUNTS > 0) +#endif #endif IF (LSYNC_TRANS) THEN @@ -906,7 +940,14 @@ SUBROUTINE TRLTOG(ALLOCATOR,HTRLTOG,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G, !$ACC UPDATE HOST(PGP3B) ASYNC(1) #endif ENDIF - IF (ACC_POINTERS_CNT > 0) CALL EXT_ACC_DELETE(ACC_POINTERS(1:ACC_POINTERS_CNT),STREAM=1_ACC_HANDLE_KIND) + IF (ACC_POINTERS_CNT > 0) CALL EXT_ACC_DELETE(ACC_POINTERS(1:ACC_POINTERS_CNT), & +#ifdef ACCGPU + & STREAM=1_ACC_HANDLE_KIND) +#endif +#ifdef OMPGPU + & STREAM=1) +#endif + IF (LSYNC_TRANS) THEN #ifdef ACCGPU !$ACC WAIT(1) diff --git a/src/trans/gpu/internal/trltom_mod.F90 b/src/trans/gpu/internal/trltom_mod.F90 index 09596eba2..173a04938 100755 --- a/src/trans/gpu/internal/trltom_mod.F90 +++ b/src/trans/gpu/internal/trltom_mod.F90 @@ -21,10 +21,10 @@ MODULE TRLTOM_MOD END TYPE CONTAINS FUNCTION PREPARE_TRLTOM(ALLOCATOR, KF_FS) RESULT(HTRLTOM) - USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT + USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT, JPIB USE TPM_DISTR, ONLY: D USE BUFFERED_ALLOCATOR_MOD, ONLY: BUFFERED_ALLOCATOR, RESERVE - USE ISO_C_BINDING, ONLY: C_SIZE_T + USE ISO_C_BINDING, ONLY: C_SIZE_T, C_SIZEOF IMPLICIT NONE @@ -34,7 +34,7 @@ FUNCTION PREPARE_TRLTOM(ALLOCATOR, KF_FS) RESULT(HTRLTOM) REAL(KIND=JPRBT) :: DUMMY - HTRLTOM%HPFBUF = RESERVE(ALLOCATOR, int(D%NLENGT1B*2*KF_FS*SIZEOF(DUMMY),kind=c_size_t)) + HTRLTOM%HPFBUF = RESERVE(ALLOCATOR, 2_JPIB*D%NLENGT1B*KF_FS*C_SIZEOF(DUMMY), "HTRLTOM%HPFBUF") END FUNCTION SUBROUTINE TRLTOM(ALLOCATOR,HTRLTOM,PFBUF_IN,PFBUF,KF_FS) @@ -88,18 +88,18 @@ SUBROUTINE TRLTOM(ALLOCATOR,HTRLTOM,PFBUF_IN,PFBUF,KF_FS) ! Y.Seity : 07-08-30 Add barrier synchronisation under LSYNC_TRANS ! ------------------------------------------------------------------ - USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT + USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT, JPIB USE YOMHOOK, ONLY: LHOOK, DR_HOOK, JPHOOK USE MPL_MODULE, ONLY: MPL_ALLTOALLV, MPL_BARRIER, MPL_ALL_MS_COMM, MPL_MYRANK USE TPM_DISTR, ONLY: D, NPRTRW, NPROC, MYPROC, MYSETW - USE TPM_GEN, ONLY: LSYNC_TRANS, NERR + USE TPM_GEN, ONLY: LSYNC_TRANS, NERR, LMPOFF #if ECTRANS_HAVE_MPI - USE MPI_F08, ONLY: MPI_COMM, MPI_FLOAT, MPI_DOUBLE + USE MPI_F08, ONLY: MPI_COMM, MPI_REAL4, MPI_REAL8 ! Missing: MPI_ALLTOALLV on purpose due to cray-mpi bug (see https://github.com/ecmwf-ifs/ectrans/pull/157) #endif USE TPM_STATS, ONLY: GSTATS => GSTATS_NVTX USE BUFFERED_ALLOCATOR_MOD, ONLY: BUFFERED_ALLOCATOR, ASSIGN_PTR, GET_ALLOCATION - USE ISO_C_BINDING, ONLY: C_SIZE_T + USE ISO_C_BINDING, ONLY: C_SIZE_T, C_SIZEOF USE ABORT_TRANS_MOD, ONLY: ABORT_TRANS IMPLICIT NONE @@ -109,7 +109,8 @@ SUBROUTINE TRLTOM(ALLOCATOR,HTRLTOM,PFBUF_IN,PFBUF,KF_FS) REAL(KIND=JPRBT) ,INTENT(INOUT), POINTER :: PFBUF_IN(:) INTEGER(KIND=JPIM) :: ILENS(NPRTRW),IOFFS(NPRTRW),ILENR(NPRTRW),IOFFR(NPRTRW) - INTEGER(KIND=JPIM) :: J, ILEN, ISTA, FROM_SEND, TO_SEND, FROM_RECV, TO_RECV, IRANK + INTEGER(KIND=JPIM) :: J, FROM_SEND, TO_SEND, FROM_RECV, TO_RECV, IRANK + INTEGER(KIND=JPIB) :: JPOS, ISTA, IEND, ILEN REAL(KIND=JPHOOK) :: ZHOOK_HANDLE INTEGER(KIND=JPIM) :: IERROR @@ -120,19 +121,21 @@ SUBROUTINE TRLTOM(ALLOCATOR,HTRLTOM,PFBUF_IN,PFBUF,KF_FS) #endif #ifdef PARKINDTRANS_SINGLE -#define TRLTOM_DTYPE MPI_FLOAT +#define TRLTOM_DTYPE MPI_REAL4 #else -#define TRLTOM_DTYPE MPI_DOUBLE +#define TRLTOM_DTYPE MPI_REAL8 #endif #if ECTRANS_HAVE_MPI - LOCAL_COMM%MPI_VAL = MPL_ALL_MS_COMM + IF(.NOT. LMPOFF) THEN + LOCAL_COMM%MPI_VAL = MPL_ALL_MS_COMM + ENDIF #endif IF (LHOOK) CALL DR_HOOK('TRLTOM',0,ZHOOK_HANDLE) CALL ASSIGN_PTR(PFBUF, GET_ALLOCATION(ALLOCATOR, HTRLTOM%HPFBUF),& - & 1_C_SIZE_T, int(D%NLENGT1B*2*KF_FS*SIZEOF(PFBUF(1)),kind=c_size_t)) + & 1_JPIB, 2_JPIB*D%NLENGT1B*KF_FS*C_SIZEOF(PFBUF(1))) #ifdef OMPGPU #endif @@ -221,16 +224,17 @@ SUBROUTINE TRLTOM(ALLOCATOR,HTRLTOM,PFBUF_IN,PFBUF,KF_FS) #endif CALL GSTATS(806,1) ELSE - ILEN = D%NLTSGTB(MYSETW)*2*KF_FS - ISTA = D%NSTAGT1B(MYSETW)*2*KF_FS+1 + ILEN = 2_JPIB*D%NLTSGTB(MYSETW)*KF_FS + ISTA = 2_JPIB*D%NSTAGT1B(MYSETW)*KF_FS+1 + IEND = ISTA+ILEN-1 CALL GSTATS(1607,0) #ifdef OMPGPU #endif #ifdef ACCGPU - !$ACC PARALLEL LOOP DEFAULT(NONE) FIRSTPRIVATE(ISTA,ILEN) + !$ACC PARALLEL LOOP DEFAULT(NONE) FIRSTPRIVATE(ISTA,IEND) #endif - DO J=ISTA,ISTA+ILEN-1 - PFBUF(J) = PFBUF_IN(J) + DO JPOS=ISTA,IEND + PFBUF(JPOS) = PFBUF_IN(JPOS) ENDDO CALL GSTATS(1607,1) ENDIF diff --git a/src/trans/gpu/internal/trltom_pack_unpack.F90 b/src/trans/gpu/internal/trltom_pack_unpack.F90 index 18d4c7f74..3539028f3 100755 --- a/src/trans/gpu/internal/trltom_pack_unpack.F90 +++ b/src/trans/gpu/internal/trltom_pack_unpack.F90 @@ -30,9 +30,9 @@ MODULE TRLTOM_PACK_UNPACK CONTAINS FUNCTION PREPARE_TRLTOM_PACK(ALLOCATOR, KF_FS) RESULT(HTRLTOM_PACK) - USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT + USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT, JPIB USE TPM_DISTR, ONLY: D - USE ISO_C_BINDING, ONLY: C_SIZE_T + USE ISO_C_BINDING, ONLY: C_SIZE_T, C_SIZEOF USE BUFFERED_ALLOCATOR_MOD, ONLY: BUFFERED_ALLOCATOR, RESERVE IMPLICIT NONE @@ -43,7 +43,7 @@ FUNCTION PREPARE_TRLTOM_PACK(ALLOCATOR, KF_FS) RESULT(HTRLTOM_PACK) REAL(KIND=JPRBT) :: DUMMY - HTRLTOM_PACK%HFOUBUF_IN = RESERVE(ALLOCATOR, int(D%NLENGT0B*KF_FS*2*SIZEOF(DUMMY),kind=c_size_t)) + HTRLTOM_PACK%HFOUBUF_IN = RESERVE(ALLOCATOR, 2_JPIB*D%NLENGT0B*KF_FS*C_SIZEOF(DUMMY), "HTRLTOM_PACK%HFOUBUF_IN") END FUNCTION PREPARE_TRLTOM_PACK SUBROUTINE TRLTOM_PACK(ALLOCATOR,HTRLTOM_PACK,PREEL_COMPLEX,FOUBUF_IN,KF_FS) @@ -70,11 +70,11 @@ SUBROUTINE TRLTOM_PACK(ALLOCATOR,HTRLTOM_PACK,PREEL_COMPLEX,FOUBUF_IN,KF_FS) ! ------------------------------------------------------------------ USE BUFFERED_ALLOCATOR_MOD, ONLY: BUFFERED_ALLOCATOR, ASSIGN_PTR, GET_ALLOCATION - USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT - USE TPM_DISTR, ONLY: D, MYSETW, D_NSTAGTF, D_NPNTGTB0, D_NPTRLS, D_NDGL_FS - USE TPM_GEOMETRY, ONLY: G_NMEN, G_NLOEN - USE TPM_DIM, ONLY: R_NSMAX - USE ISO_C_BINDING, ONLY: C_SIZE_T + USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT, JPIB + USE TPM_DISTR, ONLY: D, MYSETW + USE TPM_GEOMETRY, ONLY: G + USE TPM_DIM, ONLY: R + USE ISO_C_BINDING, ONLY: C_SIZE_T, C_SIZEOF ! IMPLICIT NONE @@ -85,17 +85,21 @@ SUBROUTINE TRLTOM_PACK(ALLOCATOR,HTRLTOM_PACK,PREEL_COMPLEX,FOUBUF_IN,KF_FS) TYPE(BUFFERED_ALLOCATOR), INTENT(IN) :: ALLOCATOR TYPE(TRLTOM_PACK_HANDLE), INTENT(IN) :: HTRLTOM_PACK - INTEGER(KIND=JPIM) :: JM,JF,IGLG,ISTA,OFFSET_VAR,IOFF_LAT,KGL + INTEGER(KIND=JPIM) :: JM,JF,IGLG,OFFSET_VAR,KGL + INTEGER(KIND=JPIB) :: IOFF_LAT,ISTA REAL(KIND=JPRBT) :: SCAL + ASSOCIATE(D_NSTAGTF=>D%NSTAGTF, D_NPNTGTB0=>D%NPNTGTB0, D_NPTRLS=>D%NPTRLS, & + & D_NDGL_FS=>D%NDGL_FS, G_NMEN=>G%NMEN, G_NLOEN=>G%NLOEN, R_NSMAX=>R%NSMAX) + CALL ASSIGN_PTR(FOUBUF_IN, GET_ALLOCATION(ALLOCATOR, HTRLTOM_PACK%HFOUBUF_IN),& - & 1_C_SIZE_T, int(D%NLENGT0B*KF_FS*2*SIZEOF(FOUBUF_IN(1)),kind=c_size_t)) + & 1_JPIB, 2_JPIB*D%NLENGT0B*KF_FS*C_SIZEOF(FOUBUF_IN(1))) #ifdef OMPGPU #endif #ifdef ACCGPU - !$ACC DATA PRESENT(G_NMEN,D_NPNTGTB0,FOUBUF_IN,PREEL_COMPLEX,D_NSTAGTF,D_NDGL_FS,G_NLOEN, R_NSMAX) ASYNC(1) + !$ACC DATA PRESENT(G,G_NMEN,D,D_NPNTGTB0,FOUBUF_IN,PREEL_COMPLEX,D_NSTAGTF,D_NDGL_FS,G_NLOEN, R,R_NSMAX) ASYNC(1) #endif ! scale results and move into next transformation buffer @@ -105,8 +109,13 @@ SUBROUTINE TRLTOM_PACK(ALLOCATOR,HTRLTOM_PACK,PREEL_COMPLEX,FOUBUF_IN,KF_FS) #ifdef OMPGPU #endif #ifdef ACCGPU - !$ACC PARALLEL LOOP PRIVATE(IGLG,IOFF_LAT,ISTA,SCAL) FIRSTPRIVATE(KF_FS,OFFSET_VAR) DEFAULT(NONE) & - !$ACC& ASYNC(1) TILE(32,16,1) + !$ACC PARALLEL LOOP PRIVATE(IGLG,IOFF_LAT,ISTA,SCAL) FIRSTPRIVATE(KF_FS,OFFSET_VAR) & + !$ACC& TILE(32,16,1) DEFAULT(NONE) & +#ifndef _CRAYFTN + !$ACC& ASYNC(1) +#else + !$ACC& +#endif #endif DO KGL=1,D_NDGL_FS DO JM=0,R_NSMAX !(note that R_NSMAX <= G_NMEN(IGLG) for all IGLG) @@ -116,7 +125,7 @@ SUBROUTINE TRLTOM_PACK(ALLOCATOR,HTRLTOM_PACK,PREEL_COMPLEX,FOUBUF_IN,KF_FS) IOFF_LAT = KF_FS*D_NSTAGTF(KGL)+(JF-1)*(D_NSTAGTF(KGL+1)-D_NSTAGTF(KGL)) SCAL = 1._JPRBT/REAL(G_NLOEN(IGLG),JPRBT) - ISTA = D_NPNTGTB0(JM,KGL)*KF_FS*2 + ISTA = 2_JPIB*D_NPNTGTB0(JM,KGL)*KF_FS FOUBUF_IN(ISTA+2*JF-1) = SCAL * PREEL_COMPLEX(IOFF_LAT+2*JM+1) FOUBUF_IN(ISTA+2*JF ) = SCAL * PREEL_COMPLEX(IOFF_LAT+2*JM+2) @@ -131,13 +140,14 @@ SUBROUTINE TRLTOM_PACK(ALLOCATOR,HTRLTOM_PACK,PREEL_COMPLEX,FOUBUF_IN,KF_FS) !$ACC WAIT(1) #endif + END ASSOCIATE END SUBROUTINE TRLTOM_PACK FUNCTION PREPARE_TRLTOM_UNPACK(ALLOCATOR, KF_FS) RESULT(HTRLTOM_UNPACK) - USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT, JPRD + USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT, JPRD, JPIB USE BUFFERED_ALLOCATOR_MOD, ONLY: BUFFERED_ALLOCATOR, RESERVE USE LEDIR_MOD, ONLY: LEDIR_STRIDES - USE ISO_C_BINDING, ONLY: C_SIZE_T + USE ISO_C_BINDING, ONLY: C_SIZE_T, C_SIZEOF IMPLICIT NONE @@ -145,9 +155,10 @@ FUNCTION PREPARE_TRLTOM_UNPACK(ALLOCATOR, KF_FS) RESULT(HTRLTOM_UNPACK) INTEGER(KIND=JPIM), INTENT(IN) :: KF_FS TYPE(TRLTOM_UNPACK_HANDLE) :: HTRLTOM_UNPACK - INTEGER(KIND=JPIM) :: IIN_STRIDES0, IIN_SIZE + INTEGER(KIND=JPIM) :: IIN_STRIDES0 + INTEGER(KIND=JPIB) :: IIN_SIZE INTEGER(KIND=JPIM) :: IIN0_STRIDES0, IIN0_SIZE - INTEGER(KIND=C_SIZE_T) :: ISIZE + INTEGER(KIND=JPIB) :: ISIZE REAL(KIND=JPRBT) :: ZPRBT_DUMMY REAL(KIND=JPRD) :: ZPRD_DUMMY @@ -156,23 +167,23 @@ FUNCTION PREPARE_TRLTOM_UNPACK(ALLOCATOR, KF_FS) RESULT(HTRLTOM_UNPACK) IIN0_STRIDES0=IIN0_STRIDES0,IIN0_SIZE=IIN0_SIZE) ! Check if the reuse buffer is large enough - ISIZE = ALIGN(IIN_SIZE*SIZEOF(ZPRBT_DUMMY),128) - ISIZE = ISIZE + ALIGN(IIN_SIZE*SIZEOF(ZPRBT_DUMMY),128) - ISIZE = ISIZE + ALIGN(IIN0_SIZE*SIZEOF(ZPRD_DUMMY),128) - ISIZE = ISIZE + ALIGN(IIN0_SIZE*SIZEOF(ZPRD_DUMMY),128) + ISIZE = ALIGN(IIN_SIZE*C_SIZEOF(ZPRBT_DUMMY),128) + ISIZE = ISIZE + ALIGN(IIN_SIZE*C_SIZEOF(ZPRBT_DUMMY),128) + ISIZE = ISIZE + ALIGN(IIN0_SIZE*C_SIZEOF(ZPRD_DUMMY),128) + ISIZE = ISIZE + ALIGN(IIN0_SIZE*C_SIZEOF(ZPRD_DUMMY),128) - HTRLTOM_UNPACK%HINPS_AND_ZINPA = RESERVE(ALLOCATOR, ISIZE) + HTRLTOM_UNPACK%HINPS_AND_ZINPA = RESERVE(ALLOCATOR, ISIZE, "HTRLTOM_UNPACK%HINPS_AND_ZINPA") END FUNCTION PREPARE_TRLTOM_UNPACK SUBROUTINE TRLTOM_UNPACK(ALLOCATOR,HTRLTOM_UNPACK,FOUBUF,ZINPS,ZINPA,ZINPS0,ZINPA0,KF_FS,KF_UV) - USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT, JPRD - USE TPM_DIM, ONLY: R_NDGNH, R_NDGL - USE TPM_GEOMETRY, ONLY: G_NDGLU + USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT, JPRD, JPIB + USE TPM_DIM, ONLY: R + USE TPM_GEOMETRY, ONLY: G USE BUFFERED_ALLOCATOR_MOD, ONLY: BUFFERED_ALLOCATOR, ASSIGN_PTR, GET_ALLOCATION - USE TPM_FIELDS_FLAT, ONLY: F_RW, F_RACTHE - USE TPM_DISTR, ONLY: D_NUMP, D_MYMS, D_NPNTGTB1, D_OFFSETS_GEMM1 + USE TPM_FIELDS, ONLY: F + USE TPM_DISTR, ONLY: D USE LEDIR_MOD, ONLY: LEDIR_STRIDES - USE, INTRINSIC :: ISO_C_BINDING, ONLY: C_SIZE_T + USE, INTRINSIC :: ISO_C_BINDING, ONLY: C_SIZE_T, C_SIZEOF IMPLICIT NONE @@ -185,37 +196,41 @@ SUBROUTINE TRLTOM_UNPACK(ALLOCATOR,HTRLTOM_UNPACK,FOUBUF,ZINPS,ZINPA,ZINPS0,ZINP REAL(KIND=JPRBT), POINTER :: PREEL_COMPLEX(:) - INTEGER(KIND=JPIM) :: IIN_STRIDES0, IIN_SIZE + INTEGER(KIND=JPIM) :: IIN_STRIDES0 + INTEGER(KIND=JPIB) :: IIN_SIZE INTEGER(KIND=JPIM) :: IIN0_STRIDES0, IIN0_SIZE - INTEGER(KIND=C_SIZE_T) :: IALLOC_POS, IALLOC_SZ + INTEGER(KIND=JPIB) :: IALLOC_POS, IALLOC_SZ - INTEGER(KIND=8) :: JF - INTEGER(KIND=JPIM) :: KM, ISL, IGLS, OFFSET1, OFFSET2, JGL, KMLOC + INTEGER(KIND=JPIB) :: JF, OFFSET1, OFFSET2 + INTEGER(KIND=JPIM) :: KM, ISL, IGLS, JGL, KMLOC REAL(KIND=JPRBT) :: PAIA, PAIS + ASSOCIATE(D_NUMP=>D%NUMP, R_NDGNH=>R%NDGNH, R_NDGL=>R%NDGL, F_RW=>F%RW, F_RACTHE=>F%RACTHE, & + & D_MYMS=>D%MYMS, D_NPNTGTB1=>D%NPNTGTB1, D_OFFSETS_GEMM1=>D%OFFSETS_GEMM1, & + & G_NDGLU=>G%NDGLU) CALL LEDIR_STRIDES(KF_FS,IIN_STRIDES0=IIN_STRIDES0,IIN_SIZE=IIN_SIZE,& IIN0_STRIDES0=IIN0_STRIDES0,IIN0_SIZE=IIN0_SIZE) IALLOC_POS=1 - IALLOC_SZ = ALIGN(IIN_SIZE*SIZEOF(ZINPS(0)),128) + IALLOC_SZ = ALIGN(IIN_SIZE*C_SIZEOF(ZINPS(0)),128) CALL ASSIGN_PTR(ZINPS, GET_ALLOCATION(ALLOCATOR, HTRLTOM_UNPACK%HINPS_AND_ZINPA),& & IALLOC_POS, IALLOC_SZ) IALLOC_POS=IALLOC_POS+IALLOC_SZ - IALLOC_SZ = ALIGN(IIN_SIZE*SIZEOF(ZINPA(0)),128) + IALLOC_SZ = ALIGN(IIN_SIZE*C_SIZEOF(ZINPA(0)),128) CALL ASSIGN_PTR(ZINPA, GET_ALLOCATION(ALLOCATOR, HTRLTOM_UNPACK%HINPS_AND_ZINPA),& & IALLOC_POS, IALLOC_SZ) IALLOC_POS=IALLOC_POS+IALLOC_SZ - IALLOC_SZ = ALIGN(IIN0_SIZE*SIZEOF(ZINPS0(0)),128) + IALLOC_SZ = ALIGN(IIN0_SIZE*C_SIZEOF(ZINPS0(0)),128) CALL ASSIGN_PTR(ZINPS0, GET_ALLOCATION(ALLOCATOR, HTRLTOM_UNPACK%HINPS_AND_ZINPA),& & IALLOC_POS, IALLOC_SZ) IALLOC_POS=IALLOC_POS+IALLOC_SZ - IALLOC_SZ = ALIGN(IIN0_SIZE*SIZEOF(ZINPA0(0)),128) + IALLOC_SZ = ALIGN(IIN0_SIZE*C_SIZEOF(ZINPA0(0)),128) CALL ASSIGN_PTR(ZINPA0, GET_ALLOCATION(ALLOCATOR, HTRLTOM_UNPACK%HINPS_AND_ZINPA),& & IALLOC_POS, IALLOC_SZ) IALLOC_POS=IALLOC_POS+IALLOC_SZ @@ -225,12 +240,17 @@ SUBROUTINE TRLTOM_UNPACK(ALLOCATOR,HTRLTOM_UNPACK,FOUBUF,ZINPS,ZINPA,ZINPS0,ZINP #ifdef ACCGPU !$ACC DATA & !$ACC& PRESENT(ZINPS,ZINPA,ZINPS0,ZINPA0) & - !$ACC& PRESENT(F_RW,F_RACTHE) & - !$ACC& PRESENT(D_MYMS,D_NUMP,R_NDGNH,R_NDGL,G_NDGLU) & + !$ACC& PRESENT(F,F_RW,F_RACTHE) & + !$ACC& PRESENT(D,D_MYMS,D_NUMP,R,R_NDGNH,R_NDGL,G,G_NDGLU) & !$ACC& PRESENT(D_NPNTGTB1,D_OFFSETS_GEMM1,FOUBUF) !$ACC PARALLEL LOOP DEFAULT(NONE) COLLAPSE(3) PRIVATE(KM,ISL,IGLS,OFFSET1,OFFSET2,JGL,PAIA,PAIS) & - !$ACC& FIRSTPRIVATE(KF_FS,KF_UV,IIN_STRIDES0,IIN0_STRIDES0) ASYNC(1) + !$ACC& FIRSTPRIVATE(KF_FS,KF_UV,IIN_STRIDES0,IIN0_STRIDES0) & +#ifndef _CRAYFTN + !$ACC& ASYNC(1) +#else + !$ACC& +#endif #endif DO KMLOC=1,D_NUMP DO JGL=1,R_NDGNH @@ -240,8 +260,8 @@ SUBROUTINE TRLTOM_UNPACK(ALLOCATOR,HTRLTOM_UNPACK,FOUBUF,ZINPS,ZINPA,ZINPS0,ZINP IF (JGL >= ISL) THEN !(DO JGL=ISL,R_NDGNH) IGLS = R_NDGL+1-JGL - OFFSET1 = D_NPNTGTB1(KMLOC,JGL )*2*KF_FS - OFFSET2 = D_NPNTGTB1(KMLOC,IGLS)*2*KF_FS + OFFSET1 = 2_JPIB*D_NPNTGTB1(KMLOC,JGL )*KF_FS + OFFSET2 = 2_JPIB*D_NPNTGTB1(KMLOC,IGLS)*KF_FS PAIA = FOUBUF(OFFSET1+JF)-FOUBUF(OFFSET2+JF) PAIS = FOUBUF(OFFSET1+JF)+FOUBUF(OFFSET2+JF) IF (JF <= 4*KF_UV) THEN @@ -290,6 +310,7 @@ SUBROUTINE TRLTOM_UNPACK(ALLOCATOR,HTRLTOM_UNPACK,FOUBUF,ZINPS,ZINPA,ZINPS0,ZINP !$ACC END DATA #endif + END ASSOCIATE END SUBROUTINE TRLTOM_UNPACK END MODULE TRLTOM_PACK_UNPACK diff --git a/src/trans/gpu/internal/trmtol_mod.F90 b/src/trans/gpu/internal/trmtol_mod.F90 index 2b8ca8978..73d93f8c2 100755 --- a/src/trans/gpu/internal/trmtol_mod.F90 +++ b/src/trans/gpu/internal/trmtol_mod.F90 @@ -21,10 +21,10 @@ MODULE TRMTOL_MOD END TYPE CONTAINS FUNCTION PREPARE_TRMTOL(ALLOCATOR, KF_LEG) RESULT(HTRMTOL) - USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT + USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT, JPIB USE TPM_DISTR, ONLY: D USE BUFFERED_ALLOCATOR_MOD, ONLY: BUFFERED_ALLOCATOR, RESERVE - USE ISO_C_BINDING, ONLY: C_SIZE_T + USE ISO_C_BINDING, ONLY: C_SIZE_T, C_SIZEOF IMPLICIT NONE @@ -34,7 +34,7 @@ FUNCTION PREPARE_TRMTOL(ALLOCATOR, KF_LEG) RESULT(HTRMTOL) REAL(KIND=JPRBT) :: DUMMY - HTRMTOL%HPFBUF = RESERVE(ALLOCATOR, int(D%NLENGT0B*2*KF_LEG*SIZEOF(DUMMY),kind=c_size_t)) + HTRMTOL%HPFBUF = RESERVE(ALLOCATOR, 2_JPIB*D%NLENGT0B*KF_LEG*C_SIZEOF(DUMMY), "HTRMTOL%HPFBUF") END FUNCTION SUBROUTINE TRMTOL(ALLOCATOR,HTRMTOL,PFBUF_IN,PFBUF,KF_LEG) @@ -88,18 +88,18 @@ SUBROUTINE TRMTOL(ALLOCATOR,HTRMTOL,PFBUF_IN,PFBUF,KF_LEG) ! Y.Seity : 07-08-31 add barrier synchronisation under LSYNC_TRANS ! ------------------------------------------------------------------ - USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT + USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT, JPIB USE YOMHOOK, ONLY: LHOOK, DR_HOOK, JPHOOK USE MPL_MODULE, ONLY: MPL_ALLTOALLV, MPL_BARRIER, MPL_ALL_MS_COMM, MPL_MYRANK USE TPM_DISTR, ONLY: D, NPRTRW, NPROC, MYPROC, MYSETW - USE TPM_GEN, ONLY: LSYNC_TRANS, NERR + USE TPM_GEN, ONLY: LSYNC_TRANS, NERR, LMPOFF #if ECTRANS_HAVE_MPI - USE MPI_F08, ONLY: MPI_COMM, MPI_FLOAT, MPI_DOUBLE + USE MPI_F08, ONLY: MPI_COMM, MPI_REAL4, MPI_REAL8 ! Missing: MPI_ALLTOALLV on purpose due to cray-mpi bug (see https://github.com/ecmwf-ifs/ectrans/pull/157) #endif USE BUFFERED_ALLOCATOR_MOD, ONLY: BUFFERED_ALLOCATOR, ASSIGN_PTR, GET_ALLOCATION USE TPM_STATS, ONLY: GSTATS => GSTATS_NVTX - USE ISO_C_BINDING, ONLY: C_SIZE_T + USE ISO_C_BINDING, ONLY: C_SIZE_T, C_SIZEOF USE ABORT_TRANS_MOD, ONLY: ABORT_TRANS IMPLICIT NONE @@ -109,7 +109,8 @@ SUBROUTINE TRMTOL(ALLOCATOR,HTRMTOL,PFBUF_IN,PFBUF,KF_LEG) REAL(KIND=JPRBT), INTENT(IN) :: PFBUF_IN(:) INTEGER(KIND=JPIM) :: ILENS(NPRTRW),IOFFS(NPRTRW),ILENR(NPRTRW),IOFFR(NPRTRW) - INTEGER(KIND=JPIM) :: J, ILEN, ISTA, FROM_SEND, TO_SEND, FROM_RECV, TO_RECV, IRANK + INTEGER(KIND=JPIM) :: J, FROM_SEND, TO_SEND, FROM_RECV, TO_RECV, IRANK + INTEGER(KIND=JPIB) :: JPOS, ISTA, IEND, ILEN REAL(KIND=JPHOOK) :: ZHOOK_HANDLE INTEGER(KIND=JPIM) :: IERROR @@ -121,19 +122,21 @@ SUBROUTINE TRMTOL(ALLOCATOR,HTRMTOL,PFBUF_IN,PFBUF,KF_LEG) #endif #ifdef PARKINDTRANS_SINGLE -#define TRMTOL_DTYPE MPI_FLOAT +#define TRMTOL_DTYPE MPI_REAL4 #else -#define TRMTOL_DTYPE MPI_DOUBLE +#define TRMTOL_DTYPE MPI_REAL8 #endif #if ECTRANS_HAVE_MPI - LOCAL_COMM%MPI_VAL = MPL_ALL_MS_COMM + IF(.NOT. LMPOFF) THEN + LOCAL_COMM%MPI_VAL = MPL_ALL_MS_COMM + ENDIF #endif IF (LHOOK) CALL DR_HOOK('TRMTOL',0,ZHOOK_HANDLE) CALL ASSIGN_PTR(PFBUF, GET_ALLOCATION(ALLOCATOR, HTRMTOL%HPFBUF),& - & 1_C_SIZE_T, int(D%NLENGT0B*2*KF_LEG*SIZEOF(PFBUF(1)),kind=c_size_t)) + & 1_JPIB, 2_JPIB*D%NLENGT0B*KF_LEG*C_SIZEOF(PFBUF(1))) IF(NPROC > 1) THEN DO J=1,NPRTRW @@ -226,16 +229,17 @@ SUBROUTINE TRMTOL(ALLOCATOR,HTRMTOL,PFBUF_IN,PFBUF,KF_LEG) #endif CALL GSTATS(807,1) ELSE - ILEN = D%NLTSGTB(MYSETW)*2*KF_LEG - ISTA = D%NSTAGT0B(MYSETW)*2*KF_LEG+1 + ILEN = 2_JPIB*D%NLTSGTB(MYSETW)*KF_LEG + ISTA = 2_JPIB*D%NSTAGT0B(MYSETW)*KF_LEG+1 + IEND = ISTA+ILEN-1 CALL GSTATS(1608,0) #ifdef OMPGPU #endif #ifdef ACCGPU - !$ACC PARALLEL LOOP DEFAULT(NONE) PRESENT(PFBUF,PFBUF_IN) FIRSTPRIVATE(ISTA,ILEN) + !$ACC PARALLEL LOOP DEFAULT(NONE) PRESENT(PFBUF,PFBUF_IN) FIRSTPRIVATE(ISTA,IEND) #endif - DO J=ISTA,ISTA+ILEN-1 - PFBUF(J) = PFBUF_IN(J) + DO JPOS=ISTA,IEND + PFBUF(JPOS) = PFBUF_IN(JPOS) ENDDO CALL GSTATS(1608,1) ENDIF diff --git a/src/trans/gpu/internal/trmtol_pack_unpack.F90 b/src/trans/gpu/internal/trmtol_pack_unpack.F90 index c4bfe7b89..76080145c 100755 --- a/src/trans/gpu/internal/trmtol_pack_unpack.F90 +++ b/src/trans/gpu/internal/trmtol_pack_unpack.F90 @@ -26,9 +26,9 @@ MODULE TRMTOL_PACK_UNPACK CONTAINS FUNCTION PREPARE_TRMTOL_PACK(ALLOCATOR,KF_LEG) RESULT(HTRMTOL_PACK) - USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT + USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT, JPIB USE TPM_DISTR, ONLY: D - USE ISO_C_BINDING, ONLY: C_SIZE_T + USE ISO_C_BINDING, ONLY: C_SIZE_T, C_SIZEOF USE BUFFERED_ALLOCATOR_MOD, ONLY: BUFFERED_ALLOCATOR, RESERVE IMPLICIT NONE @@ -38,12 +38,12 @@ FUNCTION PREPARE_TRMTOL_PACK(ALLOCATOR,KF_LEG) RESULT(HTRMTOL_PACK) TYPE(TRMTOL_PACK_HANDLE) :: HTRMTOL_PACK - INTEGER(KIND=C_SIZE_T) :: IALLOC_SZ + INTEGER(KIND=JPIB) :: IALLOC_SZ REAL(KIND=JPRBT) :: ZPRBT_DUMMY - IALLOC_SZ = D%NLENGT1B*2*KF_LEG*SIZEOF(ZPRBT_DUMMY) - HTRMTOL_PACK%HFOUBUF_IN = RESERVE(ALLOCATOR, int(IALLOC_SZ,kind=c_size_t)) + IALLOC_SZ = 2_JPIB*D%NLENGT1B*KF_LEG*C_SIZEOF(ZPRBT_DUMMY) + HTRMTOL_PACK%HFOUBUF_IN = RESERVE(ALLOCATOR, IALLOC_SZ, "HTRMTOL_PACK%HFOUBUF_IN") END FUNCTION SUBROUTINE TRMTOL_PACK(ALLOCATOR,HTRMTOL_PACK,ZOUTS,ZOUTA,ZOUTS0,ZOUTA0,FOUBUF_IN,KF_LEG) @@ -84,14 +84,14 @@ SUBROUTINE TRMTOL_PACK(ALLOCATOR,HTRMTOL_PACK,ZOUTS,ZOUTA,ZOUTS0,ZOUTA0,FOUBUF_I ! F. Vana 05-Mar-2015 Support for single precision ! ------------------------------------------------------------------ - USE PARKIND_ECTRANS, ONLY: JPIM, JPRB, JPRBT, JPRD + USE PARKIND_ECTRANS, ONLY: JPIM, JPRB, JPRBT, JPRD, JPIB USE YOMHOOK, ONLY: LHOOK, DR_HOOK, JPHOOK - USE TPM_DIM, ONLY: R_NDGNH, R_NDGL - USE TPM_GEOMETRY, ONLY: G_NDGLU - USE TPM_DISTR, ONLY: D, D_NUMP, D_MYMS, D_NPNTGTB1, D_OFFSETS_GEMM1 + USE TPM_DIM, ONLY: R + USE TPM_GEOMETRY, ONLY: G + USE TPM_DISTR, ONLY: D USE LEINV_MOD, ONLY: LEINV_STRIDES USE BUFFERED_ALLOCATOR_MOD, ONLY: BUFFERED_ALLOCATOR, ASSIGN_PTR, GET_ALLOCATION - USE ISO_C_BINDING, ONLY: C_SIZE_T + USE ISO_C_BINDING, ONLY: C_SIZE_T, C_SIZEOF IMPLICIT NONE @@ -107,16 +107,21 @@ SUBROUTINE TRMTOL_PACK(ALLOCATOR,HTRMTOL_PACK,ZOUTS,ZOUTA,ZOUTS0,ZOUTA0,FOUBUF_I ! LOCAL REAL(KIND=JPRBT) :: ZAOA, ZSOA - INTEGER(KIND=JPIM) :: KMLOC, KM, ISL, JGL, JK, IGLS, OFFSET1, OFFSET2 - INTEGER(KIND=JPIM) :: IOUT_STRIDES0, IOUT_SIZE + INTEGER(KIND=JPIM) :: KMLOC, KM, ISL, JGL, JK, IGLS + INTEGER(KIND=JPIB) :: OFFSET1, OFFSET2 + INTEGER(KIND=JPIM) :: IOUT_STRIDES0 + INTEGER(KIND=JPIB) :: IOUT_SIZE INTEGER(KIND=JPIM) :: IOUT0_STRIDES0, IOUT0_SIZE REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + ASSOCIATE(D_NUMP=>D%NUMP, R_NDGNH=>R%NDGNH, R_NDGL=>R%NDGL, G_NDGLU=>G%NDGLU, & + & D_MYMS=>D%MYMS, D_NPNTGTB1=>D%NPNTGTB1, D_OFFSETS_GEMM1=>D%OFFSETS_GEMM1) + IF (LHOOK) CALL DR_HOOK('TRMTOL_PACK',0,ZHOOK_HANDLE) CALL ASSIGN_PTR(FOUBUF_IN, GET_ALLOCATION(ALLOCATOR, HTRMTOL_PACK%HFOUBUF_IN),& - & 1_C_SIZE_T, int(D%NLENGT1B*2*KF_LEG*SIZEOF(FOUBUF_IN(1)),kind=c_size_t)) + & 1_JPIB, 2_JPIB*D%NLENGT1B*KF_LEG*C_SIZEOF(FOUBUF_IN(1))) CALL LEINV_STRIDES(KF_LEG,IOUT_STRIDES0=IOUT_STRIDES0,IOUT_SIZE=IOUT_SIZE,& IOUT0_STRIDES0=IOUT0_STRIDES0,IOUT0_SIZE=IOUT0_SIZE) @@ -124,7 +129,7 @@ SUBROUTINE TRMTOL_PACK(ALLOCATOR,HTRMTOL_PACK,ZOUTS,ZOUTA,ZOUTS0,ZOUTA0,FOUBUF_I #ifdef OMPGPU #endif #ifdef ACCGPU - !$ACC DATA PRESENT(D_MYMS,D_NPNTGTB1,D_NUMP,G_NDGLU,R_NDGNH,R_NDGL) & + !$ACC DATA PRESENT(D,D_MYMS,D_NPNTGTB1,D_NUMP,G,G_NDGLU,R,R_NDGNH,R_NDGL) & !$ACC& PRESENT(ZOUTS,ZOUTA,ZOUTS0,ZOUTA0,FOUBUF_IN,D_OFFSETS_GEMM1) #endif @@ -132,7 +137,12 @@ SUBROUTINE TRMTOL_PACK(ALLOCATOR,HTRMTOL_PACK,ZOUTS,ZOUTA,ZOUTS0,ZOUTA0,FOUBUF_I #endif #ifdef ACCGPU !$ACC PARALLEL LOOP COLLAPSE(3) DEFAULT(NONE) PRIVATE(KM,ISL,IGLS,OFFSET1,OFFSET2,ZAOA,ZSOA) & - !$ACC& FIRSTPRIVATE(KF_LEG,IOUT_STRIDES0,IOUT0_STRIDES0) ASYNC(1) + !$ACC& FIRSTPRIVATE(KF_LEG,IOUT_STRIDES0,IOUT0_STRIDES0) & +#ifndef _CRAYFTN + !$ACC& ASYNC(1) +#else + !$ACC& +#endif #endif DO KMLOC=1,D_NUMP DO JGL=1,R_NDGNH @@ -142,8 +152,8 @@ SUBROUTINE TRMTOL_PACK(ALLOCATOR,HTRMTOL_PACK,ZOUTS,ZOUTA,ZOUTS0,ZOUTA0,FOUBUF_I IF (JGL >= ISL) THEN !(DO JGL=ISL,R_NDGNH) IGLS = R_NDGL+1-JGL - OFFSET1 = D_NPNTGTB1(KMLOC,JGL )*2*KF_LEG - OFFSET2 = D_NPNTGTB1(KMLOC,IGLS)*2*KF_LEG + OFFSET1 = 2_JPIB*D_NPNTGTB1(KMLOC,JGL )*KF_LEG + OFFSET2 = 2_JPIB*D_NPNTGTB1(KMLOC,IGLS)*KF_LEG IF(KM /= 0) THEN ZSOA = ZOUTS(JK+(JGL-ISL)*IOUT_STRIDES0+D_OFFSETS_GEMM1(KMLOC)*IOUT_STRIDES0) @@ -174,13 +184,14 @@ SUBROUTINE TRMTOL_PACK(ALLOCATOR,HTRMTOL_PACK,ZOUTS,ZOUTA,ZOUTS0,ZOUTA0,FOUBUF_I IF (LHOOK) CALL DR_HOOK('TRMTOL_PACK',1,ZHOOK_HANDLE) + END ASSOCIATE END SUBROUTINE TRMTOL_PACK FUNCTION PREPARE_TRMTOL_UNPACK(ALLOCATOR,KF_FS) RESULT(HTRMTOL_UNPACK) - USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT + USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT, JPIB USE TPM_DISTR, ONLY: D USE BUFFERED_ALLOCATOR_MOD, ONLY: BUFFERED_ALLOCATOR, RESERVE - USE ISO_C_BINDING, ONLY: C_SIZE_T + USE ISO_C_BINDING, ONLY: C_SIZE_T, C_SIZEOF IMPLICIT NONE @@ -191,7 +202,7 @@ FUNCTION PREPARE_TRMTOL_UNPACK(ALLOCATOR,KF_FS) RESULT(HTRMTOL_UNPACK) REAL(KIND=JPRBT) :: DUMMY - HTRMTOL_UNPACK%HREEL = RESERVE(ALLOCATOR, int(D%NLENGTF*KF_FS*SIZEOF(DUMMY),kind=c_size_t)) + HTRMTOL_UNPACK%HREEL = RESERVE(ALLOCATOR, 1_JPIB*D%NLENGTF*KF_FS*C_SIZEOF(DUMMY), "HTRMTOL_UNPACK%HREEL") END FUNCTION PREPARE_TRMTOL_UNPACK SUBROUTINE TRMTOL_UNPACK(ALLOCATOR,HTRMTOL_UNPACK,FOUBUF,PREEL_COMPLEX,KF_CURRENT,KF_TOTAL) @@ -223,11 +234,11 @@ SUBROUTINE TRMTOL_UNPACK(ALLOCATOR,HTRMTOL_UNPACK,FOUBUF,PREEL_COMPLEX,KF_CURREN ! ------------------------------------------------------------------ -USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT -USE TPM_DISTR, ONLY: D, MYSETW, D_NSTAGTF, D_NPNTGTB0, D_NPTRLS, D_NDGL_FS -USE TPM_GEOMETRY, ONLY: G_NMEN, G_NLOEN, G_NLOEN_MAX +USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT, JPIB +USE TPM_DISTR, ONLY: D, MYSETW +USE TPM_GEOMETRY, ONLY: G USE BUFFERED_ALLOCATOR_MOD, ONLY: BUFFERED_ALLOCATOR, ASSIGN_PTR, GET_ALLOCATION -USE ISO_C_BINDING, ONLY: C_SIZE_T +USE ISO_C_BINDING, ONLY: C_SIZE_T, C_SIZEOF ! IMPLICIT NONE @@ -238,29 +249,38 @@ SUBROUTINE TRMTOL_UNPACK(ALLOCATOR,HTRMTOL_UNPACK,FOUBUF,PREEL_COMPLEX,KF_CURREN TYPE(BUFFERED_ALLOCATOR), INTENT(IN) :: ALLOCATOR TYPE(TRMTOL_UNPACK_HANDLE), INTENT(IN) :: HTRMTOL_UNPACK -INTEGER(KIND=JPIM) :: JM,JF,IGLG,ISTA,OFFSET_VAR,IOFF_LAT,KGL +INTEGER(KIND=JPIM) :: JM,JF,IGLG,OFFSET_VAR,KGL,ILOEN_MAX +INTEGER(KIND=JPIB) :: IOFF_LAT, ISTA REAL(KIND=JPRBT) :: RET_REAL, RET_COMPLEX +ASSOCIATE(D_NDGL_FS=>D%NDGL_FS, D_NSTAGTF=>D%NSTAGTF, D_NPNTGTB0=>D%NPNTGTB0, D_NPTRLS=>D%NPTRLS, & + & G_NLOEN=>G%NLOEN, G_NMEN=>G%NMEN) + CALL ASSIGN_PTR(PREEL_COMPLEX, GET_ALLOCATION(ALLOCATOR, HTRMTOL_UNPACK%HREEL),& - & 1_C_SIZE_T, int(KF_TOTAL*D%NLENGTF*SIZEOF(PREEL_COMPLEX(1)),kind=c_size_t)) + & 1_JPIB, 1_JPIB*KF_TOTAL*D%NLENGTF*C_SIZEOF(PREEL_COMPLEX(1))) #ifdef OMPGPU #endif #ifdef ACCGPU -!$ACC DATA PRESENT(G_NLOEN,G_NMEN,D_NPNTGTB0,FOUBUF,PREEL_COMPLEX,D_NSTAGTF,D_NDGL_FS) ASYNC(1) +!$ACC DATA PRESENT(G,G_NLOEN,G_NMEN,D,D_NPNTGTB0,FOUBUF,PREEL_COMPLEX,D_NSTAGTF,D_NDGL_FS) ASYNC(1) #endif OFFSET_VAR=D_NPTRLS(MYSETW) +ILOEN_MAX=MAXVAL(G_NLOEN) #ifdef OMPGPU #endif #ifdef ACCGPU !$ACC PARALLEL LOOP PRIVATE(IGLG,IOFF_LAT,ISTA,RET_REAL,RET_COMPLEX) FIRSTPRIVATE(KF_CURRENT,& -!$ACC& KF_TOTAL,OFFSET_VAR,G_NLOEN_MAX) DEFAULT(NONE) & -!$ACC& ASYNC(1) TILE(32,16,1) +!$ACC& KF_TOTAL,OFFSET_VAR,ILOEN_MAX) DEFAULT(NONE) TILE(32,16,1) & +#ifndef _CRAYFTN +!$ACC& ASYNC(1) +#else +!$ACC& +#endif #endif DO KGL=1,D_NDGL_FS DO JF=1,KF_CURRENT - DO JM=0,G_NLOEN_MAX/2 + DO JM=0,ILOEN_MAX/2 IGLG = OFFSET_VAR+KGL-1 ! FFT transforms NLON real values to floor(NLON/2)+1 complex numbers. Hence we have @@ -270,12 +290,12 @@ SUBROUTINE TRMTOL_UNPACK(ALLOCATOR,HTRMTOL_UNPACK,FOUBUF,PREEL_COMPLEX,KF_CURREN RET_REAL = 0.0_JPRBT RET_COMPLEX = 0.0_JPRBT IF (JM <= G_NMEN(IGLG)) THEN - ISTA = D_NPNTGTB0(JM,KGL)*KF_CURRENT*2 + ISTA = 2_JPIB*D_NPNTGTB0(JM,KGL)*KF_CURRENT RET_REAL = FOUBUF(ISTA+2*JF-1) RET_COMPLEX = FOUBUF(ISTA+2*JF ) ENDIF - IOFF_LAT = KF_TOTAL*D_NSTAGTF(KGL)+(JF-1)*(D_NSTAGTF(KGL+1)-D_NSTAGTF(KGL)) + IOFF_LAT = 1_JPIB*KF_TOTAL*D_NSTAGTF(KGL)+(JF-1)*(D_NSTAGTF(KGL+1)-D_NSTAGTF(KGL)) PREEL_COMPLEX(IOFF_LAT+2*JM+1) = RET_REAL PREEL_COMPLEX(IOFF_LAT+2*JM+2) = RET_COMPLEX ENDIF @@ -290,6 +310,8 @@ SUBROUTINE TRMTOL_UNPACK(ALLOCATOR,HTRMTOL_UNPACK,FOUBUF,PREEL_COMPLEX,KF_CURREN !$ACC WAIT(1) #endif +END ASSOCIATE + END SUBROUTINE TRMTOL_UNPACK END MODULE TRMTOL_PACK_UNPACK diff --git a/src/trans/gpu/internal/updspb_mod.F90 b/src/trans/gpu/internal/updspb_mod.F90 index 0f59694ed..32a12660b 100755 --- a/src/trans/gpu/internal/updspb_mod.F90 +++ b/src/trans/gpu/internal/updspb_mod.F90 @@ -57,8 +57,10 @@ SUBROUTINE UPDSPB(KFIELD,POA,PSPEC,KFLDPTR) ! ------------------------------------------------------------------ USE PARKIND_ECTRANS, ONLY: JPIM, JPRB, JPRBT - USE TPM_DIM, ONLY: R_NTMAX - USE TPM_DISTR, ONLY: D_NUMP, D_MYMS, D_NASM0 + USE TPM_DIM, ONLY: R + USE TPM_DISTR, ONLY: D + USE ABORT_TRANS_MOD, ONLY: ABORT_TRANS + ! IMPLICIT NONE @@ -85,28 +87,35 @@ SUBROUTINE UPDSPB(KFIELD,POA,PSPEC,KFLDPTR) ! and nn=NTMAX+2-n from NTMAX+2-m to NTMAX+2-NSMAX. ! NLTN(m)=NTMAX+2-m : n=NLTN(nn),nn=NLTN(n) ! nn is the loop index. - - IF(PRESENT(KFLDPTR)) THEN - stop 'Error: code path not (yet) supported in GPU version' - ENDIF + ASSOCIATE(D_NUMP=>D%NUMP, D_MYMS=>D%MYMS, D_NASM0=>D%NASM0, R_NTMAX=>R%NTMAX) + + IF(PRESENT(KFLDPTR)) THEN + CALL ABORT_TRANS('UPDSPB: Code path not (yet) supported in GPU version') + ENDIF !* 1. UPDATE SPECTRAL FIELDS. ! ----------------------- - !loop over wavenumber +#ifdef OMPGPU + !$OMP TARGET DATA MAP(TO:KFIELD) +#endif #ifdef ACCGPU - !$ACC DATA PRESENT(PSPEC,POA,R_NTMAX,D_NUMP,D_MYMS,D_NASM0) ASYNC(1) + !$ACC DATA PRESENT(PSPEC,POA,R,R_NTMAX,D,D_NUMP,D_MYMS,D_NASM0) ASYNC(1) #endif + +! Directive incomplete -> putting more variables in SHARED() triggers internal compiler error +! ftn-7991: INTERNAL COMPILER ERROR: "Too few arguments on the stack" #ifdef OMPGPU -!WARNING: following line should be PRESENT,ALLOC but causes issues with AMD compiler! - !$OMP TARGET DATA MAP(ALLOC:PSPEC,POA) & - !$OMP& MAP(TO:R_NTMAX,D_NUMP,D_MYMS,D_NASM0) - !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(3) PRIVATE(KM,IASM0,INM,IR,II) DEFAULT(NONE) & - !$OMP& SHARED(R_NTMAX,D_NUMP,D_MYMS,D_NASM0,PSPEC,KFIELD,POA) + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO PRIVATE(KM,IASM0,INM) & + !$OMP& SHARED(KFIELD) #endif #ifdef ACCGPU - !$ACC PARALLEL LOOP COLLAPSE(3) PRIVATE(KM,IASM0,INM) DEFAULT(NONE) & - !$ACC& FIRSTPRIVATE(KFIELD) ASYNC(1) + !$ACC PARALLEL LOOP COLLAPSE(3) PRIVATE(KM,IASM0,INM) DEFAULT(NONE) COPYIN(KFIELD) & +#ifndef _CRAYFTN + !$ACC& ASYNC(1) +#else + !$ACC& +#endif #endif DO KMLOC=1,D_NUMP DO JN=3,R_NTMAX+3 @@ -128,13 +137,15 @@ SUBROUTINE UPDSPB(KFIELD,POA,PSPEC,KFLDPTR) ENDDO ENDDO ENDDO -#ifdef OMPGPU - !$OMP END TARGET DATA -#endif + #ifdef ACCGPU !$ACC END DATA #endif +#ifdef OMPGPU + !$OMP END TARGET DATA +#endif + END ASSOCIATE ! ------------------------------------------------------------------ END SUBROUTINE UPDSPB diff --git a/src/trans/gpu/internal/uvtvd_mod.F90 b/src/trans/gpu/internal/uvtvd_mod.F90 index 36646f8e9..739592c49 100755 --- a/src/trans/gpu/internal/uvtvd_mod.F90 +++ b/src/trans/gpu/internal/uvtvd_mod.F90 @@ -59,9 +59,9 @@ SUBROUTINE UVTVD(KF_UV,PU,PV,PVOR,PDIV) ! ------------------------------------------------------------------ USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT -USE TPM_DIM, ONLY: R, R_NTMAX -USE TPM_DISTR, ONLY: D, D_NUMP, D_MYMS -USE TPM_FIELDS_FLAT, ONLY: ZEPSNM +USE TPM_DIM, ONLY: R +USE TPM_DISTR, ONLY: D +USE TPM_FIELDS_GPU, ONLY: FG ! IMPLICIT NONE @@ -79,30 +79,30 @@ SUBROUTINE UVTVD(KF_UV,PU,PV,PVOR,PDIV) REAL(KIND=JPRBT) :: ZKM,ZJN ! ------------------------------------------------------------------ +ASSOCIATE(D_NUMP=>D%NUMP, R_NTMAX=>R%NTMAX, D_MYMS=>D%MYMS, ZEPSNM=>FG%ZEPSNM) !* 1. COMPUTE U V FROM VORTICITY AND DIVERGENCE. ! ------------------------------------------ #ifdef ACCGPU !$ACC DATA & -!$ACC& PRESENT(D_MYMS,D_NUMP,R_NTMAX) & -!$ACC& PRESENT(ZEPSNM,PU,PV,PVOR,PDIV) ASYNC(1) -#endif -#ifdef OMPGPU -!WARNING: following line should be PRESENT,ALLOC but causes issues with AMD compiler! -!$OMP TARGET DATA& -!$OMP& MAP(TO:D_MYMS,D_NUMP,R_NTMAX) & -!$OMP& MAP(ALLOC:ZEPSNM,PU,PV,PVOR,PDIV) +!$ACC& PRESENT(D,D_MYMS,D_NUMP,R,R_NTMAX) & +!$ACC& PRESENT(FG,ZEPSNM,PU,PV,PVOR,PDIV) ASYNC(1) #endif !* 1.1 SET N=KM-1 COMPONENT TO 0 FOR U AND V #ifdef OMPGPU -!$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(2) +!$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(2) PRIVATE(KM) SHARED(D,KF_UV,R,PU,PV) & +!$OMP& MAP(TO:KF_UV) DEFAULT(NONE) #endif #ifdef ACCGPU -!$ACC PARALLEL LOOP COLLAPSE(2) PRIVATE(KM) DEFAULT(NONE) & -!$ACC& FIRSTPRIVATE(KF_UV) ASYNC(1) +!$ACC PARALLEL LOOP COLLAPSE(2) PRIVATE(KM) FIRSTPRIVATE(KF_UV) DEFAULT(NONE) & +#ifndef _CRAYFTN +!$ACC& ASYNC(1) +#else +!$ACC& +#endif #endif DO KMLOC=1,D_NUMP DO J=1,2*KF_UV @@ -115,12 +115,16 @@ SUBROUTINE UVTVD(KF_UV,PU,PV,PVOR,PDIV) !* 1.2 COMPUTE VORTICITY AND DIVERGENCE. #ifdef OMPGPU -!$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(3) PRIVATE(IR,II,IN,KM,ZKM,JN,ZJN) DEFAULT(NONE) & -!$OMP& SHARED(D_NUMP,R_NTMAX,KF_UV,D_MYMS,PVOR,PV,PU,PDIV,ZEPSNM) +!$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(3) PRIVATE(IR,II,IN,KM,ZKM,ZJN) & +!$OMP& SHARED(D,R,KF_UV,FG,PVOR,PV,PU,PDIV) DEFAULT(NONE) #endif #ifdef ACCGPU -!$ACC PARALLEL LOOP COLLAPSE(3) PRIVATE(IR,II,IN,KM,ZKM,JN,ZJN) DEFAULT(NONE) & -!$ACC& FIRSTPRIVATE(KF_UV) ASYNC(1) +!$ACC PARALLEL LOOP COLLAPSE(3) PRIVATE(IR,II,IN,KM,ZKM,ZJN) FIRSTPRIVATE(KF_UV) DEFAULT(NONE) & +#ifndef _CRAYFTN +!$ACC& ASYNC(1) +#else +!$ACC& +#endif #endif DO KMLOC=1,D_NUMP DO JN=0,R_NTMAX @@ -163,13 +167,12 @@ SUBROUTINE UVTVD(KF_UV,PU,PV,PVOR,PDIV) ENDDO ENDDO ENDDO -#ifdef OMPGPU -!$OMP END TARGET DATA -#endif + #ifdef ACCGPU !$ACC END DATA #endif ! ------------------------------------------------------------------ +END ASSOCIATE END SUBROUTINE UVTVD END MODULE UVTVD_MOD diff --git a/src/trans/gpu/internal/vd2uv_mod.F90 b/src/trans/gpu/internal/vd2uv_mod.F90 index 984083312..489684f95 100755 --- a/src/trans/gpu/internal/vd2uv_mod.F90 +++ b/src/trans/gpu/internal/vd2uv_mod.F90 @@ -20,6 +20,7 @@ SUBROUTINE VD2UV(KM,KMLOC,KF_UV,KLEI2,PSPVOR,PSPDIV,PU,PV) USE PREPSNM_MOD, ONLY: PREPSNM USE PRFI1B_MOD, ONLY: PRFI1B USE VDTUV_MOD, ONLY: VDTUV +USE ABORT_TRANS_MOD, ONLY: ABORT_TRANS !**** *VD2UV* - U and V from Vor/div @@ -101,7 +102,8 @@ SUBROUTINE VD2UV(KM,KMLOC,KF_UV,KLEI2,PSPVOR,PSPDIV,PU,PV) !* 1. PREPARE ZEPSNM. ! --------------- -stop 'Error: code path not (yet) supported in GPU version' +CALL ABORT_TRANS('VD2UV: Code path not (yet) supported in GPU version') + !CALL PREPSNM(KM,KMLOC,ZEPSNM) ! ------------------------------------------------------------------ @@ -122,7 +124,6 @@ SUBROUTINE VD2UV(KM,KMLOC,KF_UV,KLEI2,PSPVOR,PSPDIV,PU,PV) IUU = 6*KF_UV IVL = 6*KF_UV+1 IVU = 8*KF_UV - stop 'Error: code path not (yet) supported in GPU version' !CALL PRFI1B(KM,ZIA(:,IVORL:IVORU),PSPVOR,KF_UV) !CALL PRFI1B(KM,ZIA(:,IDIVL:IDIVU),PSPDIV,KF_UV) diff --git a/src/trans/gpu/internal/vdtuv_mod.F90 b/src/trans/gpu/internal/vdtuv_mod.F90 index 68e8c47c8..e548abe96 100755 --- a/src/trans/gpu/internal/vdtuv_mod.F90 +++ b/src/trans/gpu/internal/vdtuv_mod.F90 @@ -14,10 +14,9 @@ MODULE VDTUV_MOD SUBROUTINE VDTUV(KFIELD,PEPSNM,PVOR,PDIV,PU,PV) USE PARKIND_ECTRANS, ONLY: JPIM, JPRB, JPRBT -USE TPM_DIM, ONLY: R, R_NTMAX +USE TPM_DIM, ONLY: R USE TPM_FIELDS, ONLY: F -USE TPM_FIELDS_FLAT, ONLY: F_RLAPIN -USE TPM_DISTR, ONLY: D, D_NUMP, D_MYMS +USE TPM_DISTR, ONLY: D !**** *VDTUV* - Compute U,V in spectral space @@ -86,18 +85,16 @@ SUBROUTINE VDTUV(KFIELD,PEPSNM,PVOR,PDIV,PU,PV) ! LOCAL REAL SCALARS REAL(KIND=JPRBT) :: ZKM +ASSOCIATE(D_NUMP=>D%NUMP, D_MYMS=>D%MYMS, R_NTMAX=>R%NTMAX, F_RLAPIN=>F%RLAPIN) + #ifdef ACCGPU !$ACC DATA & -!$ACC& PRESENT(R_NTMAX,D_MYMS,D_NUMP,F_RLAPIN) & +!$ACC& PRESENT(R,R_NTMAX,D,D_MYMS,D_NUMP,F,F_RLAPIN) & !$ACC& PRESENT(PEPSNM, PVOR, PDIV) & !$ACC& PRESENT(PU, PV) #endif #ifdef OMPGPU -!$OMP TARGET DATA & -!$OMP& MAP (PRESENT,ALLOC:ZEPSNM, ZN, ZLAPIN) & -!$OMP& MAP (TO:R_NSMAX, D_MYMS,D_NUMP,F_RLAPIN) & -!$OMP& MAP(PRESENT,ALLOC:ZEPSNM, PVOR, PDIV) & -!$OMP& MAP(PRESENT,ALLOC:PU, PV) +!$OMP TARGET DATA MAP(PRESENT,ALLOC:R_NTMAX,D_MYMS,D_NUMP,F_RLAPIN,PEPSNM,PVOR,PDIV,PU,PV) #endif ! ------------------------------------------------------------------ @@ -106,10 +103,16 @@ SUBROUTINE VDTUV(KFIELD,PEPSNM,PVOR,PDIV,PU,PV) ! ------------------------------------------ #ifdef OMPGPU +!$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(3) PRIVATE(IR,II,KM,ZKM,JI) & +!$OMP& FIRSTPRIVATE(KFIELD,KMLOC) #endif #ifdef ACCGPU -!$ACC PARALLEL LOOP COLLAPSE(3) DEFAULT(NONE) PRIVATE(IR,II,KM,ZKM,JI) & -!$ACC& FIRSTPRIVATE(KFIELD,KMLOC) ASYNC(1) +!$ACC PARALLEL LOOP COLLAPSE(3) DEFAULT(NONE) PRIVATE(IR,II,KM,ZKM,JI) FIRSTPRIVATE(KFIELD,KMLOC) & +#ifndef _CRAYFTN +!$ACC& ASYNC(1) +#else +!$ACC& +#endif #endif DO KMLOC=1,D_NUMP DO JN=0,R_NTMAX+1 @@ -157,6 +160,7 @@ SUBROUTINE VDTUV(KFIELD,PEPSNM,PVOR,PDIV,PU,PV) !$ACC END DATA #endif ! ------------------------------------------------------------------ +END ASSOCIATE END SUBROUTINE VDTUV END MODULE VDTUV_MOD diff --git a/src/trans/include/ectrans/setup_trans.h b/src/trans/include/ectrans/setup_trans.h index 72e6b5a6d..ad4353c75 100644 --- a/src/trans/include/ectrans/setup_trans.h +++ b/src/trans/include/ectrans/setup_trans.h @@ -11,7 +11,7 @@ INTERFACE SUBROUTINE SETUP_TRANS(KSMAX,KDGL,KDLON,KLOEN,LDSPLIT,PSTRET,& &KTMAX,KRESOL,PWEIGHT,LDGRIDONLY,LDUSERPNM,LDKEEPRPNM,LDUSEFLT,& -&LDSPSETUPONLY,LDPNMONLY,LDUSEFFTW,& +&LDSPSETUPONLY,LDPNMONLY,LDUSEFFTW,LD_ALL_FFTW,& &LDLL,LDSHIFTLL,CDIO_LEGPOL,CDLEGPOLFNAME,KLEGPOLPTR,KLEGPOLPTR_LEN) !**** *SETUP_TRANS* - Setup transform package for specific resolution @@ -52,6 +52,7 @@ SUBROUTINE SETUP_TRANS(KSMAX,KDGL,KDLON,KLOEN,LDSPLIT,PSTRET,& ! FLT, otherwise always kept) ! LDPNMONLY - Compute the Legendre polynomialsonly, not the FFTs. ! LDUSEFFTW - Use FFTW for FFTs (option deprecated - FFTW is now mandatory) +! LD_ALL_FFTW : T to transform all fields in one call, F to transforms fields one after another ! LDLL - Setup second set of input/output latitudes ! the number of input/output latitudes to transform is equal KDGL ! or KDGL+2 in the case that includes poles + equator @@ -97,6 +98,7 @@ REAL(KIND=JPRD) ,OPTIONAL,INTENT(IN) :: PWEIGHT(:) REAL(KIND=JPRD) ,OPTIONAL,INTENT(IN) :: PSTRET LOGICAL ,OPTIONAL,INTENT(IN):: LDGRIDONLY LOGICAL ,OPTIONAL,INTENT(IN):: LDUSEFLT +LOGICAL ,OPTIONAL,INTENT(IN):: LD_ALL_FFTW LOGICAL ,OPTIONAL,INTENT(IN):: LDUSERPNM LOGICAL ,OPTIONAL,INTENT(IN):: LDKEEPRPNM LOGICAL ,OPTIONAL,INTENT(IN):: LDPNMONLY diff --git a/src/trans/sedrenames.txt b/src/trans/sedrenames.txt index 49cb51484..e999ecfca 100644 --- a/src/trans/sedrenames.txt +++ b/src/trans/sedrenames.txt @@ -110,7 +110,7 @@ s/\ SULEG_MOD/\ SULEG_MOD_VARIANTDESIGNATOR/g s/SUTRLE_MOD/SUTRLE_MOD_VARIANTDESIGNATOR/g s/TPM_FFTW/TPM_FFTW_VARIANTDESIGNATOR/g s/TPM_FFT/TPM_FFT_VARIANTDESIGNATOR/g -s/TPM_FIELDS_FLAT/TPM_FIELDS_FLAT_VARIANTDESIGNATOR/g +s/TPM_FIELDS_GPU/TPM_FIELDS_GPU_VARIANTDESIGNATOR/g s/TPM_FLT/TPM_FLT_VARIANTDESIGNATOR/g s/TPM_TRANS/TPM_TRANS_VARIANTDESIGNATOR/g s/trans_end( *($|\(| |\*|\.h))/trans_end_VARIANTDESIGNATOR\1/g diff --git a/src/transi/CMakeLists.txt b/src/transi/CMakeLists.txt index 43e79dfa0..ae97cf5ff 100644 --- a/src/transi/CMakeLists.txt +++ b/src/transi/CMakeLists.txt @@ -27,6 +27,33 @@ ecbuild_add_library( TARGET transi_dp ectrans_target_fortran_module_directory( TARGET transi_dp MODULE_DIRECTORY ${PROJECT_BINARY_DIR}/module/trans_dp ) + +if( HAVE_GPU ) + ecbuild_add_library( TARGET transi_gpu_dp + SOURCES transi_module.F90 + transi.h + transi.c + version.h + ${CMAKE_CURRENT_BINARY_DIR}/version.c + HEADER_DESTINATION include/ectrans + PUBLIC_INCLUDES $ + $ + PRIVATE_LIBS trans_gpu_dp + PRIVATE_DEFINITIONS ECTRANS_HAVE_MPI=${ectrans_HAVE_MPI} ECTRANS_GPU_VERSION + ) + if( HAVE_ACC AND CMAKE_Fortran_COMPILER_ID MATCHES NVHPC ) + # Propagate flags as link options for downstream targets. Only required for NVHPC + target_link_options( transi_gpu_dp INTERFACE + $<$:SHELL:${OpenACC_Fortran_FLAGS}> + $<$:SHELL:${OpenACC_Fortran_FLAGS}> + $<$:SHELL:${OpenACC_Fortran_FLAGS}> ) + endif() + + ectrans_target_fortran_module_directory( TARGET transi_gpu_dp + MODULE_DIRECTORY ${PROJECT_BINARY_DIR}/module/trans_gpu_dp + ) +endif() + set( transi_includes transi.h version.h ) install( FILES ${transi_includes} diff --git a/src/transi/transi.c b/src/transi/transi.c index 2fb0ce956..569e3b974 100644 --- a/src/transi/transi.c +++ b/src/transi/transi.c @@ -44,7 +44,7 @@ const char* trans_error_msg(int errcode) case TRANS_ERROR: return "Trans: Error"; case TRANS_NOTIMPL: - return "Trans: Not (yet) implemented"; + return "Trans: Not implemented"; case TRANS_MISSING_ARG: return "Trans: Required member of the argument structure is missing or not allocated"; case TRANS_UNRECOGNIZED_ARG: @@ -58,6 +58,7 @@ const char* trans_error_msg(int errcode) int trans_new( struct Trans_t* trans ) { + trans->handle = 0; // not initialized trans->llatlon = 0; trans->lsplit = true; trans->flt = -1; @@ -283,3 +284,7 @@ struct SpecNorm_t new_specnorm(struct Trans_t* trans) specnorm.count = 0; return specnorm; } + +void transi_disable_DR_HOOK_ASSERT_MPI_INITIALIZED() { + setenv("DR_HOOK_ASSERT_MPI_INITIALIZED","0",1); +} \ No newline at end of file diff --git a/src/transi/transi_module.F90 b/src/transi/transi_module.F90 index d5e13dd01..4976f3125 100644 --- a/src/transi/transi_module.F90 +++ b/src/transi/transi_module.F90 @@ -410,6 +410,8 @@ subroutine transi_free(ptr) bind(C,name="transi_free") use, intrinsic :: iso_c_binding, only: c_ptr type(c_ptr), intent(in) :: ptr end subroutine transi_free + subroutine transi_disable_DR_HOOK_ASSERT_MPI_INITIALIZED() bind(C,name="transi_disable_DR_HOOK_ASSERT_MPI_INITIALIZED") + end subroutine end interface @@ -551,6 +553,7 @@ function trans_init() bind(C,name="trans_init") result(iret) NPRGPNS = MPL_NPROC() NPRTRW = MPL_NPROC()/NPRTRV; else + call transi_disable_DR_HOOK_ASSERT_MPI_INITIALIZED() allocate( I_REGIONS(1) ) NPRGPNS = 1 NPRTRW = 1; @@ -597,6 +600,15 @@ function trans_setup(trans) bind(C,name="trans_setup") result(iret) if( trans%llatlon /= 0 ) llatlon = .True. if( trans%llatlon == 2 ) llatlonshift = .True. +#ifdef ECTRANS_GPU_VERSION + if (llatlon) then + call transi_error("trans_setup: lonlat grid input not (yet) implemented for GPU") + trans%handle = 0 ! Not created! + iret = TRANS_NOTIMPL + return + endif +#endif + if ( .not. is_init ) then err = trans_init() endif @@ -1251,6 +1263,10 @@ function trans_delete(trans) bind(C,name="trans_delete") use, intrinsic :: iso_c_binding integer(c_int) :: trans_delete type(Trans_t), intent(inout) :: trans + trans_delete = TRANS_SUCCESS + if (trans%handle == 0) then + return + endif call free_ptr( trans%nloen ) call free_ptr( trans%readfp ) call free_ptr( trans%writefp ) @@ -1282,7 +1298,6 @@ function trans_delete(trans) bind(C,name="trans_delete") call free_ptr( trans%rlapin ) call free_ptr( trans%ndglu ) call trans_release( trans%handle ) - trans_delete = TRANS_SUCCESS end function trans_delete function trans_finalize() bind(C,name="trans_finalize") @@ -1615,6 +1630,12 @@ function trans_dirtrans_adj(args) bind(C,name="trans_dirtrans_adj") result(iret) RGPM => RGP endif +#ifdef ECTRANS_GPU_VERSION + call transi_error("trans_dirtrans_adj: ERROR: Not implemented for GPU") + iret = TRANS_NOTIMPL + return +#endif + if( args%nvordiv > 0 .and. args%nscalar > 0 ) then call DIR_TRANSAD( KRESOL=trans%handle, & & KPROMA=args%nproma, & @@ -1855,6 +1876,13 @@ function trans_invtrans_adj(args) bind(C,name="trans_invtrans_adj") result(iret) RGPM => RGP endif +#ifdef ECTRANS_GPU_VERSION + call transi_error("trans_invtrans_adj: ERROR: Not implemented for GPU") + iret = TRANS_NOTIMPL + return +#endif + + ! Note that llatlon is not an option in INV_TRANSAD unlile INV_TRANS and DIR_TRANS if( args%nvordiv > 0 .and. args%nscalar > 0 ) then call INV_TRANSAD( KRESOL=trans%handle, & @@ -2287,6 +2315,11 @@ function trans_vordiv_to_UV(args) bind(C,name="trans_vordiv_to_UV") result(iret) endif call C_F_POINTER( args%rspv, RSPV, (/args%nfld,args%ncoeff/) ) +#ifdef ECTRANS_GPU_VERSION + call transi_error("trans_vordiv_to_UV: ERROR: Not implemented for GPU") + iret = TRANS_NOTIMPL + return +#endif if ( .not. is_init ) then err = trans_init() diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 9ba9cfe51..58e1a55a3 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -6,37 +6,64 @@ # granted to it by virtue of its status as an intergovernmental organisation # nor does it submit to any jurisdiction. -### trans_test_install +find_package( MPI ) -if( HAVE_TESTS ) +# -------------------------------------------------------------------------------------------------- +# Add a test for installation of ecTrans +# -------------------------------------------------------------------------------------------------- - find_package( MPI ) - configure_file( test-install.sh.in ${CMAKE_CURRENT_BINARY_DIR}/test-install.sh @ONLY ) +configure_file( test-install.sh.in ${CMAKE_CURRENT_BINARY_DIR}/test-install.sh @ONLY ) - unset( _test_args ) - if( CMAKE_TOOLCHAIN_FILE ) - list( APPEND _test_args "-DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE}" ) - endif() - foreach( lang C CXX Fortran ) - if( CMAKE_${lang}_COMPILER ) - list( APPEND _test_args "-DCMAKE_${lang}_COMPILER=${CMAKE_${lang}_COMPILER}" ) - endif() - endforeach() - foreach( lang C CXX Fortran ) - if( CMAKE_${lang}_FLAGS ) - list( APPEND _test_args "-DCMAKE_${lang}_FLAGS=${CMAKE_${lang}_FLAGS}" ) - endif() - endforeach() - if( CMAKE_EXE_LINKER_FLAGS ) - list( APPEND _test_args "-DCMAKE_EXE_LINKER_FLAGS=${CMAKE_EXE_LINKER_FLAGS}" ) +unset( _test_args ) +if( CMAKE_TOOLCHAIN_FILE ) + list( APPEND _test_args "-DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE}" ) +endif() +foreach( lang C CXX Fortran ) + if( CMAKE_${lang}_COMPILER ) + list( APPEND _test_args "-DCMAKE_${lang}_COMPILER=${CMAKE_${lang}_COMPILER}" ) endif() - if( NOT HAVE_DOUBLE_PRECISION ) - list( APPEND _test_args "-DCOMPONENTS=single" ) +endforeach() +foreach( lang C CXX Fortran ) + if( CMAKE_${lang}_FLAGS ) + list( APPEND _test_args "-DCMAKE_${lang}_FLAGS=${CMAKE_${lang}_FLAGS}" ) endif() +endforeach() +if( CMAKE_EXE_LINKER_FLAGS ) + list( APPEND _test_args "-DCMAKE_EXE_LINKER_FLAGS=${CMAKE_EXE_LINKER_FLAGS}" ) +endif() +if( NOT HAVE_DOUBLE_PRECISION ) + list( APPEND _test_args "-DCOMPONENTS=single" ) +endif() + +add_test( NAME ectrans_test_install + COMMAND ${CMAKE_CURRENT_BINARY_DIR}/test-install.sh ${_test_args} ) + +# -------------------------------------------------------------------------------------------------- +# Add a test for SETUP_TRANS0 +# -------------------------------------------------------------------------------------------------- - add_test( NAME ectrans_test_install - COMMAND ${CMAKE_CURRENT_BINARY_DIR}/test-install.sh ${_test_args} ) +ecbuild_add_executable( + TARGET ectrans_test_setup_trans0 + SOURCES trans/test_setup_trans0.F90 + LIBS ectrans_common + LINKER_LANGUAGE Fortran + NOINSTALL) +set( ntasks 0 ) +if( HAVE_MPI ) + list( APPEND ntasks 1 2 ) +endif() +foreach( mpi ${ntasks} ) + ecbuild_add_test( TARGET ectrans_test_setup_trans0_mpi${mpi} + COMMAND ectrans_test_setup_trans0 + MPI ${mpi} + ) +endforeach() + +# -------------------------------------------------------------------------------------------------- +# Add a test for tangent-linear/adjoint correspondence (CPU version only) +# -------------------------------------------------------------------------------------------------- +if( HAVE_CPU ) if( HAVE_DOUBLE_PRECISION ) set( trans trans_dp ) set( parkind parkind_dp ) @@ -45,35 +72,160 @@ if( HAVE_TESTS ) set( parkind parkind_sp ) endif() - ecbuild_add_executable( - TARGET ectrans_test_setup_trans0 - SOURCES trans/test_setup_trans0.F90 - LIBS ectrans_common + ecbuild_add_test(TARGET ectrans_test_adjoint + SOURCES trans/test_adjoint.F90 + LIBS ${trans} ${parkind} LINKER_LANGUAGE Fortran - NOINSTALL) + ) + if( TEST ectrans_test_adjoint AND HAVE_OMP ) + target_link_libraries( ectrans_test_adjoint OpenMP::OpenMP_Fortran ) + endif() +endif() + +# -------------------------------------------------------------------------------------------------- +# Add tests for common call patterns of ecTrans, using the benchmark program +# This tests CPU and/or GPU versions, depending on which are enabled +# -------------------------------------------------------------------------------------------------- + +macro(ectrans_set_test_properties target) + if( "${target}" MATCHES "gpu" ) + set_tests_properties(${target} PROPERTIES LABELS "gpu;Fortran") + endif() +endmacro() + +# Determine which benchmarks are available +set( benchmarks "" ) +if( TARGET ectrans-benchmark-cpu-dp ) + list( APPEND benchmarks ectrans-benchmark-cpu-dp ) +endif() +if( TARGET ectrans-benchmark-cpu-sp ) + list( APPEND benchmarks ectrans-benchmark-cpu-sp ) +endif() +if( TARGET ectrans-benchmark-gpu-dp ) + list( APPEND benchmarks ectrans-benchmark-gpu-dp ) +endif() +if( TARGET ectrans-benchmark-gpu-sp ) + list( APPEND benchmarks ectrans-benchmark-gpu-sp ) +endif() + +foreach( benchmark ${benchmarks} ) + # Establish which task/thread parameters to test set( ntasks 0 ) + set( nthreads 1 ) if( HAVE_MPI ) list( APPEND ntasks 1 2 ) endif() + if( ${benchmark} MATCHES "cpu" ) + if( HAVE_OMP ) + list( APPEND nthreads 4 8 ) + endif() + endif() + + # Add test for each parameter combination foreach( mpi ${ntasks} ) - ecbuild_add_test( TARGET ectrans_test_setup_trans0_mpi${mpi} - COMMAND ectrans_test_setup_trans0 + foreach( omp ${nthreads} ) + # TCO47 truncation + set( t 47 ) + set( grid O48 ) + + # Base arguments -> 2 iterations, memory consumption/pinning information, spectral norms, and + # verbose output + set( base_args "--niter 2 --meminfo --norms -v" ) + + set (base_title "${benchmark}_T${t}_${grid}_mpi${mpi}_omp${omp}") + + # Check it works with 0 3D scalar fields + # This test doesn't work on GPU -> should we delete it? + if( NOT "${benchmark}" MATCHES "-gpu-" ) + ecbuild_add_test( TARGET ${base_title}_nfld0 + COMMAND ${benchmark} + ARGS --truncation ${t} --grid ${grid} --nfld 0 --check 100 ${baseargs} + MPI ${mpi} + OMP ${omp} + ) + ectrans_set_test_properties( ${base_title}_nfld0 ) + endif() + + # Check it works with 10 3D scalar fields + ecbuild_add_test( TARGET ${base_title}_nfld10 + COMMAND ${benchmark} + ARGS --truncation ${t} --grid ${grid} --nfld 10 --check 100 ${baseargs} MPI ${mpi} + OMP ${omp} ) + ectrans_set_test_properties( ${base_title}_nfld10 ) + + # Check it works with 10 3D scalar fields and 20 levels + ecbuild_add_test( TARGET ${base_title}_nfld10_nlev20 + COMMAND ${benchmark} + ARGS --truncation ${t} --grid ${grid} --nfld 10 --nlev 20 --check 100 ${baseargs} + MPI ${mpi} + OMP ${omp} + ) + ectrans_set_test_properties( ${base_title}_nfld10_nlev20 ) + + # Check it works with 10 3D scalar fields, 20 levels, and scalar derivatives + ecbuild_add_test( TARGET ${base_title}_nfld10_nlev20_scders + COMMAND ${benchmark} + ARGS --truncation ${t} --grid ${grid} --nfld 10 --nlev 20 --scders --check 100 ${baseargs} + MPI ${mpi} + OMP ${omp} + ) + ectrans_set_test_properties( ${base_title}_nfld10_nlev20_scders ) + + # Check it works with 10 3D scalar fields, 20 levels, and wind transforms + ecbuild_add_test( TARGET ${base_title}_nfld10_nlev20_vordiv + COMMAND ${benchmark} + ARGS --truncation ${t} --grid ${grid} --nfld 10 --nlev 20 --vordiv --check 200 ${baseargs} + MPI ${mpi} + OMP ${omp} + ) + ectrans_set_test_properties( ${base_title}_nfld10_nlev20_vordiv ) + + # Check it works with 10 3D scalar fields, 20 levels, wind transforms, and wind derivatives + ecbuild_add_test( TARGET ${base_title}_nfld10_nlev20_vordiv_uvders + COMMAND ${benchmark} + ARGS --truncation ${t} --grid ${grid} --nfld 10 --nlev 20 --vordiv --uvders --check 200 ${baseargs} + MPI ${mpi} + OMP ${omp} + ) + ectrans_set_test_properties( ${base_title}_nfld10_nlev20_vordiv_uvders ) + + # Check it works with 10 3D scalar fields, 20 levels, and NPROMA=16 + ecbuild_add_test( TARGET ${base_title}_nfld10_nlev20_nproma16 + COMMAND ${benchmark} + ARGS --truncation ${t} --grid ${grid} --nfld 10 --nlev 20 --nproma 16 --check 100 ${baseargs} + MPI ${mpi} + OMP ${omp} + ) + ectrans_set_test_properties( ${base_title}_nfld10_nlev20_nproma16 ) + + if( ${benchmark} MATCHES "cpu" ) + # Check it works with 10 3D scalar fields, 20 levels, and the fast Legendre tranform (CPU only) + ecbuild_add_test( TARGET ${base_title}_nfld10_nlev20_flt + COMMAND ${benchmark} + ARGS --truncation ${t} --grid ${grid} --nfld 10 --nlev 20 --flt --check 4000 ${baseargs} + MPI ${mpi} + OMP ${omp} + ) + ectrans_set_test_properties( ${base_title}_nfld10_nlev20_flt ) + endif() + endforeach() endforeach() +endforeach() - ecbuild_add_test(TARGET ectrans_test_adjoint - SOURCES trans/test_adjoint.F90 - LIBS ${trans} ${parkind} - LINKER_LANGUAGE Fortran - ) - if( TEST ectrans_test_adjoint AND HAVE_OMP ) - target_link_libraries( ectrans_test_adjoint OpenMP::OpenMP_Fortran ) - endif() +# -------------------------------------------------------------------------------------------------- +# Add tests for common call patterns of ecTrans LAM benchmark (i.e. etrans), using the benchmark +# program +# -------------------------------------------------------------------------------------------------- +if( HAVE_ETRANS ) + # Set resolution + set( nlon 48 ) + set( nlat 40 ) foreach( prec dp sp ) - if( TARGET ectrans-benchmark-cpu-${prec} ) + if( TARGET ectrans-lam-benchmark-cpu-${prec} ) set( ntasks 0 ) set( nthreads 1 ) if( HAVE_MPI ) @@ -82,47 +234,47 @@ if( HAVE_TESTS ) if( HAVE_OMP ) list( APPEND nthreads 4 8 ) endif() + + # Base arguments -> nlat x nlon, 2 iterations, memory consumption/pinning information, + # spectral norms, and verbose output + set( base_args "--nlon ${nlon} --nlat ${nlat} --niter 2 --meminfo --norms -v" ) + foreach( mpi ${ntasks} ) foreach( omp ${nthreads} ) - set( t 47 ) - set( grid O48 ) - ecbuild_add_test( TARGET ectrans_test_benchmark_${prec}_T${t}_${grid}_mpi${mpi}_omp${omp}_nfld0 - COMMAND ectrans-benchmark-cpu-${prec} ARGS --truncation ${t} --grid ${grid} --niter 2 --nfld 0 --meminfo --check 100 --norms -v - MPI ${mpi} - OMP ${omp} - ) - ecbuild_add_test( TARGET ectrans_test_benchmark_${prec}_T${t}_${grid}_mpi${mpi}_omp${omp}_nfld10 - COMMAND ectrans-benchmark-cpu-${prec} ARGS --truncation ${t} --grid ${grid} --niter 2 --nfld 10 --meminfo --check 100 --norms -v + set( base_title "ectrans_lam_test_benchmark_${prec}_${nlon}x${nlat}_mpi${mpi}_omp${omp}" ) + + ecbuild_add_test( TARGET ${base_title}_nfld0 + COMMAND ectrans-lam-benchmark-cpu-${prec} ARGS ${base_args} --nfld 0 MPI ${mpi} OMP ${omp} ) - ecbuild_add_test( TARGET ectrans_test_benchmark_${prec}_T${t}_${grid}_mpi${mpi}_omp${omp}_nfld10_nlev20 - COMMAND ectrans-benchmark-cpu-${prec} ARGS --truncation ${t} --grid ${grid} --niter 2 --nfld 10 --nlev 20 --check 100 --norms -v + ecbuild_add_test( TARGET ${base_title}_nfld10 + COMMAND ectrans-lam-benchmark-cpu-${prec} ARGS ${base_args} --nfld 10 MPI ${mpi} OMP ${omp} ) - ecbuild_add_test( TARGET ectrans_test_benchmark_${prec}_T${t}_${grid}_mpi${mpi}_omp${omp}_nfld10_nlev20_scders - COMMAND ectrans-benchmark-cpu-${prec} ARGS --truncation ${t} --grid ${grid} --niter 2 --nfld 10 --nlev 20 --scders --check 100 --norms -v + ecbuild_add_test( TARGET ${base_title}_nfld10_nlev20 + COMMAND ectrans-lam-benchmark-cpu-${prec} ARGS ${base_args} --nfld 10 --nlev 20 MPI ${mpi} OMP ${omp} ) - ecbuild_add_test( TARGET ectrans_test_benchmark_${prec}_T${t}_${grid}_mpi${mpi}_omp${omp}_nfld10_nlev20_vordiv - COMMAND ectrans-benchmark-cpu-${prec} ARGS --truncation ${t} --grid ${grid} --niter 2 --nfld 10 --nlev 20 --vordiv --check 100 --norms -v + ecbuild_add_test( TARGET ${base_title}_nfld10_nlev20_scders + COMMAND ectrans-lam-benchmark-cpu-${prec} ARGS ${base_args} --nfld 10 --nlev 20 --scders MPI ${mpi} OMP ${omp} ) - ecbuild_add_test( TARGET ectrans_test_benchmark_${prec}_T${t}_${grid}_mpi${mpi}_omp${omp}_nfld10_nlev20_vordiv_uvders - COMMAND ectrans-benchmark-cpu-${prec} ARGS --truncation ${t} --grid ${grid} --niter 2 --nfld 10 --nlev 20 --vordiv --uvders --check 100 --norms -v + ecbuild_add_test( TARGET ${base_title}_nfld10_nlev20_vordiv + COMMAND ectrans-lam-benchmark-cpu-${prec} ARGS ${base_args} --nfld 10 --nlev 20 --vordiv MPI ${mpi} OMP ${omp} ) - ecbuild_add_test( TARGET ectrans_test_benchmark_${prec}_T${t}_${grid}_mpi${mpi}_omp${omp}_nfld10_nlev20_flt - COMMAND ectrans-benchmark-cpu-${prec} ARGS --truncation ${t} --grid ${grid} --niter 2 --nfld 10 --nlev 20 --flt --check 2000 --norms -v + ecbuild_add_test( TARGET ${base_title}_nfld10_nlev20_vordiv_uvders + COMMAND ectrans-lam-benchmark-cpu-${prec} ARGS ${base_args} --nfld 10 --nlev 20 --vordiv --uvders MPI ${mpi} OMP ${omp} ) - ecbuild_add_test( TARGET ectrans_test_benchmark_${prec}_T${t}_${grid}_mpi${mpi}_omp${omp}_nfld10_nlev20_nproma16 - COMMAND ectrans-benchmark-cpu-${prec} ARGS --truncation ${t} --grid ${grid} --niter 2 --nfld 10 --nlev 20 --nproma 16 --check 100 --norms -v + ecbuild_add_test( TARGET ${base_title}_nfld10_nlev20_nproma16 + COMMAND ectrans-lam-benchmark-cpu-${prec} ARGS ${base_args} --nfld 10 --nlev 20 --nproma 16 MPI ${mpi} OMP ${omp} ) @@ -133,12 +285,14 @@ if( HAVE_TESTS ) endif() -if( HAVE_TRANSI ) +# -------------------------------------------------------------------------------------------------- +# Add tests for transi +# -------------------------------------------------------------------------------------------------- +if( HAVE_TRANSI ) check_include_files( malloc.h EC_HAVE_MALLOC_H ) ecbuild_debug_var( EC_HAVE_MALLOC_H ) - if( EC_HAVE_MALLOC_H ) list( APPEND TEST_DEFINITIONS TRANSI_HAVE_MEMORY @@ -154,12 +308,31 @@ if( HAVE_TRANSI ) ) target_compile_definitions( ectrans_test PUBLIC ${TEST_DEFINITIONS} ) + if( HAVE_GPU ) + ecbuild_add_library( TARGET ectrans_test_gpu + SOURCES transi/transi_test.h transi/transi_test.c + PUBLIC_LIBS transi_gpu_dp + NOINSTALL + ) + target_compile_definitions( ectrans_test PUBLIC ${TEST_DEFINITIONS} ) + endif() + ecbuild_add_test( TARGET ectrans_test_transi_program SOURCES transi/transi_test_program.c LIBS ectrans_test LINKER_LANGUAGE C ENVIRONMENT TRANS_USE_MPI=0 ) + if( HAVE_GPU ) + ecbuild_add_test( TARGET ectrans_test_transi_program_gpu + SOURCES transi/transi_test_program.c + LIBS ectrans_test_gpu + LINKER_LANGUAGE C + DEFINITIONS GPU_VERSION + ENVIRONMENT TRANS_USE_MPI=0 ) + set_tests_properties(ectrans_test_transi_program_gpu PROPERTIES LABELS gpu) + endif() + ecbuild_add_test( TARGET ectrans_test_transi_timings SOURCES transi/transi_test_timings.c LIBS ectrans_test @@ -216,11 +389,8 @@ if( HAVE_TRANSI ) LINKER_LANGUAGE C ENVIRONMENT TRANS_USE_MPI=0 ) - if( HAVE_TESTS ) - ecbuild_add_option( FEATURE MEMORY_TESTS DEFAULT ON DESCRIPTION "Enable memory tests" ) - if( NOT HAVE_MEMORY_TESTS ) - set_tests_properties( ectrans_test_transi_memory ectrans_test_transi_memory_lonlat PROPERTIES DISABLED ON ) - endif() + ecbuild_add_option( FEATURE MEMORY_TESTS DEFAULT ON DESCRIPTION "Enable memory tests" ) + if( NOT HAVE_MEMORY_TESTS ) + set_tests_properties( ectrans_test_transi_memory ectrans_test_transi_memory_lonlat PROPERTIES DISABLED ON ) endif() - endif() diff --git a/tests/test_ectrans4py/__init__.py b/tests/test_ectrans4py/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/test_ectrans4py/data/__init__.py b/tests/test_ectrans4py/data/__init__.py new file mode 100644 index 000000000..22793e116 --- /dev/null +++ b/tests/test_ectrans4py/data/__init__.py @@ -0,0 +1,15 @@ +import numpy +import os +_here = os.path.abspath(os.path.dirname(__file__)) + +lon_number_by_lat = numpy.load(os.path.join(_here, 'lon_number_by_lat.npy')) +zonal_wavenumbers = numpy.load(os.path.join(_here, 'zonal_wavenumbers.npy')) + +antwrp1300 = { + 'sp' : numpy.load(os.path.join(_here, 'antwrp1300-s1t@sp.npy')), + 'sp2gp' : numpy.load(os.path.join(_here, 'antwrp1300-s1t@sp2gp.npy')), + } +tl149_c24 = { + 'sp' : numpy.load(os.path.join(_here, 'tl149-c24-s1t@sp.npy')), + 'sp2gp' : numpy.load(os.path.join(_here, 'tl149-c24-s1t@sp2gp.npy')), + } diff --git a/tests/test_ectrans4py/data/antwrp1300-s1t@sp.npy b/tests/test_ectrans4py/data/antwrp1300-s1t@sp.npy new file mode 100644 index 000000000..6095a7527 Binary files /dev/null and b/tests/test_ectrans4py/data/antwrp1300-s1t@sp.npy differ diff --git a/tests/test_ectrans4py/data/antwrp1300-s1t@sp2gp.npy b/tests/test_ectrans4py/data/antwrp1300-s1t@sp2gp.npy new file mode 100644 index 000000000..d5d0f3e1e Binary files /dev/null and b/tests/test_ectrans4py/data/antwrp1300-s1t@sp2gp.npy differ diff --git a/tests/test_ectrans4py/data/lon_number_by_lat.npy b/tests/test_ectrans4py/data/lon_number_by_lat.npy new file mode 100644 index 000000000..317b2bd3c Binary files /dev/null and b/tests/test_ectrans4py/data/lon_number_by_lat.npy differ diff --git a/tests/test_ectrans4py/data/tl149-c24-s1t@sp.npy b/tests/test_ectrans4py/data/tl149-c24-s1t@sp.npy new file mode 100644 index 000000000..702d566ec Binary files /dev/null and b/tests/test_ectrans4py/data/tl149-c24-s1t@sp.npy differ diff --git a/tests/test_ectrans4py/data/tl149-c24-s1t@sp2gp.npy b/tests/test_ectrans4py/data/tl149-c24-s1t@sp2gp.npy new file mode 100644 index 000000000..d836cdc37 Binary files /dev/null and b/tests/test_ectrans4py/data/tl149-c24-s1t@sp2gp.npy differ diff --git a/tests/test_ectrans4py/data/zonal_wavenumbers.npy b/tests/test_ectrans4py/data/zonal_wavenumbers.npy new file mode 100644 index 000000000..43dec377d Binary files /dev/null and b/tests/test_ectrans4py/data/zonal_wavenumbers.npy differ diff --git a/tests/test_ectrans4py/test_ectrans4py.py b/tests/test_ectrans4py/test_ectrans4py.py new file mode 100644 index 000000000..f96c3f7dc --- /dev/null +++ b/tests/test_ectrans4py/test_ectrans4py.py @@ -0,0 +1,130 @@ +from unittest import main, TestCase +import numpy +from . import data +import ectrans4py + +ectrans4py.init_env() + +KNUMMAXRESOL = 10 +EPSILON = 1e-13 + + +class ArraysAlmostEqual(object): + + def assert_arrays_diff_under_epsilon(self, x, y): + diff = x - y + diffmax = abs(diff.max()) + diffmin = abs(diff.min()) + self.assertTrue(diffmax < EPSILON, "diffmax is {}".format(diffmax)) + self.assertTrue(diffmin < EPSILON, "diffmin is {}".format(diffmin)) + + +class TestLAM(TestCase, ArraysAlmostEqual): + + gpdims = {'X':54, + 'Y':48, + 'X_CIzone':43, + 'Y_CIzone':37, + 'X_resolution':1300.0, + 'Y_resolution':1300.0} + truncation = {'in_X':26, + 'in_Y':23} + spectral_data_sizes = (2592, 1968) + spdata = data.antwrp1300['sp'] + gpdata = data.antwrp1300['sp2gp'] + + def test_etrans_inq(self): + spectral_data_sizes = ectrans4py.etrans_inq4py( + self.gpdims['X'], + self.gpdims['Y'], + self.gpdims['X_CIzone'], + self.gpdims['Y_CIzone'], + self.truncation['in_X'], + self.truncation['in_Y'], + KNUMMAXRESOL, + self.gpdims['X_resolution'], + self.gpdims['Y_resolution']) + self.assertEqual(spectral_data_sizes, self.spectral_data_sizes) + + def test_sp2gp(self): + gpdata = ectrans4py.sp2gp_lam4py( + self.gpdims['X'], + self.gpdims['Y'], + self.gpdims['X_CIzone'], + self.gpdims['Y_CIzone'], + self.truncation['in_X'], + self.truncation['in_Y'], + KNUMMAXRESOL, + len(self.spdata.flatten()), + False, # no derivatives + False, # spectral_coeff_order != 'model', + self.gpdims['X_resolution'], + self.gpdims['Y_resolution'], + self.spdata.flatten())[0] + self.assert_arrays_diff_under_epsilon(gpdata, gpdata.flatten()) + + def test_gp2sp(self): + spdata = ectrans4py.gp2sp_lam4py( + self.spectral_data_sizes[1], + self.gpdims['X'], + self.gpdims['Y'], + self.gpdims['X_CIzone'], + self.gpdims['Y_CIzone'], + self.truncation['in_X'], + self.truncation['in_Y'], + KNUMMAXRESOL, + self.gpdims['X_resolution'], + self.gpdims['Y_resolution'], + False, # spectral_coeff_order != 'model', + self.gpdata.flatten()) + self.assert_arrays_diff_under_epsilon(spdata, spdata.flatten()) + +class TestGlobal(TestCase, ArraysAlmostEqual): + + gpdims = {'lat_number':150, + 'lon_number_by_lat':data.lon_number_by_lat} + truncation = {'max':148} + spectral_data_sizes = ( + 33052, + 11175, + data.zonal_wavenumbers) + spdata = data.tl149_c24['sp'] + gpdata = data.tl149_c24['sp2gp'] + + def test_trans_inq4py(self): + spectral_data_sizes = ectrans4py.trans_inq4py( + self.gpdims['lat_number'], + self.truncation['max'], + len(self.gpdims['lon_number_by_lat']), + self.gpdims['lon_number_by_lat'], + KNUMMAXRESOL) + self.assertEqual(spectral_data_sizes[0:2], self.spectral_data_sizes[0:2]) # dimensions + numpy.testing.assert_array_equal(spectral_data_sizes[2], self.spectral_data_sizes[2]) # zonal_wavenumbers + + def test_sp2gp(self): + gpdata = ectrans4py.sp2gp_gauss4py( + self.gpdims['lat_number'], + self.truncation['max'], + KNUMMAXRESOL, + sum(self.gpdims['lon_number_by_lat']), + len(self.gpdims['lon_number_by_lat']), + self.gpdims['lon_number_by_lat'], + len(self.spdata.flatten()), + False, # no derivatives + False, # spectral_coeff_order != 'model', + self.spdata.flatten())[0] + self.assert_arrays_diff_under_epsilon(gpdata, gpdata.flatten()) + + def test_gp2sp(self): + spdata = ectrans4py.gp2sp_gauss4py( + self.spectral_data_sizes[1] * 2, # *2 for complex coefficients + self.gpdims['lat_number'], + self.truncation['max'], + KNUMMAXRESOL, + len(self.gpdims['lon_number_by_lat']), + self.gpdims['lon_number_by_lat'], + len(self.gpdata.flatten()), + False, # spectral_coeff_order != 'model', + self.gpdata.flatten()) + self.assert_arrays_diff_under_epsilon(spdata, spdata.flatten()) + diff --git a/tests/transi/transi_test_program.c b/tests/transi/transi_test_program.c index 929cd8b88..096586c01 100644 --- a/tests/transi/transi_test_program.c +++ b/tests/transi/transi_test_program.c @@ -19,22 +19,35 @@ void read_grid(struct Trans_t*); int main ( int arc, char **argv ) { - trans_use_mpi( test_use_mpi() ); +#ifdef GPU_VERSION + fprintf(stderr, "transi_test_program GPU VERSION\n"); +#else + fprintf(stderr, "transi_test_program CPU VERSION\n"); +#endif + + fprintf(stderr,"start\n"); + fprintf(stderr,"ectrans version int = %d\n",ectrans_version_int()); + fprintf(stderr,"ectrans version = %s\n",ectrans_version()); + fprintf(stderr,"ectrans version str = %s\n",ectrans_version_str()); + fprintf(stderr,"ectrans git sha1 [0:7] = %s\n",ectrans_git_sha1_abbrev(7)); + fprintf(stderr,"ectrans git sha1 [0:12] = %s\n",ectrans_git_sha1_abbrev(12)); + fprintf(stderr,"ectrans git sha1 = %s\n",ectrans_git_sha1()); + + fprintf(stderr,"Using MPI: %d\n", test_use_mpi()); - printf("ectrans version int = %d\n",ectrans_version_int()); - printf("ectrans version = %s\n",ectrans_version()); - printf("ectrans version str = %s\n",ectrans_version_str()); - printf("ectrans git sha1 [0:7] = %s\n",ectrans_git_sha1_abbrev(7)); - printf("ectrans git sha1 [0:12] = %s\n",ectrans_git_sha1_abbrev(12)); - printf("ectrans git sha1 = %s\n",ectrans_git_sha1()); + trans_use_mpi( test_use_mpi() ); - //printf("transi started\n"); + fprintf(stderr,"trans_new\n"); int nout = 3; struct Trans_t trans; trans_new(&trans); + fprintf(stderr,"trans_new done\n"); read_grid(&trans); + fprintf(stderr,"trans_setup\n"); trans_setup(&trans); + fprintf(stderr,"trans_setup done\n"); + trans_inquire(&trans,"numpp,ngptotl,nmyms,nasm0,npossp,nptrms,nallms,ndim0g,nvalue"); trans_inquire(&trans,"nfrstlat,nlstlat,nptrlat,nptrfrstlat,nptrlstlat,nsta,nonl,ldsplitlat"); trans_inquire(&trans,"nultpp,nptrls,nnmeng"); @@ -43,7 +56,7 @@ int main ( int arc, char **argv ) //Check values of numpp if( trans.myproc == 1 ) { - printf("nprtrw = %d\n",trans.nprtrw); + fprintf(stderr,"nprtrw = %d\n",trans.nprtrw); int i; for( i=0; i 1.e-5) - printf("rgp[%d][%d] : %f\n",j,i,rgp[j*trans.ngptot+i]); + fprintf(stderr,"rgp[%d][%d] : %f\n",j,i,rgp[j*trans.ngptot+i]); } } @@ -118,6 +133,7 @@ int main ( int arc, char **argv ) double* rspdiv = malloc( sizeof(double) * nvordiv*trans.nspec2 ); // Direct Transform + fprintf(stderr,"dirtrans\n"); struct DirTrans_t dirtrans = new_dirtrans(&trans); dirtrans.nscalar = nscalar; dirtrans.nvordiv = nvordiv; @@ -126,6 +142,7 @@ int main ( int arc, char **argv ) dirtrans.rspvor = rspvor; dirtrans.rspdiv = rspdiv; trans_dirtrans(&dirtrans); + fprintf(stderr,"dirtrans done\n"); if( trans.myproc == 1 ) { @@ -133,21 +150,22 @@ int main ( int arc, char **argv ) for( j=0; j 1.e-5) - printf("rspscalar[%d][%d] : %f\n",j,i,rspscalar[i*nscalar+j]); + fprintf(stderr,"rspscalar[%d][%d] : %f\n",j,i,rspscalar[i*nscalar+j]); } } } +#ifndef GPU_VERSION // Allocate fields for u*cos(theta) and v*cos(theta) double* rspu = malloc( sizeof(double) * nvordiv*trans.nspec2 ); double* rspv = malloc( sizeof(double) * nvordiv*trans.nspec2 ); // Convert vorticity & divergence to u*cos(theta) & v*cos(theta) - printf("Converting spectral vorticity-divergence to u*cos(lat)-v*cos(lat)...\n"); + fprintf(stderr,"Converting spectral vorticity-divergence to u*cos(lat)-v*cos(lat)...\n"); struct VorDivToUV_t vordiv_to_UV = new_vordiv_to_UV(); vordiv_to_UV.rspvor = rspvor; vordiv_to_UV.rspdiv = rspdiv; @@ -157,8 +175,8 @@ int main ( int arc, char **argv ) vordiv_to_UV.ncoeff = trans.nspec2; vordiv_to_UV.nsmax = trans.nsmax; trans_vordiv_to_UV(&vordiv_to_UV); - printf("Converting spectral vorticity-divergence to u*cos(lat)-v*cos(lat)...done\n"); - + fprintf(stderr,"Converting spectral vorticity-divergence to u*cos(lat)-v*cos(lat)...done\n"); +#endif // Gather spectral field (for fun) int* nto = malloc( sizeof(int) * nscalar ); @@ -182,11 +200,11 @@ int main ( int arc, char **argv ) for( j=0; j 1.e-5 && i > 0) - printf("rspscalarg[%d][%d] : %f\n",j,i,rspscalarg[i*nscalar+j]); + fprintf(stderr,"rspscalarg[%d][%d] : %f\n",j,i,rspscalarg[i*nscalar+j]); } } } @@ -197,11 +215,11 @@ int main ( int arc, char **argv ) for( j=0; j 1.e-5 && i > 0) - printf("rspscalarg[%d][%d] : %f\n",j,i,rspscalarg[i*nscalar+j]); + fprintf(stderr,"rspscalarg[%d][%d] : %f\n",j,i,rspscalarg[i*nscalar+j]); } } } @@ -224,12 +242,13 @@ int main ( int arc, char **argv ) gathspec.nto = nto; trans_gathspec(&gathspec); +#ifndef GPU_VERSION // Allocate fields for u*cos(theta) and v*cos(theta) double* rspug = malloc( sizeof(double) * nvordiv*trans.nspec2g ); double* rspvg = malloc( sizeof(double) * nvordiv*trans.nspec2g ); // Convert vorticity & divergence to u*cos(theta) & v*cos(theta) - printf("Converting spectral vorticity-divergence to U-V globally...\n"); + fprintf(stderr,"Converting spectral vorticity-divergence to U-V globally...\n"); struct VorDivToUV_t vordiv_to_UV_g = new_vordiv_to_UV(); vordiv_to_UV_g.rspvor = rspvorg; vordiv_to_UV_g.rspdiv = rspdivg; @@ -239,8 +258,8 @@ int main ( int arc, char **argv ) vordiv_to_UV_g.ncoeff = trans.nspec2g; vordiv_to_UV_g.nsmax = trans.nsmax; trans_vordiv_to_UV(&vordiv_to_UV_g); - printf("Converting spectral vorticity-divergence to U-V globally...done\n"); - + fprintf(stderr,"Converting spectral vorticity-divergence to U-V globally...done\n"); +#endif // Distribute spectral field (for fun) struct DistSpec_t distspec = new_distspec(&trans); @@ -267,7 +286,7 @@ int main ( int arc, char **argv ) { for( i=0; i