|
| 1 | +# (C) Copyright 2022- ECMWF. |
| 2 | +# (C) Copyright 2022- Meteo-France. |
| 3 | +# |
| 4 | +# This software is licensed under the terms of the Apache Licence Version 2.0 |
| 5 | +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. |
| 6 | +# In applying this licence, ECMWF does not waive the privileges and immunities |
| 7 | +# granted to it by virtue of its status as an intergovernmental organisation |
| 8 | +# nor does it submit to any jurisdiction. |
| 9 | + |
| 10 | + |
| 11 | +__all__ = ['NVHPCOpenACCCUDA'] |
| 12 | + |
| 13 | +from offload_backends.nvhpc import NVHPCOpenACC |
| 14 | + |
| 15 | +class NVHPCOpenACCCUDA(NVHPCOpenACC): |
| 16 | + """ |
| 17 | + A class that defines the macros needed for GPU offload using Nvidia's |
| 18 | + OpenACC implementation and CUDA runtime API. |
| 19 | + """ |
| 20 | + |
| 21 | + @classmethod |
| 22 | + def runtime_api_import(cls): |
| 23 | + """ |
| 24 | + Runtime API import. |
| 25 | + """ |
| 26 | + |
| 27 | + _import = [super().runtime_api_import(),] |
| 28 | + _import += ["USE CUDAFOR",] |
| 29 | + |
| 30 | + return _import |
| 31 | + |
| 32 | + @classmethod |
| 33 | + def stream_handle_kind(cls): |
| 34 | + """ |
| 35 | + Return the INTEGER kind specifier for a stream handle. |
| 36 | + """ |
| 37 | + |
| 38 | + return "CUDA_STREAM_KIND" |
| 39 | + |
| 40 | + @classmethod |
| 41 | + def dev_malloc_intf(cls): |
| 42 | + """ |
| 43 | + The ISO_C interface for a device memory allocation. |
| 44 | + """ |
| 45 | + |
| 46 | + intf = """ |
| 47 | + INTEGER FUNCTION CUDA_MALLOC (PTR,SIZ) BIND (C, NAME='cudaMalloc') |
| 48 | + IMPORT :: C_PTR, C_SIZE_T |
| 49 | + INTEGER (C_SIZE_T), VALUE, INTENT(IN) :: SIZ |
| 50 | + TYPE (C_PTR), INTENT(OUT) :: PTR |
| 51 | + END FUNCTION CUDA_MALLOC |
| 52 | + """ |
| 53 | + |
| 54 | + return intf.split('\n') |
| 55 | + |
| 56 | + @classmethod |
| 57 | + def dev_free_intf(cls): |
| 58 | + """ |
| 59 | + The ISO_C interface for freeing device memory. |
| 60 | + """ |
| 61 | + |
| 62 | + intf = """ |
| 63 | + INTEGER FUNCTION CUDA_FREE (PTR) BIND (C, NAME='cudaFree') |
| 64 | + IMPORT :: C_PTR |
| 65 | + TYPE (C_PTR), VALUE, INTENT(IN) :: PTR |
| 66 | + END FUNCTION CUDA_FREE |
| 67 | + """ |
| 68 | + |
| 69 | + return intf.split('\n') |
| 70 | + |
| 71 | + @classmethod |
| 72 | + def runtime_error_return_type(cls, symbols): |
| 73 | + """ |
| 74 | + Declaration for the variable used to store the runtime API error status. |
| 75 | + """ |
| 76 | + |
| 77 | + return f"INTEGER :: {','.join(symbols)}" |
| 78 | + |
| 79 | + @classmethod |
| 80 | + def dev_malloc(cls, ptr, size, return_val="ISTAT"): |
| 81 | + """ |
| 82 | + Allocate memory on device. |
| 83 | + """ |
| 84 | + |
| 85 | + return f"{return_val} = CUDA_MALLOC({ptr}, {size})" |
| 86 | + |
| 87 | + @classmethod |
| 88 | + def dev_free(cls, ptr, return_val="ISTAT"): |
| 89 | + """ |
| 90 | + Free device memory. |
| 91 | + """ |
| 92 | + |
| 93 | + return f"{return_val} = CUDA_FREE({ptr})" |
| 94 | + |
| 95 | + @classmethod |
| 96 | + def register_host(cls, ptr, size, flags, return_val="ISTAT"): |
| 97 | + """ |
| 98 | + Page-lock host memory. |
| 99 | + """ |
| 100 | + |
| 101 | + return f"{return_val} = CUDA_HOST_REGISTER({ptr}, {size}, {flags})" |
| 102 | + |
| 103 | + @classmethod |
| 104 | + def register_host_set_flags(cls, flag_var, val): |
| 105 | + """ |
| 106 | + Set flags used to control page-locking of host memory. |
| 107 | + """ |
| 108 | + |
| 109 | + return f"{flag_var} = {val} !... Corresponds to cudaHostRegisterMapped" |
| 110 | + |
| 111 | + @classmethod |
| 112 | + def register_host_decl_flags(cls, flag_var): |
| 113 | + """ |
| 114 | + Declare variable used to store flags for controlling page-locking of host memory. |
| 115 | + """ |
| 116 | + |
| 117 | + return f"INTEGER(C_INT) :: {flag_var}" |
| 118 | + |
| 119 | + @classmethod |
| 120 | + def unregister_host(cls, ptr, return_val="ISTAT"): |
| 121 | + """ |
| 122 | + Unpin (i.e. undo page-locking) host memory. |
| 123 | + """ |
| 124 | + |
| 125 | + return f"{return_val} = CUDA_HOST_UNREGISTER({ptr})" |
| 126 | + |
| 127 | + @classmethod |
| 128 | + def host_register_intf(cls): |
| 129 | + """ |
| 130 | + The ISO_C interface for page-locking host memory. |
| 131 | + """ |
| 132 | + |
| 133 | + intf = """ |
| 134 | + INTEGER FUNCTION CUDA_HOST_REGISTER (PTR, SIZ, FLAGS) BIND (C, NAME='cudaHostRegister') |
| 135 | + IMPORT :: C_PTR, C_SIZE_T, C_INT |
| 136 | + TYPE (C_PTR), VALUE, INTENT(IN) :: PTR |
| 137 | + INTEGER (C_SIZE_T), VALUE, INTENT(IN) :: SIZ |
| 138 | + INTEGER (C_INT), VALUE, INTENT(IN) :: FLAGS |
| 139 | + END FUNCTION CUDA_HOST_REGISTER |
| 140 | + """ |
| 141 | + |
| 142 | + return intf.split('\n') |
| 143 | + |
| 144 | + @classmethod |
| 145 | + def host_unregister_intf(cls): |
| 146 | + """ |
| 147 | + The ISO_C interface for un-pinning (i.e. undo page-locking) host memory. |
| 148 | + """ |
| 149 | + |
| 150 | + intf = """ |
| 151 | + INTEGER FUNCTION CUDA_HOST_UNREGISTER (PTR) BIND (C, NAME='cudaHostUnregister') |
| 152 | + IMPORT :: C_PTR |
| 153 | + TYPE (C_PTR), VALUE, INTENT(IN) :: PTR |
| 154 | + END FUNCTION CUDA_HOST_UNREGISTER |
| 155 | + """ |
| 156 | + |
| 157 | + return intf.split('\n') |
| 158 | + |
| 159 | + @classmethod |
| 160 | + def set_async_stream(cls, id, stream): |
| 161 | + """ |
| 162 | + Set an asynchronous stream. |
| 163 | + """ |
| 164 | + |
| 165 | + return f"CALL ACC_SET_CUDA_STREAM({id}, {stream})" |
| 166 | + |
| 167 | + @classmethod |
| 168 | + def copy_2D(cls, src, src_pitch, dst, dst_pitch, width, height, return_val="ISTAT"): |
| 169 | + """ |
| 170 | + Copy a strided memory region from source (src) to destination (dst). |
| 171 | + """ |
| 172 | + |
| 173 | + return f"{return_val} = CUDAMEMCPY2D({dst}, {dst_pitch}, {src}, {src_pitch}, {width}, {height})" |
| 174 | + |
| 175 | + @classmethod |
| 176 | + def copy_2D_async(cls, src, src_pitch, dst, dst_pitch, width, height, stream, return_val="ISTAT"): |
| 177 | + """ |
| 178 | + Asynchronously copy a strided memory region from source (src) to destination (dst). |
| 179 | + """ |
| 180 | + |
| 181 | + return f"{return_val} = CUDAMEMCPY2DASYNC({dst}, {dst_pitch}, {src}, {src_pitch}, {width}, {height}, STREAM={stream})" |
0 commit comments