Skip to content

Commit 04188f5

Browse files
committed
Adapt CUDA backend to use python macros
1 parent 3062bd8 commit 04188f5

File tree

7 files changed

+439
-88
lines changed

7 files changed

+439
-88
lines changed

python_utils/offload_backends/nvhpc/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,3 +8,4 @@
88
# nor does it submit to any jurisdiction.
99

1010
from offload_backends.nvhpc.openacc import *
11+
from offload_backends.nvhpc.openacc_cuda import *

python_utils/offload_backends/nvhpc/openacc.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -250,3 +250,26 @@ def update_host(cls, data):
250250
"""
251251

252252
return f"!$acc update self ({','.join(data)})"
253+
254+
@classmethod
255+
def data_start(cls, **kwargs):
256+
"""
257+
Pragma to mark the start of a `data` region.
258+
"""
259+
260+
_data_spec = ""
261+
for attr in cls._data_attributes:
262+
decl = kwargs.get(attr, None)
263+
if decl:
264+
_data_spec += f"{attr}({','.join(decl)}) "
265+
266+
return f"!$acc data {_data_spec}"
267+
268+
@classmethod
269+
def data_end(cls):
270+
"""
271+
Pragma to mark the end of a `data` region.
272+
"""
273+
274+
275+
return "!$acc end data"
Lines changed: 181 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,181 @@
1+
# (C) Copyright 2022- ECMWF.
2+
# (C) Copyright 2022- Meteo-France.
3+
#
4+
# This software is licensed under the terms of the Apache Licence Version 2.0
5+
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
6+
# In applying this licence, ECMWF does not waive the privileges and immunities
7+
# granted to it by virtue of its status as an intergovernmental organisation
8+
# nor does it submit to any jurisdiction.
9+
10+
11+
__all__ = ['NVHPCOpenACCCUDA']
12+
13+
from offload_backends.nvhpc import NVHPCOpenACC
14+
15+
class NVHPCOpenACCCUDA(NVHPCOpenACC):
16+
"""
17+
A class that defines the macros needed for GPU offload using Nvidia's
18+
OpenACC implementation and CUDA runtime API.
19+
"""
20+
21+
@classmethod
22+
def runtime_api_import(cls):
23+
"""
24+
Runtime API import.
25+
"""
26+
27+
_import = [super().runtime_api_import(),]
28+
_import += ["USE CUDAFOR",]
29+
30+
return _import
31+
32+
@classmethod
33+
def stream_handle_kind(cls):
34+
"""
35+
Return the INTEGER kind specifier for a stream handle.
36+
"""
37+
38+
return "CUDA_STREAM_KIND"
39+
40+
@classmethod
41+
def dev_malloc_intf(cls):
42+
"""
43+
The ISO_C interface for a device memory allocation.
44+
"""
45+
46+
intf = """
47+
INTEGER FUNCTION CUDA_MALLOC (PTR,SIZ) BIND (C, NAME='cudaMalloc')
48+
IMPORT :: C_PTR, C_SIZE_T
49+
INTEGER (C_SIZE_T), VALUE, INTENT(IN) :: SIZ
50+
TYPE (C_PTR), INTENT(OUT) :: PTR
51+
END FUNCTION CUDA_MALLOC
52+
"""
53+
54+
return intf.split('\n')
55+
56+
@classmethod
57+
def dev_free_intf(cls):
58+
"""
59+
The ISO_C interface for freeing device memory.
60+
"""
61+
62+
intf = """
63+
INTEGER FUNCTION CUDA_FREE (PTR) BIND (C, NAME='cudaFree')
64+
IMPORT :: C_PTR
65+
TYPE (C_PTR), VALUE, INTENT(IN) :: PTR
66+
END FUNCTION CUDA_FREE
67+
"""
68+
69+
return intf.split('\n')
70+
71+
@classmethod
72+
def runtime_error_return_type(cls, symbols):
73+
"""
74+
Declaration for the variable used to store the runtime API error status.
75+
"""
76+
77+
return f"INTEGER :: {','.join(symbols)}"
78+
79+
@classmethod
80+
def dev_malloc(cls, ptr, size, return_val="ISTAT"):
81+
"""
82+
Allocate memory on device.
83+
"""
84+
85+
return f"{return_val} = CUDA_MALLOC({ptr}, {size})"
86+
87+
@classmethod
88+
def dev_free(cls, ptr, return_val="ISTAT"):
89+
"""
90+
Free device memory.
91+
"""
92+
93+
return f"{return_val} = CUDA_FREE({ptr})"
94+
95+
@classmethod
96+
def register_host(cls, ptr, size, flags, return_val="ISTAT"):
97+
"""
98+
Page-lock host memory.
99+
"""
100+
101+
return f"{return_val} = CUDA_HOST_REGISTER({ptr}, {size}, {flags})"
102+
103+
@classmethod
104+
def register_host_set_flags(cls, flag_var, val):
105+
"""
106+
Set flags used to control page-locking of host memory.
107+
"""
108+
109+
return f"{flag_var} = {val} !... Corresponds to cudaHostRegisterMapped"
110+
111+
@classmethod
112+
def register_host_decl_flags(cls, flag_var):
113+
"""
114+
Declare variable used to store flags for controlling page-locking of host memory.
115+
"""
116+
117+
return f"INTEGER(C_INT) :: {flag_var}"
118+
119+
@classmethod
120+
def unregister_host(cls, ptr, return_val="ISTAT"):
121+
"""
122+
Unpin (i.e. undo page-locking) host memory.
123+
"""
124+
125+
return f"{return_val} = CUDA_HOST_UNREGISTER({ptr})"
126+
127+
@classmethod
128+
def host_register_intf(cls):
129+
"""
130+
The ISO_C interface for page-locking host memory.
131+
"""
132+
133+
intf = """
134+
INTEGER FUNCTION CUDA_HOST_REGISTER (PTR, SIZ, FLAGS) BIND (C, NAME='cudaHostRegister')
135+
IMPORT :: C_PTR, C_SIZE_T, C_INT
136+
TYPE (C_PTR), VALUE, INTENT(IN) :: PTR
137+
INTEGER (C_SIZE_T), VALUE, INTENT(IN) :: SIZ
138+
INTEGER (C_INT), VALUE, INTENT(IN) :: FLAGS
139+
END FUNCTION CUDA_HOST_REGISTER
140+
"""
141+
142+
return intf.split('\n')
143+
144+
@classmethod
145+
def host_unregister_intf(cls):
146+
"""
147+
The ISO_C interface for un-pinning (i.e. undo page-locking) host memory.
148+
"""
149+
150+
intf = """
151+
INTEGER FUNCTION CUDA_HOST_UNREGISTER (PTR) BIND (C, NAME='cudaHostUnregister')
152+
IMPORT :: C_PTR
153+
TYPE (C_PTR), VALUE, INTENT(IN) :: PTR
154+
END FUNCTION CUDA_HOST_UNREGISTER
155+
"""
156+
157+
return intf.split('\n')
158+
159+
@classmethod
160+
def set_async_stream(cls, id, stream):
161+
"""
162+
Set an asynchronous stream.
163+
"""
164+
165+
return f"CALL ACC_SET_CUDA_STREAM({id}, {stream})"
166+
167+
@classmethod
168+
def copy_2D(cls, src, src_pitch, dst, dst_pitch, width, height, return_val="ISTAT"):
169+
"""
170+
Copy a strided memory region from source (src) to destination (dst).
171+
"""
172+
173+
return f"{return_val} = CUDAMEMCPY2D({dst}, {dst_pitch}, {src}, {src_pitch}, {width}, {height})"
174+
175+
@classmethod
176+
def copy_2D_async(cls, src, src_pitch, dst, dst_pitch, width, height, stream, return_val="ISTAT"):
177+
"""
178+
Asynchronously copy a strided memory region from source (src) to destination (dst).
179+
"""
180+
181+
return f"{return_val} = CUDAMEMCPY2DASYNC({dst}, {dst_pitch}, {src}, {src_pitch}, {width}, {height}, STREAM={stream})"

0 commit comments

Comments
 (0)