Skip to content

Commit 395aed4

Browse files
authored
feat(python): Implement CUDA build in Python bindings (#547)
This PR updates the build system such that nanoarrow's CUDA capability can be exposed in Python (it doesn't quite expose any of the functionality yet). I'm not sure this is the best long-term strategy...I think we might be able to get CUDA plugged in at runtime because we don't use much of the API...but I think this would at least enable distributing a cuda-enabled build on conda-forge. To build, one has to do something like: ```shell export NANOARROW_PYTHON_CUDA_HOME=/usr/local/cuda pip install . ``` On Windows, this would be something like: ```powershell $Env:NANOARROW_PYTHON_CUDA_HOME = 'C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v12.5' python -m pip install . ``` Then, you should be able to resolve a CUDA device: ```python from nanoarrow import device device.resolve(device.DeviceType.CUDA.value, 0) #> <nanoarrow.device.Device> #> - device_type: CUDA <2> #> - device_id: 0 ```
1 parent 721972a commit 395aed4

File tree

8 files changed

+142
-145
lines changed

8 files changed

+142
-145
lines changed

.github/workflows/build-and-test-device.yaml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,15 @@ jobs:
132132
cd build
133133
ctest -T memcheck .
134134
135+
- name: Check Python bindings with CUDA
136+
if: matrix.config.label == 'with-cuda'
137+
env:
138+
NANOARROW_PYTHON_CUDA_HOME: "/usr/local/cuda"
139+
run: |
140+
cd python
141+
python3 -m pip install ".[test]" -vv
142+
python3 -m pytest -vv
143+
135144
- name: Upload memcheck results
136145
if: failure() && matrix.config.label == 'default-build'
137146
uses: actions/upload-artifact@main

CMakeLists.txt

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -284,12 +284,12 @@ if(NANOARROW_DEVICE)
284284

285285
# If the bundler didn't already assign the source files for the library, do so here
286286
if(NOT NANOARROW_BUNDLE)
287-
set(NANOARROW_DEVICE_BUILD_SOURCES src/nanoarrow/device/device.c)
287+
set(NANOARROW_DEVICE_BUILD_SOURCES src/nanoarrow/device/device.c
288+
${NANOARROW_DEVICE_SOURCES_CUDA})
288289
endif()
289290

290-
add_library(nanoarrow_device
291-
${NANOARROW_DEVICE_BUILD_SOURCES} ${NANOARROW_DEVICE_SOURCES_METAL}
292-
${NANOARROW_DEVICE_SOURCES_CUDA})
291+
add_library(nanoarrow_device ${NANOARROW_DEVICE_BUILD_SOURCES}
292+
${NANOARROW_DEVICE_SOURCES_METAL})
293293

294294
target_include_directories(nanoarrow_device
295295
PUBLIC $<BUILD_INTERFACE:${NANOARROW_BUILD_INCLUDE_DIR}>

ci/scripts/bundle.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -171,10 +171,11 @@ def bundle_nanoarrow_device(
171171
yield f"{output_include_dir}/{filename}", content
172172

173173
# Generate sources
174-
for filename in ["device.c"]:
175-
content = read_content(src_dir / "device" / filename)
176-
content = namespace_nanoarrow_includes(content, header_namespace)
177-
yield f"{output_source_dir}/nanoarrow_{filename}", content
174+
content = concatenate_content(
175+
[src_dir / "device" / "device.c", src_dir / "device" / "cuda.c"]
176+
)
177+
content = namespace_nanoarrow_includes(content, header_namespace)
178+
yield f"{output_source_dir}/nanoarrow_device.c", content
178179

179180

180181
def bundle_nanoarrow_ipc(

python/bootstrap.py

Lines changed: 69 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -22,14 +22,14 @@
2222

2323

2424
# Generate the nanoarrow_c.pxd file used by the Cython extensions
25-
class NanoarrowPxdGenerator:
25+
class PxdGenerator:
2626
def __init__(self):
2727
self._define_regexes()
2828

29-
def generate_nanoarrow_pxd(self, file_in, file_out):
29+
def generate_pxd(self, file_in, file_out):
3030
file_in_name = pathlib.Path(file_in).name
3131

32-
# Read the nanoarrow.h header
32+
# Read the header
3333
content = None
3434
with open(file_in, "r") as input:
3535
content = input.read()
@@ -38,7 +38,7 @@ def generate_nanoarrow_pxd(self, file_in, file_out):
3838
content = self.re_comment.sub("", content)
3939

4040
# Replace NANOARROW_MAX_FIXED_BUFFERS with its value
41-
content = self.re_max_buffers.sub("3", content)
41+
content = self._preprocess_content(content)
4242

4343
# Find typedefs, types, and function definitions
4444
typedefs = self._find_typedefs(content)
@@ -62,13 +62,7 @@ def generate_nanoarrow_pxd(self, file_in, file_out):
6262
)
6363

6464
# A few things we add in manually
65-
output.write(b"\n")
66-
output.write(b" cdef int NANOARROW_OK\n")
67-
output.write(b" cdef int NANOARROW_MAX_FIXED_BUFFERS\n")
68-
output.write(b" cdef int ARROW_FLAG_DICTIONARY_ORDERED\n")
69-
output.write(b" cdef int ARROW_FLAG_NULLABLE\n")
70-
output.write(b" cdef int ARROW_FLAG_MAP_KEYS_SORTED\n")
71-
output.write(b"\n")
65+
self._write_defs(output)
7266

7367
for type in types_cython:
7468
output.write(type.encode("UTF-8"))
@@ -77,15 +71,21 @@ def generate_nanoarrow_pxd(self, file_in, file_out):
7771
for typedef in typedefs_cython:
7872
output.write(typedef.encode("UTF-8"))
7973
output.write(b"\n")
74+
8075
output.write(b"\n")
8176

8277
for func_def in func_defs_cython:
8378
output.write(func_def.encode("UTF-8"))
8479
output.write(b"\n")
8580

81+
def _preprocess_content(self, content):
82+
return content
83+
84+
def _write_defs(self, output):
85+
pass
86+
8687
def _define_regexes(self):
8788
self.re_comment = re.compile(r"\s*//[^\n]*")
88-
self.re_max_buffers = re.compile(r"NANOARROW_MAX_FIXED_BUFFERS")
8989
self.re_typedef = re.compile(r"typedef(?P<typedef>[^;]+)")
9090
self.re_type = re.compile(
9191
r"(?P<type>struct|union|enum) (?P<name>Arrow[^ ]+) {(?P<body>[^}]*)}"
@@ -167,11 +167,58 @@ def _pxd_header(self):
167167
168168
# cython: language_level = 3
169169
170-
from libc.stdint cimport int8_t, uint8_t, int16_t, uint16_t
171-
from libc.stdint cimport int32_t, uint32_t, int64_t, uint64_t
172170
"""
173171

174172

173+
class NanoarrowPxdGenerator(PxdGenerator):
174+
def _preprocess_content(self, content):
175+
return re.sub(r"NANOARROW_MAX_FIXED_BUFFERS", "3", content)
176+
177+
def _pxd_header(self):
178+
return (
179+
super()._pxd_header()
180+
+ """
181+
from libc.stdint cimport int8_t, uint8_t, int16_t, uint16_t
182+
from libc.stdint cimport int32_t, uint32_t, int64_t, uint64_t
183+
"""
184+
)
185+
186+
def _write_defs(self, output):
187+
output.write(b"\n")
188+
output.write(b" cdef int NANOARROW_OK\n")
189+
output.write(b" cdef int NANOARROW_MAX_FIXED_BUFFERS\n")
190+
output.write(b" cdef int ARROW_FLAG_DICTIONARY_ORDERED\n")
191+
output.write(b" cdef int ARROW_FLAG_NULLABLE\n")
192+
output.write(b" cdef int ARROW_FLAG_MAP_KEYS_SORTED\n")
193+
output.write(b"\n")
194+
195+
196+
class NanoarrowDevicePxdGenerator(PxdGenerator):
197+
def _preprocess_content(self, content):
198+
self.device_names = re.findall("#define (ARROW_DEVICE_[A-Z0-9_]+)", content)
199+
return super()._preprocess_content(content)
200+
201+
def _find_typedefs(self, content):
202+
return []
203+
204+
def _pxd_header(self):
205+
return (
206+
super()._pxd_header()
207+
+ """
208+
from libc.stdint cimport int32_t, int64_t
209+
from nanoarrow_c cimport *
210+
"""
211+
)
212+
213+
def _write_defs(self, output):
214+
output.write(b"\n")
215+
output.write(b" ctypedef int32_t ArrowDeviceType\n")
216+
output.write(b"\n")
217+
for name in self.device_names:
218+
output.write(f" cdef ArrowDeviceType {name}\n".encode())
219+
output.write(b"\n")
220+
221+
175222
# Runs cmake -DNANOARROW_BUNDLE=ON if cmake exists or copies nanoarrow.c/h
176223
# from ../dist if it does not. Running cmake is safer because it will sync
177224
# any changes from nanoarrow C library sources in the checkout but is not
@@ -228,16 +275,18 @@ def copy_or_generate_nanoarrow_c():
228275

229276

230277
# Runs the pxd generator with some information about the file name
231-
def generate_nanoarrow_pxd():
278+
def generate_nanoarrow_pxds():
232279
this_dir = pathlib.Path(__file__).parent.resolve()
233-
maybe_nanoarrow_h = this_dir / "vendor/nanoarrow.h"
234-
maybe_nanoarrow_pxd = this_dir / "vendor/nanoarrow_c.pxd"
235280

236-
NanoarrowPxdGenerator().generate_nanoarrow_pxd(
237-
maybe_nanoarrow_h, maybe_nanoarrow_pxd
281+
NanoarrowPxdGenerator().generate_pxd(
282+
this_dir / "vendor" / "nanoarrow.h", this_dir / "vendor" / "nanoarrow_c.pxd"
283+
)
284+
NanoarrowDevicePxdGenerator().generate_pxd(
285+
this_dir / "vendor" / "nanoarrow_device.h",
286+
this_dir / "vendor" / "nanoarrow_device_c.pxd",
238287
)
239288

240289

241290
if __name__ == "__main__":
242291
copy_or_generate_nanoarrow_c()
243-
generate_nanoarrow_pxd()
292+
generate_nanoarrow_pxds()

python/setup.py

Lines changed: 44 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
import os
2121
import subprocess
2222
import sys
23+
from pathlib import Path
2324

2425
from setuptools import Extension, setup
2526

@@ -52,18 +53,45 @@ def get_version(pkg_path):
5253

5354

5455
# Set some extra flags for compiling with coverage support
56+
extra_include_dirs = []
57+
extra_compile_args = []
58+
extra_link_args = []
59+
extra_define_macros = []
60+
library_dirs = []
61+
libraries = []
62+
5563
if os.getenv("NANOARROW_PYTHON_COVERAGE") == "1":
56-
extra_compile_args = ["--coverage"]
57-
extra_link_args = ["--coverage"]
58-
extra_define_macros = [("CYTHON_TRACE", 1)]
59-
elif os.getenv("NANOARROW_DEBUG_EXTENSION") == "1":
60-
extra_compile_args = ["-g", "-O0"]
61-
extra_link_args = []
62-
extra_define_macros = []
63-
else:
64-
extra_compile_args = []
65-
extra_link_args = []
66-
extra_define_macros = []
64+
extra_compile_args.append("--coverage")
65+
extra_link_args.append("--coverage")
66+
extra_define_macros.append(("CYTHON_TRACE", 1))
67+
68+
if os.getenv("NANOARROW_DEBUG_EXTENSION") == "1":
69+
extra_compile_args.extend(["-g", "-O0"])
70+
71+
cuda_toolkit_root = os.getenv("NANOARROW_PYTHON_CUDA_HOME")
72+
if cuda_toolkit_root:
73+
cuda_lib = "cuda.lib" if os.name == "nt" else "libcuda.so"
74+
include_dir = Path(cuda_toolkit_root) / "include"
75+
possible_libs = [
76+
Path(cuda_toolkit_root) / "lib" / cuda_lib,
77+
Path(cuda_toolkit_root) / "lib64" / cuda_lib,
78+
Path(cuda_toolkit_root) / "lib" / "x64" / cuda_lib,
79+
Path("/usr/lib/wsl/lib") / cuda_lib,
80+
]
81+
82+
if not include_dir.is_dir():
83+
raise ValueError(f"CUDA include directory does not exist: '{include_dir}'")
84+
85+
lib_dirs = [d for d in possible_libs if d.exists()]
86+
if not lib_dirs:
87+
lib_dirs_err = ", ".join(f"'{d}" for d in possible_libs)
88+
raise ValueError(f"Can't find CUDA library directory. Checked {lib_dirs_err}")
89+
90+
extra_include_dirs.append(str(include_dir))
91+
library_dirs.append(str(lib_dirs[0].parent))
92+
libraries.append("cuda")
93+
extra_define_macros.append(("NANOARROW_DEVICE_WITH_CUDA", 1))
94+
6795

6896
setup(
6997
ext_modules=[
@@ -80,7 +108,7 @@ def get_version(pkg_path):
80108
),
81109
Extension(
82110
name="nanoarrow._utils",
83-
include_dirs=["src/nanoarrow", "vendor"],
111+
include_dirs=extra_include_dirs + ["src/nanoarrow", "vendor"],
84112
language="c",
85113
sources=[
86114
"src/nanoarrow/_utils.pyx",
@@ -92,7 +120,7 @@ def get_version(pkg_path):
92120
),
93121
Extension(
94122
name="nanoarrow._lib",
95-
include_dirs=["src/nanoarrow", "vendor"],
123+
include_dirs=extra_include_dirs + ["src/nanoarrow", "vendor"],
96124
language="c",
97125
sources=[
98126
"src/nanoarrow/_lib.pyx",
@@ -102,10 +130,12 @@ def get_version(pkg_path):
102130
extra_compile_args=extra_compile_args,
103131
extra_link_args=extra_link_args,
104132
define_macros=extra_define_macros,
133+
library_dirs=library_dirs,
134+
libraries=libraries,
105135
),
106136
Extension(
107137
name="nanoarrow._ipc_lib",
108-
include_dirs=["src/nanoarrow", "vendor"],
138+
include_dirs=extra_include_dirs + ["src/nanoarrow", "vendor"],
109139
language="c",
110140
sources=[
111141
"src/nanoarrow/_ipc_lib.pyx",

python/src/nanoarrow/_lib.pyx

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -248,9 +248,13 @@ cdef class Device:
248248
def resolve(device_type, int64_t device_id):
249249
if int(device_type) == ARROW_DEVICE_CPU:
250250
return DEVICE_CPU
251-
else:
251+
252+
cdef ArrowDevice* c_device = ArrowDeviceResolve(device_type, device_id)
253+
if c_device == NULL:
252254
raise ValueError(f"Device not found for type {device_type}/{device_id}")
253255

256+
return Device(None, <uintptr_t>c_device)
257+
254258

255259
# Cache the CPU device
256260
# The CPU device is statically allocated (so base is None)

0 commit comments

Comments
 (0)