Skip to content

Commit 0cb5e3a

Browse files
committed
Made function for scaling the compute capability to a valid one, added tests for this function, removed setting --gpu-architecture for CuPy as it is already set internally
1 parent e106bae commit 0cb5e3a

File tree

4 files changed

+38
-21
lines changed

4 files changed

+38
-21
lines changed

kernel_tuner/backends/cupy.py

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66

77
from kernel_tuner.backends.backend import GPUBackend
88
from kernel_tuner.observers.cupy import CupyRuntimeObserver
9-
from kernel_tuner.util import is_valid_nvrtc_gpu_arch_cc
109

1110
# embedded in try block to be able to generate documentation
1211
# and run tests without cupy installed
@@ -127,11 +126,7 @@ def compile(self, kernel_instance):
127126
compiler_options = self.compiler_options
128127
if not any(["-std=" in opt for opt in self.compiler_options]):
129128
compiler_options = ["--std=c++11"] + self.compiler_options
130-
if is_valid_nvrtc_gpu_arch_cc(self.cc):
131-
if not any(["--gpu-architecture=" in opt or "-arch" in opt for opt in compiler_options]):
132-
compiler_options.append(f"--gpu-architecture=compute_{self.cc}")
133-
else:
134-
warn(f"Could not add compiler option '--gpu-architecture=compute_{self.cc}' as {self.cc} is an invalid target")
129+
# CuPy already sets the --gpu-architecture by itself, as per https://github.com/cupy/cupy/blob/20ccd63c0acc40969c851b1917dedeb032209e8b/cupy/cuda/compiler.py#L145
135130

136131
options = tuple(compiler_options)
137132

kernel_tuner/backends/nvcuda.py

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
from kernel_tuner.backends.backend import GPUBackend
77
from kernel_tuner.observers.nvcuda import CudaRuntimeObserver
8-
from kernel_tuner.util import SkippableFailure, cuda_error_check, is_valid_nvrtc_gpu_arch_cc
8+
from kernel_tuner.util import SkippableFailure, cuda_error_check, to_valid_nvrtc_gpu_arch_cc
99

1010
# embedded in try block to be able to generate documentation
1111
# and run tests without cuda-python installed
@@ -167,15 +167,12 @@ def compile(self, kernel_instance):
167167
compiler_options.append(b"--std=c++11")
168168
if not any(["--std=" in opt for opt in self.compiler_options]):
169169
self.compiler_options.append("--std=c++11")
170-
if is_valid_nvrtc_gpu_arch_cc(self.cc):
171-
if not any([b"--gpu-architecture=" in opt or b"-arch" in opt for opt in compiler_options]):
172-
compiler_options.append(
173-
f"--gpu-architecture=compute_{self.cc}".encode("UTF-8")
174-
)
175-
if not any(["--gpu-architecture=" in opt or "-arch" in opt for opt in self.compiler_options]):
176-
self.compiler_options.append(f"--gpu-architecture=compute_{self.cc}")
177-
else:
178-
warn(f"Could not add compiler option '--gpu-architecture=compute_{self.cc}' as {self.cc} is an invalid target")
170+
if not any([b"--gpu-architecture=" in opt or b"-arch" in opt for opt in compiler_options]):
171+
compiler_options.append(
172+
f"--gpu-architecture=compute_{to_valid_nvrtc_gpu_arch_cc(self.cc)}".encode("UTF-8")
173+
)
174+
if not any(["--gpu-architecture=" in opt or "-arch" in opt for opt in self.compiler_options]):
175+
self.compiler_options.append(f"--gpu-architecture=compute_{to_valid_nvrtc_gpu_arch_cc(self.cc)}")
179176

180177
err, program = nvrtc.nvrtcCreateProgram(
181178
str.encode(kernel_string), b"CUDAProgram", 0, [], []

kernel_tuner/util.py

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -221,7 +221,7 @@ def check_block_size_names(block_size_names):
221221
if not isinstance(block_size_names, list):
222222
raise ValueError("block_size_names should be a list of strings!")
223223
if len(block_size_names) > 3:
224-
raise ValueError("block_size_names should not contain more than 3 names!")
224+
raise ValueError(f"block_size_names should not contain more than 3 names! ({block_size_names=})")
225225
if not all([isinstance(name, "".__class__) for name in block_size_names]):
226226
raise ValueError("block_size_names should contain only strings!")
227227

@@ -570,10 +570,22 @@ def get_total_timings(results, env, overhead_time):
570570
return env
571571

572572

573-
def is_valid_nvrtc_gpu_arch_cc(compute_capability: str) -> bool:
574-
"""Returns whether the Compute Capability is a valid argument for NVRTC `--gpu-architecture=`, as per https://docs.nvidia.com/cuda/nvrtc/index.html#group__options."""
575-
valid_cc = ['50', '52', '53', '60', '61', '62', '70', '72', '75', '80', '87', '89', '90', '90a']
576-
return str(compute_capability) in valid_cc
573+
def to_valid_nvrtc_gpu_arch_cc(compute_capability: str) -> str:
574+
"""Returns a valid Compute Capability for NVRTC `--gpu-architecture=`, as per https://docs.nvidia.com/cuda/nvrtc/index.html#group__options."""
575+
valid_cc = ['50', '52', '53', '60', '61', '62', '70', '72', '75', '80', '87', '89', '90', '90a'] # must be in ascending order, when updating also update test_to_valid_nvrtc_gpu_arch_cc
576+
compute_capability = str(compute_capability)
577+
if len(compute_capability) < 2:
578+
raise ValueError(f"Compute capability '{compute_capability}' must be at least of length 2, is {len(compute_capability)}")
579+
if compute_capability in valid_cc:
580+
return compute_capability
581+
# if the compute capability does not match, scale down to the nearest matching
582+
subset_cc = [cc for cc in valid_cc if compute_capability[0] == cc[0]]
583+
if len(subset_cc) > 0:
584+
# get the next-highest valid CC
585+
highest_cc_index = max([i for i, cc in enumerate(subset_cc) if int(cc[1]) <= int(compute_capability[1])])
586+
return subset_cc[highest_cc_index]
587+
# if all else fails, return the default 52
588+
return '52'
577589

578590

579591
def print_config(config, tuning_options, runner):

test/test_util_functions.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,19 @@ def test_get_thread_block_dimensions():
146146
assert threads[2] == 1
147147

148148

149+
def test_to_valid_nvrtc_gpu_arch_cc():
150+
assert to_valid_nvrtc_gpu_arch_cc("89") == "89"
151+
assert to_valid_nvrtc_gpu_arch_cc("88") == "87"
152+
assert to_valid_nvrtc_gpu_arch_cc("86") == "80"
153+
assert to_valid_nvrtc_gpu_arch_cc("40") == "52"
154+
assert to_valid_nvrtc_gpu_arch_cc("90b") == "90a"
155+
assert to_valid_nvrtc_gpu_arch_cc("91c") == "90a"
156+
assert to_valid_nvrtc_gpu_arch_cc("10123001") == "52"
157+
with pytest.raises(ValueError):
158+
assert to_valid_nvrtc_gpu_arch_cc("")
159+
assert to_valid_nvrtc_gpu_arch_cc("1")
160+
161+
149162
def test_prepare_kernel_string():
150163
kernel = "this is a weird kernel"
151164
grid = (3, 7)

0 commit comments

Comments
 (0)