Skip to content

Commit c0a6c63

Browse files
authored
Only generate ptx for highest compute capability (#2695)
1 parent 18a1c97 commit c0a6c63

File tree

1 file changed

+8
-3
lines changed

1 file changed

+8
-3
lines changed

build_deps/toolchains/gpu/crosstool/clang/bin/crosstool_wrapper_driver_is_not_gcc.tpl

+8-3
Original file line numberDiff line numberDiff line change
@@ -201,9 +201,14 @@ def InvokeNvcc(argv, log=False):
201201

202202
supported_cuda_compute_capabilities = [ %{cuda_compute_capabilities} ]
203203
nvccopts = '-D_FORCE_INLINES '
204-
for capability in supported_cuda_compute_capabilities:
205-
capability = capability.replace('.', '')
206-
nvccopts += r'-gencode=arch=compute_%s,\"code=sm_%s,compute_%s\" ' % (
204+
supported_cuda_compute_capabilities = sorted([
205+
x.replace(".", "") for x in supported_cuda_compute_capabilities])
206+
for capability in supported_cuda_compute_capabilities[:-1]:
207+
nvccopts += r'-gencode=arch=compute_%s,\"code=sm_%s\" ' % (
208+
capability, capability, capability)
209+
if supported_cuda_compute_capabilities:
210+
capability = supported_cuda_compute_capabilities[-1]
211+
nvccopts += r'-gencode=arch=compute_%s,code=\"sm_%s,compute_%s\" ' % (
207212
capability, capability, capability)
208213
nvccopts += ' ' + nvcc_compiler_options
209214
nvccopts += undefines

0 commit comments

Comments
 (0)