Skip to content

Commit 69ec5ac

Browse files
committed
Merge branch 'master' into directives
2 parents b207e06 + ac05da3 commit 69ec5ac

File tree

4 files changed

+33
-13
lines changed

4 files changed

+33
-13
lines changed

kernel_tuner/backends/compiler.py

Lines changed: 23 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
get_temp_filename,
1919
delete_temp_file,
2020
write_file,
21+
SkippableFailure,
2122
)
2223

2324
try:
@@ -260,12 +261,23 @@ def compile(self, kernel_instance):
260261
if platform.system() == "Darwin":
261262
lib_extension = ".dylib"
262263

263-
subprocess.check_call([self.compiler, "-c", source_file] + compiler_options + ["-o", filename + ".o"])
264-
subprocess.check_call(
264+
subprocess.run(
265+
[self.compiler, "-c", source_file] + compiler_options + ["-o", filename + ".o"],
266+
stdout=subprocess.PIPE,
267+
stderr=subprocess.PIPE,
268+
text=True,
269+
check=True
270+
)
271+
272+
subprocess.run(
265273
[self.compiler, filename + ".o"]
266274
+ compiler_options
267275
+ ["-shared", "-o", filename + lib_extension]
268-
+ lib_args
276+
+ lib_args,
277+
stdout=subprocess.PIPE,
278+
stderr=subprocess.PIPE,
279+
text=True,
280+
check=True
269281
)
270282

271283
self.lib = np.ctypeslib.load_library(filename, ".")
@@ -385,11 +397,17 @@ def refresh_memory(self, arguments, should_sync):
385397
self.memcpy_dtoh(arg, self.allocations[i])
386398

387399
def cleanup_lib(self):
388-
"""Unload the previously loaded shared library"""
400+
"""unload the previously loaded shared library"""
401+
if self.lib is None:
402+
return
403+
389404
if not self.using_openmp and not self.using_openacc:
390405
# this if statement is necessary because shared libraries that use
391406
# OpenMP will core dump when unloaded, this is a well-known issue with OpenMP
392407
logging.debug("unloading shared library")
393-
_ctypes.dlclose(self.lib._handle)
408+
try:
409+
_ctypes.dlclose(self.lib._handle)
410+
finally:
411+
self.lib = None
394412

395413
units = {}

kernel_tuner/core.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -619,9 +619,13 @@ def compile_kernel(self, instance, verbose):
619619
shared_mem_error_messages = [
620620
"uses too much shared data",
621621
"local memory limit exceeded",
622+
r"local memory \(\d+\) exceeds limit \(\d+\)",
622623
]
623-
if any(msg in str(e) for msg in shared_mem_error_messages):
624-
logging.debug("compile_kernel failed due to kernel using too much shared memory")
624+
error_message = str(e.stderr) if hasattr(e, "stderr") else str(e)
625+
if any(re.search(msg, error_message) for msg in shared_mem_error_messages):
626+
logging.debug(
627+
"compile_kernel failed due to kernel using too much shared memory"
628+
)
625629
if verbose:
626630
print(
627631
f"skipping config {util.get_instance_string(instance.params)} reason: too much shared memory used"
@@ -683,7 +687,7 @@ def create_kernel_instance(self, kernel_source, kernel_options, params, verbose)
683687
)
684688

685689
# check for templated kernel
686-
if kernel_source.lang in ["CUDA", "NVCUDA"] and "<" in name and ">" in name:
690+
if kernel_source.lang in ["CUDA", "NVCUDA", "HIP"] and "<" in name and ">" in name:
687691
kernel_string, name = wrap_templated_kernel(kernel_string, name)
688692

689693
# Preprocess GPU arguments. Require for handling `Tunable` arguments

kernel_tuner/observers/pmt.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -125,9 +125,7 @@ def after_finish(self):
125125

126126
def get_results(self):
127127
average_kernel_execution_time_ms = self.results["time"]
128-
129-
averages = {key: np.average(values) for key, values in self.results.items()}
130-
self.parent.initialize_results(self.parent.pm_names)
128+
averages = self.parent.get_results()
131129

132130
# correct energy measurement, because current _energy number is collected over the entire duration
133131
# we estimate energy as the average power over the continuous duration times the kernel execution time

test/test_compiler_functions.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -188,11 +188,11 @@ def test_compile_detects_device_code(npct, subprocess):
188188
cfunc = CompilerFunctions()
189189
cfunc.compile(kernel_instance)
190190

191-
print(subprocess.check_call.call_args_list)
191+
print(subprocess.run.call_args_list)
192192

193193
# assert the filename suffix used for source compilation is .cu
194194
dot_cu_used = False
195-
for call in subprocess.check_call.call_args_list:
195+
for call in subprocess.run.call_args_list:
196196
args, kwargs = call
197197
args = args[0]
198198
print(args)

0 commit comments

Comments
 (0)