Skip to content

Commit 123fba5

Browse files
committed
added greedy ils esemble instead of default
1 parent 104205d commit 123fba5

File tree

1 file changed

+12
-5
lines changed

1 file changed

+12
-5
lines changed

test/test_ensemble_tuning.py

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,11 @@
1717
def env():
1818
kernel_string = """
1919
extern "C" __global__ void vector_add(float *c, float *a, float *b, int n) {
20-
int i = blockIdx.x * block_size_x + threadIdx.x;
21-
if (i<n) {
22-
c[i] = a[i] + b[i];
20+
int i = blockIdx.x * blockDim.x + threadIdx.x;
21+
int j = blockIdx.y * blockDim.y + threadIdx.y;
22+
int index = i + j * gridDim.x * blockDim.x;
23+
if (index < n) {
24+
c[index] = a[index] + b[index];
2325
}
2426
}
2527
"""
@@ -32,11 +34,16 @@ def env():
3234

3335
args = [c, a, b, n]
3436
tune_params = dict()
35-
tune_params["block_size_x"] = [128 + 64 * i for i in range(15)]
37+
38+
# Extend the range of block sizes for a bigger search space
39+
tune_params["block_size_x"] = [128 + 64 * i for i in range(30)]
40+
tune_params["block_size_y"] = [1 + i for i in range(1, 16)]
3641

3742
return ["vector_add", kernel_string, size, args, tune_params]
3843

3944
@skip_if_no_pycuda
4045
def test_parallel_tune_kernel(env):
41-
result, _ = tune_kernel(*env, lang="CUDA", verbose=True, strategy="ensemble", parallel_mode=True)
46+
strategy_options = {"ensemble": ["greedy_ils", "greedy_ils"]}
47+
result, _ = tune_kernel(*env, lang="CUDA", verbose=True, strategy="ensemble",
48+
parallel_mode=True, strategy_options=strategy_options)
4249
assert len(result) > 0

0 commit comments

Comments
 (0)