Skip to content

Commit 259892b

Browse files
committed
vendor agnostic
1 parent d96d556 commit 259892b

File tree

5 files changed

+118
-140
lines changed

5 files changed

+118
-140
lines changed

Diff for: Project.toml

+2-2
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@ authors = ["Ben Arthur <[email protected]>"]
44
version = "0.2.3"
55

66
[deps]
7-
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
7+
GPUArrays = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7"
8+
KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c"
89

910
[compat]
10-
CUDA = "3, 4"
1111
julia = "1.6"

Diff for: bench/runbench.jl

+24-15
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,15 @@ macro belapsed_median(args...)
55
esc(:(time(median(@benchmark $(args...))) / 1e9))
66
end
77

8+
macro sync(ex)
9+
quote
10+
local ret = $(esc(ex))
11+
KernelAbstractions.synchronize(CUDABackend())
12+
ret
13+
14+
end
15+
end
16+
817

918
function doit(L,N)
1019
x2 = CuArray(rand(L,N));
@@ -14,8 +23,8 @@ function doit(L,N)
1423
o1 = CuArray(rand(N));
1524
o3 = CuArray(rand(1,1,N));
1625

17-
tbgemm = @belapsed_median CUDA.@sync batched_mul!($o3, batched_transpose($x3), $y3)
18-
tbdot = @belapsed_median CUDA.@sync batched_dot!($o1, $x2, $y2)
26+
tbgemm = @belapsed_median @sync batched_mul!($o3, batched_transpose($x3), $y3)
27+
tbdot = @belapsed_median @sync batched_dot!($o1, $x2, $y2)
1928

2029
CUDA.unsafe_free!.((x2, x3, y2, y3, o1, o3))
2130
CUDA.memory_status()
@@ -51,18 +60,18 @@ function doit(L,N)
5160
y2 = CuArray(rand(L,N));
5261
y3 = CuArray(rand(L,1,N));
5362

54-
tbgemm = @belapsed_median CUDA.@sync batched_mul!($y3, $A3, $x3)
63+
tbgemm = @belapsed_median @sync batched_mul!($y3, $A3, $x3)
5564

56-
tbgemvn = @belapsed_median CUDA.@sync batched_gemv!('N', 1.0, $A3, $x2, 0.0, $y2)
57-
tbgemvt = @belapsed_median CUDA.@sync batched_gemv!('T', 1.0, $A3, $x2, 0.0, $y2)
65+
tbgemvn = @belapsed_median @sync batched_gemv!('N', 1.0, $A3, $x2, 0.0, $y2)
66+
tbgemvt = @belapsed_median @sync batched_gemv!('T', 1.0, $A3, $x2, 0.0, $y2)
5867

59-
tbsymvu = @belapsed_median CUDA.@sync batched_symv!('U', 1.0, $A3, $x2, 0.0, $y2)
60-
tbsymvl = @belapsed_median CUDA.@sync batched_symv!('L', 1.0, $A3, $x2, 0.0, $y2)
68+
tbsymvu = @belapsed_median @sync batched_symv!('U', 1.0, $A3, $x2, 0.0, $y2)
69+
tbsymvl = @belapsed_median @sync batched_symv!('L', 1.0, $A3, $x2, 0.0, $y2)
6170

6271
AP = CuArray(hcat([SymmetricPacked(x, :U).tri for x in eachslice(_A, dims=3)]...));
63-
tbspmvu = @belapsed_median CUDA.@sync batched_spmv!('U', 1.0, $AP, $x2, 0.0, $y2)
72+
tbspmvu = @belapsed_median @sync batched_spmv!('U', 1.0, $AP, $x2, 0.0, $y2)
6473
AP = CuArray(hcat([SymmetricPacked(x, :L).tri for x in eachslice(_A, dims=3)]...));
65-
tbspmvl = @belapsed_median CUDA.@sync batched_spmv!('L', 1.0, $AP, $x2, 0.0, $y2)
74+
tbspmvl = @belapsed_median @sync batched_spmv!('L', 1.0, $AP, $x2, 0.0, $y2)
6675

6776
CUDA.unsafe_free!.((A3, AP, x2, x3, y2, y3))
6877
CUDA.memory_status()
@@ -108,17 +117,17 @@ function doit(L,N)
108117
y2 = CuArray(rand(L,N));
109118
y3 = CuArray(rand(L,1,N));
110119

111-
tbgemm = @belapsed_median CUDA.@sync batched_mul!($A3, $x3, batched_transpose($x3), -1.0, 1.0)
120+
tbgemm = @belapsed_median @sync batched_mul!($A3, $x3, batched_transpose($x3), -1.0, 1.0)
112121

113-
tbger = @belapsed_median CUDA.@sync batched_ger!(-1.0, $x2, $y2, $A3)
122+
tbger = @belapsed_median @sync batched_ger!(-1.0, $x2, $y2, $A3)
114123

115-
tbsyru = @belapsed_median CUDA.@sync batched_syr!('U', -1.0, $x2, $A3)
116-
tbsyrl = @belapsed_median CUDA.@sync batched_syr!('L', -1.0, $x2, $A3)
124+
tbsyru = @belapsed_median @sync batched_syr!('U', -1.0, $x2, $A3)
125+
tbsyrl = @belapsed_median @sync batched_syr!('L', -1.0, $x2, $A3)
117126

118127
AP = CuArray(hcat([SymmetricPacked(x, :U).tri for x in eachslice(_A, dims=3)]...));
119-
tbspru = @belapsed_median CUDA.@sync batched_spr!('U', -1.0, $x2, $AP)
128+
tbspru = @belapsed_median @sync batched_spr!('U', -1.0, $x2, $AP)
120129
AP = CuArray(hcat([SymmetricPacked(x, :L).tri for x in eachslice(_A, dims=3)]...));
121-
tbsprl = @belapsed_median CUDA.@sync batched_spr!('L', -1.0, $x2, $AP)
130+
tbsprl = @belapsed_median @sync batched_spr!('L', -1.0, $x2, $AP)
122131

123132
CUDA.unsafe_free!.((A3, AP, x2, x3, y2, y3))
124133
CUDA.memory_status()

0 commit comments

Comments
 (0)