@@ -5,6 +5,15 @@ macro belapsed_median(args...)
5
5
esc (:(time (median (@benchmark $ (args... ))) / 1e9 ))
6
6
end
7
7
8
+ macro sync (ex)
9
+ quote
10
+ local ret = $ (esc (ex))
11
+ KernelAbstractions. synchronize (CUDABackend ())
12
+ ret
13
+
14
+ end
15
+ end
16
+
8
17
9
18
function doit (L,N)
10
19
x2 = CuArray (rand (L,N));
@@ -14,8 +23,8 @@ function doit(L,N)
14
23
o1 = CuArray (rand (N));
15
24
o3 = CuArray (rand (1 ,1 ,N));
16
25
17
- tbgemm = @belapsed_median CUDA . @sync batched_mul! ($ o3, batched_transpose ($ x3), $ y3)
18
- tbdot = @belapsed_median CUDA . @sync batched_dot! ($ o1, $ x2, $ y2)
26
+ tbgemm = @belapsed_median @sync batched_mul! ($ o3, batched_transpose ($ x3), $ y3)
27
+ tbdot = @belapsed_median @sync batched_dot! ($ o1, $ x2, $ y2)
19
28
20
29
CUDA. unsafe_free! .((x2, x3, y2, y3, o1, o3))
21
30
CUDA. memory_status ()
@@ -51,18 +60,18 @@ function doit(L,N)
51
60
y2 = CuArray (rand (L,N));
52
61
y3 = CuArray (rand (L,1 ,N));
53
62
54
- tbgemm = @belapsed_median CUDA . @sync batched_mul! ($ y3, $ A3, $ x3)
63
+ tbgemm = @belapsed_median @sync batched_mul! ($ y3, $ A3, $ x3)
55
64
56
- tbgemvn = @belapsed_median CUDA . @sync batched_gemv! (' N' , 1.0 , $ A3, $ x2, 0.0 , $ y2)
57
- tbgemvt = @belapsed_median CUDA . @sync batched_gemv! (' T' , 1.0 , $ A3, $ x2, 0.0 , $ y2)
65
+ tbgemvn = @belapsed_median @sync batched_gemv! (' N' , 1.0 , $ A3, $ x2, 0.0 , $ y2)
66
+ tbgemvt = @belapsed_median @sync batched_gemv! (' T' , 1.0 , $ A3, $ x2, 0.0 , $ y2)
58
67
59
- tbsymvu = @belapsed_median CUDA . @sync batched_symv! (' U' , 1.0 , $ A3, $ x2, 0.0 , $ y2)
60
- tbsymvl = @belapsed_median CUDA . @sync batched_symv! (' L' , 1.0 , $ A3, $ x2, 0.0 , $ y2)
68
+ tbsymvu = @belapsed_median @sync batched_symv! (' U' , 1.0 , $ A3, $ x2, 0.0 , $ y2)
69
+ tbsymvl = @belapsed_median @sync batched_symv! (' L' , 1.0 , $ A3, $ x2, 0.0 , $ y2)
61
70
62
71
AP = CuArray (hcat ([SymmetricPacked (x, :U ). tri for x in eachslice (_A, dims= 3 )]. .. ));
63
- tbspmvu = @belapsed_median CUDA . @sync batched_spmv! (' U' , 1.0 , $ AP, $ x2, 0.0 , $ y2)
72
+ tbspmvu = @belapsed_median @sync batched_spmv! (' U' , 1.0 , $ AP, $ x2, 0.0 , $ y2)
64
73
AP = CuArray (hcat ([SymmetricPacked (x, :L ). tri for x in eachslice (_A, dims= 3 )]. .. ));
65
- tbspmvl = @belapsed_median CUDA . @sync batched_spmv! (' L' , 1.0 , $ AP, $ x2, 0.0 , $ y2)
74
+ tbspmvl = @belapsed_median @sync batched_spmv! (' L' , 1.0 , $ AP, $ x2, 0.0 , $ y2)
66
75
67
76
CUDA. unsafe_free! .((A3, AP, x2, x3, y2, y3))
68
77
CUDA. memory_status ()
@@ -108,17 +117,17 @@ function doit(L,N)
108
117
y2 = CuArray (rand (L,N));
109
118
y3 = CuArray (rand (L,1 ,N));
110
119
111
- tbgemm = @belapsed_median CUDA . @sync batched_mul! ($ A3, $ x3, batched_transpose ($ x3), - 1.0 , 1.0 )
120
+ tbgemm = @belapsed_median @sync batched_mul! ($ A3, $ x3, batched_transpose ($ x3), - 1.0 , 1.0 )
112
121
113
- tbger = @belapsed_median CUDA . @sync batched_ger! (- 1.0 , $ x2, $ y2, $ A3)
122
+ tbger = @belapsed_median @sync batched_ger! (- 1.0 , $ x2, $ y2, $ A3)
114
123
115
- tbsyru = @belapsed_median CUDA . @sync batched_syr! (' U' , - 1.0 , $ x2, $ A3)
116
- tbsyrl = @belapsed_median CUDA . @sync batched_syr! (' L' , - 1.0 , $ x2, $ A3)
124
+ tbsyru = @belapsed_median @sync batched_syr! (' U' , - 1.0 , $ x2, $ A3)
125
+ tbsyrl = @belapsed_median @sync batched_syr! (' L' , - 1.0 , $ x2, $ A3)
117
126
118
127
AP = CuArray (hcat ([SymmetricPacked (x, :U ). tri for x in eachslice (_A, dims= 3 )]. .. ));
119
- tbspru = @belapsed_median CUDA . @sync batched_spr! (' U' , - 1.0 , $ x2, $ AP)
128
+ tbspru = @belapsed_median @sync batched_spr! (' U' , - 1.0 , $ x2, $ AP)
120
129
AP = CuArray (hcat ([SymmetricPacked (x, :L ). tri for x in eachslice (_A, dims= 3 )]. .. ));
121
- tbsprl = @belapsed_median CUDA . @sync batched_spr! (' L' , - 1.0 , $ x2, $ AP)
130
+ tbsprl = @belapsed_median @sync batched_spr! (' L' , - 1.0 , $ x2, $ AP)
122
131
123
132
CUDA. unsafe_free! .((A3, AP, x2, x3, y2, y3))
124
133
CUDA. memory_status ()
0 commit comments