9
9
#define MAX_BLOCKS_PER_MP 4
10
10
#endif
11
11
12
+ // CHECK: @Kernel1() #[[ATTR0:[0-9]+]]
13
+ // CHECK: @{{.*}}Kernel4{{.*}}() #[[ATTR0]]
14
+ // CHECK: @{{.*}}Kernel5{{.*}}() #[[ATTR1:[0-9]+]]
15
+ // CHECK: @{{.*}}Kernel6{{.*}}() #[[ATTR0]]
16
+ // CHECK: @{{.*}}Kernel8{{.*}}() #[[ATTR3:[0-9]+]]
17
+
18
+ // CHECK: attributes #[[ATTR0]] = {{{.*}} "nvvm.minctasm"="2" {{.*}}}
19
+ // CHECK: attributes #[[ATTR1]] = {{{.*}} "nvvm.minctasm"="258" {{.*}}}
20
+ // CHECK: attributes #[[ATTR3]] = {{{.*}} "nvvm.minctasm"="12" {{.*}}}
21
+
22
+ // CHECK_MAX_BLOCKS: @Kernel1_sm_90() #[[ATTR4:[0-9]+]]
23
+ // CHECK_MAX_BLOCKS: @{{.*}}Kernel4_sm_90{{.*}} #[[ATTR4]]
24
+ // CHECK_MAX_BLOCKS: @{{.*}}Kernel5_sm_90{{.*}} #[[ATTR5:[0-9]+]]
25
+ // CHECK_MAX_BLOCKS: @{{.*}}Kernel8_sm_90{{.*}} #[[ATTR6:[0-9]+]]
26
+
27
+ // CHECK_MAX_BLOCKS: attributes #[[ATTR4]] = {{{.*}} "nvvm.maxclusterrank"="4" "nvvm.minctasm"="2" {{.*}}}
28
+ // CHECK_MAX_BLOCKS: attributes #[[ATTR5]] = {{{.*}} "nvvm.maxclusterrank"="260" "nvvm.minctasm"="258" {{.*}}}
29
+ // CHECK_MAX_BLOCKS: attributes #[[ATTR6]] = {{{.*}} "nvvm.maxclusterrank"="14" "nvvm.minctasm"="12" {{.*}}}
30
+
12
31
// Test both max threads per block and Min cta per sm.
13
32
extern " C" {
14
33
__global__ void
@@ -19,7 +38,6 @@ Kernel1()
19
38
}
20
39
21
40
// CHECK: !{{[0-9]+}} = !{ptr @Kernel1, !"maxntidx", i32 256}
22
- // CHECK: !{{[0-9]+}} = !{ptr @Kernel1, !"minctasm", i32 2}
23
41
24
42
#ifdef USE_MAX_BLOCKS
25
43
// Test max threads per block and min/max cta per sm.
@@ -32,8 +50,6 @@ Kernel1_sm_90()
32
50
}
33
51
34
52
// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @Kernel1_sm_90, !"maxntidx", i32 256}
35
- // CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @Kernel1_sm_90, !"minctasm", i32 2}
36
- // CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @Kernel1_sm_90, !"maxclusterrank", i32 4}
37
53
#endif // USE_MAX_BLOCKS
38
54
39
55
// Test only max threads per block. Min cta per sm defaults to 0, and
@@ -67,7 +83,6 @@ Kernel4()
67
83
template __global__ void Kernel4<MAX_THREADS_PER_BLOCK, MIN_BLOCKS_PER_MP>();
68
84
69
85
// CHECK: !{{[0-9]+}} = !{ptr @{{.*}}Kernel4{{.*}}, !"maxntidx", i32 256}
70
- // CHECK: !{{[0-9]+}} = !{ptr @{{.*}}Kernel4{{.*}}, !"minctasm", i32 2}
71
86
72
87
#ifdef USE_MAX_BLOCKS
73
88
template <int max_threads_per_block, int min_blocks_per_mp, int max_blocks_per_mp>
@@ -79,8 +94,6 @@ Kernel4_sm_90()
79
94
template __global__ void Kernel4_sm_90<MAX_THREADS_PER_BLOCK, MIN_BLOCKS_PER_MP, MAX_BLOCKS_PER_MP>();
80
95
81
96
// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @{{.*}}Kernel4_sm_90{{.*}}, !"maxntidx", i32 256}
82
- // CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @{{.*}}Kernel4_sm_90{{.*}}, !"minctasm", i32 2}
83
- // CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @{{.*}}Kernel4_sm_90{{.*}}, !"maxclusterrank", i32 4}
84
97
#endif // USE_MAX_BLOCKS
85
98
86
99
const int constint = 100 ;
@@ -94,7 +107,6 @@ Kernel5()
94
107
template __global__ void Kernel5<MAX_THREADS_PER_BLOCK, MIN_BLOCKS_PER_MP>();
95
108
96
109
// CHECK: !{{[0-9]+}} = !{ptr @{{.*}}Kernel5{{.*}}, !"maxntidx", i32 356}
97
- // CHECK: !{{[0-9]+}} = !{ptr @{{.*}}Kernel5{{.*}}, !"minctasm", i32 258}
98
110
99
111
#ifdef USE_MAX_BLOCKS
100
112
@@ -109,8 +121,6 @@ Kernel5_sm_90()
109
121
template __global__ void Kernel5_sm_90<MAX_THREADS_PER_BLOCK, MIN_BLOCKS_PER_MP, MAX_BLOCKS_PER_MP>();
110
122
111
123
// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @{{.*}}Kernel5_sm_90{{.*}}, !"maxntidx", i32 356}
112
- // CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @{{.*}}Kernel5_sm_90{{.*}}, !"minctasm", i32 258}
113
- // CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @{{.*}}Kernel5_sm_90{{.*}}, !"maxclusterrank", i32 260}
114
124
#endif // USE_MAX_BLOCKS
115
125
116
126
// Make sure we don't emit negative launch bounds values.
@@ -120,7 +130,6 @@ Kernel6()
120
130
{
121
131
}
122
132
// CHECK-NOT: !{{[0-9]+}} = !{ptr @{{.*}}Kernel6{{.*}}, !"maxntidx",
123
- // CHECK: !{{[0-9]+}} = !{ptr @{{.*}}Kernel6{{.*}}, !"minctasm",
124
133
125
134
__global__ void
126
135
__launch_bounds__ ( MAX_THREADS_PER_BLOCK, -MIN_BLOCKS_PER_MP )
@@ -144,12 +153,9 @@ Kernel7_sm_90()
144
153
const char constchar = 12 ;
145
154
__global__ void __launch_bounds__ (constint, constchar) Kernel8() {}
146
155
// CHECK: !{{[0-9]+}} = !{ptr @{{.*}}Kernel8{{.*}}, !"maxntidx", i32 100
147
- // CHECK: !{{[0-9]+}} = !{ptr @{{.*}}Kernel8{{.*}}, !"minctasm", i32 12
148
156
149
157
#ifdef USE_MAX_BLOCKS
150
158
const char constchar_2 = 14 ;
151
159
__global__ void __launch_bounds__ (constint, constchar, constchar_2) Kernel8_sm_90() {}
152
160
// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @{{.*}}Kernel8_sm_90{{.*}}, !"maxntidx", i32 100
153
- // CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @{{.*}}Kernel8_sm_90{{.*}}, !"minctasm", i32 12
154
- // CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @{{.*}}Kernel8_sm_90{{.*}}, !"maxclusterrank", i32 14
155
161
#endif // USE_MAX_BLOCKS
0 commit comments