Skip to content

Commit cb6ac07

Browse files
committed
[NVPTX] Convert scalar function nvvm.annotations to attributes
1 parent 3ef5348 commit cb6ac07

File tree

15 files changed

+160
-100
lines changed

15 files changed

+160
-100
lines changed

clang/lib/CodeGen/Targets/NVPTX.cpp

+5-10
Original file line numberDiff line numberDiff line change
@@ -375,11 +375,8 @@ void CodeGenModule::handleCUDALaunchBoundsAttr(llvm::Function *F,
375375
if (MinBlocks > 0) {
376376
if (MinBlocksVal)
377377
*MinBlocksVal = MinBlocks.getExtValue();
378-
if (F) {
379-
// Create !{<func-ref>, metadata !"minctasm", i32 <val>} node
380-
NVPTXTargetCodeGenInfo::addNVVMMetadata(F, "minctasm",
381-
MinBlocks.getExtValue());
382-
}
378+
if (F)
379+
F->addFnAttr("nvvm.minctasm", llvm::utostr(MinBlocks.getExtValue()));
383380
}
384381
}
385382
if (Attr->getMaxBlocks()) {
@@ -388,11 +385,9 @@ void CodeGenModule::handleCUDALaunchBoundsAttr(llvm::Function *F,
388385
if (MaxBlocks > 0) {
389386
if (MaxClusterRankVal)
390387
*MaxClusterRankVal = MaxBlocks.getExtValue();
391-
if (F) {
392-
// Create !{<func-ref>, metadata !"maxclusterrank", i32 <val>} node
393-
NVPTXTargetCodeGenInfo::addNVVMMetadata(F, "maxclusterrank",
394-
MaxBlocks.getExtValue());
395-
}
388+
if (F)
389+
F->addFnAttr("nvvm.maxclusterrank",
390+
llvm::utostr(MaxBlocks.getExtValue()));
396391
}
397392
}
398393
}

clang/test/CodeGenCUDA/launch-bounds.cu

+19-13
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,25 @@
99
#define MAX_BLOCKS_PER_MP 4
1010
#endif
1111

12+
// CHECK: @Kernel1() #[[ATTR0:[0-9]+]]
13+
// CHECK: @{{.*}}Kernel4{{.*}}() #[[ATTR0]]
14+
// CHECK: @{{.*}}Kernel5{{.*}}() #[[ATTR1:[0-9]+]]
15+
// CHECK: @{{.*}}Kernel6{{.*}}() #[[ATTR0]]
16+
// CHECK: @{{.*}}Kernel8{{.*}}() #[[ATTR3:[0-9]+]]
17+
18+
// CHECK: attributes #[[ATTR0]] = {{{.*}} "nvvm.minctasm"="2" {{.*}}}
19+
// CHECK: attributes #[[ATTR1]] = {{{.*}} "nvvm.minctasm"="258" {{.*}}}
20+
// CHECK: attributes #[[ATTR3]] = {{{.*}} "nvvm.minctasm"="12" {{.*}}}
21+
22+
// CHECK_MAX_BLOCKS: @Kernel1_sm_90() #[[ATTR4:[0-9]+]]
23+
// CHECK_MAX_BLOCKS: @{{.*}}Kernel4_sm_90{{.*}} #[[ATTR4]]
24+
// CHECK_MAX_BLOCKS: @{{.*}}Kernel5_sm_90{{.*}} #[[ATTR5:[0-9]+]]
25+
// CHECK_MAX_BLOCKS: @{{.*}}Kernel8_sm_90{{.*}} #[[ATTR6:[0-9]+]]
26+
27+
// CHECK_MAX_BLOCKS: attributes #[[ATTR4]] = {{{.*}} "nvvm.maxclusterrank"="4" "nvvm.minctasm"="2" {{.*}}}
28+
// CHECK_MAX_BLOCKS: attributes #[[ATTR5]] = {{{.*}} "nvvm.maxclusterrank"="260" "nvvm.minctasm"="258" {{.*}}}
29+
// CHECK_MAX_BLOCKS: attributes #[[ATTR6]] = {{{.*}} "nvvm.maxclusterrank"="14" "nvvm.minctasm"="12" {{.*}}}
30+
1231
// Test both max threads per block and Min cta per sm.
1332
extern "C" {
1433
__global__ void
@@ -19,7 +38,6 @@ Kernel1()
1938
}
2039

2140
// CHECK: !{{[0-9]+}} = !{ptr @Kernel1, !"maxntidx", i32 256}
22-
// CHECK: !{{[0-9]+}} = !{ptr @Kernel1, !"minctasm", i32 2}
2341

2442
#ifdef USE_MAX_BLOCKS
2543
// Test max threads per block and min/max cta per sm.
@@ -32,8 +50,6 @@ Kernel1_sm_90()
3250
}
3351

3452
// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @Kernel1_sm_90, !"maxntidx", i32 256}
35-
// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @Kernel1_sm_90, !"minctasm", i32 2}
36-
// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @Kernel1_sm_90, !"maxclusterrank", i32 4}
3753
#endif // USE_MAX_BLOCKS
3854

3955
// Test only max threads per block. Min cta per sm defaults to 0, and
@@ -67,7 +83,6 @@ Kernel4()
6783
template __global__ void Kernel4<MAX_THREADS_PER_BLOCK, MIN_BLOCKS_PER_MP>();
6884

6985
// CHECK: !{{[0-9]+}} = !{ptr @{{.*}}Kernel4{{.*}}, !"maxntidx", i32 256}
70-
// CHECK: !{{[0-9]+}} = !{ptr @{{.*}}Kernel4{{.*}}, !"minctasm", i32 2}
7186

7287
#ifdef USE_MAX_BLOCKS
7388
template <int max_threads_per_block, int min_blocks_per_mp, int max_blocks_per_mp>
@@ -79,8 +94,6 @@ Kernel4_sm_90()
7994
template __global__ void Kernel4_sm_90<MAX_THREADS_PER_BLOCK, MIN_BLOCKS_PER_MP, MAX_BLOCKS_PER_MP>();
8095

8196
// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @{{.*}}Kernel4_sm_90{{.*}}, !"maxntidx", i32 256}
82-
// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @{{.*}}Kernel4_sm_90{{.*}}, !"minctasm", i32 2}
83-
// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @{{.*}}Kernel4_sm_90{{.*}}, !"maxclusterrank", i32 4}
8497
#endif //USE_MAX_BLOCKS
8598

8699
const int constint = 100;
@@ -94,7 +107,6 @@ Kernel5()
94107
template __global__ void Kernel5<MAX_THREADS_PER_BLOCK, MIN_BLOCKS_PER_MP>();
95108

96109
// CHECK: !{{[0-9]+}} = !{ptr @{{.*}}Kernel5{{.*}}, !"maxntidx", i32 356}
97-
// CHECK: !{{[0-9]+}} = !{ptr @{{.*}}Kernel5{{.*}}, !"minctasm", i32 258}
98110

99111
#ifdef USE_MAX_BLOCKS
100112

@@ -109,8 +121,6 @@ Kernel5_sm_90()
109121
template __global__ void Kernel5_sm_90<MAX_THREADS_PER_BLOCK, MIN_BLOCKS_PER_MP, MAX_BLOCKS_PER_MP>();
110122

111123
// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @{{.*}}Kernel5_sm_90{{.*}}, !"maxntidx", i32 356}
112-
// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @{{.*}}Kernel5_sm_90{{.*}}, !"minctasm", i32 258}
113-
// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @{{.*}}Kernel5_sm_90{{.*}}, !"maxclusterrank", i32 260}
114124
#endif //USE_MAX_BLOCKS
115125

116126
// Make sure we don't emit negative launch bounds values.
@@ -120,7 +130,6 @@ Kernel6()
120130
{
121131
}
122132
// CHECK-NOT: !{{[0-9]+}} = !{ptr @{{.*}}Kernel6{{.*}}, !"maxntidx",
123-
// CHECK: !{{[0-9]+}} = !{ptr @{{.*}}Kernel6{{.*}}, !"minctasm",
124133

125134
__global__ void
126135
__launch_bounds__( MAX_THREADS_PER_BLOCK, -MIN_BLOCKS_PER_MP )
@@ -144,12 +153,9 @@ Kernel7_sm_90()
144153
const char constchar = 12;
145154
__global__ void __launch_bounds__(constint, constchar) Kernel8() {}
146155
// CHECK: !{{[0-9]+}} = !{ptr @{{.*}}Kernel8{{.*}}, !"maxntidx", i32 100
147-
// CHECK: !{{[0-9]+}} = !{ptr @{{.*}}Kernel8{{.*}}, !"minctasm", i32 12
148156

149157
#ifdef USE_MAX_BLOCKS
150158
const char constchar_2 = 14;
151159
__global__ void __launch_bounds__(constint, constchar, constchar_2) Kernel8_sm_90() {}
152160
// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @{{.*}}Kernel8_sm_90{{.*}}, !"maxntidx", i32 100
153-
// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @{{.*}}Kernel8_sm_90{{.*}}, !"minctasm", i32 12
154-
// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @{{.*}}Kernel8_sm_90{{.*}}, !"maxclusterrank", i32 14
155161
#endif // USE_MAX_BLOCKS

llvm/docs/NVPTXUsage.rst

+23-14
Original file line numberDiff line numberDiff line change
@@ -33,17 +33,12 @@ Marking Functions as Kernels
3333

3434
In PTX, there are two types of functions: *device functions*, which are only
3535
callable by device code, and *kernel functions*, which are callable by host
36-
code. By default, the back-end will emit device functions. Metadata is used to
37-
declare a function as a kernel function. This metadata is attached to the
38-
``nvvm.annotations`` named metadata object, and has the following format:
36+
code. By default, the back-end will emit device functions. The ``ptx_kernel``
37+
calling convention is used to declare a function as a kernel function.
3938

40-
.. code-block:: text
41-
42-
!0 = !{<function-ref>, metadata !"kernel", i32 1}
43-
44-
The first parameter is a reference to the kernel function. The following
45-
example shows a kernel function calling a device function in LLVM IR. The
46-
function ``@my_kernel`` is callable from host code, but ``@my_fmad`` is not.
39+
The following example shows a kernel function calling a device function in LLVM
40+
IR. The function ``@my_kernel`` is callable from host code, but ``@my_fmad`` is
41+
not.
4742

4843
.. code-block:: llvm
4944
@@ -53,18 +48,32 @@ function ``@my_kernel`` is callable from host code, but ``@my_fmad`` is not.
5348
ret float %add
5449
}
5550
56-
define void @my_kernel(ptr %ptr) {
51+
define ptx_kernel void @my_kernel(ptr %ptr) {
5752
%val = load float, ptr %ptr
5853
%ret = call float @my_fmad(float %val, float %val, float %val)
5954
store float %ret, ptr %ptr
6055
ret void
6156
}
6257
63-
!nvvm.annotations = !{!1}
64-
!1 = !{ptr @my_kernel, !"kernel", i32 1}
65-
6658
When compiled, the PTX kernel functions are callable by host-side code.
6759

60+
.. _nvptx_fnattrs:
61+
62+
Function Attributes
63+
-------------------
64+
65+
``"nvvm.maxclusterrank"="<n>"``
66+
This attribute specifies the maximum number of blocks per cluster. Must be
67+
non-zero. Only supported for Hopper+.
68+
69+
``"nvvm.minctasm"="<n>"``
70+
This indicates a hint/directive to the compiler/driver, asking it to put at
71+
least these many CTAs on an SM.
72+
73+
``"nvvm.maxnreg"="<n>"``
74+
This attribute indicates the maximum number of registers to be used for the
75+
kernel function.
76+
6877

6978
.. _address_spaces:
7079

llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -6458,7 +6458,7 @@ void OpenMPIRBuilder::writeTeamsForKernel(const Triple &T, Function &Kernel,
64586458
int32_t LB, int32_t UB) {
64596459
if (T.isNVPTX())
64606460
if (UB > 0)
6461-
updateNVPTXMetadata(Kernel, "maxclusterrank", UB, true);
6461+
Kernel.addFnAttr("nvvm.maxclusterrank", llvm::utostr(UB));
64626462
if (T.isAMDGPU())
64636463
Kernel.addFnAttr("amdgpu-max-num-workgroups", llvm::utostr(LB) + ",1,1");
64646464

llvm/lib/IR/AutoUpgrade.cpp

+16
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
//===----------------------------------------------------------------------===//
1414

1515
#include "llvm/IR/AutoUpgrade.h"
16+
#include "llvm/ADT/StringExtras.h"
1617
#include "llvm/ADT/StringRef.h"
1718
#include "llvm/ADT/StringSwitch.h"
1819
#include "llvm/BinaryFormat/Dwarf.h"
@@ -5043,6 +5044,21 @@ bool static upgradeSingleNVVMAnnotation(GlobalValue *GV, StringRef K,
50435044
Idx, Attribute::getWithStackAlignment(GV->getContext(), StackAlign));
50445045
return true;
50455046
}
5047+
if (K == "maxclusterrank" || K == "cluster_max_blocks") {
5048+
const auto CV = mdconst::extract<ConstantInt>(V)->getZExtValue();
5049+
cast<Function>(GV)->addFnAttr("nvvm.maxclusterrank", llvm::utostr(CV));
5050+
return true;
5051+
}
5052+
if (K == "minctasm") {
5053+
const auto CV = mdconst::extract<ConstantInt>(V)->getZExtValue();
5054+
cast<Function>(GV)->addFnAttr("nvvm.minctasm", llvm::utostr(CV));
5055+
return true;
5056+
}
5057+
if (K == "maxnreg") {
5058+
const auto CV = mdconst::extract<ConstantInt>(V)->getZExtValue();
5059+
cast<Function>(GV)->addFnAttr("nvvm.maxnreg", llvm::utostr(CV));
5060+
return true;
5061+
}
50465062

50475063
return false;
50485064
}

llvm/lib/Target/NVPTX/NVPTXCtorDtorLowering.cpp

+2-7
Original file line numberDiff line numberDiff line change
@@ -70,18 +70,13 @@ static void addKernelMetadata(Module &M, Function *F) {
7070
llvm::ConstantAsMetadata::get(
7171
llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), 1))};
7272

73-
llvm::Metadata *BlockMDVals[] = {
74-
llvm::ConstantAsMetadata::get(F),
75-
llvm::MDString::get(Ctx, "maxclusterrank"),
76-
llvm::ConstantAsMetadata::get(
77-
llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), 1))};
73+
F->addFnAttr("nvvm.maxclusterrank", "1");
74+
F->setCallingConv(CallingConv::PTX_Kernel);
7875

7976
// Append metadata to nvvm.annotations.
80-
F->setCallingConv(CallingConv::PTX_Kernel);
8177
MD->addOperand(llvm::MDNode::get(Ctx, ThreadXMDVals));
8278
MD->addOperand(llvm::MDNode::get(Ctx, ThreadYMDVals));
8379
MD->addOperand(llvm::MDNode::get(Ctx, ThreadZMDVals));
84-
MD->addOperand(llvm::MDNode::get(Ctx, BlockMDVals));
8580
}
8681

8782
static Function *createInitOrFiniKernelFunction(Module &M, bool IsCtor) {

llvm/lib/Target/NVPTX/NVPTXUtilities.cpp

+10-3
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,13 @@ static bool argHasNVVMAnnotation(const Value &Val,
179179
return false;
180180
}
181181

182+
static std::optional<unsigned> getFnAttrParsedIntOrNull(const Function &F,
183+
StringRef Attr) {
184+
if (F.hasFnAttribute(Attr))
185+
return F.getFnAttributeAsParsedInteger(Attr);
186+
return std::nullopt;
187+
}
188+
182189
bool isParamGridConstant(const Value &V) {
183190
if (const Argument *Arg = dyn_cast<Argument>(&V)) {
184191
// "grid_constant" counts argument indices starting from 1
@@ -277,7 +284,7 @@ std::optional<unsigned> getClusterDimz(const Function &F) {
277284
}
278285

279286
std::optional<unsigned> getMaxClusterRank(const Function &F) {
280-
return findOneNVVMAnnotation(&F, "maxclusterrank");
287+
return getFnAttrParsedIntOrNull(F, "nvvm.maxclusterrank");
281288
}
282289

283290
std::optional<unsigned> getReqNTIDx(const Function &F) {
@@ -303,11 +310,11 @@ std::optional<unsigned> getReqNTID(const Function &F) {
303310
}
304311

305312
std::optional<unsigned> getMinCTASm(const Function &F) {
306-
return findOneNVVMAnnotation(&F, "minctasm");
313+
return getFnAttrParsedIntOrNull(F, "nvvm.minctasm");
307314
}
308315

309316
std::optional<unsigned> getMaxNReg(const Function &F) {
310-
return findOneNVVMAnnotation(&F, "maxnreg");
317+
return getFnAttrParsedIntOrNull(F, "nvvm.maxnreg");
311318
}
312319

313320
MaybeAlign getAlign(const Function &F, unsigned Index) {

llvm/test/Analysis/KernelInfo/launch-bounds/nvptx.ll

+2-2
Original file line numberDiff line numberDiff line change
@@ -23,19 +23,19 @@ entry:
2323
attributes #0 = {
2424
"omp_target_num_teams"="100"
2525
"omp_target_thread_limit"="101"
26+
"nvvm.maxclusterrank"="200"
2627
}
2728

2829
!llvm.module.flags = !{!0}
2930
!llvm.dbg.cu = !{!1}
30-
!nvvm.annotations = !{!6, !7, !8, !9, !10}
31+
!nvvm.annotations = !{!7, !8, !9, !10}
3132

3233
!0 = !{i32 2, !"Debug Info Version", i32 3}
3334
!1 = distinct !DICompileUnit(language: DW_LANG_C11, file: !2, producer: "clang version 19.0.0git", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: None)
3435
!2 = !DIFile(filename: "test.c", directory: "/tmp")
3536
!3 = !{}
3637
!4 = !DISubroutineType(types: !3)
3738
!5 = distinct !DISubprogram(name: "test", scope: !2, file: !2, line: 10, type: !4, scopeLine: 10, flags: DIFlagArtificial | DIFlagPrototyped, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition, unit: !1, retainedNodes: !3)
38-
!6 = !{ptr @test, !"maxclusterrank", i32 200}
3939
!7 = !{ptr @test, !"maxntidx", i32 210}
4040
!8 = !{ptr @test, !"maxntidy", i32 211}
4141
!9 = !{ptr @test, !"maxntidz", i32 212}

llvm/test/CodeGen/NVPTX/annotations.ll

+3-9
Original file line numberDiff line numberDiff line change
@@ -23,32 +23,26 @@ define void @kernel_func_reqntid(ptr %a) {
2323
}
2424

2525
; CHECK: .entry kernel_func_minctasm
26-
define void @kernel_func_minctasm(ptr %a) {
26+
define ptx_kernel void @kernel_func_minctasm(ptr %a) "nvvm.minctasm"="42" {
2727
; CHECK: .minnctapersm 42
2828
; CHECK: ret
2929
ret void
3030
}
3131

3232
; CHECK-LABEL: .entry kernel_func_maxnreg
33-
define void @kernel_func_maxnreg() {
33+
define ptx_kernel void @kernel_func_maxnreg() "nvvm.maxnreg"="1234" {
3434
; CHECK: .maxnreg 1234
3535
; CHECK: ret
3636
ret void
3737
}
3838

39-
!nvvm.annotations = !{!1, !2, !3, !4, !5, !6, !7, !8, !9, !10}
39+
!nvvm.annotations = !{!1, !2, !3, !4, !9, !10}
4040

4141
!1 = !{ptr @kernel_func_maxntid, !"kernel", i32 1}
4242
!2 = !{ptr @kernel_func_maxntid, !"maxntidx", i32 10, !"maxntidy", i32 20, !"maxntidz", i32 30}
4343

4444
!3 = !{ptr @kernel_func_reqntid, !"kernel", i32 1}
4545
!4 = !{ptr @kernel_func_reqntid, !"reqntidx", i32 11, !"reqntidy", i32 22, !"reqntidz", i32 33}
4646

47-
!5 = !{ptr @kernel_func_minctasm, !"kernel", i32 1}
48-
!6 = !{ptr @kernel_func_minctasm, !"minctasm", i32 42}
49-
50-
!7 = !{ptr @kernel_func_maxnreg, !"kernel", i32 1}
51-
!8 = !{ptr @kernel_func_maxnreg, !"maxnreg", i32 1234}
52-
5347
!9 = !{ptr addrspace(1) @texture, !"texture", i32 1}
5448
!10 = !{ptr addrspace(1) @surface, !"surface", i32 1}

llvm/test/CodeGen/NVPTX/lower-ctor-dtor.ll

+9-7
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,8 @@ define internal void @bar() {
4343
ret void
4444
}
4545

46-
; CHECK-LABEL: define weak_odr ptx_kernel void @"nvptx$device$init"() {
46+
; CHECK-LABEL: define weak_odr ptx_kernel void @"nvptx$device$init"
47+
; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
4748
; CHECK-NEXT: entry:
4849
; CHECK-NEXT: [[BEGIN:%.*]] = load ptr addrspace(1), ptr addrspace(1) @__init_array_start, align 8
4950
; CHECK-NEXT: [[STOP:%.*]] = load ptr addrspace(1), ptr addrspace(1) @__init_array_end, align 8
@@ -60,7 +61,8 @@ define internal void @bar() {
6061
; CHECK-NEXT: ret void
6162
;
6263
;
63-
; CHECK-LABEL: define weak_odr ptx_kernel void @"nvptx$device$fini"() {
64+
; CHECK-LABEL: define weak_odr ptx_kernel void @"nvptx$device$fini"
65+
; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
6466
; CHECK-NEXT: entry:
6567
; CHECK-NEXT: [[BEGIN:%.*]] = load ptr addrspace(1), ptr addrspace(1) @__fini_array_start, align 8
6668
; CHECK-NEXT: [[STOP:%.*]] = load ptr addrspace(1), ptr addrspace(1) @__fini_array_end, align 8
@@ -82,11 +84,11 @@ define internal void @bar() {
8284
; CHECK: while.end:
8385
; CHECK-NEXT: ret void
8486

87+
; CHECK: attributes #[[ATTR0]] = { "nvvm.maxclusterrank"="1" }
88+
8589
; CHECK: [[META1:![0-9]+]] = !{ptr @"nvptx$device$init", !"maxntidx", i32 1}
8690
; CHECK: [[META2:![0-9]+]] = !{ptr @"nvptx$device$init", !"maxntidy", i32 1}
8791
; CHECK: [[META3:![0-9]+]] = !{ptr @"nvptx$device$init", !"maxntidz", i32 1}
88-
; CHECK: [[META4:![0-9]+]] = !{ptr @"nvptx$device$init", !"maxclusterrank", i32 1}
89-
; CHECK: [[META6:![0-9]+]] = !{ptr @"nvptx$device$fini", !"maxntidx", i32 1}
90-
; CHECK: [[META7:![0-9]+]] = !{ptr @"nvptx$device$fini", !"maxntidy", i32 1}
91-
; CHECK: [[META8:![0-9]+]] = !{ptr @"nvptx$device$fini", !"maxntidz", i32 1}
92-
; CHECK: [[META9:![0-9]+]] = !{ptr @"nvptx$device$fini", !"maxclusterrank", i32 1}
92+
; CHECK: [[META4:![0-9]+]] = !{ptr @"nvptx$device$fini", !"maxntidx", i32 1}
93+
; CHECK: [[META5:![0-9]+]] = !{ptr @"nvptx$device$fini", !"maxntidy", i32 1}
94+
; CHECK: [[META6:![0-9]+]] = !{ptr @"nvptx$device$fini", !"maxntidz", i32 1}

llvm/test/CodeGen/NVPTX/maxclusterrank.ll

+3-5
Original file line numberDiff line numberDiff line change
@@ -10,16 +10,14 @@ target triple = "nvptx64-unknown-unknown"
1010
; CHECK_SM_80-NOT: .maxclusterrank 8
1111

1212
; Make sure that for SM version prior to 90 `.maxclusterrank` directive is
13-
; sielently ignored.
14-
define dso_local ptx_kernel void @_Z18TestMaxClusterRankv() {
13+
; silently ignored.
14+
define dso_local ptx_kernel void @_Z18TestMaxClusterRankv() "nvvm.minctasm"="2" "nvvm.maxclusterrank"="8" {
1515
entry:
1616
%a = alloca i32, align 4
1717
store volatile i32 1, ptr %a, align 4
1818
ret void
1919
}
2020

21-
!nvvm.annotations = !{!1, !2, !3}
21+
!nvvm.annotations = !{!1}
2222

2323
!1 = !{ptr @_Z18TestMaxClusterRankv, !"maxntidx", i32 128}
24-
!2 = !{ptr @_Z18TestMaxClusterRankv, !"minctasm", i32 2}
25-
!3 = !{ptr @_Z18TestMaxClusterRankv, !"maxclusterrank", i32 8}

0 commit comments

Comments
 (0)