Skip to content

[NVPTX] Convert scalar function nvvm.annotations to attributes #125908

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 5 additions & 10 deletions clang/lib/CodeGen/Targets/NVPTX.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -375,11 +375,8 @@ void CodeGenModule::handleCUDALaunchBoundsAttr(llvm::Function *F,
if (MinBlocks > 0) {
if (MinBlocksVal)
*MinBlocksVal = MinBlocks.getExtValue();
if (F) {
// Create !{<func-ref>, metadata !"minctasm", i32 <val>} node
NVPTXTargetCodeGenInfo::addNVVMMetadata(F, "minctasm",
MinBlocks.getExtValue());
}
if (F)
F->addFnAttr("nvvm.minctasm", llvm::utostr(MinBlocks.getExtValue()));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should eventually create a list of strings for valid nvvm. function attributes and use them here instead of hard-coding strings. It would serve as a single-source-of-truth for the set of valid attributes. Not necessary for this PR, but something to consider for the future.

}
}
if (Attr->getMaxBlocks()) {
Expand All @@ -388,11 +385,9 @@ void CodeGenModule::handleCUDALaunchBoundsAttr(llvm::Function *F,
if (MaxBlocks > 0) {
if (MaxClusterRankVal)
*MaxClusterRankVal = MaxBlocks.getExtValue();
if (F) {
// Create !{<func-ref>, metadata !"maxclusterrank", i32 <val>} node
NVPTXTargetCodeGenInfo::addNVVMMetadata(F, "maxclusterrank",
MaxBlocks.getExtValue());
}
if (F)
F->addFnAttr("nvvm.maxclusterrank",
llvm::utostr(MaxBlocks.getExtValue()));
}
}
}
Expand Down
32 changes: 19 additions & 13 deletions clang/test/CodeGenCUDA/launch-bounds.cu
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,25 @@
#define MAX_BLOCKS_PER_MP 4
#endif

// CHECK: @Kernel1() #[[ATTR0:[0-9]+]]
// CHECK: @{{.*}}Kernel4{{.*}}() #[[ATTR0]]
// CHECK: @{{.*}}Kernel5{{.*}}() #[[ATTR1:[0-9]+]]
// CHECK: @{{.*}}Kernel6{{.*}}() #[[ATTR0]]
// CHECK: @{{.*}}Kernel8{{.*}}() #[[ATTR3:[0-9]+]]

// CHECK: attributes #[[ATTR0]] = {{{.*}} "nvvm.minctasm"="2" {{.*}}}
// CHECK: attributes #[[ATTR1]] = {{{.*}} "nvvm.minctasm"="258" {{.*}}}
// CHECK: attributes #[[ATTR3]] = {{{.*}} "nvvm.minctasm"="12" {{.*}}}

// CHECK_MAX_BLOCKS: @Kernel1_sm_90() #[[ATTR4:[0-9]+]]
// CHECK_MAX_BLOCKS: @{{.*}}Kernel4_sm_90{{.*}} #[[ATTR4]]
// CHECK_MAX_BLOCKS: @{{.*}}Kernel5_sm_90{{.*}} #[[ATTR5:[0-9]+]]
// CHECK_MAX_BLOCKS: @{{.*}}Kernel8_sm_90{{.*}} #[[ATTR6:[0-9]+]]

// CHECK_MAX_BLOCKS: attributes #[[ATTR4]] = {{{.*}} "nvvm.maxclusterrank"="4" "nvvm.minctasm"="2" {{.*}}}
// CHECK_MAX_BLOCKS: attributes #[[ATTR5]] = {{{.*}} "nvvm.maxclusterrank"="260" "nvvm.minctasm"="258" {{.*}}}
// CHECK_MAX_BLOCKS: attributes #[[ATTR6]] = {{{.*}} "nvvm.maxclusterrank"="14" "nvvm.minctasm"="12" {{.*}}}

// Test both max threads per block and Min cta per sm.
extern "C" {
__global__ void
Expand All @@ -19,7 +38,6 @@ Kernel1()
}

// CHECK: !{{[0-9]+}} = !{ptr @Kernel1, !"maxntidx", i32 256}
// CHECK: !{{[0-9]+}} = !{ptr @Kernel1, !"minctasm", i32 2}

#ifdef USE_MAX_BLOCKS
// Test max threads per block and min/max cta per sm.
Expand All @@ -32,8 +50,6 @@ Kernel1_sm_90()
}

// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @Kernel1_sm_90, !"maxntidx", i32 256}
// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @Kernel1_sm_90, !"minctasm", i32 2}
// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @Kernel1_sm_90, !"maxclusterrank", i32 4}
#endif // USE_MAX_BLOCKS

// Test only max threads per block. Min cta per sm defaults to 0, and
Expand Down Expand Up @@ -67,7 +83,6 @@ Kernel4()
template __global__ void Kernel4<MAX_THREADS_PER_BLOCK, MIN_BLOCKS_PER_MP>();

// CHECK: !{{[0-9]+}} = !{ptr @{{.*}}Kernel4{{.*}}, !"maxntidx", i32 256}
// CHECK: !{{[0-9]+}} = !{ptr @{{.*}}Kernel4{{.*}}, !"minctasm", i32 2}

#ifdef USE_MAX_BLOCKS
template <int max_threads_per_block, int min_blocks_per_mp, int max_blocks_per_mp>
Expand All @@ -79,8 +94,6 @@ Kernel4_sm_90()
template __global__ void Kernel4_sm_90<MAX_THREADS_PER_BLOCK, MIN_BLOCKS_PER_MP, MAX_BLOCKS_PER_MP>();

// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @{{.*}}Kernel4_sm_90{{.*}}, !"maxntidx", i32 256}
// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @{{.*}}Kernel4_sm_90{{.*}}, !"minctasm", i32 2}
// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @{{.*}}Kernel4_sm_90{{.*}}, !"maxclusterrank", i32 4}
#endif //USE_MAX_BLOCKS

const int constint = 100;
Expand All @@ -94,7 +107,6 @@ Kernel5()
template __global__ void Kernel5<MAX_THREADS_PER_BLOCK, MIN_BLOCKS_PER_MP>();

// CHECK: !{{[0-9]+}} = !{ptr @{{.*}}Kernel5{{.*}}, !"maxntidx", i32 356}
// CHECK: !{{[0-9]+}} = !{ptr @{{.*}}Kernel5{{.*}}, !"minctasm", i32 258}

#ifdef USE_MAX_BLOCKS

Expand All @@ -109,8 +121,6 @@ Kernel5_sm_90()
template __global__ void Kernel5_sm_90<MAX_THREADS_PER_BLOCK, MIN_BLOCKS_PER_MP, MAX_BLOCKS_PER_MP>();

// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @{{.*}}Kernel5_sm_90{{.*}}, !"maxntidx", i32 356}
// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @{{.*}}Kernel5_sm_90{{.*}}, !"minctasm", i32 258}
// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @{{.*}}Kernel5_sm_90{{.*}}, !"maxclusterrank", i32 260}
#endif //USE_MAX_BLOCKS

// Make sure we don't emit negative launch bounds values.
Expand All @@ -120,7 +130,6 @@ Kernel6()
{
}
// CHECK-NOT: !{{[0-9]+}} = !{ptr @{{.*}}Kernel6{{.*}}, !"maxntidx",
// CHECK: !{{[0-9]+}} = !{ptr @{{.*}}Kernel6{{.*}}, !"minctasm",

__global__ void
__launch_bounds__( MAX_THREADS_PER_BLOCK, -MIN_BLOCKS_PER_MP )
Expand All @@ -144,12 +153,9 @@ Kernel7_sm_90()
const char constchar = 12;
__global__ void __launch_bounds__(constint, constchar) Kernel8() {}
// CHECK: !{{[0-9]+}} = !{ptr @{{.*}}Kernel8{{.*}}, !"maxntidx", i32 100
// CHECK: !{{[0-9]+}} = !{ptr @{{.*}}Kernel8{{.*}}, !"minctasm", i32 12

#ifdef USE_MAX_BLOCKS
const char constchar_2 = 14;
__global__ void __launch_bounds__(constint, constchar, constchar_2) Kernel8_sm_90() {}
// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @{{.*}}Kernel8_sm_90{{.*}}, !"maxntidx", i32 100
// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @{{.*}}Kernel8_sm_90{{.*}}, !"minctasm", i32 12
// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @{{.*}}Kernel8_sm_90{{.*}}, !"maxclusterrank", i32 14
#endif // USE_MAX_BLOCKS
37 changes: 23 additions & 14 deletions llvm/docs/NVPTXUsage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -33,17 +33,12 @@ Marking Functions as Kernels

In PTX, there are two types of functions: *device functions*, which are only
callable by device code, and *kernel functions*, which are callable by host
code. By default, the back-end will emit device functions. Metadata is used to
declare a function as a kernel function. This metadata is attached to the
``nvvm.annotations`` named metadata object, and has the following format:
code. By default, the back-end will emit device functions. The ``ptx_kernel``
calling convention is used to declare a function as a kernel function.

.. code-block:: text

!0 = !{<function-ref>, metadata !"kernel", i32 1}

The first parameter is a reference to the kernel function. The following
example shows a kernel function calling a device function in LLVM IR. The
function ``@my_kernel`` is callable from host code, but ``@my_fmad`` is not.
The following example shows a kernel function calling a device function in LLVM
IR. The function ``@my_kernel`` is callable from host code, but ``@my_fmad`` is
not.

.. code-block:: llvm

Expand All @@ -53,18 +48,32 @@ function ``@my_kernel`` is callable from host code, but ``@my_fmad`` is not.
ret float %add
}

define void @my_kernel(ptr %ptr) {
define ptx_kernel void @my_kernel(ptr %ptr) {
%val = load float, ptr %ptr
%ret = call float @my_fmad(float %val, float %val, float %val)
store float %ret, ptr %ptr
ret void
}

!nvvm.annotations = !{!1}
!1 = !{ptr @my_kernel, !"kernel", i32 1}

When compiled, the PTX kernel functions are callable by host-side code.

.. _nvptx_fnattrs:

Function Attributes
-------------------

``"nvvm.maxclusterrank"="<n>"``
This attribute specifies the maximum number of blocks per cluster. Must be
non-zero. Only supported for Hopper+.

``"nvvm.minctasm"="<n>"``
This indicates a hint/directive to the compiler/driver, asking it to put at
least these many CTAs on an SM.

``"nvvm.maxnreg"="<n>"``
This attribute indicates the maximum number of registers to be used for the
kernel function.


.. _address_spaces:

Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6458,7 +6458,7 @@ void OpenMPIRBuilder::writeTeamsForKernel(const Triple &T, Function &Kernel,
int32_t LB, int32_t UB) {
if (T.isNVPTX())
if (UB > 0)
updateNVPTXMetadata(Kernel, "maxclusterrank", UB, true);
Kernel.addFnAttr("nvvm.maxclusterrank", llvm::utostr(UB));
if (T.isAMDGPU())
Kernel.addFnAttr("amdgpu-max-num-workgroups", llvm::utostr(LB) + ",1,1");

Expand Down
16 changes: 16 additions & 0 deletions llvm/lib/IR/AutoUpgrade.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
//===----------------------------------------------------------------------===//

#include "llvm/IR/AutoUpgrade.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/BinaryFormat/Dwarf.h"
Expand Down Expand Up @@ -5043,6 +5044,21 @@ bool static upgradeSingleNVVMAnnotation(GlobalValue *GV, StringRef K,
Idx, Attribute::getWithStackAlignment(GV->getContext(), StackAlign));
return true;
}
if (K == "maxclusterrank" || K == "cluster_max_blocks") {
const auto CV = mdconst::extract<ConstantInt>(V)->getZExtValue();
cast<Function>(GV)->addFnAttr("nvvm.maxclusterrank", llvm::utostr(CV));
return true;
}
if (K == "minctasm") {
const auto CV = mdconst::extract<ConstantInt>(V)->getZExtValue();
cast<Function>(GV)->addFnAttr("nvvm.minctasm", llvm::utostr(CV));
return true;
}
if (K == "maxnreg") {
const auto CV = mdconst::extract<ConstantInt>(V)->getZExtValue();
cast<Function>(GV)->addFnAttr("nvvm.maxnreg", llvm::utostr(CV));
return true;
}

return false;
}
Expand Down
9 changes: 2 additions & 7 deletions llvm/lib/Target/NVPTX/NVPTXCtorDtorLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,18 +70,13 @@ static void addKernelMetadata(Module &M, Function *F) {
llvm::ConstantAsMetadata::get(
llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), 1))};

llvm::Metadata *BlockMDVals[] = {
llvm::ConstantAsMetadata::get(F),
llvm::MDString::get(Ctx, "maxclusterrank"),
llvm::ConstantAsMetadata::get(
llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), 1))};
F->addFnAttr("nvvm.maxclusterrank", "1");
F->setCallingConv(CallingConv::PTX_Kernel);

// Append metadata to nvvm.annotations.
F->setCallingConv(CallingConv::PTX_Kernel);
MD->addOperand(llvm::MDNode::get(Ctx, ThreadXMDVals));
MD->addOperand(llvm::MDNode::get(Ctx, ThreadYMDVals));
MD->addOperand(llvm::MDNode::get(Ctx, ThreadZMDVals));
MD->addOperand(llvm::MDNode::get(Ctx, BlockMDVals));
}

static Function *createInitOrFiniKernelFunction(Module &M, bool IsCtor) {
Expand Down
13 changes: 10 additions & 3 deletions llvm/lib/Target/NVPTX/NVPTXUtilities.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,13 @@ static bool argHasNVVMAnnotation(const Value &Val,
return false;
}

static std::optional<unsigned> getFnAttrParsedInt(const Function &F,
StringRef Attr) {
return F.hasFnAttribute(Attr)
? std::optional(F.getFnAttributeAsParsedInteger(Attr))
: std::nullopt;
Comment on lines +184 to +186
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ugh. {} can't be used in a ternary, and using std::nullopt forces explicit optional use in the other branch. :-/
That ended up being a wash. Sorry about the noise.

Just in case. Comments marked as "nit" are up to you. It includes ignoring them or pushing back when those suggestions don't make sense or turn out not being worth it.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No worries! I agree it is basically a wash and will leave it as it currently is.

}

bool isParamGridConstant(const Value &V) {
if (const Argument *Arg = dyn_cast<Argument>(&V)) {
// "grid_constant" counts argument indices starting from 1
Expand Down Expand Up @@ -277,7 +284,7 @@ std::optional<unsigned> getClusterDimz(const Function &F) {
}

std::optional<unsigned> getMaxClusterRank(const Function &F) {
return findOneNVVMAnnotation(&F, "maxclusterrank");
return getFnAttrParsedInt(F, "nvvm.maxclusterrank");
}

std::optional<unsigned> getReqNTIDx(const Function &F) {
Expand All @@ -303,11 +310,11 @@ std::optional<unsigned> getReqNTID(const Function &F) {
}

std::optional<unsigned> getMinCTASm(const Function &F) {
return findOneNVVMAnnotation(&F, "minctasm");
return getFnAttrParsedInt(F, "nvvm.minctasm");
}

std::optional<unsigned> getMaxNReg(const Function &F) {
return findOneNVVMAnnotation(&F, "maxnreg");
return getFnAttrParsedInt(F, "nvvm.maxnreg");
}

MaybeAlign getAlign(const Function &F, unsigned Index) {
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/Analysis/KernelInfo/launch-bounds/nvptx.ll
Original file line number Diff line number Diff line change
Expand Up @@ -23,19 +23,19 @@ entry:
attributes #0 = {
"omp_target_num_teams"="100"
"omp_target_thread_limit"="101"
"nvvm.maxclusterrank"="200"
}

!llvm.module.flags = !{!0}
!llvm.dbg.cu = !{!1}
!nvvm.annotations = !{!6, !7, !8, !9, !10}
!nvvm.annotations = !{!7, !8, !9, !10}

!0 = !{i32 2, !"Debug Info Version", i32 3}
!1 = distinct !DICompileUnit(language: DW_LANG_C11, file: !2, producer: "clang version 19.0.0git", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: None)
!2 = !DIFile(filename: "test.c", directory: "/tmp")
!3 = !{}
!4 = !DISubroutineType(types: !3)
!5 = distinct !DISubprogram(name: "test", scope: !2, file: !2, line: 10, type: !4, scopeLine: 10, flags: DIFlagArtificial | DIFlagPrototyped, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition, unit: !1, retainedNodes: !3)
!6 = !{ptr @test, !"maxclusterrank", i32 200}
!7 = !{ptr @test, !"maxntidx", i32 210}
!8 = !{ptr @test, !"maxntidy", i32 211}
!9 = !{ptr @test, !"maxntidz", i32 212}
Expand Down
12 changes: 3 additions & 9 deletions llvm/test/CodeGen/NVPTX/annotations.ll
Original file line number Diff line number Diff line change
Expand Up @@ -23,32 +23,26 @@ define void @kernel_func_reqntid(ptr %a) {
}

; CHECK: .entry kernel_func_minctasm
define void @kernel_func_minctasm(ptr %a) {
define ptx_kernel void @kernel_func_minctasm(ptr %a) "nvvm.minctasm"="42" {
; CHECK: .minnctapersm 42
; CHECK: ret
ret void
}

; CHECK-LABEL: .entry kernel_func_maxnreg
define void @kernel_func_maxnreg() {
define ptx_kernel void @kernel_func_maxnreg() "nvvm.maxnreg"="1234" {
; CHECK: .maxnreg 1234
; CHECK: ret
ret void
}

!nvvm.annotations = !{!1, !2, !3, !4, !5, !6, !7, !8, !9, !10}
!nvvm.annotations = !{!1, !2, !3, !4, !9, !10}

!1 = !{ptr @kernel_func_maxntid, !"kernel", i32 1}
!2 = !{ptr @kernel_func_maxntid, !"maxntidx", i32 10, !"maxntidy", i32 20, !"maxntidz", i32 30}

!3 = !{ptr @kernel_func_reqntid, !"kernel", i32 1}
!4 = !{ptr @kernel_func_reqntid, !"reqntidx", i32 11, !"reqntidy", i32 22, !"reqntidz", i32 33}

!5 = !{ptr @kernel_func_minctasm, !"kernel", i32 1}
!6 = !{ptr @kernel_func_minctasm, !"minctasm", i32 42}

!7 = !{ptr @kernel_func_maxnreg, !"kernel", i32 1}
!8 = !{ptr @kernel_func_maxnreg, !"maxnreg", i32 1234}

!9 = !{ptr addrspace(1) @texture, !"texture", i32 1}
!10 = !{ptr addrspace(1) @surface, !"surface", i32 1}
16 changes: 9 additions & 7 deletions llvm/test/CodeGen/NVPTX/lower-ctor-dtor.ll
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,8 @@ define internal void @bar() {
ret void
}

; CHECK-LABEL: define weak_odr ptx_kernel void @"nvptx$device$init"() {
; CHECK-LABEL: define weak_odr ptx_kernel void @"nvptx$device$init"
; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[BEGIN:%.*]] = load ptr addrspace(1), ptr addrspace(1) @__init_array_start, align 8
; CHECK-NEXT: [[STOP:%.*]] = load ptr addrspace(1), ptr addrspace(1) @__init_array_end, align 8
Expand All @@ -60,7 +61,8 @@ define internal void @bar() {
; CHECK-NEXT: ret void
;
;
; CHECK-LABEL: define weak_odr ptx_kernel void @"nvptx$device$fini"() {
; CHECK-LABEL: define weak_odr ptx_kernel void @"nvptx$device$fini"
; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[BEGIN:%.*]] = load ptr addrspace(1), ptr addrspace(1) @__fini_array_start, align 8
; CHECK-NEXT: [[STOP:%.*]] = load ptr addrspace(1), ptr addrspace(1) @__fini_array_end, align 8
Expand All @@ -82,11 +84,11 @@ define internal void @bar() {
; CHECK: while.end:
; CHECK-NEXT: ret void

; CHECK: attributes #[[ATTR0]] = { "nvvm.maxclusterrank"="1" }

; CHECK: [[META1:![0-9]+]] = !{ptr @"nvptx$device$init", !"maxntidx", i32 1}
; CHECK: [[META2:![0-9]+]] = !{ptr @"nvptx$device$init", !"maxntidy", i32 1}
; CHECK: [[META3:![0-9]+]] = !{ptr @"nvptx$device$init", !"maxntidz", i32 1}
; CHECK: [[META4:![0-9]+]] = !{ptr @"nvptx$device$init", !"maxclusterrank", i32 1}
; CHECK: [[META6:![0-9]+]] = !{ptr @"nvptx$device$fini", !"maxntidx", i32 1}
; CHECK: [[META7:![0-9]+]] = !{ptr @"nvptx$device$fini", !"maxntidy", i32 1}
; CHECK: [[META8:![0-9]+]] = !{ptr @"nvptx$device$fini", !"maxntidz", i32 1}
; CHECK: [[META9:![0-9]+]] = !{ptr @"nvptx$device$fini", !"maxclusterrank", i32 1}
; CHECK: [[META4:![0-9]+]] = !{ptr @"nvptx$device$fini", !"maxntidx", i32 1}
; CHECK: [[META5:![0-9]+]] = !{ptr @"nvptx$device$fini", !"maxntidy", i32 1}
; CHECK: [[META6:![0-9]+]] = !{ptr @"nvptx$device$fini", !"maxntidz", i32 1}
8 changes: 3 additions & 5 deletions llvm/test/CodeGen/NVPTX/maxclusterrank.ll
Original file line number Diff line number Diff line change
Expand Up @@ -10,16 +10,14 @@ target triple = "nvptx64-unknown-unknown"
; CHECK_SM_80-NOT: .maxclusterrank 8

; Make sure that for SM version prior to 90 `.maxclusterrank` directive is
; sielently ignored.
define dso_local ptx_kernel void @_Z18TestMaxClusterRankv() {
; silently ignored.
define dso_local ptx_kernel void @_Z18TestMaxClusterRankv() "nvvm.minctasm"="2" "nvvm.maxclusterrank"="8" {
entry:
%a = alloca i32, align 4
store volatile i32 1, ptr %a, align 4
ret void
}

!nvvm.annotations = !{!1, !2, !3}
!nvvm.annotations = !{!1}

!1 = !{ptr @_Z18TestMaxClusterRankv, !"maxntidx", i32 128}
!2 = !{ptr @_Z18TestMaxClusterRankv, !"minctasm", i32 2}
!3 = !{ptr @_Z18TestMaxClusterRankv, !"maxclusterrank", i32 8}
Loading