diff --git a/clang/lib/CodeGen/Targets/NVPTX.cpp b/clang/lib/CodeGen/Targets/NVPTX.cpp index b82e4ddb9f3f2..f89d32d4e13fe 100644 --- a/clang/lib/CodeGen/Targets/NVPTX.cpp +++ b/clang/lib/CodeGen/Targets/NVPTX.cpp @@ -375,11 +375,8 @@ void CodeGenModule::handleCUDALaunchBoundsAttr(llvm::Function *F, if (MinBlocks > 0) { if (MinBlocksVal) *MinBlocksVal = MinBlocks.getExtValue(); - if (F) { - // Create !{, metadata !"minctasm", i32 } node - NVPTXTargetCodeGenInfo::addNVVMMetadata(F, "minctasm", - MinBlocks.getExtValue()); - } + if (F) + F->addFnAttr("nvvm.minctasm", llvm::utostr(MinBlocks.getExtValue())); } } if (Attr->getMaxBlocks()) { @@ -388,11 +385,9 @@ void CodeGenModule::handleCUDALaunchBoundsAttr(llvm::Function *F, if (MaxBlocks > 0) { if (MaxClusterRankVal) *MaxClusterRankVal = MaxBlocks.getExtValue(); - if (F) { - // Create !{, metadata !"maxclusterrank", i32 } node - NVPTXTargetCodeGenInfo::addNVVMMetadata(F, "maxclusterrank", - MaxBlocks.getExtValue()); - } + if (F) + F->addFnAttr("nvvm.maxclusterrank", + llvm::utostr(MaxBlocks.getExtValue())); } } } diff --git a/clang/test/CodeGenCUDA/launch-bounds.cu b/clang/test/CodeGenCUDA/launch-bounds.cu index 31ca9216b413e..72f7857264f8c 100644 --- a/clang/test/CodeGenCUDA/launch-bounds.cu +++ b/clang/test/CodeGenCUDA/launch-bounds.cu @@ -9,6 +9,25 @@ #define MAX_BLOCKS_PER_MP 4 #endif +// CHECK: @Kernel1() #[[ATTR0:[0-9]+]] +// CHECK: @{{.*}}Kernel4{{.*}}() #[[ATTR0]] +// CHECK: @{{.*}}Kernel5{{.*}}() #[[ATTR1:[0-9]+]] +// CHECK: @{{.*}}Kernel6{{.*}}() #[[ATTR0]] +// CHECK: @{{.*}}Kernel8{{.*}}() #[[ATTR3:[0-9]+]] + +// CHECK: attributes #[[ATTR0]] = {{{.*}} "nvvm.minctasm"="2" {{.*}}} +// CHECK: attributes #[[ATTR1]] = {{{.*}} "nvvm.minctasm"="258" {{.*}}} +// CHECK: attributes #[[ATTR3]] = {{{.*}} "nvvm.minctasm"="12" {{.*}}} + +// CHECK_MAX_BLOCKS: @Kernel1_sm_90() #[[ATTR4:[0-9]+]] +// CHECK_MAX_BLOCKS: @{{.*}}Kernel4_sm_90{{.*}} #[[ATTR4]] +// CHECK_MAX_BLOCKS: @{{.*}}Kernel5_sm_90{{.*}} #[[ATTR5:[0-9]+]] +// CHECK_MAX_BLOCKS: @{{.*}}Kernel8_sm_90{{.*}} #[[ATTR6:[0-9]+]] + +// CHECK_MAX_BLOCKS: attributes #[[ATTR4]] = {{{.*}} "nvvm.maxclusterrank"="4" "nvvm.minctasm"="2" {{.*}}} +// CHECK_MAX_BLOCKS: attributes #[[ATTR5]] = {{{.*}} "nvvm.maxclusterrank"="260" "nvvm.minctasm"="258" {{.*}}} +// CHECK_MAX_BLOCKS: attributes #[[ATTR6]] = {{{.*}} "nvvm.maxclusterrank"="14" "nvvm.minctasm"="12" {{.*}}} + // Test both max threads per block and Min cta per sm. extern "C" { __global__ void @@ -19,7 +38,6 @@ Kernel1() } // CHECK: !{{[0-9]+}} = !{ptr @Kernel1, !"maxntidx", i32 256} -// CHECK: !{{[0-9]+}} = !{ptr @Kernel1, !"minctasm", i32 2} #ifdef USE_MAX_BLOCKS // Test max threads per block and min/max cta per sm. @@ -32,8 +50,6 @@ Kernel1_sm_90() } // CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @Kernel1_sm_90, !"maxntidx", i32 256} -// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @Kernel1_sm_90, !"minctasm", i32 2} -// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @Kernel1_sm_90, !"maxclusterrank", i32 4} #endif // USE_MAX_BLOCKS // Test only max threads per block. Min cta per sm defaults to 0, and @@ -67,7 +83,6 @@ Kernel4() template __global__ void Kernel4(); // CHECK: !{{[0-9]+}} = !{ptr @{{.*}}Kernel4{{.*}}, !"maxntidx", i32 256} -// CHECK: !{{[0-9]+}} = !{ptr @{{.*}}Kernel4{{.*}}, !"minctasm", i32 2} #ifdef USE_MAX_BLOCKS template @@ -79,8 +94,6 @@ Kernel4_sm_90() template __global__ void Kernel4_sm_90(); // CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @{{.*}}Kernel4_sm_90{{.*}}, !"maxntidx", i32 256} -// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @{{.*}}Kernel4_sm_90{{.*}}, !"minctasm", i32 2} -// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @{{.*}}Kernel4_sm_90{{.*}}, !"maxclusterrank", i32 4} #endif //USE_MAX_BLOCKS const int constint = 100; @@ -94,7 +107,6 @@ Kernel5() template __global__ void Kernel5(); // CHECK: !{{[0-9]+}} = !{ptr @{{.*}}Kernel5{{.*}}, !"maxntidx", i32 356} -// CHECK: !{{[0-9]+}} = !{ptr @{{.*}}Kernel5{{.*}}, !"minctasm", i32 258} #ifdef USE_MAX_BLOCKS @@ -109,8 +121,6 @@ Kernel5_sm_90() template __global__ void Kernel5_sm_90(); // CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @{{.*}}Kernel5_sm_90{{.*}}, !"maxntidx", i32 356} -// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @{{.*}}Kernel5_sm_90{{.*}}, !"minctasm", i32 258} -// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @{{.*}}Kernel5_sm_90{{.*}}, !"maxclusterrank", i32 260} #endif //USE_MAX_BLOCKS // Make sure we don't emit negative launch bounds values. @@ -120,7 +130,6 @@ Kernel6() { } // CHECK-NOT: !{{[0-9]+}} = !{ptr @{{.*}}Kernel6{{.*}}, !"maxntidx", -// CHECK: !{{[0-9]+}} = !{ptr @{{.*}}Kernel6{{.*}}, !"minctasm", __global__ void __launch_bounds__( MAX_THREADS_PER_BLOCK, -MIN_BLOCKS_PER_MP ) @@ -144,12 +153,9 @@ Kernel7_sm_90() const char constchar = 12; __global__ void __launch_bounds__(constint, constchar) Kernel8() {} // CHECK: !{{[0-9]+}} = !{ptr @{{.*}}Kernel8{{.*}}, !"maxntidx", i32 100 -// CHECK: !{{[0-9]+}} = !{ptr @{{.*}}Kernel8{{.*}}, !"minctasm", i32 12 #ifdef USE_MAX_BLOCKS const char constchar_2 = 14; __global__ void __launch_bounds__(constint, constchar, constchar_2) Kernel8_sm_90() {} // CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @{{.*}}Kernel8_sm_90{{.*}}, !"maxntidx", i32 100 -// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @{{.*}}Kernel8_sm_90{{.*}}, !"minctasm", i32 12 -// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @{{.*}}Kernel8_sm_90{{.*}}, !"maxclusterrank", i32 14 #endif // USE_MAX_BLOCKS diff --git a/llvm/docs/NVPTXUsage.rst b/llvm/docs/NVPTXUsage.rst index 1680b11433537..7eacc58549c7d 100644 --- a/llvm/docs/NVPTXUsage.rst +++ b/llvm/docs/NVPTXUsage.rst @@ -33,17 +33,12 @@ Marking Functions as Kernels In PTX, there are two types of functions: *device functions*, which are only callable by device code, and *kernel functions*, which are callable by host -code. By default, the back-end will emit device functions. Metadata is used to -declare a function as a kernel function. This metadata is attached to the -``nvvm.annotations`` named metadata object, and has the following format: +code. By default, the back-end will emit device functions. The ``ptx_kernel`` +calling convention is used to declare a function as a kernel function. -.. code-block:: text - - !0 = !{, metadata !"kernel", i32 1} - -The first parameter is a reference to the kernel function. The following -example shows a kernel function calling a device function in LLVM IR. The -function ``@my_kernel`` is callable from host code, but ``@my_fmad`` is not. +The following example shows a kernel function calling a device function in LLVM +IR. The function ``@my_kernel`` is callable from host code, but ``@my_fmad`` is +not. .. code-block:: llvm @@ -53,18 +48,32 @@ function ``@my_kernel`` is callable from host code, but ``@my_fmad`` is not. ret float %add } - define void @my_kernel(ptr %ptr) { + define ptx_kernel void @my_kernel(ptr %ptr) { %val = load float, ptr %ptr %ret = call float @my_fmad(float %val, float %val, float %val) store float %ret, ptr %ptr ret void } - !nvvm.annotations = !{!1} - !1 = !{ptr @my_kernel, !"kernel", i32 1} - When compiled, the PTX kernel functions are callable by host-side code. +.. _nvptx_fnattrs: + +Function Attributes +------------------- + +``"nvvm.maxclusterrank"=""`` + This attribute specifies the maximum number of blocks per cluster. Must be + non-zero. Only supported for Hopper+. + +``"nvvm.minctasm"=""`` + This indicates a hint/directive to the compiler/driver, asking it to put at + least these many CTAs on an SM. + +``"nvvm.maxnreg"=""`` + This attribute indicates the maximum number of registers to be used for the + kernel function. + .. _address_spaces: diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index ea8fab94a2256..04acab1e5765e 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -6458,7 +6458,7 @@ void OpenMPIRBuilder::writeTeamsForKernel(const Triple &T, Function &Kernel, int32_t LB, int32_t UB) { if (T.isNVPTX()) if (UB > 0) - updateNVPTXMetadata(Kernel, "maxclusterrank", UB, true); + Kernel.addFnAttr("nvvm.maxclusterrank", llvm::utostr(UB)); if (T.isAMDGPU()) Kernel.addFnAttr("amdgpu-max-num-workgroups", llvm::utostr(LB) + ",1,1"); diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index e886a6012b219..57072715366c9 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -13,6 +13,7 @@ //===----------------------------------------------------------------------===// #include "llvm/IR/AutoUpgrade.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/BinaryFormat/Dwarf.h" @@ -5043,6 +5044,21 @@ bool static upgradeSingleNVVMAnnotation(GlobalValue *GV, StringRef K, Idx, Attribute::getWithStackAlignment(GV->getContext(), StackAlign)); return true; } + if (K == "maxclusterrank" || K == "cluster_max_blocks") { + const auto CV = mdconst::extract(V)->getZExtValue(); + cast(GV)->addFnAttr("nvvm.maxclusterrank", llvm::utostr(CV)); + return true; + } + if (K == "minctasm") { + const auto CV = mdconst::extract(V)->getZExtValue(); + cast(GV)->addFnAttr("nvvm.minctasm", llvm::utostr(CV)); + return true; + } + if (K == "maxnreg") { + const auto CV = mdconst::extract(V)->getZExtValue(); + cast(GV)->addFnAttr("nvvm.maxnreg", llvm::utostr(CV)); + return true; + } return false; } diff --git a/llvm/lib/Target/NVPTX/NVPTXCtorDtorLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXCtorDtorLowering.cpp index c03ef8d33220c..ae5922cba4ce3 100644 --- a/llvm/lib/Target/NVPTX/NVPTXCtorDtorLowering.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXCtorDtorLowering.cpp @@ -70,18 +70,13 @@ static void addKernelMetadata(Module &M, Function *F) { llvm::ConstantAsMetadata::get( llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), 1))}; - llvm::Metadata *BlockMDVals[] = { - llvm::ConstantAsMetadata::get(F), - llvm::MDString::get(Ctx, "maxclusterrank"), - llvm::ConstantAsMetadata::get( - llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), 1))}; + F->addFnAttr("nvvm.maxclusterrank", "1"); + F->setCallingConv(CallingConv::PTX_Kernel); // Append metadata to nvvm.annotations. - F->setCallingConv(CallingConv::PTX_Kernel); MD->addOperand(llvm::MDNode::get(Ctx, ThreadXMDVals)); MD->addOperand(llvm::MDNode::get(Ctx, ThreadYMDVals)); MD->addOperand(llvm::MDNode::get(Ctx, ThreadZMDVals)); - MD->addOperand(llvm::MDNode::get(Ctx, BlockMDVals)); } static Function *createInitOrFiniKernelFunction(Module &M, bool IsCtor) { diff --git a/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp b/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp index a41943880807c..430502d85dfb4 100644 --- a/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp @@ -179,6 +179,13 @@ static bool argHasNVVMAnnotation(const Value &Val, return false; } +static std::optional getFnAttrParsedInt(const Function &F, + StringRef Attr) { + return F.hasFnAttribute(Attr) + ? std::optional(F.getFnAttributeAsParsedInteger(Attr)) + : std::nullopt; +} + bool isParamGridConstant(const Value &V) { if (const Argument *Arg = dyn_cast(&V)) { // "grid_constant" counts argument indices starting from 1 @@ -277,7 +284,7 @@ std::optional getClusterDimz(const Function &F) { } std::optional getMaxClusterRank(const Function &F) { - return findOneNVVMAnnotation(&F, "maxclusterrank"); + return getFnAttrParsedInt(F, "nvvm.maxclusterrank"); } std::optional getReqNTIDx(const Function &F) { @@ -303,11 +310,11 @@ std::optional getReqNTID(const Function &F) { } std::optional getMinCTASm(const Function &F) { - return findOneNVVMAnnotation(&F, "minctasm"); + return getFnAttrParsedInt(F, "nvvm.minctasm"); } std::optional getMaxNReg(const Function &F) { - return findOneNVVMAnnotation(&F, "maxnreg"); + return getFnAttrParsedInt(F, "nvvm.maxnreg"); } MaybeAlign getAlign(const Function &F, unsigned Index) { diff --git a/llvm/test/Analysis/KernelInfo/launch-bounds/nvptx.ll b/llvm/test/Analysis/KernelInfo/launch-bounds/nvptx.ll index 7a055c7152ec8..a0c06083c270b 100644 --- a/llvm/test/Analysis/KernelInfo/launch-bounds/nvptx.ll +++ b/llvm/test/Analysis/KernelInfo/launch-bounds/nvptx.ll @@ -23,11 +23,12 @@ entry: attributes #0 = { "omp_target_num_teams"="100" "omp_target_thread_limit"="101" + "nvvm.maxclusterrank"="200" } !llvm.module.flags = !{!0} !llvm.dbg.cu = !{!1} -!nvvm.annotations = !{!6, !7, !8, !9, !10} +!nvvm.annotations = !{!7, !8, !9, !10} !0 = !{i32 2, !"Debug Info Version", i32 3} !1 = distinct !DICompileUnit(language: DW_LANG_C11, file: !2, producer: "clang version 19.0.0git", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: None) @@ -35,7 +36,6 @@ attributes #0 = { !3 = !{} !4 = !DISubroutineType(types: !3) !5 = distinct !DISubprogram(name: "test", scope: !2, file: !2, line: 10, type: !4, scopeLine: 10, flags: DIFlagArtificial | DIFlagPrototyped, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition, unit: !1, retainedNodes: !3) -!6 = !{ptr @test, !"maxclusterrank", i32 200} !7 = !{ptr @test, !"maxntidx", i32 210} !8 = !{ptr @test, !"maxntidy", i32 211} !9 = !{ptr @test, !"maxntidz", i32 212} diff --git a/llvm/test/CodeGen/NVPTX/annotations.ll b/llvm/test/CodeGen/NVPTX/annotations.ll index 3bd534bb0cf5d..1f888d7fb21f1 100644 --- a/llvm/test/CodeGen/NVPTX/annotations.ll +++ b/llvm/test/CodeGen/NVPTX/annotations.ll @@ -23,20 +23,20 @@ define void @kernel_func_reqntid(ptr %a) { } ; CHECK: .entry kernel_func_minctasm -define void @kernel_func_minctasm(ptr %a) { +define ptx_kernel void @kernel_func_minctasm(ptr %a) "nvvm.minctasm"="42" { ; CHECK: .minnctapersm 42 ; CHECK: ret ret void } ; CHECK-LABEL: .entry kernel_func_maxnreg -define void @kernel_func_maxnreg() { +define ptx_kernel void @kernel_func_maxnreg() "nvvm.maxnreg"="1234" { ; CHECK: .maxnreg 1234 ; CHECK: ret ret void } -!nvvm.annotations = !{!1, !2, !3, !4, !5, !6, !7, !8, !9, !10} +!nvvm.annotations = !{!1, !2, !3, !4, !9, !10} !1 = !{ptr @kernel_func_maxntid, !"kernel", i32 1} !2 = !{ptr @kernel_func_maxntid, !"maxntidx", i32 10, !"maxntidy", i32 20, !"maxntidz", i32 30} @@ -44,11 +44,5 @@ define void @kernel_func_maxnreg() { !3 = !{ptr @kernel_func_reqntid, !"kernel", i32 1} !4 = !{ptr @kernel_func_reqntid, !"reqntidx", i32 11, !"reqntidy", i32 22, !"reqntidz", i32 33} -!5 = !{ptr @kernel_func_minctasm, !"kernel", i32 1} -!6 = !{ptr @kernel_func_minctasm, !"minctasm", i32 42} - -!7 = !{ptr @kernel_func_maxnreg, !"kernel", i32 1} -!8 = !{ptr @kernel_func_maxnreg, !"maxnreg", i32 1234} - !9 = !{ptr addrspace(1) @texture, !"texture", i32 1} !10 = !{ptr addrspace(1) @surface, !"surface", i32 1} diff --git a/llvm/test/CodeGen/NVPTX/lower-ctor-dtor.ll b/llvm/test/CodeGen/NVPTX/lower-ctor-dtor.ll index 60b3d70840af5..3b73c36de4b89 100644 --- a/llvm/test/CodeGen/NVPTX/lower-ctor-dtor.ll +++ b/llvm/test/CodeGen/NVPTX/lower-ctor-dtor.ll @@ -43,7 +43,8 @@ define internal void @bar() { ret void } -; CHECK-LABEL: define weak_odr ptx_kernel void @"nvptx$device$init"() { +; CHECK-LABEL: define weak_odr ptx_kernel void @"nvptx$device$init" +; CHECK-SAME: () #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[BEGIN:%.*]] = load ptr addrspace(1), ptr addrspace(1) @__init_array_start, align 8 ; CHECK-NEXT: [[STOP:%.*]] = load ptr addrspace(1), ptr addrspace(1) @__init_array_end, align 8 @@ -60,7 +61,8 @@ define internal void @bar() { ; CHECK-NEXT: ret void ; ; -; CHECK-LABEL: define weak_odr ptx_kernel void @"nvptx$device$fini"() { +; CHECK-LABEL: define weak_odr ptx_kernel void @"nvptx$device$fini" +; CHECK-SAME: () #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[BEGIN:%.*]] = load ptr addrspace(1), ptr addrspace(1) @__fini_array_start, align 8 ; CHECK-NEXT: [[STOP:%.*]] = load ptr addrspace(1), ptr addrspace(1) @__fini_array_end, align 8 @@ -82,11 +84,11 @@ define internal void @bar() { ; CHECK: while.end: ; CHECK-NEXT: ret void +; CHECK: attributes #[[ATTR0]] = { "nvvm.maxclusterrank"="1" } + ; CHECK: [[META1:![0-9]+]] = !{ptr @"nvptx$device$init", !"maxntidx", i32 1} ; CHECK: [[META2:![0-9]+]] = !{ptr @"nvptx$device$init", !"maxntidy", i32 1} ; CHECK: [[META3:![0-9]+]] = !{ptr @"nvptx$device$init", !"maxntidz", i32 1} -; CHECK: [[META4:![0-9]+]] = !{ptr @"nvptx$device$init", !"maxclusterrank", i32 1} -; CHECK: [[META6:![0-9]+]] = !{ptr @"nvptx$device$fini", !"maxntidx", i32 1} -; CHECK: [[META7:![0-9]+]] = !{ptr @"nvptx$device$fini", !"maxntidy", i32 1} -; CHECK: [[META8:![0-9]+]] = !{ptr @"nvptx$device$fini", !"maxntidz", i32 1} -; CHECK: [[META9:![0-9]+]] = !{ptr @"nvptx$device$fini", !"maxclusterrank", i32 1} +; CHECK: [[META4:![0-9]+]] = !{ptr @"nvptx$device$fini", !"maxntidx", i32 1} +; CHECK: [[META5:![0-9]+]] = !{ptr @"nvptx$device$fini", !"maxntidy", i32 1} +; CHECK: [[META6:![0-9]+]] = !{ptr @"nvptx$device$fini", !"maxntidz", i32 1} diff --git a/llvm/test/CodeGen/NVPTX/maxclusterrank.ll b/llvm/test/CodeGen/NVPTX/maxclusterrank.ll index c445c34c1842a..51483296dd34f 100644 --- a/llvm/test/CodeGen/NVPTX/maxclusterrank.ll +++ b/llvm/test/CodeGen/NVPTX/maxclusterrank.ll @@ -10,16 +10,14 @@ target triple = "nvptx64-unknown-unknown" ; CHECK_SM_80-NOT: .maxclusterrank 8 ; Make sure that for SM version prior to 90 `.maxclusterrank` directive is -; sielently ignored. -define dso_local ptx_kernel void @_Z18TestMaxClusterRankv() { +; silently ignored. +define dso_local ptx_kernel void @_Z18TestMaxClusterRankv() "nvvm.minctasm"="2" "nvvm.maxclusterrank"="8" { entry: %a = alloca i32, align 4 store volatile i32 1, ptr %a, align 4 ret void } -!nvvm.annotations = !{!1, !2, !3} +!nvvm.annotations = !{!1} !1 = !{ptr @_Z18TestMaxClusterRankv, !"maxntidx", i32 128} -!2 = !{ptr @_Z18TestMaxClusterRankv, !"minctasm", i32 2} -!3 = !{ptr @_Z18TestMaxClusterRankv, !"maxclusterrank", i32 8} diff --git a/llvm/test/CodeGen/NVPTX/upgrade-nvvm-annotations.ll b/llvm/test/CodeGen/NVPTX/upgrade-nvvm-annotations.ll index a9f370a12a945..3a1f59454493c 100644 --- a/llvm/test/CodeGen/NVPTX/upgrade-nvvm-annotations.ll +++ b/llvm/test/CodeGen/NVPTX/upgrade-nvvm-annotations.ll @@ -1,28 +1,68 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals all --version 5 -; RUN: opt < %s -mtriple=nvptx64-unknown-unknown -O0 -S | FileCheck %s +; RUN: opt < %s -passes=verify -S | FileCheck %s -define i32 @foo(i32 %a, i32 %b) { -; CHECK-LABEL: define i32 @foo( +define i32 @test_align(i32 %a, i32 %b) { +; CHECK-LABEL: define i32 @test_align( ; CHECK-SAME: i32 alignstack(8) [[A:%.*]], i32 alignstack(16) [[B:%.*]]) { ; CHECK-NEXT: ret i32 0 ; ret i32 0 } -define i32 @bar(i32 %a, i32 %b) { -; CHECK-LABEL: define ptx_kernel i32 @bar( -; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]]) { -; CHECK-NEXT: ret i32 0 +define void @test_kernel() { +; CHECK-LABEL: define ptx_kernel void @test_kernel() { +; CHECK-NEXT: ret void ; - ret i32 0 + ret void +} + +define void @test_maxclusterrank() { +; CHECK-LABEL: define void @test_maxclusterrank( +; CHECK-SAME: ) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: ret void +; + ret void } -!nvvm.annotations = !{!0, !1, !2} +define void @test_cluster_max_blocks() { +; CHECK-LABEL: define void @test_cluster_max_blocks( +; CHECK-SAME: ) #[[ATTR1:[0-9]+]] { +; CHECK-NEXT: ret void +; + ret void +} -!0 = !{ptr @foo, !"align", i32 u0x00000008, !"align", i32 u0x00010008, !"align", i32 u0x00020010} +define void @test_minctasm() { +; CHECK-LABEL: define void @test_minctasm( +; CHECK-SAME: ) #[[ATTR2:[0-9]+]] { +; CHECK-NEXT: ret void +; + ret void +} + +define void @test_maxnreg() { +; CHECK-LABEL: define void @test_maxnreg( +; CHECK-SAME: ) #[[ATTR3:[0-9]+]] { +; CHECK-NEXT: ret void +; + ret void +} + +!nvvm.annotations = !{!0, !1, !2, !3, !4, !5, !6} + +!0 = !{ptr @test_align, !"align", i32 u0x00000008, !"align", i32 u0x00010008, !"align", i32 u0x00020010} !1 = !{null, !"align", i32 u0x00000008, !"align", i32 u0x00010008, !"align", i32 u0x00020008} -!2 = !{ptr @bar, !"kernel", i32 1} +!2 = !{ptr @test_kernel, !"kernel", i32 1} +!3 = !{ptr @test_maxclusterrank, !"maxclusterrank", i32 2} +!4 = !{ptr @test_cluster_max_blocks, !"cluster_max_blocks", i32 3} +!5 = !{ptr @test_minctasm, !"minctasm", i32 4} +!6 = !{ptr @test_maxnreg, !"maxnreg", i32 5} ;. -; CHECK: [[META0:![0-9]+]] = !{ptr @foo, !"align", i32 8} +; CHECK: attributes #[[ATTR0]] = { "nvvm.maxclusterrank"="2" } +; CHECK: attributes #[[ATTR1]] = { "nvvm.maxclusterrank"="3" } +; CHECK: attributes #[[ATTR2]] = { "nvvm.minctasm"="4" } +; CHECK: attributes #[[ATTR3]] = { "nvvm.maxnreg"="5" } +;. +; CHECK: [[META0:![0-9]+]] = !{ptr @test_align, !"align", i32 8} ;. diff --git a/mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp index 659ab1227f113..8b13735774663 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp @@ -17,6 +17,7 @@ #include "mlir/IR/Operation.h" #include "mlir/Target/LLVMIR/ModuleTranslation.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/IntrinsicsNVPTX.h" @@ -227,14 +228,14 @@ class NVVMDialectLLVMIRTranslationInterface } else if (attribute.getName() == NVVM::NVVMDialect::getClusterMaxBlocksAttrName()) { auto value = dyn_cast(attribute.getValue()); - generateMetadata(value.getInt(), "cluster_max_blocks"); + llvmFunc->addFnAttr("nvvm.maxclusterrank", llvm::utostr(value.getInt())); } else if (attribute.getName() == NVVM::NVVMDialect::getMinctasmAttrName()) { auto value = dyn_cast(attribute.getValue()); - generateMetadata(value.getInt(), "minctasm"); + llvmFunc->addFnAttr("nvvm.minctasm", llvm::utostr(value.getInt())); } else if (attribute.getName() == NVVM::NVVMDialect::getMaxnregAttrName()) { auto value = dyn_cast(attribute.getValue()); - generateMetadata(value.getInt(), "maxnreg"); + llvmFunc->addFnAttr("nvvm.maxnreg", llvm::utostr(value.getInt())); } else if (attribute.getName() == NVVM::NVVMDialect::getKernelFuncAttrName()) { llvmFunc->setCallingConv(llvm::CallingConv::PTX_Kernel); diff --git a/mlir/test/Target/LLVMIR/external-func-dialect-attr.mlir b/mlir/test/Target/LLVMIR/external-func-dialect-attr.mlir index 6605f10f128e6..459859f5be47b 100644 --- a/mlir/test/Target/LLVMIR/external-func-dialect-attr.mlir +++ b/mlir/test/Target/LLVMIR/external-func-dialect-attr.mlir @@ -6,6 +6,6 @@ module { llvm.func external @f() attributes { nvvm.minctasm = 10 : i32 } - // CHECK: !nvvm.annotations = !{![[NVVM:[0-9]+]]} - // CHECK: ![[NVVM]] = !{ptr @f, !"minctasm", i32 10} + // CHECK: declare void @f() #[[ATTRS:[0-9]+]] + // CHECK: attributes #[[ATTRS]] = { "nvvm.minctasm"="10" } } diff --git a/mlir/test/Target/LLVMIR/nvvmir.mlir b/mlir/test/Target/LLVMIR/nvvmir.mlir index 970cac707b058..5ab593452ab66 100644 --- a/mlir/test/Target/LLVMIR/nvvmir.mlir +++ b/mlir/test/Target/LLVMIR/nvvmir.mlir @@ -623,27 +623,25 @@ llvm.func @kernel_func() attributes {nvvm.kernel, nvvm.cluster_max_blocks = 8} { llvm.return } -// CHECK: define ptx_kernel void @kernel_func -// CHECK: !nvvm.annotations = -// CHECK: {ptr @kernel_func, !"cluster_max_blocks", i32 8} +// CHECK: define ptx_kernel void @kernel_func() #[[ATTR0:[0-9]+]] +// CHECK: attributes #[[ATTR0]] = { "nvvm.maxclusterrank"="8" } + // ----- llvm.func @kernel_func() attributes {nvvm.kernel, nvvm.minctasm = 16} { llvm.return } -// CHECK: define ptx_kernel void @kernel_func -// CHECK: !nvvm.annotations = -// CHECK: {ptr @kernel_func, !"minctasm", i32 16} +// CHECK: define ptx_kernel void @kernel_func() #[[ATTR0:[0-9]+]] +// CHECK: attributes #[[ATTR0]] = { "nvvm.minctasm"="16" } // ----- llvm.func @kernel_func() attributes {nvvm.kernel, nvvm.maxnreg = 16} { llvm.return } -// CHECK: define ptx_kernel void @kernel_func -// CHECK: !nvvm.annotations = -// CHECK: {ptr @kernel_func, !"maxnreg", i32 16} +// CHECK: define ptx_kernel void @kernel_func() #[[ATTR0:[0-9]+]] +// CHECK: attributes #[[ATTR0]] = { "nvvm.maxnreg"="16" } // ----- llvm.func @kernel_func() attributes {nvvm.kernel, nvvm.maxntid = array, @@ -651,13 +649,12 @@ llvm.func @kernel_func() attributes {nvvm.kernel, nvvm.maxntid = array