From 4e2504ad5c09e561650877d44bf69d3a931cfc50 Mon Sep 17 00:00:00 2001 From: David Green Date: Sun, 13 Apr 2025 11:17:47 +0100 Subject: [PATCH] [AArch64] Add CostKind to getSpliceCost This likely does not alter much yet with how the costs are used. Like other cost functions the CostKind should be passed into and through the function. --- .../AArch64/AArch64TargetTransformInfo.cpp | 6 ++-- .../AArch64/AArch64TargetTransformInfo.h | 3 +- .../CostModel/AArch64/sve-intrinsics.ll | 32 +++++++++---------- 3 files changed, 21 insertions(+), 20 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index ca1a486901951..4a03af5d47481 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -5207,7 +5207,8 @@ AArch64TTIImpl::getMulAccReductionCost(bool IsUnsigned, Type *ResTy, return BaseT::getMulAccReductionCost(IsUnsigned, ResTy, VecTy, CostKind); } -InstructionCost AArch64TTIImpl::getSpliceCost(VectorType *Tp, int Index) { +InstructionCost AArch64TTIImpl::getSpliceCost(VectorType *Tp, int Index, + TTI::TargetCostKind CostKind) { static const CostTblEntry ShuffleTbl[] = { { TTI::SK_Splice, MVT::nxv16i8, 1 }, { TTI::SK_Splice, MVT::nxv8i16, 1 }, @@ -5233,7 +5234,6 @@ InstructionCost AArch64TTIImpl::getSpliceCost(VectorType *Tp, int Index) { std::pair LT = getTypeLegalizationCost(Tp); Type *LegalVTy = EVT(LT.second).getTypeForEVT(Tp->getContext()); - TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; EVT PromotedVT = LT.second.getScalarType() == MVT::i1 ? TLI->getPromotedVTForPredicate(EVT(LT.second)) : LT.second; @@ -5616,7 +5616,7 @@ InstructionCost AArch64TTIImpl::getShuffleCost( } if (Kind == TTI::SK_Splice && isa(Tp)) - return getSpliceCost(Tp, Index); + return getSpliceCost(Tp, Index, CostKind); // Inserting a subvector can often be done with either a D, S or H register // move, so long as the inserted vector is "aligned". diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h index ae0df6b895ec8..372ec22bd548f 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -219,7 +219,8 @@ class AArch64TTIImpl : public BasicTTIImplBase { VectorType *ValTy, TTI::TargetCostKind CostKind); - InstructionCost getSpliceCost(VectorType *Tp, int Index); + InstructionCost getSpliceCost(VectorType *Tp, int Index, + TTI::TargetCostKind CostKind); InstructionCost getArithmeticInstrCost( unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll b/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll index 1483d476bef0d..1716f48f4b042 100644 --- a/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll +++ b/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll @@ -638,10 +638,10 @@ define void @vector_splice() #0 { ; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %splice_nxv4bf16 = call @llvm.vector.splice.nxv4bf16( zeroinitializer, zeroinitializer, i32 1) ; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %splice_nxv8bf16 = call @llvm.vector.splice.nxv8bf16( zeroinitializer, zeroinitializer, i32 1) ; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 2 for: %splice_nxv16bf16 = call @llvm.vector.splice.nxv16bf16( zeroinitializer, zeroinitializer, i32 1) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 4 for: %splice_nxv16i1 = call @llvm.vector.splice.nxv16i1( zeroinitializer, zeroinitializer, i32 1) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 4 for: %splice_nxv8i1 = call @llvm.vector.splice.nxv8i1( zeroinitializer, zeroinitializer, i32 1) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 4 for: %splice_nxv4i1 = call @llvm.vector.splice.nxv4i1( zeroinitializer, zeroinitializer, i32 1) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 4 for: %splice_nxv2i1 = call @llvm.vector.splice.nxv2i1( zeroinitializer, zeroinitializer, i32 1) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:3 Lat:3 SizeLat:3 for: %splice_nxv16i1 = call @llvm.vector.splice.nxv16i1( zeroinitializer, zeroinitializer, i32 1) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:3 Lat:3 SizeLat:3 for: %splice_nxv8i1 = call @llvm.vector.splice.nxv8i1( zeroinitializer, zeroinitializer, i32 1) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:3 Lat:3 SizeLat:3 for: %splice_nxv4i1 = call @llvm.vector.splice.nxv4i1( zeroinitializer, zeroinitializer, i32 1) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:3 Lat:3 SizeLat:3 for: %splice_nxv2i1 = call @llvm.vector.splice.nxv2i1( zeroinitializer, zeroinitializer, i32 1) ; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 3 for: %splice_nxv16i8_neg = call @llvm.vector.splice.nxv16i8( zeroinitializer, zeroinitializer, i32 -1) ; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 6 for: %splice_nxv32i8_neg = call @llvm.vector.splice.nxv32i8( zeroinitializer, zeroinitializer, i32 -1) ; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of Invalid for: %splice_nxv1i16_neg = call @llvm.vector.splice.nxv1i16( zeroinitializer, zeroinitializer, i32 -1) @@ -671,10 +671,10 @@ define void @vector_splice() #0 { ; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 3 for: %splice_nxv4bf16_neg = call @llvm.vector.splice.nxv4bf16( zeroinitializer, zeroinitializer, i32 -1) ; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 3 for: %splice_nxv8bf16_neg = call @llvm.vector.splice.nxv8bf16( zeroinitializer, zeroinitializer, i32 -1) ; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 6 for: %splice_nxv16bf16_neg = call @llvm.vector.splice.nxv16bf16( zeroinitializer, zeroinitializer, i32 -1) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 6 for: %splice_nxv16i1_neg = call @llvm.vector.splice.nxv16i1( zeroinitializer, zeroinitializer, i32 -1) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 6 for: %splice_nxv8i1_neg = call @llvm.vector.splice.nxv8i1( zeroinitializer, zeroinitializer, i32 -1) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 6 for: %splice_nxv4i1_neg = call @llvm.vector.splice.nxv4i1( zeroinitializer, zeroinitializer, i32 -1) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 6 for: %splice_nxv2i1_neg = call @llvm.vector.splice.nxv2i1( zeroinitializer, zeroinitializer, i32 -1) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:5 Lat:5 SizeLat:5 for: %splice_nxv16i1_neg = call @llvm.vector.splice.nxv16i1( zeroinitializer, zeroinitializer, i32 -1) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:5 Lat:5 SizeLat:5 for: %splice_nxv8i1_neg = call @llvm.vector.splice.nxv8i1( zeroinitializer, zeroinitializer, i32 -1) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:5 Lat:5 SizeLat:5 for: %splice_nxv4i1_neg = call @llvm.vector.splice.nxv4i1( zeroinitializer, zeroinitializer, i32 -1) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:5 Lat:5 SizeLat:5 for: %splice_nxv2i1_neg = call @llvm.vector.splice.nxv2i1( zeroinitializer, zeroinitializer, i32 -1) ; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of Invalid for: %splice_nxv1i1_neg = call @llvm.vector.splice.nxv1i1( zeroinitializer, zeroinitializer, i32 -1) ; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; @@ -702,10 +702,10 @@ define void @vector_splice() #0 { ; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %splice_nxv4bf16 = call @llvm.vector.splice.nxv4bf16( zeroinitializer, zeroinitializer, i32 1) ; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %splice_nxv8bf16 = call @llvm.vector.splice.nxv8bf16( zeroinitializer, zeroinitializer, i32 1) ; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 2 for: %splice_nxv16bf16 = call @llvm.vector.splice.nxv16bf16( zeroinitializer, zeroinitializer, i32 1) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 4 for: %splice_nxv16i1 = call @llvm.vector.splice.nxv16i1( zeroinitializer, zeroinitializer, i32 1) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 4 for: %splice_nxv8i1 = call @llvm.vector.splice.nxv8i1( zeroinitializer, zeroinitializer, i32 1) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 4 for: %splice_nxv4i1 = call @llvm.vector.splice.nxv4i1( zeroinitializer, zeroinitializer, i32 1) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 4 for: %splice_nxv2i1 = call @llvm.vector.splice.nxv2i1( zeroinitializer, zeroinitializer, i32 1) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:3 Lat:3 SizeLat:3 for: %splice_nxv16i1 = call @llvm.vector.splice.nxv16i1( zeroinitializer, zeroinitializer, i32 1) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:3 Lat:3 SizeLat:3 for: %splice_nxv8i1 = call @llvm.vector.splice.nxv8i1( zeroinitializer, zeroinitializer, i32 1) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:3 Lat:3 SizeLat:3 for: %splice_nxv4i1 = call @llvm.vector.splice.nxv4i1( zeroinitializer, zeroinitializer, i32 1) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:3 Lat:3 SizeLat:3 for: %splice_nxv2i1 = call @llvm.vector.splice.nxv2i1( zeroinitializer, zeroinitializer, i32 1) ; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 3 for: %splice_nxv16i8_neg = call @llvm.vector.splice.nxv16i8( zeroinitializer, zeroinitializer, i32 -1) ; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 6 for: %splice_nxv32i8_neg = call @llvm.vector.splice.nxv32i8( zeroinitializer, zeroinitializer, i32 -1) ; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of Invalid for: %splice_nxv1i16_neg = call @llvm.vector.splice.nxv1i16( zeroinitializer, zeroinitializer, i32 -1) @@ -735,10 +735,10 @@ define void @vector_splice() #0 { ; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 3 for: %splice_nxv4bf16_neg = call @llvm.vector.splice.nxv4bf16( zeroinitializer, zeroinitializer, i32 -1) ; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 3 for: %splice_nxv8bf16_neg = call @llvm.vector.splice.nxv8bf16( zeroinitializer, zeroinitializer, i32 -1) ; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 6 for: %splice_nxv16bf16_neg = call @llvm.vector.splice.nxv16bf16( zeroinitializer, zeroinitializer, i32 -1) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 6 for: %splice_nxv16i1_neg = call @llvm.vector.splice.nxv16i1( zeroinitializer, zeroinitializer, i32 -1) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 6 for: %splice_nxv8i1_neg = call @llvm.vector.splice.nxv8i1( zeroinitializer, zeroinitializer, i32 -1) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 6 for: %splice_nxv4i1_neg = call @llvm.vector.splice.nxv4i1( zeroinitializer, zeroinitializer, i32 -1) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 6 for: %splice_nxv2i1_neg = call @llvm.vector.splice.nxv2i1( zeroinitializer, zeroinitializer, i32 -1) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:6 CodeSize:5 Lat:5 SizeLat:5 for: %splice_nxv16i1_neg = call @llvm.vector.splice.nxv16i1( zeroinitializer, zeroinitializer, i32 -1) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:6 CodeSize:5 Lat:5 SizeLat:5 for: %splice_nxv8i1_neg = call @llvm.vector.splice.nxv8i1( zeroinitializer, zeroinitializer, i32 -1) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:6 CodeSize:5 Lat:5 SizeLat:5 for: %splice_nxv4i1_neg = call @llvm.vector.splice.nxv4i1( zeroinitializer, zeroinitializer, i32 -1) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:6 CodeSize:5 Lat:5 SizeLat:5 for: %splice_nxv2i1_neg = call @llvm.vector.splice.nxv2i1( zeroinitializer, zeroinitializer, i32 -1) ; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of Invalid for: %splice_nxv1i1_neg = call @llvm.vector.splice.nxv1i1( zeroinitializer, zeroinitializer, i32 -1) ; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ;