-
Notifications
You must be signed in to change notification settings - Fork 13.4k
[X86][AVX512FP16] Decouple AVX512VL and AVX512DQ from AVX512FP16 #137450
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-clang Author: Phoebe Wang (phoebewang) ChangesFixes: #136209 Patch is 158.75 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/137450.diff 41 Files Affected:
diff --git a/clang/lib/Headers/avx512fp16intrin.h b/clang/lib/Headers/avx512fp16intrin.h
index e136aa14a194c..92df320b45006 100644
--- a/clang/lib/Headers/avx512fp16intrin.h
+++ b/clang/lib/Headers/avx512fp16intrin.h
@@ -553,7 +553,8 @@ static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_abs_ph(__m512h __A) {
}
static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_conj_pch(__m512h __A) {
- return (__m512h)_mm512_xor_ps((__m512)__A, _mm512_set1_ps(-0.0f));
+ return (__m512h)_mm512_xor_epi32((__m512i)__A,
+ _mm512_set1_epi32(-2147483648));
}
static __inline__ __m512h __DEFAULT_FN_ATTRS512
diff --git a/clang/test/CodeGen/X86/avx512fp16-builtins-constrained-cmp.c b/clang/test/CodeGen/X86/avx512fp16-builtins-constrained-cmp.c
index 1a164ff57fda1..ffef29d17e542 100644
--- a/clang/test/CodeGen/X86/avx512fp16-builtins-constrained-cmp.c
+++ b/clang/test/CodeGen/X86/avx512fp16-builtins-constrained-cmp.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512fp16 -emit-llvm -ffp-exception-behavior=strict -o - -Wall -Werror | FileCheck %s
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512fp16 -target-feature +avx512vl -emit-llvm -ffp-exception-behavior=strict -o - -Wall -Werror | FileCheck %s
#include <immintrin.h>
diff --git a/clang/test/CodeGen/X86/avx512fp16-builtins.c b/clang/test/CodeGen/X86/avx512fp16-builtins.c
index a766476ca92bd..d277d053147fd 100644
--- a/clang/test/CodeGen/X86/avx512fp16-builtins.c
+++ b/clang/test/CodeGen/X86/avx512fp16-builtins.c
@@ -689,24 +689,24 @@ __m512h test_mm512_abs_ph(__m512h a) {
__m512h test_mm512_conj_pch(__m512h __A) {
// CHECK-LABEL: @test_mm512_conj_pch
- // CHECK: %{{.*}} = bitcast <32 x half> %{{.*}} to <16 x float>
- // CHECK: %{{.*}} = bitcast <16 x float> %{{.*}} to <16 x i32>
- // CHECK: %{{.*}} = bitcast <16 x float> %{{.*}} to <16 x i32>
+ // CHECK: %{{.*}} = bitcast <32 x half> %{{.*}} to <8 x i64>
+ // CHECK: %{{.*}} = bitcast <8 x i64> %{{.*}} to <16 x i32>
+ // CHECK: %{{.*}} = bitcast <8 x i64> %{{.*}} to <16 x i32>
// CHECK: %{{.*}} = xor <16 x i32> %{{.*}}, %{{.*}}
- // CHECK: %{{.*}} = bitcast <16 x i32> %{{.*}} to <16 x float>
- // CHECK: %{{.*}} = bitcast <16 x float> %{{.*}} to <32 x half>
+ // CHECK: %{{.*}} = bitcast <16 x i32> %{{.*}} to <8 x i64>
+ // CHECK: %{{.*}} = bitcast <8 x i64> %{{.*}} to <32 x half>
return _mm512_conj_pch(__A);
}
__m512h test_mm512_mask_conj_pch(__m512h __W, __mmask32 __U, __m512h __A) {
// CHECK-LABEL: @test_mm512_mask_conj_pch
// CHECK: %{{.*}} = trunc i32 %{{.*}} to i16
- // CHECK: %{{.*}} = bitcast <32 x half> %{{.*}} to <16 x float>
- // CHECK: %{{.*}} = bitcast <16 x float> %{{.*}} to <16 x i32>
- // CHECK: %{{.*}} = bitcast <16 x float> %{{.*}} to <16 x i32>
+ // CHECK: %{{.*}} = bitcast <32 x half> %{{.*}} to <8 x i64>
+ // CHECK: %{{.*}} = bitcast <8 x i64> %{{.*}} to <16 x i32>
+ // CHECK: %{{.*}} = bitcast <8 x i64> %{{.*}} to <16 x i32>
// CHECK: %{{.*}} = xor <16 x i32> %{{.*}}, %{{.*}}
- // CHECK: %{{.*}} = bitcast <16 x i32> %{{.*}} to <16 x float>
- // CHECK: %{{.*}} = bitcast <16 x float> %{{.*}} to <32 x half>
+ // CHECK: %{{.*}} = bitcast <16 x i32> %{{.*}} to <8 x i64>
+ // CHECK: %{{.*}} = bitcast <8 x i64> %{{.*}} to <32 x half>
// CHECK: %{{.*}} = bitcast <32 x half> %{{.*}} to <16 x float>
// CHECK: %{{.*}} = bitcast i16 %{{.*}} to <16 x i1>
// CHECK: %{{.*}} = select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
@@ -717,12 +717,12 @@ __m512h test_mm512_mask_conj_pch(__m512h __W, __mmask32 __U, __m512h __A) {
__m512h test_mm512_maskz_conj_pch(__mmask32 __U, __m512h __A) {
// CHECK-LABEL: @test_mm512_maskz_conj_pch
// CHECK: %{{.*}} = trunc i32 %{{.*}} to i16
- // CHECK: %{{.*}} = bitcast <32 x half> %{{.*}} to <16 x float>
- // CHECK: %{{.*}} = bitcast <16 x float> %{{.*}} to <16 x i32>
- // CHECK: %{{.*}} = bitcast <16 x float> %{{.*}} to <16 x i32>
+ // CHECK: %{{.*}} = bitcast <32 x half> %{{.*}} to <8 x i64>
+ // CHECK: %{{.*}} = bitcast <8 x i64> %{{.*}} to <16 x i32>
+ // CHECK: %{{.*}} = bitcast <8 x i64> %{{.*}} to <16 x i32>
// CHECK: %{{.*}} = xor <16 x i32> %{{.*}}, %{{.*}}
- // CHECK: %{{.*}} = bitcast <16 x i32> %{{.*}} to <16 x float>
- // CHECK: %{{.*}} = bitcast <16 x float> %{{.*}} to <32 x half>
+ // CHECK: %{{.*}} = bitcast <16 x i32> %{{.*}} to <8 x i64>
+ // CHECK: %{{.*}} = bitcast <8 x i64> %{{.*}} to <32 x half>
// CHECK: %{{.*}} = bitcast i16 %{{.*}} to <16 x i1>
// CHECK: %{{.*}} = select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
// CHECK: %{{.*}} = bitcast <16 x float> %{{.*}} to <32 x half>
diff --git a/clang/test/Preprocessor/x86_target_features.c b/clang/test/Preprocessor/x86_target_features.c
index 63222a882ff53..3edc92c75303a 100644
--- a/clang/test/Preprocessor/x86_target_features.c
+++ b/clang/test/Preprocessor/x86_target_features.c
@@ -596,31 +596,24 @@
// RUN: %clang -target i386-unknown-unknown -march=atom -mavx512fp16 -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVX512FP16 %s
// AVX512FP16: #define __AVX512BW__ 1
-// AVX512FP16: #define __AVX512DQ__ 1
// AVX512FP16: #define __AVX512FP16__ 1
-// AVX512FP16: #define __AVX512VL__ 1
-// AVX512FP16: #define __EVEX256__ 1
// AVX512FP16: #define __EVEX512__ 1
// RUN: %clang -target i386-unknown-unknown -march=atom -mavx512fp16 -mno-avx512vl -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVX512FP16NOAVX512VL %s
-// AVX512FP16NOAVX512VL-NOT: #define __AVX512FP16__ 1
-// AVX512FP16NOAVX512VL-NOT: #define __AVX512VL__ 1
-// AVX512FP16NOAVX512VL-NOT: #define __EVEX256__ 1
+// AVX512FP16NOAVX512VL: #define __AVX512FP16__ 1
// AVX512FP16NOAVX512VL: #define __EVEX512__ 1
// RUN: %clang -target i386-unknown-unknown -march=atom -mavx512fp16 -mno-avx512bw -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVX512FP16NOAVX512BW %s
// AVX512FP16NOAVX512BW-NOT: #define __AVX512BW__ 1
// AVX512FP16NOAVX512BW-NOT: #define __AVX512FP16__ 1
-// AVX512FP16NOAVX512BW: #define __EVEX256__ 1
// AVX512FP16NOAVX512BW: #define __EVEX512__ 1
// RUN: %clang -target i386-unknown-unknown -march=atom -mavx512fp16 -mno-avx512dq -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVX512FP16NOAVX512DQ %s
// AVX512FP16NOAVX512DQ-NOT: #define __AVX512DQ__ 1
-// AVX512FP16NOAVX512DQ-NOT: #define __AVX512FP16__ 1
-// AVX512FP16NOAVX512DQ: #define __EVEX256__ 1
+// AVX512FP16NOAVX512DQ: #define __AVX512FP16__ 1
// AVX512FP16NOAVX512DQ: #define __EVEX512__ 1
// RUN: %clang -target i386-unknown-unknown -march=atom -mavx512f -mno-avx512f -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=NOEVEX512 %s
diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td
index 577428cad6d61..eacf9e7a7fb62 100644
--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td
@@ -173,7 +173,7 @@ def FeatureVP2INTERSECT : SubtargetFeature<"avx512vp2intersect",
// currently.
def FeatureFP16 : SubtargetFeature<"avx512fp16", "HasFP16", "true",
"Support 16-bit floating point",
- [FeatureBWI, FeatureVLX, FeatureDQI]>;
+ [FeatureBWI]>;
def FeatureAVXVNNIINT8 : SubtargetFeature<"avxvnniint8",
"HasAVXVNNIINT8", "true",
"Enable AVX-VNNI-INT8",
@@ -338,7 +338,7 @@ def FeatureAVX10_1 : SubtargetFeature<"avx10.1-256", "HasAVX10_1", "true",
"Support AVX10.1 up to 256-bit instruction",
[FeatureCDI, FeatureVBMI, FeatureIFMA, FeatureVNNI,
FeatureBF16, FeatureVPOPCNTDQ, FeatureVBMI2, FeatureBITALG,
- FeatureFP16]>;
+ FeatureFP16, FeatureVLX, FeatureDQI]>;
def FeatureAVX10_1_512 : SubtargetFeature<"avx10.1-512", "HasAVX10_1_512", "true",
"Support AVX10.1 up to 512-bit instruction",
[FeatureAVX10_1, FeatureEVEX512]>;
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 0fc50dc1a87b6..9a91caaddd1c6 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -2024,13 +2024,14 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FSHL, MVT::v16i32, Custom);
setOperationAction(ISD::FSHR, MVT::v16i32, Custom);
- if (Subtarget.hasDQI()) {
+ if (Subtarget.hasDQI() || Subtarget.hasFP16())
for (auto Opc : {ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::STRICT_SINT_TO_FP,
ISD::STRICT_UINT_TO_FP, ISD::FP_TO_SINT, ISD::FP_TO_UINT,
ISD::STRICT_FP_TO_SINT, ISD::STRICT_FP_TO_UINT})
setOperationAction(Opc, MVT::v8i64, Custom);
+
+ if (Subtarget.hasDQI())
setOperationAction(ISD::MUL, MVT::v8i64, Legal);
- }
if (Subtarget.hasCDI()) {
// NonVLX sub-targets extend 128/256 vectors to use the 512 version.
@@ -19850,7 +19851,7 @@ static SDValue promoteXINT_TO_FP(SDValue Op, const SDLoc &dl,
DAG.getNode(Op.getOpcode(), dl, NVT, Src), Rnd);
}
-static bool isLegalConversion(MVT VT, bool IsSigned,
+static bool isLegalConversion(MVT VT, MVT FloatVT, bool IsSigned,
const X86Subtarget &Subtarget) {
if (VT == MVT::v4i32 && Subtarget.hasSSE2() && IsSigned)
return true;
@@ -19861,6 +19862,8 @@ static bool isLegalConversion(MVT VT, bool IsSigned,
if (Subtarget.useAVX512Regs()) {
if (VT == MVT::v16i32)
return true;
+ if (VT == MVT::v8i64 && FloatVT == MVT::v8f16 && Subtarget.hasFP16())
+ return true;
if (VT == MVT::v8i64 && Subtarget.hasDQI())
return true;
}
@@ -19882,7 +19885,7 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
if (isSoftF16(VT, Subtarget))
return promoteXINT_TO_FP(Op, dl, DAG);
- else if (isLegalConversion(SrcVT, true, Subtarget))
+ else if (isLegalConversion(SrcVT, VT, true, Subtarget))
return Op;
if (Subtarget.isTargetWin64() && SrcVT == MVT::i128)
@@ -20386,7 +20389,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
if (isSoftF16(DstVT, Subtarget))
return promoteXINT_TO_FP(Op, dl, DAG);
- else if (isLegalConversion(SrcVT, false, Subtarget))
+ else if (isLegalConversion(SrcVT, DstVT, false, Subtarget))
return Op;
if (DstVT.isVector())
@@ -21409,7 +21412,8 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
{NVT, MVT::Other}, {Chain, Src})});
return DAG.getNode(Op.getOpcode(), dl, VT,
DAG.getNode(ISD::FP_EXTEND, dl, NVT, Src));
- } else if (isTypeLegal(SrcVT) && isLegalConversion(VT, IsSigned, Subtarget)) {
+ } else if (isTypeLegal(SrcVT) &&
+ isLegalConversion(VT, SrcVT, IsSigned, Subtarget)) {
return Op;
}
diff --git a/llvm/lib/TargetParser/X86TargetParser.cpp b/llvm/lib/TargetParser/X86TargetParser.cpp
index 2ae6dd6b3d1ef..21d05ee389e64 100644
--- a/llvm/lib/TargetParser/X86TargetParser.cpp
+++ b/llvm/lib/TargetParser/X86TargetParser.cpp
@@ -135,7 +135,7 @@ constexpr FeatureBitset FeaturesSapphireRapids =
FeatureAVX512BF16 | FeatureAVX512FP16 | FeatureAVXVNNI | FeatureCLDEMOTE |
FeatureENQCMD | FeatureMOVDIR64B | FeatureMOVDIRI | FeaturePTWRITE |
FeatureSERIALIZE | FeatureSHSTK | FeatureTSXLDTRK | FeatureUINTR |
- FeatureWAITPKG;
+ FeatureWAITPKG | FeatureAVX512DQ | FeatureAVX512VL;
constexpr FeatureBitset FeaturesGraniteRapids =
FeaturesSapphireRapids | FeatureAMX_FP16 | FeaturePREFETCHI;
constexpr FeatureBitset FeaturesDiamondRapids =
@@ -624,8 +624,7 @@ constexpr FeatureBitset ImpliedFeaturesAVXVNNIINT8 = FeatureAVX2;
constexpr FeatureBitset ImpliedFeaturesAVXIFMA = FeatureAVX2;
constexpr FeatureBitset ImpliedFeaturesAVXNECONVERT = FeatureAVX2;
constexpr FeatureBitset ImpliedFeaturesSHA512 = FeatureAVX2;
-constexpr FeatureBitset ImpliedFeaturesAVX512FP16 =
- FeatureAVX512BW | FeatureAVX512DQ | FeatureAVX512VL;
+constexpr FeatureBitset ImpliedFeaturesAVX512FP16 = FeatureAVX512BW;
// Key Locker Features
constexpr FeatureBitset ImpliedFeaturesKL = FeatureSSE2;
constexpr FeatureBitset ImpliedFeaturesWIDEKL = FeatureKL;
@@ -637,7 +636,8 @@ constexpr FeatureBitset ImpliedFeaturesAVXVNNI = FeatureAVX2;
constexpr FeatureBitset ImpliedFeaturesAVX10_1 =
FeatureAVX512CD | FeatureAVX512VBMI | FeatureAVX512IFMA |
FeatureAVX512VNNI | FeatureAVX512BF16 | FeatureAVX512VPOPCNTDQ |
- FeatureAVX512VBMI2 | FeatureAVX512BITALG | FeatureAVX512FP16;
+ FeatureAVX512VBMI2 | FeatureAVX512BITALG | FeatureAVX512FP16 |
+ FeatureAVX512DQ | FeatureAVX512VL;
constexpr FeatureBitset ImpliedFeaturesAVX10_1_512 =
FeatureAVX10_1 | FeatureEVEX512;
constexpr FeatureBitset ImpliedFeaturesAVX10_2 = FeatureAVX10_1;
diff --git a/llvm/test/CodeGen/X86/avx512fp16-combine-shuffle-fma.ll b/llvm/test/CodeGen/X86/avx512fp16-combine-shuffle-fma.ll
index 54ccc23840f99..f02d11648362c 100644
--- a/llvm/test/CodeGen/X86/avx512fp16-combine-shuffle-fma.ll
+++ b/llvm/test/CodeGen/X86/avx512fp16-combine-shuffle-fma.ll
@@ -2,7 +2,7 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx2 | FileCheck %s --check-prefix=AVX2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=f16c,fma | FileCheck %s --check-prefix=F16C
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512vl | FileCheck %s --check-prefix=F16C
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512fp16 | FileCheck %s --check-prefix=FP16
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512fp16,avx512vl | FileCheck %s --check-prefix=FP16
define <2 x half> @foo(<2 x half> %0) "unsafe-fp-math"="true" nounwind {
; AVX2-LABEL: foo:
diff --git a/llvm/test/CodeGen/X86/avx512fp16-combine-vfmac-fadd.ll b/llvm/test/CodeGen/X86/avx512fp16-combine-vfmac-fadd.ll
index 7473ca9da9ff0..36b95e744ba14 100644
--- a/llvm/test/CodeGen/X86/avx512fp16-combine-vfmac-fadd.ll
+++ b/llvm/test/CodeGen/X86/avx512fp16-combine-vfmac-fadd.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown --fp-contract=fast --enable-no-signed-zeros-fp-math -mattr=avx512fp16 | FileCheck %s --check-prefixes=CHECK,NO-SZ
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown --fp-contract=fast -mattr=avx512fp16 | FileCheck %s --check-prefixes=CHECK,HAS-SZ
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown --fp-contract=fast --enable-no-signed-zeros-fp-math -mattr=avx512fp16,avx512vl | FileCheck %s --check-prefixes=CHECK,NO-SZ
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown --fp-contract=fast -mattr=avx512fp16,avx512vl | FileCheck %s --check-prefixes=CHECK,HAS-SZ
; FADD(acc, FMA(a, b, +0.0)) can be combined to FMA(a, b, acc) if the nsz flag set.
define dso_local <32 x half> @test1(<32 x half> %acc, <32 x half> %a, <32 x half> %b) {
diff --git a/llvm/test/CodeGen/X86/avx512fp16-combine-vfmulc-fadd.ll b/llvm/test/CodeGen/X86/avx512fp16-combine-vfmulc-fadd.ll
index 9afe46e9e7c63..a509503584649 100644
--- a/llvm/test/CodeGen/X86/avx512fp16-combine-vfmulc-fadd.ll
+++ b/llvm/test/CodeGen/X86/avx512fp16-combine-vfmulc-fadd.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512fp16 --fp-contract=fast --enable-unsafe-fp-math | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512fp16,avx512vl --fp-contract=fast --enable-unsafe-fp-math | FileCheck %s
define dso_local <32 x half> @test1(<32 x half> %acc.coerce, <32 x half> %lhs.coerce, <32 x half> %rhs.coerce) {
; CHECK-LABEL: test1:
diff --git a/llvm/test/CodeGen/X86/avx512fp16-combine-xor-vfmulc-fadd.ll b/llvm/test/CodeGen/X86/avx512fp16-combine-xor-vfmulc-fadd.ll
index 1d413ad0c1065..43f30da15b20d 100644
--- a/llvm/test/CodeGen/X86/avx512fp16-combine-xor-vfmulc-fadd.ll
+++ b/llvm/test/CodeGen/X86/avx512fp16-combine-xor-vfmulc-fadd.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512fp16 --fp-contract=fast --enable-unsafe-fp-math | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512fp16,avx512vl --fp-contract=fast --enable-unsafe-fp-math | FileCheck %s
define dso_local <32 x half> @test1(<32 x half> %acc.coerce, <32 x half> %lhs.coerce.conj, <32 x half> %rhs.coerce) local_unnamed_addr #0 {
; CHECK-LABEL: test1:
@@ -84,7 +84,7 @@ entry:
define dso_local <8 x half> @test6(<8 x half> %acc.coerce, <8 x half> %lhs.coerce.conj, <8 x half> %rhs.coerce) local_unnamed_addr #0 {
; CHECK-LABEL: test6:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
+; CHECK-NEXT: vpxord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
; CHECK-NEXT: vfmaddcph %xmm2, %xmm1, %xmm0
; CHECK-NEXT: retq
entry:
diff --git a/llvm/test/CodeGen/X86/avx512fp16-combine-xor-vfmulc.ll b/llvm/test/CodeGen/X86/avx512fp16-combine-xor-vfmulc.ll
index d6fe8232b056b..7b142ea170c22 100644
--- a/llvm/test/CodeGen/X86/avx512fp16-combine-xor-vfmulc.ll
+++ b/llvm/test/CodeGen/X86/avx512fp16-combine-xor-vfmulc.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512fp16 --fp-contract=fast --enable-unsafe-fp-math | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512fp16,avx512vl --fp-contract=fast --enable-unsafe-fp-math | FileCheck %s
define dso_local <32 x half> @test1(<32 x half> %lhs.coerce.conj, <32 x half> %rhs.coerce) local_unnamed_addr #0 {
; CHECK-LABEL: test1:
diff --git a/llvm/test/CodeGen/X86/avx512fp16-cvt-ph-w-vl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512fp16-cvt-ph-w-vl-intrinsics.ll
index 1318f607ea931..c306bfdd0c614 100644
--- a/llvm/test/CodeGen/X86/avx512fp16-cvt-ph-w-vl-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx512fp16-cvt-ph-w-vl-intrinsics.ll
@@ -761,7 +761,7 @@ define <4 x half> @test_s17tofp4(<4 x i17> %arg0) {
define <2 x half> @test_u33tofp2(<2 x i33> %arg0) {
; CHECK-LABEL: test_u33tofp2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
+; CHECK-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
; CHECK-NEXT: vcvtqq2ph %xmm0, %xmm0
; CHECK-NEXT: retq
%res = uitofp <2 x i33> %arg0 to <2 x half>
diff --git a/llvm/test/CodeGen/X86/avx512fp16-cvt.ll b/llvm/test/CodeGen/X86/avx512fp16-cvt.ll
index 3040e58b37997..26abf51c76b23 100644
--- a/llvm/test/CodeGen/X86/avx512fp16-cvt.ll
+++ b/llvm/test/CodeGen/X86/avx512fp16-cvt.ll
@@ -82,7 +82,8 @@ define <8 x half> @f32to4f16_mask(<4 x float> %a, <8 x half> %b, i8 %mask) {
;
; X86-LABEL: f32to4f16_mask:
; X86: # %bb.0:
-; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovd %eax, %k1
; X86-NEXT: vcvtps2phx %xmm0, %xmm1 {%k1}
; X86-NEXT: vmovaps %xmm1, %xmm0
; X86-NEXT: retl
@@ -101,7 +102,8 @@ define <8 x half> @f32to8f16_mask(<8 x float> %a, <8 x half> %b, i8 %mask) {
;
; X86-LABEL: f32to8f16_mask:
; X86: # %bb.0:
-; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovd %eax, %k1
; X86-NEXT: vcvtps2phx %ymm0, %xmm1 {%k1}
; X86-NEXT: vmovaps %xmm1, %xmm0
; X86-NEXT: vzeroupper
diff --git a/llvm/test/CodeGen/X86/avx512fp16-fma-intrinsics.ll b/llvm/test/CodeGen/X86/avx512fp16-fma-intrinsics.ll
index be0ef7ac478a3..3d4fa9e2cc6fa 100644
--- a/llvm/test/CodeGen/X86/avx512fp16-fma-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx512fp16-fma-intrinsics.ll
@@ -469,16 +469,17 @@ define <8 x half>@test_int_x86_avx512_mask3_vfmadd_sh(<8 x half> %x0, <8 x half>
; X86-LABEL: test_int_x86_avx512_mask3_vfmadd_sh:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%...
[truncated]
|
@llvm/pr-subscribers-backend-x86 Author: Phoebe Wang (phoebewang) ChangesFixes: #136209 Patch is 158.75 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/137450.diff 41 Files Affected:
diff --git a/clang/lib/Headers/avx512fp16intrin.h b/clang/lib/Headers/avx512fp16intrin.h
index e136aa14a194c..92df320b45006 100644
--- a/clang/lib/Headers/avx512fp16intrin.h
+++ b/clang/lib/Headers/avx512fp16intrin.h
@@ -553,7 +553,8 @@ static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_abs_ph(__m512h __A) {
}
static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_conj_pch(__m512h __A) {
- return (__m512h)_mm512_xor_ps((__m512)__A, _mm512_set1_ps(-0.0f));
+ return (__m512h)_mm512_xor_epi32((__m512i)__A,
+ _mm512_set1_epi32(-2147483648));
}
static __inline__ __m512h __DEFAULT_FN_ATTRS512
diff --git a/clang/test/CodeGen/X86/avx512fp16-builtins-constrained-cmp.c b/clang/test/CodeGen/X86/avx512fp16-builtins-constrained-cmp.c
index 1a164ff57fda1..ffef29d17e542 100644
--- a/clang/test/CodeGen/X86/avx512fp16-builtins-constrained-cmp.c
+++ b/clang/test/CodeGen/X86/avx512fp16-builtins-constrained-cmp.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512fp16 -emit-llvm -ffp-exception-behavior=strict -o - -Wall -Werror | FileCheck %s
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512fp16 -target-feature +avx512vl -emit-llvm -ffp-exception-behavior=strict -o - -Wall -Werror | FileCheck %s
#include <immintrin.h>
diff --git a/clang/test/CodeGen/X86/avx512fp16-builtins.c b/clang/test/CodeGen/X86/avx512fp16-builtins.c
index a766476ca92bd..d277d053147fd 100644
--- a/clang/test/CodeGen/X86/avx512fp16-builtins.c
+++ b/clang/test/CodeGen/X86/avx512fp16-builtins.c
@@ -689,24 +689,24 @@ __m512h test_mm512_abs_ph(__m512h a) {
__m512h test_mm512_conj_pch(__m512h __A) {
// CHECK-LABEL: @test_mm512_conj_pch
- // CHECK: %{{.*}} = bitcast <32 x half> %{{.*}} to <16 x float>
- // CHECK: %{{.*}} = bitcast <16 x float> %{{.*}} to <16 x i32>
- // CHECK: %{{.*}} = bitcast <16 x float> %{{.*}} to <16 x i32>
+ // CHECK: %{{.*}} = bitcast <32 x half> %{{.*}} to <8 x i64>
+ // CHECK: %{{.*}} = bitcast <8 x i64> %{{.*}} to <16 x i32>
+ // CHECK: %{{.*}} = bitcast <8 x i64> %{{.*}} to <16 x i32>
// CHECK: %{{.*}} = xor <16 x i32> %{{.*}}, %{{.*}}
- // CHECK: %{{.*}} = bitcast <16 x i32> %{{.*}} to <16 x float>
- // CHECK: %{{.*}} = bitcast <16 x float> %{{.*}} to <32 x half>
+ // CHECK: %{{.*}} = bitcast <16 x i32> %{{.*}} to <8 x i64>
+ // CHECK: %{{.*}} = bitcast <8 x i64> %{{.*}} to <32 x half>
return _mm512_conj_pch(__A);
}
__m512h test_mm512_mask_conj_pch(__m512h __W, __mmask32 __U, __m512h __A) {
// CHECK-LABEL: @test_mm512_mask_conj_pch
// CHECK: %{{.*}} = trunc i32 %{{.*}} to i16
- // CHECK: %{{.*}} = bitcast <32 x half> %{{.*}} to <16 x float>
- // CHECK: %{{.*}} = bitcast <16 x float> %{{.*}} to <16 x i32>
- // CHECK: %{{.*}} = bitcast <16 x float> %{{.*}} to <16 x i32>
+ // CHECK: %{{.*}} = bitcast <32 x half> %{{.*}} to <8 x i64>
+ // CHECK: %{{.*}} = bitcast <8 x i64> %{{.*}} to <16 x i32>
+ // CHECK: %{{.*}} = bitcast <8 x i64> %{{.*}} to <16 x i32>
// CHECK: %{{.*}} = xor <16 x i32> %{{.*}}, %{{.*}}
- // CHECK: %{{.*}} = bitcast <16 x i32> %{{.*}} to <16 x float>
- // CHECK: %{{.*}} = bitcast <16 x float> %{{.*}} to <32 x half>
+ // CHECK: %{{.*}} = bitcast <16 x i32> %{{.*}} to <8 x i64>
+ // CHECK: %{{.*}} = bitcast <8 x i64> %{{.*}} to <32 x half>
// CHECK: %{{.*}} = bitcast <32 x half> %{{.*}} to <16 x float>
// CHECK: %{{.*}} = bitcast i16 %{{.*}} to <16 x i1>
// CHECK: %{{.*}} = select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
@@ -717,12 +717,12 @@ __m512h test_mm512_mask_conj_pch(__m512h __W, __mmask32 __U, __m512h __A) {
__m512h test_mm512_maskz_conj_pch(__mmask32 __U, __m512h __A) {
// CHECK-LABEL: @test_mm512_maskz_conj_pch
// CHECK: %{{.*}} = trunc i32 %{{.*}} to i16
- // CHECK: %{{.*}} = bitcast <32 x half> %{{.*}} to <16 x float>
- // CHECK: %{{.*}} = bitcast <16 x float> %{{.*}} to <16 x i32>
- // CHECK: %{{.*}} = bitcast <16 x float> %{{.*}} to <16 x i32>
+ // CHECK: %{{.*}} = bitcast <32 x half> %{{.*}} to <8 x i64>
+ // CHECK: %{{.*}} = bitcast <8 x i64> %{{.*}} to <16 x i32>
+ // CHECK: %{{.*}} = bitcast <8 x i64> %{{.*}} to <16 x i32>
// CHECK: %{{.*}} = xor <16 x i32> %{{.*}}, %{{.*}}
- // CHECK: %{{.*}} = bitcast <16 x i32> %{{.*}} to <16 x float>
- // CHECK: %{{.*}} = bitcast <16 x float> %{{.*}} to <32 x half>
+ // CHECK: %{{.*}} = bitcast <16 x i32> %{{.*}} to <8 x i64>
+ // CHECK: %{{.*}} = bitcast <8 x i64> %{{.*}} to <32 x half>
// CHECK: %{{.*}} = bitcast i16 %{{.*}} to <16 x i1>
// CHECK: %{{.*}} = select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
// CHECK: %{{.*}} = bitcast <16 x float> %{{.*}} to <32 x half>
diff --git a/clang/test/Preprocessor/x86_target_features.c b/clang/test/Preprocessor/x86_target_features.c
index 63222a882ff53..3edc92c75303a 100644
--- a/clang/test/Preprocessor/x86_target_features.c
+++ b/clang/test/Preprocessor/x86_target_features.c
@@ -596,31 +596,24 @@
// RUN: %clang -target i386-unknown-unknown -march=atom -mavx512fp16 -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVX512FP16 %s
// AVX512FP16: #define __AVX512BW__ 1
-// AVX512FP16: #define __AVX512DQ__ 1
// AVX512FP16: #define __AVX512FP16__ 1
-// AVX512FP16: #define __AVX512VL__ 1
-// AVX512FP16: #define __EVEX256__ 1
// AVX512FP16: #define __EVEX512__ 1
// RUN: %clang -target i386-unknown-unknown -march=atom -mavx512fp16 -mno-avx512vl -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVX512FP16NOAVX512VL %s
-// AVX512FP16NOAVX512VL-NOT: #define __AVX512FP16__ 1
-// AVX512FP16NOAVX512VL-NOT: #define __AVX512VL__ 1
-// AVX512FP16NOAVX512VL-NOT: #define __EVEX256__ 1
+// AVX512FP16NOAVX512VL: #define __AVX512FP16__ 1
// AVX512FP16NOAVX512VL: #define __EVEX512__ 1
// RUN: %clang -target i386-unknown-unknown -march=atom -mavx512fp16 -mno-avx512bw -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVX512FP16NOAVX512BW %s
// AVX512FP16NOAVX512BW-NOT: #define __AVX512BW__ 1
// AVX512FP16NOAVX512BW-NOT: #define __AVX512FP16__ 1
-// AVX512FP16NOAVX512BW: #define __EVEX256__ 1
// AVX512FP16NOAVX512BW: #define __EVEX512__ 1
// RUN: %clang -target i386-unknown-unknown -march=atom -mavx512fp16 -mno-avx512dq -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVX512FP16NOAVX512DQ %s
// AVX512FP16NOAVX512DQ-NOT: #define __AVX512DQ__ 1
-// AVX512FP16NOAVX512DQ-NOT: #define __AVX512FP16__ 1
-// AVX512FP16NOAVX512DQ: #define __EVEX256__ 1
+// AVX512FP16NOAVX512DQ: #define __AVX512FP16__ 1
// AVX512FP16NOAVX512DQ: #define __EVEX512__ 1
// RUN: %clang -target i386-unknown-unknown -march=atom -mavx512f -mno-avx512f -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=NOEVEX512 %s
diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td
index 577428cad6d61..eacf9e7a7fb62 100644
--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td
@@ -173,7 +173,7 @@ def FeatureVP2INTERSECT : SubtargetFeature<"avx512vp2intersect",
// currently.
def FeatureFP16 : SubtargetFeature<"avx512fp16", "HasFP16", "true",
"Support 16-bit floating point",
- [FeatureBWI, FeatureVLX, FeatureDQI]>;
+ [FeatureBWI]>;
def FeatureAVXVNNIINT8 : SubtargetFeature<"avxvnniint8",
"HasAVXVNNIINT8", "true",
"Enable AVX-VNNI-INT8",
@@ -338,7 +338,7 @@ def FeatureAVX10_1 : SubtargetFeature<"avx10.1-256", "HasAVX10_1", "true",
"Support AVX10.1 up to 256-bit instruction",
[FeatureCDI, FeatureVBMI, FeatureIFMA, FeatureVNNI,
FeatureBF16, FeatureVPOPCNTDQ, FeatureVBMI2, FeatureBITALG,
- FeatureFP16]>;
+ FeatureFP16, FeatureVLX, FeatureDQI]>;
def FeatureAVX10_1_512 : SubtargetFeature<"avx10.1-512", "HasAVX10_1_512", "true",
"Support AVX10.1 up to 512-bit instruction",
[FeatureAVX10_1, FeatureEVEX512]>;
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 0fc50dc1a87b6..9a91caaddd1c6 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -2024,13 +2024,14 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FSHL, MVT::v16i32, Custom);
setOperationAction(ISD::FSHR, MVT::v16i32, Custom);
- if (Subtarget.hasDQI()) {
+ if (Subtarget.hasDQI() || Subtarget.hasFP16())
for (auto Opc : {ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::STRICT_SINT_TO_FP,
ISD::STRICT_UINT_TO_FP, ISD::FP_TO_SINT, ISD::FP_TO_UINT,
ISD::STRICT_FP_TO_SINT, ISD::STRICT_FP_TO_UINT})
setOperationAction(Opc, MVT::v8i64, Custom);
+
+ if (Subtarget.hasDQI())
setOperationAction(ISD::MUL, MVT::v8i64, Legal);
- }
if (Subtarget.hasCDI()) {
// NonVLX sub-targets extend 128/256 vectors to use the 512 version.
@@ -19850,7 +19851,7 @@ static SDValue promoteXINT_TO_FP(SDValue Op, const SDLoc &dl,
DAG.getNode(Op.getOpcode(), dl, NVT, Src), Rnd);
}
-static bool isLegalConversion(MVT VT, bool IsSigned,
+static bool isLegalConversion(MVT VT, MVT FloatVT, bool IsSigned,
const X86Subtarget &Subtarget) {
if (VT == MVT::v4i32 && Subtarget.hasSSE2() && IsSigned)
return true;
@@ -19861,6 +19862,8 @@ static bool isLegalConversion(MVT VT, bool IsSigned,
if (Subtarget.useAVX512Regs()) {
if (VT == MVT::v16i32)
return true;
+ if (VT == MVT::v8i64 && FloatVT == MVT::v8f16 && Subtarget.hasFP16())
+ return true;
if (VT == MVT::v8i64 && Subtarget.hasDQI())
return true;
}
@@ -19882,7 +19885,7 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
if (isSoftF16(VT, Subtarget))
return promoteXINT_TO_FP(Op, dl, DAG);
- else if (isLegalConversion(SrcVT, true, Subtarget))
+ else if (isLegalConversion(SrcVT, VT, true, Subtarget))
return Op;
if (Subtarget.isTargetWin64() && SrcVT == MVT::i128)
@@ -20386,7 +20389,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
if (isSoftF16(DstVT, Subtarget))
return promoteXINT_TO_FP(Op, dl, DAG);
- else if (isLegalConversion(SrcVT, false, Subtarget))
+ else if (isLegalConversion(SrcVT, DstVT, false, Subtarget))
return Op;
if (DstVT.isVector())
@@ -21409,7 +21412,8 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
{NVT, MVT::Other}, {Chain, Src})});
return DAG.getNode(Op.getOpcode(), dl, VT,
DAG.getNode(ISD::FP_EXTEND, dl, NVT, Src));
- } else if (isTypeLegal(SrcVT) && isLegalConversion(VT, IsSigned, Subtarget)) {
+ } else if (isTypeLegal(SrcVT) &&
+ isLegalConversion(VT, SrcVT, IsSigned, Subtarget)) {
return Op;
}
diff --git a/llvm/lib/TargetParser/X86TargetParser.cpp b/llvm/lib/TargetParser/X86TargetParser.cpp
index 2ae6dd6b3d1ef..21d05ee389e64 100644
--- a/llvm/lib/TargetParser/X86TargetParser.cpp
+++ b/llvm/lib/TargetParser/X86TargetParser.cpp
@@ -135,7 +135,7 @@ constexpr FeatureBitset FeaturesSapphireRapids =
FeatureAVX512BF16 | FeatureAVX512FP16 | FeatureAVXVNNI | FeatureCLDEMOTE |
FeatureENQCMD | FeatureMOVDIR64B | FeatureMOVDIRI | FeaturePTWRITE |
FeatureSERIALIZE | FeatureSHSTK | FeatureTSXLDTRK | FeatureUINTR |
- FeatureWAITPKG;
+ FeatureWAITPKG | FeatureAVX512DQ | FeatureAVX512VL;
constexpr FeatureBitset FeaturesGraniteRapids =
FeaturesSapphireRapids | FeatureAMX_FP16 | FeaturePREFETCHI;
constexpr FeatureBitset FeaturesDiamondRapids =
@@ -624,8 +624,7 @@ constexpr FeatureBitset ImpliedFeaturesAVXVNNIINT8 = FeatureAVX2;
constexpr FeatureBitset ImpliedFeaturesAVXIFMA = FeatureAVX2;
constexpr FeatureBitset ImpliedFeaturesAVXNECONVERT = FeatureAVX2;
constexpr FeatureBitset ImpliedFeaturesSHA512 = FeatureAVX2;
-constexpr FeatureBitset ImpliedFeaturesAVX512FP16 =
- FeatureAVX512BW | FeatureAVX512DQ | FeatureAVX512VL;
+constexpr FeatureBitset ImpliedFeaturesAVX512FP16 = FeatureAVX512BW;
// Key Locker Features
constexpr FeatureBitset ImpliedFeaturesKL = FeatureSSE2;
constexpr FeatureBitset ImpliedFeaturesWIDEKL = FeatureKL;
@@ -637,7 +636,8 @@ constexpr FeatureBitset ImpliedFeaturesAVXVNNI = FeatureAVX2;
constexpr FeatureBitset ImpliedFeaturesAVX10_1 =
FeatureAVX512CD | FeatureAVX512VBMI | FeatureAVX512IFMA |
FeatureAVX512VNNI | FeatureAVX512BF16 | FeatureAVX512VPOPCNTDQ |
- FeatureAVX512VBMI2 | FeatureAVX512BITALG | FeatureAVX512FP16;
+ FeatureAVX512VBMI2 | FeatureAVX512BITALG | FeatureAVX512FP16 |
+ FeatureAVX512DQ | FeatureAVX512VL;
constexpr FeatureBitset ImpliedFeaturesAVX10_1_512 =
FeatureAVX10_1 | FeatureEVEX512;
constexpr FeatureBitset ImpliedFeaturesAVX10_2 = FeatureAVX10_1;
diff --git a/llvm/test/CodeGen/X86/avx512fp16-combine-shuffle-fma.ll b/llvm/test/CodeGen/X86/avx512fp16-combine-shuffle-fma.ll
index 54ccc23840f99..f02d11648362c 100644
--- a/llvm/test/CodeGen/X86/avx512fp16-combine-shuffle-fma.ll
+++ b/llvm/test/CodeGen/X86/avx512fp16-combine-shuffle-fma.ll
@@ -2,7 +2,7 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx2 | FileCheck %s --check-prefix=AVX2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=f16c,fma | FileCheck %s --check-prefix=F16C
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512vl | FileCheck %s --check-prefix=F16C
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512fp16 | FileCheck %s --check-prefix=FP16
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512fp16,avx512vl | FileCheck %s --check-prefix=FP16
define <2 x half> @foo(<2 x half> %0) "unsafe-fp-math"="true" nounwind {
; AVX2-LABEL: foo:
diff --git a/llvm/test/CodeGen/X86/avx512fp16-combine-vfmac-fadd.ll b/llvm/test/CodeGen/X86/avx512fp16-combine-vfmac-fadd.ll
index 7473ca9da9ff0..36b95e744ba14 100644
--- a/llvm/test/CodeGen/X86/avx512fp16-combine-vfmac-fadd.ll
+++ b/llvm/test/CodeGen/X86/avx512fp16-combine-vfmac-fadd.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown --fp-contract=fast --enable-no-signed-zeros-fp-math -mattr=avx512fp16 | FileCheck %s --check-prefixes=CHECK,NO-SZ
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown --fp-contract=fast -mattr=avx512fp16 | FileCheck %s --check-prefixes=CHECK,HAS-SZ
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown --fp-contract=fast --enable-no-signed-zeros-fp-math -mattr=avx512fp16,avx512vl | FileCheck %s --check-prefixes=CHECK,NO-SZ
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown --fp-contract=fast -mattr=avx512fp16,avx512vl | FileCheck %s --check-prefixes=CHECK,HAS-SZ
; FADD(acc, FMA(a, b, +0.0)) can be combined to FMA(a, b, acc) if the nsz flag set.
define dso_local <32 x half> @test1(<32 x half> %acc, <32 x half> %a, <32 x half> %b) {
diff --git a/llvm/test/CodeGen/X86/avx512fp16-combine-vfmulc-fadd.ll b/llvm/test/CodeGen/X86/avx512fp16-combine-vfmulc-fadd.ll
index 9afe46e9e7c63..a509503584649 100644
--- a/llvm/test/CodeGen/X86/avx512fp16-combine-vfmulc-fadd.ll
+++ b/llvm/test/CodeGen/X86/avx512fp16-combine-vfmulc-fadd.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512fp16 --fp-contract=fast --enable-unsafe-fp-math | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512fp16,avx512vl --fp-contract=fast --enable-unsafe-fp-math | FileCheck %s
define dso_local <32 x half> @test1(<32 x half> %acc.coerce, <32 x half> %lhs.coerce, <32 x half> %rhs.coerce) {
; CHECK-LABEL: test1:
diff --git a/llvm/test/CodeGen/X86/avx512fp16-combine-xor-vfmulc-fadd.ll b/llvm/test/CodeGen/X86/avx512fp16-combine-xor-vfmulc-fadd.ll
index 1d413ad0c1065..43f30da15b20d 100644
--- a/llvm/test/CodeGen/X86/avx512fp16-combine-xor-vfmulc-fadd.ll
+++ b/llvm/test/CodeGen/X86/avx512fp16-combine-xor-vfmulc-fadd.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512fp16 --fp-contract=fast --enable-unsafe-fp-math | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512fp16,avx512vl --fp-contract=fast --enable-unsafe-fp-math | FileCheck %s
define dso_local <32 x half> @test1(<32 x half> %acc.coerce, <32 x half> %lhs.coerce.conj, <32 x half> %rhs.coerce) local_unnamed_addr #0 {
; CHECK-LABEL: test1:
@@ -84,7 +84,7 @@ entry:
define dso_local <8 x half> @test6(<8 x half> %acc.coerce, <8 x half> %lhs.coerce.conj, <8 x half> %rhs.coerce) local_unnamed_addr #0 {
; CHECK-LABEL: test6:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
+; CHECK-NEXT: vpxord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
; CHECK-NEXT: vfmaddcph %xmm2, %xmm1, %xmm0
; CHECK-NEXT: retq
entry:
diff --git a/llvm/test/CodeGen/X86/avx512fp16-combine-xor-vfmulc.ll b/llvm/test/CodeGen/X86/avx512fp16-combine-xor-vfmulc.ll
index d6fe8232b056b..7b142ea170c22 100644
--- a/llvm/test/CodeGen/X86/avx512fp16-combine-xor-vfmulc.ll
+++ b/llvm/test/CodeGen/X86/avx512fp16-combine-xor-vfmulc.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512fp16 --fp-contract=fast --enable-unsafe-fp-math | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512fp16,avx512vl --fp-contract=fast --enable-unsafe-fp-math | FileCheck %s
define dso_local <32 x half> @test1(<32 x half> %lhs.coerce.conj, <32 x half> %rhs.coerce) local_unnamed_addr #0 {
; CHECK-LABEL: test1:
diff --git a/llvm/test/CodeGen/X86/avx512fp16-cvt-ph-w-vl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512fp16-cvt-ph-w-vl-intrinsics.ll
index 1318f607ea931..c306bfdd0c614 100644
--- a/llvm/test/CodeGen/X86/avx512fp16-cvt-ph-w-vl-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx512fp16-cvt-ph-w-vl-intrinsics.ll
@@ -761,7 +761,7 @@ define <4 x half> @test_s17tofp4(<4 x i17> %arg0) {
define <2 x half> @test_u33tofp2(<2 x i33> %arg0) {
; CHECK-LABEL: test_u33tofp2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
+; CHECK-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
; CHECK-NEXT: vcvtqq2ph %xmm0, %xmm0
; CHECK-NEXT: retq
%res = uitofp <2 x i33> %arg0 to <2 x half>
diff --git a/llvm/test/CodeGen/X86/avx512fp16-cvt.ll b/llvm/test/CodeGen/X86/avx512fp16-cvt.ll
index 3040e58b37997..26abf51c76b23 100644
--- a/llvm/test/CodeGen/X86/avx512fp16-cvt.ll
+++ b/llvm/test/CodeGen/X86/avx512fp16-cvt.ll
@@ -82,7 +82,8 @@ define <8 x half> @f32to4f16_mask(<4 x float> %a, <8 x half> %b, i8 %mask) {
;
; X86-LABEL: f32to4f16_mask:
; X86: # %bb.0:
-; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovd %eax, %k1
; X86-NEXT: vcvtps2phx %xmm0, %xmm1 {%k1}
; X86-NEXT: vmovaps %xmm1, %xmm0
; X86-NEXT: retl
@@ -101,7 +102,8 @@ define <8 x half> @f32to8f16_mask(<8 x float> %a, <8 x half> %b, i8 %mask) {
;
; X86-LABEL: f32to8f16_mask:
; X86: # %bb.0:
-; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovd %eax, %k1
; X86-NEXT: vcvtps2phx %ymm0, %xmm1 {%k1}
; X86-NEXT: vmovaps %xmm1, %xmm0
; X86-NEXT: vzeroupper
diff --git a/llvm/test/CodeGen/X86/avx512fp16-fma-intrinsics.ll b/llvm/test/CodeGen/X86/avx512fp16-fma-intrinsics.ll
index be0ef7ac478a3..3d4fa9e2cc6fa 100644
--- a/llvm/test/CodeGen/X86/avx512fp16-fma-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx512fp16-fma-intrinsics.ll
@@ -469,16 +469,17 @@ define <8 x half>@test_int_x86_avx512_mask3_vfmadd_sh(<8 x half> %x0, <8 x half>
; X86-LABEL: test_int_x86_avx512_mask3_vfmadd_sh:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%...
[truncated]
|
You can test this locally with the following command:git-clang-format --diff HEAD~1 HEAD --extensions h,c,cpp -- clang/lib/Headers/avx512fp16intrin.h clang/test/CodeGen/X86/avx512fp16-builtins-constrained-cmp.c clang/test/CodeGen/X86/avx512fp16-builtins.c clang/test/Preprocessor/x86_target_features.c llvm/lib/Target/X86/X86ISelLowering.cpp llvm/lib/TargetParser/X86TargetParser.cpp View the diff from clang-format here.diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 1bf33b5ed..9c1787fad 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -2031,7 +2031,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(Opc, MVT::v8i64, Custom);
if (Subtarget.hasDQI())
- setOperationAction(ISD::MUL, MVT::v8i64, Legal);
+ setOperationAction(ISD::MUL, MVT::v8i64, Legal);
if (Subtarget.hasCDI()) {
// NonVLX sub-targets extend 128/256 vectors to use the 512 version.
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/59/builds/16729 Here is the relevant piece of the build log for the reference
|
Fixes: #136209