Skip to content
This repository was archived by the owner on Sep 2, 2018. It is now read-only.

Commit 14d8a84

Browse files
committed
[X86][SSE41] Combine insertion of zero scalars into vector blends with zero
Part 1 of 2 This patch attempts to replace the insertion of zero scalars with a vector blend with zero, avoiding the use of the integer insertion instructions (which are particularly slow on many targets). (Part 2 will add support for combining multiple blends-with-zero). Differential Revision: http://reviews.llvm.org/D17483 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@261743 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent 8d04517 commit 14d8a84

File tree

3 files changed

+189
-98
lines changed

3 files changed

+189
-98
lines changed

lib/Target/X86/X86ISelLowering.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12301,6 +12301,7 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
1230112301
SelectionDAG &DAG) const {
1230212302
MVT VT = Op.getSimpleValueType();
1230312303
MVT EltVT = VT.getVectorElementType();
12304+
unsigned NumElts = VT.getVectorNumElements();
1230412305

1230512306
if (EltVT == MVT::i1)
1230612307
return InsertBitToMaskVector(Op, DAG);
@@ -12314,6 +12315,19 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
1231412315
auto *N2C = cast<ConstantSDNode>(N2);
1231512316
unsigned IdxVal = N2C->getZExtValue();
1231612317

12318+
// If we are clearing out a element, we do this more efficiently with a
12319+
// blend shuffle than a costly integer insertion.
12320+
// TODO: would other rematerializable values (e.g. allbits) benefit as well?
12321+
// TODO: pre-SSE41 targets will tend to use bit masking - this could still
12322+
// be beneficial if we are inserting several zeros and can combine the masks.
12323+
if (X86::isZeroNode(N1) && Subtarget.hasSSE41() && NumElts <= 8) {
12324+
SmallVector<int, 8> ClearMask;
12325+
for (unsigned i = 0; i != NumElts; ++i)
12326+
ClearMask.push_back(i == IdxVal ? i + NumElts : i);
12327+
SDValue ZeroVector = getZeroVector(VT, Subtarget, DAG, dl);
12328+
return DAG.getVectorShuffle(VT, dl, N0, ZeroVector, ClearMask);
12329+
}
12330+
1231712331
// If the vector is wider than 128 bits, extract the 128-bit subvector, insert
1231812332
// into that, and then insert the subvector back into the result.
1231912333
if (VT.is256BitVector() || VT.is512BitVector()) {

0 commit comments

Comments
 (0)