Skip to content

Commit

Permalink
[X86] getFauxShuffleMask - add support for vXi64/vXf64 concat_vectors…
Browse files Browse the repository at this point in the history
… decoding (#127630)

Similar to insert_subvector - limit this to vXi64 vector cases to make the most of cross lane shuffles (for now).
  • Loading branch information
RKSimon authored Feb 19, 2025
1 parent db59708 commit 0607f94
Show file tree
Hide file tree
Showing 5 changed files with 150 additions and 102 deletions.
13 changes: 13 additions & 0 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6110,6 +6110,19 @@ static bool getFauxShuffleMask(SDValue N, const APInt &DemandedElts,
Ops.push_back(N1);
return true;
}
case ISD::CONCAT_VECTORS: {
// Limit this to vXi64 vector cases to make the most of cross lane shuffles.
unsigned NumSubElts = N.getOperand(0).getValueType().getVectorNumElements();
if (NumBitsPerElt == 64) {
for (unsigned I = 0, E = N.getNumOperands(); I != E; ++I) {
for (unsigned M = 0; M != NumSubElts; ++M)
Mask.push_back((I * NumElts) + M);
Ops.push_back(N.getOperand(I));
}
return true;
}
return false;
}
case ISD::INSERT_SUBVECTOR: {
SDValue Src = N.getOperand(0);
SDValue Sub = N.getOperand(1);
Expand Down
16 changes: 8 additions & 8 deletions llvm/test/CodeGen/X86/vector-interleaved-store-i8-stride-7.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1215,10 +1215,10 @@ define void @store_i8_stride7_vf8(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vecp
; AVX512BW-NEXT: vpshufb {{.*#+}} zmm0 = zero,zero,zero,zero,zero,zero,zmm0[0],zero,zero,zero,zero,zero,zero,zmm0[1],zero,zero,zero,zero,zmm0[18,26],zero,zero,zero,zero,zero,zmm0[19,27],zero,zero,zero,zero,zero,zero,zero,zmm0[36],zero,zero,zero,zero,zero,zero,zmm0[37],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[55,63],zero,zero,zero,zero,zero,zero,zero,zero,zero
; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vinserti64x4 $1, %ymm2, %zmm2, %zmm1
; AVX512BW-NEXT: vpshufb {{.*#+}} zmm2 = zmm1[0,8],zero,zero,zero,zero,zero,zmm1[1,9],zero,zero,zero,zero,zero,zmm1[2,10,18,26],zero,zero,zero,zero,zero,zmm1[19,27],zero,zero,zero,zero,zero,zmm1[20,28],zero,zero,zero,zmm1[37,45],zero,zero,zero,zero,zero,zmm1[38,46],zero,zero,zero,zero,zero,zero,zero,zmm1[55,63],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; AVX512BW-NEXT: vpermq {{.*#+}} zmm1 = zmm1[2,3,0,1,6,7,4,5]
; AVX512BW-NEXT: vpshufb {{.*#+}} zmm1 = zero,zero,zmm1[0,8],zero,zero,zero,zero,zero,zmm1[1,9],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm1[19,27],zero,zero,zero,zero,zero,zmm1[20,28],zero,zero,zero,zero,zero,zero,zero,zmm1[37,45],zero,zero,zero,zero,zero,zmm1[38,46],zero,zero,zero,zmm1[55,63],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; AVX512BW-NEXT: vporq %zmm2, %zmm1, %zmm1
; AVX512BW-NEXT: vpshufb {{.*#+}} zmm1 = zmm1[0,8],zero,zero,zero,zero,zero,zmm1[1,9],zero,zero,zero,zero,zero,zmm1[2,10,18,26],zero,zero,zero,zero,zero,zmm1[19,27],zero,zero,zero,zero,zero,zmm1[20,28],zero,zero,zero,zmm1[37,45],zero,zero,zero,zero,zero,zmm1[38,46],zero,zero,zero,zero,zero,zero,zero,zmm1[55,63],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; AVX512BW-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[2,3,0,1,2,3,0,1]
; AVX512BW-NEXT: vpshufb {{.*#+}} zmm2 = zero,zero,zmm2[0,8],zero,zero,zero,zero,zero,zmm2[1,9],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm2[19,27],zero,zero,zero,zero,zero,zmm2[20,28],zero,zero,zero,zero,zero,zero,zero,zmm2[37,45],zero,zero,zero,zero,zero,zmm2[38,46],zero,zero,zero,zmm2[55,63],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; AVX512BW-NEXT: vporq %zmm1, %zmm2, %zmm1
; AVX512BW-NEXT: movabsq $63546854584629360, %rcx # imm = 0xE1C3870E1C3870
; AVX512BW-NEXT: kmovq %rcx, %k1
; AVX512BW-NEXT: vmovdqu8 %zmm0, %zmm1 {%k1}
Expand Down Expand Up @@ -1294,10 +1294,10 @@ define void @store_i8_stride7_vf8(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vecp
; AVX512DQ-BW-NEXT: vpshufb {{.*#+}} zmm0 = zero,zero,zero,zero,zero,zero,zmm0[0],zero,zero,zero,zero,zero,zero,zmm0[1],zero,zero,zero,zero,zmm0[18,26],zero,zero,zero,zero,zero,zmm0[19,27],zero,zero,zero,zero,zero,zero,zero,zmm0[36],zero,zero,zero,zero,zero,zero,zmm0[37],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[55,63],zero,zero,zero,zero,zero,zero,zero,zero,zero
; AVX512DQ-BW-NEXT: vporq %zmm1, %zmm0, %zmm0
; AVX512DQ-BW-NEXT: vinserti64x4 $1, %ymm2, %zmm2, %zmm1
; AVX512DQ-BW-NEXT: vpshufb {{.*#+}} zmm2 = zmm1[0,8],zero,zero,zero,zero,zero,zmm1[1,9],zero,zero,zero,zero,zero,zmm1[2,10,18,26],zero,zero,zero,zero,zero,zmm1[19,27],zero,zero,zero,zero,zero,zmm1[20,28],zero,zero,zero,zmm1[37,45],zero,zero,zero,zero,zero,zmm1[38,46],zero,zero,zero,zero,zero,zero,zero,zmm1[55,63],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; AVX512DQ-BW-NEXT: vpermq {{.*#+}} zmm1 = zmm1[2,3,0,1,6,7,4,5]
; AVX512DQ-BW-NEXT: vpshufb {{.*#+}} zmm1 = zero,zero,zmm1[0,8],zero,zero,zero,zero,zero,zmm1[1,9],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm1[19,27],zero,zero,zero,zero,zero,zmm1[20,28],zero,zero,zero,zero,zero,zero,zero,zmm1[37,45],zero,zero,zero,zero,zero,zmm1[38,46],zero,zero,zero,zmm1[55,63],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; AVX512DQ-BW-NEXT: vporq %zmm2, %zmm1, %zmm1
; AVX512DQ-BW-NEXT: vpshufb {{.*#+}} zmm1 = zmm1[0,8],zero,zero,zero,zero,zero,zmm1[1,9],zero,zero,zero,zero,zero,zmm1[2,10,18,26],zero,zero,zero,zero,zero,zmm1[19,27],zero,zero,zero,zero,zero,zmm1[20,28],zero,zero,zero,zmm1[37,45],zero,zero,zero,zero,zero,zmm1[38,46],zero,zero,zero,zero,zero,zero,zero,zmm1[55,63],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; AVX512DQ-BW-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[2,3,0,1,2,3,0,1]
; AVX512DQ-BW-NEXT: vpshufb {{.*#+}} zmm2 = zero,zero,zmm2[0,8],zero,zero,zero,zero,zero,zmm2[1,9],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm2[19,27],zero,zero,zero,zero,zero,zmm2[20,28],zero,zero,zero,zero,zero,zero,zero,zmm2[37,45],zero,zero,zero,zero,zero,zmm2[38,46],zero,zero,zero,zmm2[55,63],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; AVX512DQ-BW-NEXT: vporq %zmm1, %zmm2, %zmm1
; AVX512DQ-BW-NEXT: movabsq $63546854584629360, %rcx # imm = 0xE1C3870E1C3870
; AVX512DQ-BW-NEXT: kmovq %rcx, %k1
; AVX512DQ-BW-NEXT: vmovdqu8 %zmm0, %zmm1 {%k1}
Expand Down
Loading

0 comments on commit 0607f94

Please sign in to comment.