Skip to content

Commit 6adeda8

Browse files
authored
[X86] combinePTESTCC - fold PTESTC(PCMPEQ(X,0),-1) == PTESTZ(X,X) (#123466)
Simplifies the hidden "all_of(X == 0)" pattern Fixes #123456
1 parent b5df0e7 commit 6adeda8

File tree

2 files changed

+21
-31
lines changed

2 files changed

+21
-31
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

+12
Original file line numberDiff line numberDiff line change
@@ -48054,6 +48054,18 @@ static SDValue combinePTESTCC(SDValue EFLAGS, X86::CondCode &CC,
4805448054
DAG.getAllOnesConstant(DL, NotOp1.getValueType())));
4805548055
}
4805648056
}
48057+
// PTESTC(PCMPEQ(X,0),-1) == PTESTZ(X,X)
48058+
if (EFLAGS.getOpcode() == X86ISD::PTEST &&
48059+
ISD::isBuildVectorAllOnes(Op1.getNode())) {
48060+
SDValue BC0 = peekThroughBitcasts(Op0);
48061+
if (BC0.getOpcode() == X86ISD::PCMPEQ &&
48062+
ISD::isBuildVectorAllZeros(BC0.getOperand(1).getNode())) {
48063+
SDLoc DL(EFLAGS);
48064+
CC = (CC == X86::COND_B ? X86::COND_E : X86::COND_NE);
48065+
SDValue X = DAG.getBitcast(OpVT, BC0.getOperand(0));
48066+
return DAG.getNode(EFLAGS.getOpcode(), DL, VT, X, X);
48067+
}
48068+
}
4805748069
}
4805848070

4805948071
if (CC == X86::COND_E || CC == X86::COND_NE) {

llvm/test/CodeGen/X86/combine-ptest.ll

+9-31
Original file line numberDiff line numberDiff line change
@@ -376,20 +376,14 @@ define i32 @ptestz_v2i64_concat(<4 x i64> %c, <4 x i64> %d, i32 %a, i32 %b) {
376376
define i1 @ptestc_v4i32_eq0(<4 x i32> %a0) {
377377
; SSE-LABEL: ptestc_v4i32_eq0:
378378
; SSE: # %bb.0:
379-
; SSE-NEXT: pxor %xmm1, %xmm1
380-
; SSE-NEXT: pcmpeqd %xmm0, %xmm1
381-
; SSE-NEXT: pcmpeqd %xmm0, %xmm0
382-
; SSE-NEXT: ptest %xmm0, %xmm1
383-
; SSE-NEXT: setb %al
379+
; SSE-NEXT: ptest %xmm0, %xmm0
380+
; SSE-NEXT: sete %al
384381
; SSE-NEXT: retq
385382
;
386383
; AVX-LABEL: ptestc_v4i32_eq0:
387384
; AVX: # %bb.0:
388-
; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
389-
; AVX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
390-
; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
391-
; AVX-NEXT: vptest %xmm1, %xmm0
392-
; AVX-NEXT: setb %al
385+
; AVX-NEXT: vptest %xmm0, %xmm0
386+
; AVX-NEXT: sete %al
393387
; AVX-NEXT: retq
394388
%icmp = icmp eq <4 x i32> %a0, zeroinitializer
395389
%sext = sext <4 x i1> %icmp to <4 x i32>
@@ -403,22 +397,14 @@ define i1 @ptestc_v4i32_eq0(<4 x i32> %a0) {
403397
define i1 @ptestc_v4i32_and_eq0(<4 x i32> %a0, <4 x i32> %a1) {
404398
; SSE-LABEL: ptestc_v4i32_and_eq0:
405399
; SSE: # %bb.0:
406-
; SSE-NEXT: pand %xmm1, %xmm0
407-
; SSE-NEXT: pxor %xmm1, %xmm1
408-
; SSE-NEXT: pcmpeqd %xmm0, %xmm1
409-
; SSE-NEXT: pcmpeqd %xmm0, %xmm0
410400
; SSE-NEXT: ptest %xmm0, %xmm1
411-
; SSE-NEXT: setb %al
401+
; SSE-NEXT: sete %al
412402
; SSE-NEXT: retq
413403
;
414404
; AVX-LABEL: ptestc_v4i32_and_eq0:
415405
; AVX: # %bb.0:
416-
; AVX-NEXT: vpand %xmm0, %xmm1, %xmm0
417-
; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
418-
; AVX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
419-
; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
420-
; AVX-NEXT: vptest %xmm1, %xmm0
421-
; AVX-NEXT: setb %al
406+
; AVX-NEXT: vptest %xmm0, %xmm1
407+
; AVX-NEXT: sete %al
422408
; AVX-NEXT: retq
423409
%and = and <4 x i32> %a1, %a0
424410
%icmp = icmp eq <4 x i32> %and, zeroinitializer
@@ -433,21 +419,13 @@ define i1 @ptestc_v4i32_and_eq0(<4 x i32> %a0, <4 x i32> %a1) {
433419
define i1 @ptestc_v4i32_andnot_eq0(<4 x i32> %a0, <4 x i32> %a1) {
434420
; SSE-LABEL: ptestc_v4i32_andnot_eq0:
435421
; SSE: # %bb.0:
436-
; SSE-NEXT: pandn %xmm0, %xmm1
437-
; SSE-NEXT: pxor %xmm0, %xmm0
438-
; SSE-NEXT: pcmpeqd %xmm1, %xmm0
439-
; SSE-NEXT: pcmpeqd %xmm1, %xmm1
440-
; SSE-NEXT: ptest %xmm1, %xmm0
422+
; SSE-NEXT: ptest %xmm0, %xmm1
441423
; SSE-NEXT: setae %al
442424
; SSE-NEXT: retq
443425
;
444426
; AVX-LABEL: ptestc_v4i32_andnot_eq0:
445427
; AVX: # %bb.0:
446-
; AVX-NEXT: vpandn %xmm0, %xmm1, %xmm0
447-
; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
448-
; AVX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
449-
; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
450-
; AVX-NEXT: vptest %xmm1, %xmm0
428+
; AVX-NEXT: vptest %xmm0, %xmm1
451429
; AVX-NEXT: setae %al
452430
; AVX-NEXT: retq
453431
%not = xor <4 x i32> %a1, splat (i32 -1)

0 commit comments

Comments
 (0)