[DAGCombiner] fold select-of-constants based on sign-bit test
Examples: i32 X > -1 ? C1 : -1 --> (X >>s 31) | C1 i8 X < 0 ? C1 : 0 --> (X >>s 7) & C1 This is a small generalization of a fold requested in PR43650: https://bugs.llvm.org/show_bug.cgi?id=43650 The sign-bit of the condition operand can be used as a mask for the true operand: https://rise4fun.com/Alive/paT Note that we already handle some of the patterns (isNegative + scalar) because there's an over-specialized, yet over-reaching fold for that in foldSelectCCToShiftAnd(). It doesn't use any TLI hooks, so I can't easily rip out that code even though we're duplicating part of it here. This fold is guarded by TLI.convertSelectOfConstantsToMath(), so it should not cause problems for targets that prefer select over shift. Also worth noting: I thought we could generalize this further to include the case where the true operand of the select is not constant, but Alive says that may allow poison to pass through where it does not in the original select form of the code. Differential Revision: https://reviews.llvm.org/D68949 llvm-svn: 374902
This commit is contained in:
parent
b18170660e
commit
d545c9056e
|
@ -8173,6 +8173,43 @@ static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
|
|||
}
|
||||
}
|
||||
|
||||
/// If a (v)select has a condition value that is a sign-bit test, try to smear
|
||||
/// the condition operand sign-bit across the value width and use it as a mask.
|
||||
static SDValue foldSelectOfConstantsUsingSra(SDNode *N, SelectionDAG &DAG) {
|
||||
SDValue Cond = N->getOperand(0);
|
||||
SDValue C1 = N->getOperand(1);
|
||||
SDValue C2 = N->getOperand(2);
|
||||
assert(isConstantOrConstantVector(C1) && isConstantOrConstantVector(C2) &&
|
||||
"Expected select-of-constants");
|
||||
|
||||
EVT VT = N->getValueType(0);
|
||||
if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse() ||
|
||||
VT != Cond.getOperand(0).getValueType())
|
||||
return SDValue();
|
||||
|
||||
// The inverted-condition + commuted-select variants of these patterns are
|
||||
// canonicalized to these forms in IR.
|
||||
SDValue X = Cond.getOperand(0);
|
||||
SDValue CondC = Cond.getOperand(1);
|
||||
ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
|
||||
if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(CondC) &&
|
||||
isAllOnesOrAllOnesSplat(C2)) {
|
||||
// i32 X > -1 ? C1 : -1 --> (X >>s 31) | C1
|
||||
SDLoc DL(N);
|
||||
SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
|
||||
SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
|
||||
return DAG.getNode(ISD::OR, DL, VT, Sra, C1);
|
||||
}
|
||||
if (CC == ISD::SETLT && isNullOrNullSplat(CondC) && isNullOrNullSplat(C2)) {
|
||||
// i8 X < 0 ? C1 : 0 --> (X >>s 7) & C1
|
||||
SDLoc DL(N);
|
||||
SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
|
||||
SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
|
||||
return DAG.getNode(ISD::AND, DL, VT, Sra, C1);
|
||||
}
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
|
||||
SDValue Cond = N->getOperand(0);
|
||||
SDValue N1 = N->getOperand(1);
|
||||
|
@ -8248,6 +8285,9 @@ SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
|
|||
SDValue ShAmtC = DAG.getConstant(C1Val.exactLogBase2(), DL, VT);
|
||||
return DAG.getNode(ISD::SHL, DL, VT, Cond, ShAmtC);
|
||||
}
|
||||
|
||||
if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG))
|
||||
return V;
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
|
@ -8623,6 +8663,9 @@ SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) {
|
|||
return DAG.getNode(ISD::SHL, DL, VT, ZextCond, ShAmtC);
|
||||
}
|
||||
|
||||
if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG))
|
||||
return V;
|
||||
|
||||
// The general case for select-of-constants:
|
||||
// vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2
|
||||
// ...but that only makes sense if a vselect is slower than 2 logic ops, so
|
||||
|
|
|
@ -4,10 +4,9 @@
|
|||
define i8 @isnonneg_i8(i8 %x) {
|
||||
; CHECK-LABEL: isnonneg_i8:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: testb %dil, %dil
|
||||
; CHECK-NEXT: movl $42, %ecx
|
||||
; CHECK-NEXT: movl $255, %eax
|
||||
; CHECK-NEXT: cmovnsl %ecx, %eax
|
||||
; CHECK-NEXT: movl %edi, %eax
|
||||
; CHECK-NEXT: sarb $7, %al
|
||||
; CHECK-NEXT: orb $42, %al
|
||||
; CHECK-NEXT: # kill: def $al killed $al killed $eax
|
||||
; CHECK-NEXT: retq
|
||||
%cond = icmp sgt i8 %x, -1
|
||||
|
@ -18,10 +17,9 @@ define i8 @isnonneg_i8(i8 %x) {
|
|||
define i16 @isnonneg_i16(i16 %x) {
|
||||
; CHECK-LABEL: isnonneg_i16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: testw %di, %di
|
||||
; CHECK-NEXT: movl $542, %ecx # imm = 0x21E
|
||||
; CHECK-NEXT: movl $65535, %eax # imm = 0xFFFF
|
||||
; CHECK-NEXT: cmovnsl %ecx, %eax
|
||||
; CHECK-NEXT: movswl %di, %eax
|
||||
; CHECK-NEXT: sarl $15, %eax
|
||||
; CHECK-NEXT: orl $542, %eax # imm = 0x21E
|
||||
; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; CHECK-NEXT: retq
|
||||
%cond = icmp sgt i16 %x, -1
|
||||
|
@ -32,10 +30,9 @@ define i16 @isnonneg_i16(i16 %x) {
|
|||
define i32 @isnonneg_i32(i32 %x) {
|
||||
; CHECK-LABEL: isnonneg_i32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: testl %edi, %edi
|
||||
; CHECK-NEXT: movl $-42, %ecx
|
||||
; CHECK-NEXT: movl $-1, %eax
|
||||
; CHECK-NEXT: cmovnsl %ecx, %eax
|
||||
; CHECK-NEXT: movl %edi, %eax
|
||||
; CHECK-NEXT: sarl $31, %eax
|
||||
; CHECK-NEXT: orl $-42, %eax
|
||||
; CHECK-NEXT: retq
|
||||
%cond = icmp sgt i32 %x, -1
|
||||
%r = select i1 %cond, i32 -42, i32 -1
|
||||
|
@ -45,10 +42,9 @@ define i32 @isnonneg_i32(i32 %x) {
|
|||
define i64 @isnonneg_i64(i64 %x) {
|
||||
; CHECK-LABEL: isnonneg_i64:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: testq %rdi, %rdi
|
||||
; CHECK-NEXT: movl $2342342, %ecx # imm = 0x23BDC6
|
||||
; CHECK-NEXT: movq $-1, %rax
|
||||
; CHECK-NEXT: cmovnsq %rcx, %rax
|
||||
; CHECK-NEXT: movq %rdi, %rax
|
||||
; CHECK-NEXT: sarq $63, %rax
|
||||
; CHECK-NEXT: orq $2342342, %rax # imm = 0x23BDC6
|
||||
; CHECK-NEXT: retq
|
||||
%cond = icmp sgt i64 %x, -1
|
||||
%r = select i1 %cond, i64 2342342, i64 -1
|
||||
|
@ -58,10 +54,10 @@ define i64 @isnonneg_i64(i64 %x) {
|
|||
define <16 x i8> @isnonneg_v16i8(<16 x i8> %x) {
|
||||
; CHECK-LABEL: isnonneg_v16i8:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: pcmpeqd %xmm1, %xmm1
|
||||
; CHECK-NEXT: pcmpgtb %xmm1, %xmm0
|
||||
; CHECK-NEXT: pxor %xmm1, %xmm0
|
||||
; CHECK-NEXT: por {{.*}}(%rip), %xmm0
|
||||
; CHECK-NEXT: pxor %xmm1, %xmm1
|
||||
; CHECK-NEXT: pcmpgtb %xmm0, %xmm1
|
||||
; CHECK-NEXT: por {{.*}}(%rip), %xmm1
|
||||
; CHECK-NEXT: movdqa %xmm1, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%cond = icmp sgt <16 x i8> %x, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
|
||||
%r = select <16 x i1> %cond, <16 x i8> <i8 12, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42>, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
|
||||
|
@ -71,9 +67,7 @@ define <16 x i8> @isnonneg_v16i8(<16 x i8> %x) {
|
|||
define <8 x i16> @isnonneg_v8i16(<8 x i16> %x) {
|
||||
; CHECK-LABEL: isnonneg_v8i16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: pcmpeqd %xmm1, %xmm1
|
||||
; CHECK-NEXT: pcmpgtw %xmm1, %xmm0
|
||||
; CHECK-NEXT: pxor %xmm1, %xmm0
|
||||
; CHECK-NEXT: psraw $15, %xmm0
|
||||
; CHECK-NEXT: por {{.*}}(%rip), %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%cond = icmp sgt <8 x i16> %x, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
|
||||
|
@ -84,9 +78,7 @@ define <8 x i16> @isnonneg_v8i16(<8 x i16> %x) {
|
|||
define <4 x i32> @isnonneg_v4i32(<4 x i32> %x) {
|
||||
; CHECK-LABEL: isnonneg_v4i32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: pcmpeqd %xmm1, %xmm1
|
||||
; CHECK-NEXT: pcmpgtd %xmm1, %xmm0
|
||||
; CHECK-NEXT: pxor %xmm1, %xmm0
|
||||
; CHECK-NEXT: psrad $31, %xmm0
|
||||
; CHECK-NEXT: por {{.*}}(%rip), %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%cond = icmp sgt <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
|
||||
|
@ -97,18 +89,8 @@ define <4 x i32> @isnonneg_v4i32(<4 x i32> %x) {
|
|||
define <2 x i64> @isnonneg_v2i64(<2 x i64> %x) {
|
||||
; CHECK-LABEL: isnonneg_v2i64:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: pxor {{.*}}(%rip), %xmm0
|
||||
; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [18446744071562067967,18446744071562067967]
|
||||
; CHECK-NEXT: movdqa %xmm0, %xmm2
|
||||
; CHECK-NEXT: pcmpgtd %xmm1, %xmm2
|
||||
; CHECK-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
|
||||
; CHECK-NEXT: pcmpeqd %xmm1, %xmm0
|
||||
; CHECK-NEXT: psrad $31, %xmm0
|
||||
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
|
||||
; CHECK-NEXT: pand %xmm3, %xmm0
|
||||
; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
|
||||
; CHECK-NEXT: por %xmm0, %xmm1
|
||||
; CHECK-NEXT: pcmpeqd %xmm0, %xmm0
|
||||
; CHECK-NEXT: pxor %xmm1, %xmm0
|
||||
; CHECK-NEXT: por {{.*}}(%rip), %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%cond = icmp sgt <2 x i64> %x, <i64 -1, i64 -1>
|
||||
|
@ -182,10 +164,8 @@ define <16 x i8> @isneg_v16i8(<16 x i8> %x) {
|
|||
define <8 x i16> @isneg_v8i16(<8 x i16> %x) {
|
||||
; CHECK-LABEL: isneg_v8i16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: pxor %xmm1, %xmm1
|
||||
; CHECK-NEXT: pcmpgtw %xmm0, %xmm1
|
||||
; CHECK-NEXT: pand {{.*}}(%rip), %xmm1
|
||||
; CHECK-NEXT: movdqa %xmm1, %xmm0
|
||||
; CHECK-NEXT: psraw $15, %xmm0
|
||||
; CHECK-NEXT: pand {{.*}}(%rip), %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%cond = icmp slt <8 x i16> %x, zeroinitializer
|
||||
%r = select <8 x i1> %cond, <8 x i16> <i16 1, i16 542, i16 542, i16 542, i16 542, i16 542, i16 542, i16 1>, <8 x i16> zeroinitializer
|
||||
|
@ -195,10 +175,8 @@ define <8 x i16> @isneg_v8i16(<8 x i16> %x) {
|
|||
define <4 x i32> @isneg_v4i32(<4 x i32> %x) {
|
||||
; CHECK-LABEL: isneg_v4i32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: pxor %xmm1, %xmm1
|
||||
; CHECK-NEXT: pcmpgtd %xmm0, %xmm1
|
||||
; CHECK-NEXT: pand {{.*}}(%rip), %xmm1
|
||||
; CHECK-NEXT: movdqa %xmm1, %xmm0
|
||||
; CHECK-NEXT: psrad $31, %xmm0
|
||||
; CHECK-NEXT: pand {{.*}}(%rip), %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%cond = icmp slt <4 x i32> %x, zeroinitializer
|
||||
%r = select <4 x i1> %cond, <4 x i32> <i32 0, i32 42, i32 -42, i32 1>, <4 x i32> zeroinitializer
|
||||
|
@ -208,15 +186,8 @@ define <4 x i32> @isneg_v4i32(<4 x i32> %x) {
|
|||
define <2 x i64> @isneg_v2i64(<2 x i64> %x) {
|
||||
; CHECK-LABEL: isneg_v2i64:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [2147483648,2147483648]
|
||||
; CHECK-NEXT: pxor %xmm1, %xmm0
|
||||
; CHECK-NEXT: movdqa %xmm1, %xmm2
|
||||
; CHECK-NEXT: pcmpgtd %xmm0, %xmm2
|
||||
; CHECK-NEXT: pcmpeqd %xmm1, %xmm0
|
||||
; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
|
||||
; CHECK-NEXT: pand %xmm2, %xmm1
|
||||
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
|
||||
; CHECK-NEXT: por %xmm1, %xmm0
|
||||
; CHECK-NEXT: psrad $31, %xmm0
|
||||
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
|
||||
; CHECK-NEXT: pand {{.*}}(%rip), %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%cond = icmp slt <2 x i64> %x, zeroinitializer
|
||||
|
|
Loading…
Reference in New Issue