[DAGCombiner] fold select-of-constants based on sign-bit test

Examples:
  i32 X > -1 ? C1 : -1 --> (X >>s 31) | C1
  i8 X < 0 ? C1 : 0 --> (X >>s 7) & C1

This is a small generalization of a fold requested in PR43650:
https://bugs.llvm.org/show_bug.cgi?id=43650

The sign-bit of the condition operand can be used as a mask for the true operand:
https://rise4fun.com/Alive/paT

Note that we already handle some of the patterns (isNegative + scalar) because
there's an over-specialized, yet over-reaching fold for that in foldSelectCCToShiftAnd().
It doesn't use any TLI hooks, so I can't easily rip out that code even though we're
duplicating part of it here. This fold is guarded by TLI.convertSelectOfConstantsToMath(),
so it should not cause problems for targets that prefer select over shift.

Also worth noting: I thought we could generalize this further to include the case where
the true operand of the select is not constant, but Alive says that may allow poison to
pass through where it does not in the original select form of the code.

Differential Revision: https://reviews.llvm.org/D68949

llvm-svn: 374902
This commit is contained in:
Sanjay Patel 2019-10-15 15:23:57 +00:00
parent b18170660e
commit d545c9056e
2 changed files with 68 additions and 54 deletions

View File

@ -8173,6 +8173,43 @@ static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
}
}
/// If a (v)select has a condition value that is a sign-bit test, try to smear
/// the condition operand sign-bit across the value width and use it as a mask.
static SDValue foldSelectOfConstantsUsingSra(SDNode *N, SelectionDAG &DAG) {
SDValue Cond = N->getOperand(0);
SDValue C1 = N->getOperand(1);
SDValue C2 = N->getOperand(2);
assert(isConstantOrConstantVector(C1) && isConstantOrConstantVector(C2) &&
"Expected select-of-constants");
EVT VT = N->getValueType(0);
if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse() ||
VT != Cond.getOperand(0).getValueType())
return SDValue();
// The inverted-condition + commuted-select variants of these patterns are
// canonicalized to these forms in IR.
SDValue X = Cond.getOperand(0);
SDValue CondC = Cond.getOperand(1);
ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(CondC) &&
isAllOnesOrAllOnesSplat(C2)) {
// i32 X > -1 ? C1 : -1 --> (X >>s 31) | C1
SDLoc DL(N);
SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
return DAG.getNode(ISD::OR, DL, VT, Sra, C1);
}
if (CC == ISD::SETLT && isNullOrNullSplat(CondC) && isNullOrNullSplat(C2)) {
// i8 X < 0 ? C1 : 0 --> (X >>s 7) & C1
SDLoc DL(N);
SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
return DAG.getNode(ISD::AND, DL, VT, Sra, C1);
}
return SDValue();
}
SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
SDValue Cond = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@ -8248,6 +8285,9 @@ SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
SDValue ShAmtC = DAG.getConstant(C1Val.exactLogBase2(), DL, VT);
return DAG.getNode(ISD::SHL, DL, VT, Cond, ShAmtC);
}
if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG))
return V;
}
return SDValue();
@ -8623,6 +8663,9 @@ SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) {
return DAG.getNode(ISD::SHL, DL, VT, ZextCond, ShAmtC);
}
if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG))
return V;
// The general case for select-of-constants:
// vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2
// ...but that only makes sense if a vselect is slower than 2 logic ops, so

View File

@ -4,10 +4,9 @@
define i8 @isnonneg_i8(i8 %x) {
; CHECK-LABEL: isnonneg_i8:
; CHECK: # %bb.0:
; CHECK-NEXT: testb %dil, %dil
; CHECK-NEXT: movl $42, %ecx
; CHECK-NEXT: movl $255, %eax
; CHECK-NEXT: cmovnsl %ecx, %eax
; CHECK-NEXT: movl %edi, %eax
; CHECK-NEXT: sarb $7, %al
; CHECK-NEXT: orb $42, %al
; CHECK-NEXT: # kill: def $al killed $al killed $eax
; CHECK-NEXT: retq
%cond = icmp sgt i8 %x, -1
@ -18,10 +17,9 @@ define i8 @isnonneg_i8(i8 %x) {
define i16 @isnonneg_i16(i16 %x) {
; CHECK-LABEL: isnonneg_i16:
; CHECK: # %bb.0:
; CHECK-NEXT: testw %di, %di
; CHECK-NEXT: movl $542, %ecx # imm = 0x21E
; CHECK-NEXT: movl $65535, %eax # imm = 0xFFFF
; CHECK-NEXT: cmovnsl %ecx, %eax
; CHECK-NEXT: movswl %di, %eax
; CHECK-NEXT: sarl $15, %eax
; CHECK-NEXT: orl $542, %eax # imm = 0x21E
; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
; CHECK-NEXT: retq
%cond = icmp sgt i16 %x, -1
@ -32,10 +30,9 @@ define i16 @isnonneg_i16(i16 %x) {
define i32 @isnonneg_i32(i32 %x) {
; CHECK-LABEL: isnonneg_i32:
; CHECK: # %bb.0:
; CHECK-NEXT: testl %edi, %edi
; CHECK-NEXT: movl $-42, %ecx
; CHECK-NEXT: movl $-1, %eax
; CHECK-NEXT: cmovnsl %ecx, %eax
; CHECK-NEXT: movl %edi, %eax
; CHECK-NEXT: sarl $31, %eax
; CHECK-NEXT: orl $-42, %eax
; CHECK-NEXT: retq
%cond = icmp sgt i32 %x, -1
%r = select i1 %cond, i32 -42, i32 -1
@ -45,10 +42,9 @@ define i32 @isnonneg_i32(i32 %x) {
define i64 @isnonneg_i64(i64 %x) {
; CHECK-LABEL: isnonneg_i64:
; CHECK: # %bb.0:
; CHECK-NEXT: testq %rdi, %rdi
; CHECK-NEXT: movl $2342342, %ecx # imm = 0x23BDC6
; CHECK-NEXT: movq $-1, %rax
; CHECK-NEXT: cmovnsq %rcx, %rax
; CHECK-NEXT: movq %rdi, %rax
; CHECK-NEXT: sarq $63, %rax
; CHECK-NEXT: orq $2342342, %rax # imm = 0x23BDC6
; CHECK-NEXT: retq
%cond = icmp sgt i64 %x, -1
%r = select i1 %cond, i64 2342342, i64 -1
@ -58,10 +54,10 @@ define i64 @isnonneg_i64(i64 %x) {
define <16 x i8> @isnonneg_v16i8(<16 x i8> %x) {
; CHECK-LABEL: isnonneg_v16i8:
; CHECK: # %bb.0:
; CHECK-NEXT: pcmpeqd %xmm1, %xmm1
; CHECK-NEXT: pcmpgtb %xmm1, %xmm0
; CHECK-NEXT: pxor %xmm1, %xmm0
; CHECK-NEXT: por {{.*}}(%rip), %xmm0
; CHECK-NEXT: pxor %xmm1, %xmm1
; CHECK-NEXT: pcmpgtb %xmm0, %xmm1
; CHECK-NEXT: por {{.*}}(%rip), %xmm1
; CHECK-NEXT: movdqa %xmm1, %xmm0
; CHECK-NEXT: retq
%cond = icmp sgt <16 x i8> %x, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
%r = select <16 x i1> %cond, <16 x i8> <i8 12, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42>, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
@ -71,9 +67,7 @@ define <16 x i8> @isnonneg_v16i8(<16 x i8> %x) {
define <8 x i16> @isnonneg_v8i16(<8 x i16> %x) {
; CHECK-LABEL: isnonneg_v8i16:
; CHECK: # %bb.0:
; CHECK-NEXT: pcmpeqd %xmm1, %xmm1
; CHECK-NEXT: pcmpgtw %xmm1, %xmm0
; CHECK-NEXT: pxor %xmm1, %xmm0
; CHECK-NEXT: psraw $15, %xmm0
; CHECK-NEXT: por {{.*}}(%rip), %xmm0
; CHECK-NEXT: retq
%cond = icmp sgt <8 x i16> %x, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
@ -84,9 +78,7 @@ define <8 x i16> @isnonneg_v8i16(<8 x i16> %x) {
define <4 x i32> @isnonneg_v4i32(<4 x i32> %x) {
; CHECK-LABEL: isnonneg_v4i32:
; CHECK: # %bb.0:
; CHECK-NEXT: pcmpeqd %xmm1, %xmm1
; CHECK-NEXT: pcmpgtd %xmm1, %xmm0
; CHECK-NEXT: pxor %xmm1, %xmm0
; CHECK-NEXT: psrad $31, %xmm0
; CHECK-NEXT: por {{.*}}(%rip), %xmm0
; CHECK-NEXT: retq
%cond = icmp sgt <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
@ -97,18 +89,8 @@ define <4 x i32> @isnonneg_v4i32(<4 x i32> %x) {
define <2 x i64> @isnonneg_v2i64(<2 x i64> %x) {
; CHECK-LABEL: isnonneg_v2i64:
; CHECK: # %bb.0:
; CHECK-NEXT: pxor {{.*}}(%rip), %xmm0
; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [18446744071562067967,18446744071562067967]
; CHECK-NEXT: movdqa %xmm0, %xmm2
; CHECK-NEXT: pcmpgtd %xmm1, %xmm2
; CHECK-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
; CHECK-NEXT: pcmpeqd %xmm1, %xmm0
; CHECK-NEXT: psrad $31, %xmm0
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
; CHECK-NEXT: pand %xmm3, %xmm0
; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
; CHECK-NEXT: por %xmm0, %xmm1
; CHECK-NEXT: pcmpeqd %xmm0, %xmm0
; CHECK-NEXT: pxor %xmm1, %xmm0
; CHECK-NEXT: por {{.*}}(%rip), %xmm0
; CHECK-NEXT: retq
%cond = icmp sgt <2 x i64> %x, <i64 -1, i64 -1>
@ -182,10 +164,8 @@ define <16 x i8> @isneg_v16i8(<16 x i8> %x) {
define <8 x i16> @isneg_v8i16(<8 x i16> %x) {
; CHECK-LABEL: isneg_v8i16:
; CHECK: # %bb.0:
; CHECK-NEXT: pxor %xmm1, %xmm1
; CHECK-NEXT: pcmpgtw %xmm0, %xmm1
; CHECK-NEXT: pand {{.*}}(%rip), %xmm1
; CHECK-NEXT: movdqa %xmm1, %xmm0
; CHECK-NEXT: psraw $15, %xmm0
; CHECK-NEXT: pand {{.*}}(%rip), %xmm0
; CHECK-NEXT: retq
%cond = icmp slt <8 x i16> %x, zeroinitializer
%r = select <8 x i1> %cond, <8 x i16> <i16 1, i16 542, i16 542, i16 542, i16 542, i16 542, i16 542, i16 1>, <8 x i16> zeroinitializer
@ -195,10 +175,8 @@ define <8 x i16> @isneg_v8i16(<8 x i16> %x) {
define <4 x i32> @isneg_v4i32(<4 x i32> %x) {
; CHECK-LABEL: isneg_v4i32:
; CHECK: # %bb.0:
; CHECK-NEXT: pxor %xmm1, %xmm1
; CHECK-NEXT: pcmpgtd %xmm0, %xmm1
; CHECK-NEXT: pand {{.*}}(%rip), %xmm1
; CHECK-NEXT: movdqa %xmm1, %xmm0
; CHECK-NEXT: psrad $31, %xmm0
; CHECK-NEXT: pand {{.*}}(%rip), %xmm0
; CHECK-NEXT: retq
%cond = icmp slt <4 x i32> %x, zeroinitializer
%r = select <4 x i1> %cond, <4 x i32> <i32 0, i32 42, i32 -42, i32 1>, <4 x i32> zeroinitializer
@ -208,15 +186,8 @@ define <4 x i32> @isneg_v4i32(<4 x i32> %x) {
define <2 x i64> @isneg_v2i64(<2 x i64> %x) {
; CHECK-LABEL: isneg_v2i64:
; CHECK: # %bb.0:
; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [2147483648,2147483648]
; CHECK-NEXT: pxor %xmm1, %xmm0
; CHECK-NEXT: movdqa %xmm1, %xmm2
; CHECK-NEXT: pcmpgtd %xmm0, %xmm2
; CHECK-NEXT: pcmpeqd %xmm1, %xmm0
; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
; CHECK-NEXT: pand %xmm2, %xmm1
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
; CHECK-NEXT: por %xmm1, %xmm0
; CHECK-NEXT: psrad $31, %xmm0
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
; CHECK-NEXT: pand {{.*}}(%rip), %xmm0
; CHECK-NEXT: retq
%cond = icmp slt <2 x i64> %x, zeroinitializer