[DAGCombiner] add and use TLI hook to convert and-of-seteq / or-of-setne to bitwise logic+setcc (PR32401)

This is a generic combine enabled via target hook to reduce icmp logic as discussed in:
https://bugs.llvm.org/show_bug.cgi?id=32401

It's likely that other targets will want to enable this hook for scalar transforms, 
and there are probably other patterns that can use bitwise logic to reduce comparisons.

Note that we are missing an IR canonicalization for these patterns, and we will probably
prefer the pair-of-compares form in IR (shorter, more likely to fold).

Differential Revision: https://reviews.llvm.org/D31483

llvm-svn: 299542
This commit is contained in:
Sanjay Patel 2017-04-05 14:09:39 +00:00
parent 6615f2b3d6
commit b2f1621bb1
9 changed files with 65 additions and 35 deletions

View File

@ -437,6 +437,15 @@ public:
return false;
}
/// Use bitwise logic to make pairs of compares more efficient. For example:
/// and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
/// This should be true when it takes more than one instruction to lower
/// setcc (cmp+set on x86 scalar), when bitwise ops are faster than logic on
/// condition bits (crand on PowerPC), and/or when reducing cmp+br is a win.
virtual bool convertSetCCLogicToBitwiseLogic(EVT VT) const {
return false;
}
/// Return the preferred operand type if the target has a quick way to compare
/// integer values of the given size. Assume that any legal integer type can
/// be compared efficiently. Targets may override this to allow illegal wide

View File

@ -3255,6 +3255,21 @@ SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE);
}
// Try more general transforms if the predicates match and the only user of
// the compares is the 'and' or 'or'.
if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 &&
N0.hasOneUse() && N1.hasOneUse()) {
// and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
// or (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
if ((IsAnd && CC1 == ISD::SETEQ) || (!IsAnd && CC1 == ISD::SETNE)) {
SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR);
SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR);
SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR);
SDValue Zero = DAG.getConstant(0, DL, OpVT);
return DAG.getSetCC(DL, VT, Or, Zero, CC1);
}
}
// Canonicalize equivalent operands to LL == RL.
if (LL == RR && LR == RL) {
CC1 = ISD::getSetCCSwappedOperands(CC1);

View File

@ -515,6 +515,10 @@ class InstrItineraryData;
bool isCheapToSpeculateCttz() const override;
bool isCheapToSpeculateCtlz() const override;
bool convertSetCCLogicToBitwiseLogic(EVT VT) const override {
return VT.isScalarInteger();
}
bool supportSwiftError() const override {
return true;
}

View File

@ -531,6 +531,10 @@ namespace llvm {
return true;
}
bool convertSetCCLogicToBitwiseLogic(EVT VT) const override {
return VT.isScalarInteger();
}
bool supportSplitCSR(MachineFunction *MF) const override {
return
MF->getFunction()->getCallingConv() == CallingConv::CXX_FAST_TLS &&

View File

@ -814,6 +814,10 @@ namespace llvm {
bool hasAndNotCompare(SDValue Y) const override;
bool convertSetCCLogicToBitwiseLogic(EVT VT) const override {
return VT.isScalarInteger();
}
/// Vector-sized comparisons are fast using PCMPEQ + PMOVMSK or PTEST.
MVT hasFastEqualityCompare(unsigned NumBits) const override;

View File

@ -20,13 +20,11 @@ define zeroext i1 @ne_neg1_and_ne_zero(i32 %x) nounwind {
define zeroext i1 @and_eq(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
; CHECK-LABEL: and_eq:
; CHECK: @ BB#0:
; CHECK-NEXT: cmp r2, r3
; CHECK-NEXT: mov r2, #0
; CHECK-NEXT: movweq r2, #1
; CHECK-NEXT: mov r12, #0
; CHECK-NEXT: cmp r0, r1
; CHECK-NEXT: movweq r12, #1
; CHECK-NEXT: and r0, r12, r2
; CHECK-NEXT: eor r2, r2, r3
; CHECK-NEXT: eor r0, r0, r1
; CHECK-NEXT: orrs r0, r0, r2
; CHECK-NEXT: mov r0, #0
; CHECK-NEXT: movweq r0, #1
; CHECK-NEXT: bx lr
%cmp1 = icmp eq i32 %a, %b
%cmp2 = icmp eq i32 %c, %d
@ -37,13 +35,10 @@ define zeroext i1 @and_eq(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
define zeroext i1 @or_ne(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
; CHECK-LABEL: or_ne:
; CHECK: @ BB#0:
; CHECK-NEXT: cmp r2, r3
; CHECK-NEXT: mov r2, #0
; CHECK-NEXT: movwne r2, #1
; CHECK-NEXT: mov r12, #0
; CHECK-NEXT: cmp r0, r1
; CHECK-NEXT: movwne r12, #1
; CHECK-NEXT: orr r0, r12, r2
; CHECK-NEXT: eor r2, r2, r3
; CHECK-NEXT: eor r0, r0, r1
; CHECK-NEXT: orrs r0, r0, r2
; CHECK-NEXT: movwne r0, #1
; CHECK-NEXT: bx lr
%cmp1 = icmp ne i32 %a, %b
%cmp2 = icmp ne i32 %c, %d

View File

@ -433,11 +433,11 @@ define zeroext i1 @ne_neg1_and_ne_zero(i64 %x) {
define zeroext i1 @and_eq(i16 zeroext %a, i16 zeroext %b, i16 zeroext %c, i16 zeroext %d) {
; CHECK-LABEL: and_eq:
; CHECK: # BB#0:
; CHECK-NEXT: cmpw 0, 3, 4
; CHECK-NEXT: cmpw 1, 5, 6
; CHECK-NEXT: li 3, 1
; CHECK-NEXT: crnand 20, 2, 6
; CHECK-NEXT: isel 3, 0, 3, 20
; CHECK-NEXT: xor 5, 5, 6
; CHECK-NEXT: xor 3, 3, 4
; CHECK-NEXT: or 3, 3, 5
; CHECK-NEXT: cntlzw 3, 3
; CHECK-NEXT: rlwinm 3, 3, 27, 31, 31
; CHECK-NEXT: blr
%cmp1 = icmp eq i16 %a, %b
%cmp2 = icmp eq i16 %c, %d
@ -448,11 +448,12 @@ define zeroext i1 @and_eq(i16 zeroext %a, i16 zeroext %b, i16 zeroext %c, i16 z
define zeroext i1 @or_ne(i32 %a, i32 %b, i32 %c, i32 %d) {
; CHECK-LABEL: or_ne:
; CHECK: # BB#0:
; CHECK-NEXT: cmpw 0, 3, 4
; CHECK-NEXT: cmpw 1, 5, 6
; CHECK-NEXT: li 3, 1
; CHECK-NEXT: crand 20, 6, 2
; CHECK-NEXT: isel 3, 0, 3, 20
; CHECK-NEXT: xor 5, 5, 6
; CHECK-NEXT: xor 3, 3, 4
; CHECK-NEXT: or 3, 3, 5
; CHECK-NEXT: cntlzw 3, 3
; CHECK-NEXT: nor 3, 3, 3
; CHECK-NEXT: rlwinm 3, 3, 27, 31, 31
; CHECK-NEXT: blr
%cmp1 = icmp ne i32 %a, %b
%cmp2 = icmp ne i32 %c, %d

View File

@ -120,12 +120,12 @@ entry:
define i32 @test8(i32 %a1, i32 %a2, i32 %a3) {
; ALL-LABEL: test8:
; ALL: ## BB#0:
; ALL-NEXT: notl %edi
; ALL-NEXT: xorl $-2147483648, %esi ## imm = 0x80000000
; ALL-NEXT: testl %edx, %edx
; ALL-NEXT: movl $1, %eax
; ALL-NEXT: cmovel %eax, %edx
; ALL-NEXT: cmpl $-2147483648, %esi ## imm = 0x80000000
; ALL-NEXT: cmovnel %edx, %eax
; ALL-NEXT: cmpl $-1, %edi
; ALL-NEXT: orl %edi, %esi
; ALL-NEXT: cmovnel %edx, %eax
; ALL-NEXT: retq
%tmp1 = icmp eq i32 %a1, -1

View File

@ -440,11 +440,10 @@ define zeroext i1 @ne_neg1_and_ne_zero(i64 %x) nounwind {
define zeroext i1 @and_eq(i8 %a, i8 %b, i8 %c, i8 %d) nounwind {
; CHECK-LABEL: and_eq:
; CHECK: # BB#0:
; CHECK-NEXT: cmpb %sil, %dil
; CHECK-NEXT: sete %sil
; CHECK-NEXT: cmpb %cl, %dl
; CHECK-NEXT: xorl %esi, %edi
; CHECK-NEXT: xorl %ecx, %edx
; CHECK-NEXT: orb %dl, %dil
; CHECK-NEXT: sete %al
; CHECK-NEXT: andb %sil, %al
; CHECK-NEXT: retq
%cmp1 = icmp eq i8 %a, %b
%cmp2 = icmp eq i8 %c, %d
@ -455,11 +454,10 @@ define zeroext i1 @and_eq(i8 %a, i8 %b, i8 %c, i8 %d) nounwind {
define zeroext i1 @or_ne(i8 %a, i8 %b, i8 %c, i8 %d) nounwind {
; CHECK-LABEL: or_ne:
; CHECK: # BB#0:
; CHECK-NEXT: cmpb %sil, %dil
; CHECK-NEXT: setne %sil
; CHECK-NEXT: cmpb %cl, %dl
; CHECK-NEXT: xorl %esi, %edi
; CHECK-NEXT: xorl %ecx, %edx
; CHECK-NEXT: orb %dl, %dil
; CHECK-NEXT: setne %al
; CHECK-NEXT: orb %sil, %al
; CHECK-NEXT: retq
%cmp1 = icmp ne i8 %a, %b
%cmp2 = icmp ne i8 %c, %d