Add some DAGCombines for (adde 0, 0, glue), which are useful to optimize legalized code for large integer arithmetic.
1. Inform users of ADDEs with two 0 operands that it never sets carry 2. Fold other ADDs or ADDCs into the ADDE if possible It would be neat if we could do the same thing for SETCC+ADD eventually, but we can't do that in target independent code. llvm-svn: 126557
This commit is contained in:
parent
c5575cced8
commit
26691d9660
|
@ -1290,6 +1290,16 @@ SDValue combineShlAddConstant(DebugLoc DL, SDValue N0, SDValue N1,
|
||||||
return SDValue();
|
return SDValue();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// isCarryMaterialization - Returns true if V is an ADDE node that is known to
|
||||||
|
/// return 0 or 1 depending on the carry flag.
|
||||||
|
static bool isCarryMaterialization(SDValue V) {
|
||||||
|
if (V.getOpcode() != ISD::ADDE)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
ConstantSDNode *C = dyn_cast<ConstantSDNode>(V.getOperand(0));
|
||||||
|
return C && C->isNullValue() && V.getOperand(0) == V.getOperand(1);
|
||||||
|
}
|
||||||
|
|
||||||
SDValue DAGCombiner::visitADD(SDNode *N) {
|
SDValue DAGCombiner::visitADD(SDNode *N) {
|
||||||
SDValue N0 = N->getOperand(0);
|
SDValue N0 = N->getOperand(0);
|
||||||
SDValue N1 = N->getOperand(1);
|
SDValue N1 = N->getOperand(1);
|
||||||
|
@ -1453,6 +1463,18 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
|
||||||
return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
|
return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// add (adde 0, 0, glue), X -> adde X, 0, glue
|
||||||
|
if (N0->hasOneUse() && isCarryMaterialization(N0))
|
||||||
|
return DAG.getNode(ISD::ADDE, N->getDebugLoc(),
|
||||||
|
DAG.getVTList(VT, MVT::Glue), N1, N0.getOperand(0),
|
||||||
|
N0.getOperand(2));
|
||||||
|
|
||||||
|
// add X, (adde 0, 0, glue) -> adde X, 0, glue
|
||||||
|
if (N1->hasOneUse() && isCarryMaterialization(N1))
|
||||||
|
return DAG.getNode(ISD::ADDE, N->getDebugLoc(),
|
||||||
|
DAG.getVTList(VT, MVT::Glue), N0, N1.getOperand(0),
|
||||||
|
N1.getOperand(2));
|
||||||
|
|
||||||
return SDValue();
|
return SDValue();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1496,6 +1518,16 @@ SDValue DAGCombiner::visitADDC(SDNode *N) {
|
||||||
N->getDebugLoc(), MVT::Glue));
|
N->getDebugLoc(), MVT::Glue));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// addc (adde 0, 0, glue), X -> adde X, 0, glue
|
||||||
|
if (N0->hasOneUse() && isCarryMaterialization(N0))
|
||||||
|
return DAG.getNode(ISD::ADDE, N->getDebugLoc(), N->getVTList(), N1,
|
||||||
|
DAG.getConstant(0, VT), N0.getOperand(2));
|
||||||
|
|
||||||
|
// addc X, (adde 0, 0, glue) -> adde X, 0, glue
|
||||||
|
if (N1->hasOneUse() && isCarryMaterialization(N1))
|
||||||
|
return DAG.getNode(ISD::ADDE, N->getDebugLoc(), N->getVTList(), N0,
|
||||||
|
DAG.getConstant(0, VT), N1.getOperand(2));
|
||||||
|
|
||||||
return SDValue();
|
return SDValue();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1506,6 +1538,12 @@ SDValue DAGCombiner::visitADDE(SDNode *N) {
|
||||||
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
|
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
|
||||||
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
|
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
|
||||||
|
|
||||||
|
// If both operands are null we know that carry out will always be false.
|
||||||
|
if (N0C && N0C->isNullValue() && N0 == N1)
|
||||||
|
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), DAG.getNode(ISD::CARRY_FALSE,
|
||||||
|
N->getDebugLoc(),
|
||||||
|
MVT::Glue));
|
||||||
|
|
||||||
// canonicalize constant to RHS
|
// canonicalize constant to RHS
|
||||||
if (N0C && !N1C)
|
if (N0C && !N1C)
|
||||||
return DAG.getNode(ISD::ADDE, N->getDebugLoc(), N->getVTList(),
|
return DAG.getNode(ISD::ADDE, N->getDebugLoc(), N->getVTList(),
|
||||||
|
|
|
@ -1780,43 +1780,6 @@ case it choses instead to keep the max operation obvious.
|
||||||
|
|
||||||
//===---------------------------------------------------------------------===//
|
//===---------------------------------------------------------------------===//
|
||||||
|
|
||||||
Take the following testcase on x86-64 (similar testcases exist for all targets
|
|
||||||
with addc/adde):
|
|
||||||
|
|
||||||
define void @a(i64* nocapture %s, i64* nocapture %t, i64 %a, i64 %b,
|
|
||||||
i64 %c) nounwind {
|
|
||||||
entry:
|
|
||||||
%0 = zext i64 %a to i128 ; <i128> [#uses=1]
|
|
||||||
%1 = zext i64 %b to i128 ; <i128> [#uses=1]
|
|
||||||
%2 = add i128 %1, %0 ; <i128> [#uses=2]
|
|
||||||
%3 = zext i64 %c to i128 ; <i128> [#uses=1]
|
|
||||||
%4 = shl i128 %3, 64 ; <i128> [#uses=1]
|
|
||||||
%5 = add i128 %4, %2 ; <i128> [#uses=1]
|
|
||||||
%6 = lshr i128 %5, 64 ; <i128> [#uses=1]
|
|
||||||
%7 = trunc i128 %6 to i64 ; <i64> [#uses=1]
|
|
||||||
store i64 %7, i64* %s, align 8
|
|
||||||
%8 = trunc i128 %2 to i64 ; <i64> [#uses=1]
|
|
||||||
store i64 %8, i64* %t, align 8
|
|
||||||
ret void
|
|
||||||
}
|
|
||||||
|
|
||||||
Generated code:
|
|
||||||
addq %rcx, %rdx
|
|
||||||
sbbq %rax, %rax
|
|
||||||
subq %rax, %r8
|
|
||||||
movq %r8, (%rdi)
|
|
||||||
movq %rdx, (%rsi)
|
|
||||||
ret
|
|
||||||
|
|
||||||
Expected code:
|
|
||||||
addq %rcx, %rdx
|
|
||||||
adcq $0, %r8
|
|
||||||
movq %r8, (%rdi)
|
|
||||||
movq %rdx, (%rsi)
|
|
||||||
ret
|
|
||||||
|
|
||||||
//===---------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
Switch lowering generates less than ideal code for the following switch:
|
Switch lowering generates less than ideal code for the following switch:
|
||||||
define void @a(i32 %x) nounwind {
|
define void @a(i32 %x) nounwind {
|
||||||
entry:
|
entry:
|
||||||
|
|
|
@ -0,0 +1,26 @@
|
||||||
|
; RUN: llc -march=x86-64 < %s | FileCheck %s -check-prefix=CHECK-64
|
||||||
|
; RUN: llc -march=x86 < %s | FileCheck %s -check-prefix=CHECK-32
|
||||||
|
|
||||||
|
define void @a(i64* nocapture %s, i64* nocapture %t, i64 %a, i64 %b, i64 %c) nounwind {
|
||||||
|
entry:
|
||||||
|
%0 = zext i64 %a to i128
|
||||||
|
%1 = zext i64 %b to i128
|
||||||
|
%2 = add i128 %1, %0
|
||||||
|
%3 = zext i64 %c to i128
|
||||||
|
%4 = shl i128 %3, 64
|
||||||
|
%5 = add i128 %4, %2
|
||||||
|
%6 = lshr i128 %5, 64
|
||||||
|
%7 = trunc i128 %6 to i64
|
||||||
|
store i64 %7, i64* %s, align 8
|
||||||
|
%8 = trunc i128 %2 to i64
|
||||||
|
store i64 %8, i64* %t, align 8
|
||||||
|
ret void
|
||||||
|
|
||||||
|
; CHECK-32: addl
|
||||||
|
; CHECK-32: adcl
|
||||||
|
; CHECK-32: adcl $0
|
||||||
|
; CHECK-32: adcl $0
|
||||||
|
|
||||||
|
; CHECK-64: addq
|
||||||
|
; CHECK-64: adcq $0
|
||||||
|
}
|
Loading…
Reference in New Issue