Add some DAGCombines for (adde 0, 0, glue), which are useful to optimize legalized code for large integer arithmetic.

1. Inform users of ADDEs with two 0 operands that it never sets carry 2. Fold other ADDs or ADDCs into the ADDE if possible It would be neat if we could do the same thing for SETCC+ADD eventually, but we can't do that in target independent code. llvm-svn: 126557
2011-02-26 22:48:07 +00:00 · 2011-02-26 22:48:07 +00:00 · 26691d9660
parent c5575cced8
commit 26691d9660
3 changed files with 64 additions and 37 deletions
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@ -1290,6 +1290,16 @@ SDValue combineShlAddConstant(DebugLoc DL, SDValue N0, SDValue N1,
  return SDValue();
 }
 /// isCarryMaterialization - Returns true if V is an ADDE node that is known to
 /// return 0 or 1 depending on the carry flag.
 static bool isCarryMaterialization(SDValue V) {
  if (V.getOpcode() != ISD::ADDE)
    return false;
  ConstantSDNode *C = dyn_cast<ConstantSDNode>(V.getOperand(0));
  return C && C->isNullValue() && V.getOperand(0) == V.getOperand(1);
 }
 SDValue DAGCombiner::visitADD(SDNode *N) {
  SDValue N0 = N->getOperand(0);
  SDValue N1 = N->getOperand(1);
@ -1453,6 +1463,18 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
    return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
  }
  // add (adde 0, 0, glue), X -> adde X, 0, glue
  if (N0->hasOneUse() && isCarryMaterialization(N0))
    return DAG.getNode(ISD::ADDE, N->getDebugLoc(),
                       DAG.getVTList(VT, MVT::Glue), N1, N0.getOperand(0),
                       N0.getOperand(2));
  // add X, (adde 0, 0, glue) -> adde X, 0, glue
  if (N1->hasOneUse() && isCarryMaterialization(N1))
    return DAG.getNode(ISD::ADDE, N->getDebugLoc(),
                       DAG.getVTList(VT, MVT::Glue), N0, N1.getOperand(0),
                       N1.getOperand(2));
  return SDValue();
 }
@ -1496,6 +1518,16 @@ SDValue DAGCombiner::visitADDC(SDNode *N) {
                                   N->getDebugLoc(), MVT::Glue));
  }
  // addc (adde 0, 0, glue), X -> adde X, 0, glue
  if (N0->hasOneUse() && isCarryMaterialization(N0))
    return DAG.getNode(ISD::ADDE, N->getDebugLoc(), N->getVTList(), N1,
                       DAG.getConstant(0, VT), N0.getOperand(2));
  // addc X, (adde 0, 0, glue) -> adde X, 0, glue
  if (N1->hasOneUse() && isCarryMaterialization(N1))
    return DAG.getNode(ISD::ADDE, N->getDebugLoc(), N->getVTList(), N0,
                       DAG.getConstant(0, VT), N1.getOperand(2));
  return SDValue();
 }
@ -1506,6 +1538,12 @@ SDValue DAGCombiner::visitADDE(SDNode *N) {
  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
  // If both operands are null we know that carry out will always be false.
  if (N0C && N0C->isNullValue() && N0 == N1)
    DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), DAG.getNode(ISD::CARRY_FALSE,
                                                             N->getDebugLoc(),
                                                             MVT::Glue));
  // canonicalize constant to RHS
  if (N0C && !N1C)
    return DAG.getNode(ISD::ADDE, N->getDebugLoc(), N->getVTList(),
--- a/llvm/lib/Target/README.txt
+++ b/llvm/lib/Target/README.txt
@ -1780,43 +1780,6 @@ case it choses instead to keep the max operation obvious.
 //===---------------------------------------------------------------------===//
 Take the following testcase on x86-64 (similar testcases exist for all targets
 with addc/adde):
 define void @a(i64* nocapture %s, i64* nocapture %t, i64 %a, i64 %b,
 i64 %c) nounwind {
 entry:
 %0 = zext i64 %a to i128                        ; <i128> [#uses=1]
 %1 = zext i64 %b to i128                        ; <i128> [#uses=1]
 %2 = add i128 %1, %0                            ; <i128> [#uses=2]
 %3 = zext i64 %c to i128                        ; <i128> [#uses=1]
 %4 = shl i128 %3, 64                            ; <i128> [#uses=1]
 %5 = add i128 %4, %2                            ; <i128> [#uses=1]
 %6 = lshr i128 %5, 64                           ; <i128> [#uses=1]
 %7 = trunc i128 %6 to i64                       ; <i64> [#uses=1]
 store i64 %7, i64* %s, align 8
 %8 = trunc i128 %2 to i64                       ; <i64> [#uses=1]
 store i64 %8, i64* %t, align 8
 ret void
 }
 Generated code:
        addq	%rcx, %rdx
        sbbq	%rax, %rax
        subq	%rax, %r8
        movq	%r8, (%rdi)
        movq	%rdx, (%rsi)
        ret
 Expected code:
       addq    %rcx, %rdx
       adcq    $0, %r8
       movq    %r8, (%rdi)
       movq    %rdx, (%rsi)
       ret
 //===---------------------------------------------------------------------===//
 Switch lowering generates less than ideal code for the following switch:
 define void @a(i32 %x) nounwind {
 entry:
--- a/llvm/test/CodeGen/X86/adde-carry.ll
+++ b/llvm/test/CodeGen/X86/adde-carry.ll
@ -0,0 +1,26 @@
 ; RUN: llc -march=x86-64 < %s | FileCheck %s -check-prefix=CHECK-64
 ; RUN: llc -march=x86 < %s | FileCheck %s -check-prefix=CHECK-32
 define void @a(i64* nocapture %s, i64* nocapture %t, i64 %a, i64 %b, i64 %c) nounwind {
 entry:
 %0 = zext i64 %a to i128
 %1 = zext i64 %b to i128
 %2 = add i128 %1, %0
 %3 = zext i64 %c to i128
 %4 = shl i128 %3, 64
 %5 = add i128 %4, %2
 %6 = lshr i128 %5, 64
 %7 = trunc i128 %6 to i64
 store i64 %7, i64* %s, align 8
 %8 = trunc i128 %2 to i64
 store i64 %8, i64* %t, align 8
 ret void
 ; CHECK-32: addl
 ; CHECK-32: adcl
 ; CHECK-32: adcl $0
 ; CHECK-32: adcl $0
 ; CHECK-64: addq
 ; CHECK-64: adcq $0
 }