Arithmetic instructions don't set EFLAGS bits OF and CF bits

the same say the "test" instruction does in overflow cases, so eliminating the test is only safe when those bits aren't needed, as is the case for COND_E and COND_NE, or if it can be proven that no overflow will occur. For now, just restrict the optimization to COND_E and COND_NE and don't do any overflow analysis. llvm-svn: 66318
2009-03-07 01:58:32 +00:00 · 2009-03-07 01:58:32 +00:00 · ff659b5b86
parent 22988cf493
commit ff659b5b86
3 changed files with 52 additions and 14 deletions
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@ -5363,12 +5363,31 @@ SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {

 /// Emit nodes that will be selected as "test Op0,Op0", or something
 /// equivalent.
-SDValue X86TargetLowering::EmitTest(SDValue Op, SelectionDAG &DAG) {
+SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC,
+                                    SelectionDAG &DAG) {
  DebugLoc dl = Op.getDebugLoc();

+  // CF and OF aren't always set the way we want. Determine which
+  // of these we need.
+  bool NeedCF = false;
+  bool NeedOF = false;
+  switch (X86CC) {
+  case X86::COND_A: case X86::COND_AE:
+  case X86::COND_B: case X86::COND_BE:
+    NeedCF = true;
+    break;
+  case X86::COND_G: case X86::COND_GE:
+  case X86::COND_L: case X86::COND_LE:
+  case X86::COND_O: case X86::COND_NO:
+    NeedOF = true;
+    break;
+  default: break;
+  }
+
  // See if we can use the EFLAGS value from the operand instead of
-  // doing a separate TEST.
-  if (Op.getResNo() == 0) {
+  // doing a separate TEST. TEST always sets OF and CF to 0, so unless
+  // we prove that the arithmetic won't overflow, we can't use OF or CF.
+  if (Op.getResNo() == 0 && !NeedOF && !NeedCF) {
    unsigned Opcode = 0;
    unsigned NumOperands = 0;
    switch (Op.getNode()->getOpcode()) {
@ -5425,9 +5444,9 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, SelectionDAG &DAG) {
    if (Opcode != 0) {
      const MVT *VTs = DAG.getNodeValueTypes(Op.getValueType(), MVT::i32);
      SmallVector<SDValue, 4> Ops;
-      for (unsigned i = 0, e = NumOperands; i != e; ++i)
+      for (unsigned i = 0; i != NumOperands; ++i)
        Ops.push_back(Op.getOperand(i));
-      SDValue New = DAG.getNode(Opcode, dl, VTs, 2, &Ops[0], Ops.size());
+      SDValue New = DAG.getNode(Opcode, dl, VTs, 2, &Ops[0], NumOperands);
      DAG.ReplaceAllUsesWith(Op, New);
      return SDValue(New.getNode(), 1);
    }
@ -5440,10 +5459,11 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, SelectionDAG &DAG) {

 /// Emit nodes that will be selected as "cmp Op0,Op1", or something
 /// equivalent.
-SDValue X86TargetLowering::EmitCmp(SDValue Op0, SDValue Op1, SelectionDAG &DAG) {
+SDValue X86TargetLowering::EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC,
+                                   SelectionDAG &DAG) {
  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op1))
    if (C->getAPIntValue() == 0)
-      return EmitTest(Op0, DAG);
+      return EmitTest(Op0, X86CC, DAG);

  DebugLoc dl = Op0.getDebugLoc();
  return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op0, Op1);
@ -5511,7 +5531,7 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) {
  bool isFP = Op.getOperand(1).getValueType().isFloatingPoint();
  unsigned X86CC = TranslateX86CC(CC, isFP, Op0, Op1, DAG);

-  SDValue Cond = EmitCmp(Op0, Op1, DAG);
+  SDValue Cond = EmitCmp(Op0, Op1, X86CC, DAG);
  return DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
                     DAG.getConstant(X86CC, MVT::i8), Cond);
 }
@ -5677,7 +5697,7 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) {

  if (addTest) {
    CC = DAG.getConstant(X86::COND_NE, MVT::i8);
-    Cond = EmitTest(Cond, DAG);
+    Cond = EmitTest(Cond, X86::COND_NE, DAG);
  }

  const MVT *VTs = DAG.getNodeValueTypes(Op.getValueType(),
@ -5827,7 +5847,7 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) {

  if (addTest) {
    CC = DAG.getConstant(X86::COND_NE, MVT::i8);
-    Cond = EmitTest(Cond, DAG);
+    Cond = EmitTest(Cond, X86::COND_NE, DAG);
  }
  return DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(),
                     Chain, Dest, CC, Cond);
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@ -661,12 +661,13 @@ namespace llvm {
                                                        unsigned cmovOpc) const;

    /// Emit nodes that will be selected as "test Op0,Op0", or something
-    /// equivalent.
-    SDValue EmitTest(SDValue Op0, SelectionDAG &DAG);
+    /// equivalent, for use with the given x86 condition code.
+    SDValue EmitTest(SDValue Op0, unsigned X86CC, SelectionDAG &DAG);

    /// Emit nodes that will be selected as "cmp Op0,Op1", or something
-    /// equivalent.
-    SDValue EmitCmp(SDValue Op0, SDValue Op1, SelectionDAG &DAG);
+    /// equivalent, for use with the given x86 condition code.
+    SDValue EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC,
+                    SelectionDAG &DAG);
  };

  namespace X86 {
--- a/llvm/test/CodeGen/X86/peep-test-2.ll
+++ b/llvm/test/CodeGen/X86/peep-test-2.ll
@ -0,0 +1,17 @@
+; RUN: llvm-as < %s | llc -march=x86 | grep testl
+
+; It's tempting to eliminate the testl instruction here and just use the
+; EFLAGS value from the incl, however it can't be known whether the add
+; will overflow, and if it does the incl would set OF, and the
+; subsequent setg would return true.
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin9.6"
+
+define i32 @f(i32 %j) nounwind readnone {
+entry:
+	%0 = add i32 %j, 1		; <i32> [#uses=1]
+	%1 = icmp sgt i32 %0, 0		; <i1> [#uses=1]
+	%2 = zext i1 %1 to i32		; <i32> [#uses=1]
+	ret i32 %2
+}