diff --git a/llvm/include/llvm/Target/TargetLowering.h b/llvm/include/llvm/Target/TargetLowering.h index 47ca28cea92a..734eda042b84 100644 --- a/llvm/include/llvm/Target/TargetLowering.h +++ b/llvm/include/llvm/Target/TargetLowering.h @@ -84,10 +84,14 @@ public: /// this target. bool isSetCCExpensive() const { return SetCCIsExpensive; } - /// isIntDivExpensive() - Return true if integer divide is more expensive than + /// isIntDivCheap() - Return true if integer divide is usually cheaper than /// a sequence of several shifts, adds, and multiplies for this target. - bool isIntDivExpensive() const { return IntDivIsExpensive; } + bool isIntDivCheap() const { return IntDivIsCheap; } + /// isPow2DivCheap() - Return true if pow2 div is cheaper than a chain of + /// srl/add/sra. + bool isPow2DivCheap() const { return Pow2DivIsCheap; } + /// getSetCCResultTy - Return the ValueType of the result of setcc operations. /// MVT::ValueType getSetCCResultTy() const { return SetCCResultTy; } @@ -266,10 +270,15 @@ protected: /// setcc operations into other operations if possible. void setSetCCIsExpensive() { SetCCIsExpensive = true; } - /// setIntDivIsExpensive - Tells the code generator that integer divide is + /// setIntDivIsCheap - Tells the code generator that integer divide is /// expensive, and if possible, should be replaced by an alternate sequence /// of instructions not containing an integer divide. - void setIntDivIsExpensive() { IntDivIsExpensive = true; } + void setIntDivIsCheap(bool isCheap = true) { IntDivIsCheap = isCheap; } + + /// setPow2DivIsCheap - Tells the code generator that it shouldn't generate + /// srl/add/sra for a signed divide by power of two, and let the target handle + /// it. + void setPow2DivIsCheap(bool isCheap = true) { Pow2DivIsCheap = isCheap; } /// addRegisterClass - Add the specified register class as an available /// regclass for the specified value type. This indicates the selector can @@ -400,12 +409,16 @@ private: /// setcc operations into other operations if possible. bool SetCCIsExpensive; - /// IntDivIsExpensive - This is a hack until a real costs model is in place - /// that tells the code generator whether integer divide will always be more - /// expensive than a sequence of multiplies, shifts, and adds that performs - /// the same operation. If we ever optimize for size, this will be set to - /// false unconditionally. - bool IntDivIsExpensive; + /// IntDivIsCheap - Tells the code generator not to expand integer divides by + /// constants into a sequence of muls, adds, and shifts. This is a hack until + /// a real cost model is in place. If we ever optimize for size, this will be + /// set to true unconditionally. + bool IntDivIsCheap; + + /// Pow2DivIsCheap - Tells the code generator that it shouldn't generate + /// srl/add/sra for a signed divide by power of two, and let the target handle + /// it. + bool Pow2DivIsCheap; /// SetCCResultTy - The type that SetCC operations use. This defaults to the /// PointerTy. diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index cf6106dbb33e..6c1d22c2bab7 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -745,8 +745,7 @@ SDOperand DAGCombiner::visitMUL(SDNode *N) { return N1; // fold (mul x, -1) -> 0-x if (N1C && N1C->isAllOnesValue()) - return DAG.getNode(ISD::SUB, N->getValueType(0), - DAG.getConstant(0, N->getValueType(0)), N0); + return DAG.getNode(ISD::SUB, VT, DAG.getConstant(0, VT), N0); // fold (mul x, (1 << c)) -> x << c if (N1C && isPowerOf2_64(N1C->getValue())) return DAG.getNode(ISD::SHL, N->getValueType(0), N0, @@ -777,21 +776,49 @@ SDOperand DAGCombiner::visitSDIV(SDNode *N) { if (N0C && N1C && !N1C->isNullValue()) return DAG.getConstant(N0C->getSignExtended() / N1C->getSignExtended(), N->getValueType(0)); + // fold (sdiv X, 1) -> X + if (N1C && N1C->getSignExtended() == 1LL) + return N0; + // fold (sdiv X, -1) -> 0-X + if (N1C && N1C->isAllOnesValue()) + return DAG.getNode(ISD::SUB, VT, DAG.getConstant(0, VT), N0); // If we know the sign bits of both operands are zero, strength reduce to a // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2 uint64_t SignBit = 1ULL << (MVT::getSizeInBits(VT)-1); if (MaskedValueIsZero(N1, SignBit, TLI) && MaskedValueIsZero(N0, SignBit, TLI)) return DAG.getNode(ISD::UDIV, N1.getValueType(), N0, N1); + // fold (sdiv X, pow2) -> (add (sra X, log(pow2)), (srl X, sizeof(X)-1)) + if (N1C && N1C->getValue() && !TLI.isIntDivCheap() && + (isPowerOf2_64(N1C->getSignExtended()) || + isPowerOf2_64(-N1C->getSignExtended()))) { + // If dividing by powers of two is cheap, then don't perform the following + // fold. + if (TLI.isPow2DivCheap()) + return SDOperand(); + int64_t pow2 = N1C->getSignExtended(); + int64_t abs2 = pow2 > 0 ? pow2 : -pow2; + SDOperand SRL = DAG.getNode(ISD::SRL, VT, N0, + DAG.getConstant(MVT::getSizeInBits(VT)-1, + TLI.getShiftAmountTy())); + WorkList.push_back(SRL.Val); + SDOperand SGN = DAG.getNode(ISD::ADD, VT, N0, SRL); + WorkList.push_back(SGN.Val); + SDOperand SRA = DAG.getNode(ISD::SRA, VT, SGN, + DAG.getConstant(Log2_64(abs2), + TLI.getShiftAmountTy())); + // If we're dividing by a positive value, we're done. Otherwise, we must + // negate the result. + if (pow2 > 0) + return SRA; + WorkList.push_back(SRA.Val); + return DAG.getNode(ISD::SUB, VT, DAG.getConstant(0, VT), SRA); + } // if integer divide is expensive and we satisfy the requirements, emit an // alternate sequence. - // FIXME: This currently opts out powers of two, since targets can often be - // more clever in those cases. In an idea world, we would have some way to - // detect that too. - if (N1C && !isPowerOf2_64(N1C->getSignExtended()) && - (N1C->getSignExtended() < -1 || N1C->getSignExtended() > 1) && - TLI.isOperationLegal(ISD::MULHS, VT) && TLI.isTypeLegal(VT) && - TLI.isIntDivExpensive()) { + if (N1C && (N1C->getSignExtended() < -1 || N1C->getSignExtended() > 1) && + !TLI.isIntDivCheap() && + TLI.isOperationLegal(ISD::MULHS, VT) && TLI.isTypeLegal(VT)) { return BuildSDIV(N); } return SDOperand(); @@ -815,7 +842,7 @@ SDOperand DAGCombiner::visitUDIV(SDNode *N) { TLI.getShiftAmountTy())); // fold (udiv x, c) -> alternate if (N1C && N1C->getValue() && TLI.isOperationLegal(ISD::MULHU, VT) && - TLI.isTypeLegal(VT) && TLI.isIntDivExpensive()) + TLI.isTypeLegal(VT) && !TLI.isIntDivCheap()) return BuildUDIV(N); return SDOperand(); } diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 4813e9d369f4..cc9e23c1cc6b 100644 --- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -996,6 +996,11 @@ SDOperand PPCDAGToDAGISel::Select(SDOperand Op) { return SDOperand(N, 0); } case ISD::SDIV: { + // FIXME: since this depends on the setting of the carry flag from the srawi + // we should really be making notes about that for the scheduler. + // FIXME: It sure would be nice if we could cheaply recognize the + // srl/add/sra pattern the dag combiner will generate for this as + // sra/addze rather than having to handle sdiv ourselves. oh well. unsigned Imm; if (isIntImmediate(N->getOperand(1), Imm)) { if ((signed)Imm > 0 && isPowerOf2_32(Imm)) { diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 118c605888a8..c9ed2bae1e3a 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -27,8 +27,7 @@ PPCTargetLowering::PPCTargetLowering(TargetMachine &TM) // Fold away setcc operations if possible. setSetCCIsExpensive(); - // Fold constant integer div/rem into an alternate sequence of instructions - setIntDivIsExpensive(); + setPow2DivIsCheap(); // Use _setjmp/_longjmp instead of setjmp/longjmp. setUseUnderscoreSetJmpLongJmp(true); diff --git a/llvm/lib/Target/TargetLowering.cpp b/llvm/lib/Target/TargetLowering.cpp index c4ccbf5ef059..eaca703ad9fe 100644 --- a/llvm/lib/Target/TargetLowering.cpp +++ b/llvm/lib/Target/TargetLowering.cpp @@ -30,6 +30,8 @@ TargetLowering::TargetLowering(TargetMachine &tm) maxStoresPerMemSet = maxStoresPerMemCpy = maxStoresPerMemMove = 8; allowUnalignedMemoryAccesses = false; UseUnderscoreSetJmpLongJmp = false; + IntDivIsCheap = false; + Pow2DivIsCheap = false; } TargetLowering::~TargetLowering() {} diff --git a/llvm/lib/Target/X86/X86ISelPattern.cpp b/llvm/lib/Target/X86/X86ISelPattern.cpp index 531c33f09bc1..a244afd60189 100644 --- a/llvm/lib/Target/X86/X86ISelPattern.cpp +++ b/llvm/lib/Target/X86/X86ISelPattern.cpp @@ -3035,60 +3035,6 @@ unsigned ISel::SelectExpr(SDOperand N) { return Result; } } - - if (ConstantSDNode *CN = dyn_cast(N.getOperand(1))) { - // FIXME: These special cases should be handled by the lowering impl! - unsigned RHS = CN->getValue(); - bool isNeg = false; - if ((int)RHS < 0) { - isNeg = true; - RHS = -RHS; - } - if (RHS && (RHS & (RHS-1)) == 0) { // Signed division by power of 2? - unsigned Log = Log2_32(RHS); - unsigned SAROpc, SHROpc, ADDOpc, NEGOpc; - switch (N.getValueType()) { - default: assert("Unknown type to signed divide!"); - case MVT::i8: - SAROpc = X86::SAR8ri; - SHROpc = X86::SHR8ri; - ADDOpc = X86::ADD8rr; - NEGOpc = X86::NEG8r; - break; - case MVT::i16: - SAROpc = X86::SAR16ri; - SHROpc = X86::SHR16ri; - ADDOpc = X86::ADD16rr; - NEGOpc = X86::NEG16r; - break; - case MVT::i32: - SAROpc = X86::SAR32ri; - SHROpc = X86::SHR32ri; - ADDOpc = X86::ADD32rr; - NEGOpc = X86::NEG32r; - break; - } - unsigned RegSize = MVT::getSizeInBits(N.getValueType()); - Tmp1 = SelectExpr(N.getOperand(0)); - unsigned TmpReg; - if (Log != 1) { - TmpReg = MakeReg(N.getValueType()); - BuildMI(BB, SAROpc, 2, TmpReg).addReg(Tmp1).addImm(Log-1); - } else { - TmpReg = Tmp1; - } - unsigned TmpReg2 = MakeReg(N.getValueType()); - BuildMI(BB, SHROpc, 2, TmpReg2).addReg(TmpReg).addImm(RegSize-Log); - unsigned TmpReg3 = MakeReg(N.getValueType()); - BuildMI(BB, ADDOpc, 2, TmpReg3).addReg(Tmp1).addReg(TmpReg2); - - unsigned TmpReg4 = isNeg ? MakeReg(N.getValueType()) : Result; - BuildMI(BB, SAROpc, 2, TmpReg4).addReg(TmpReg3).addImm(Log); - if (isNeg) - BuildMI(BB, NEGOpc, 1, Result).addReg(TmpReg4); - return Result; - } - } } if (getRegPressure(N.getOperand(0)) > getRegPressure(N.getOperand(1))) {