diff --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td index 5664f7925027..49ddfb8b613b 100644 --- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td +++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td @@ -30,6 +30,11 @@ let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.". // sync instruction def int_ppc_sync : Intrinsic<[], [], []>; + + // Intrinsics used to generate ctr-based loops. These should only be + // generated by the PowerPC backend! + def int_ppc_mtctr : Intrinsic<[], [llvm_anyint_ty], []>; + def int_ppc_is_decremented_ctr_nonzero : Intrinsic<[llvm_i1_ty], [], []>; } diff --git a/llvm/lib/Target/PowerPC/PPC.h b/llvm/lib/Target/PowerPC/PPC.h index b4be51a8caec..28b654cf08d0 100644 --- a/llvm/lib/Target/PowerPC/PPC.h +++ b/llvm/lib/Target/PowerPC/PPC.h @@ -30,7 +30,7 @@ namespace llvm { class AsmPrinter; class MCInst; - FunctionPass *createPPCCTRLoops(); + FunctionPass *createPPCCTRLoops(PPCTargetMachine &TM); FunctionPass *createPPCEarlyReturnPass(); FunctionPass *createPPCBranchSelectionPass(); FunctionPass *createPPCISelDag(PPCTargetMachine &TM); diff --git a/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp b/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp index 81a54d7015b0..bb969a635ec6 100644 --- a/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp +++ b/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp @@ -9,49 +9,53 @@ // // This pass identifies loops where we can generate the PPC branch instructions // that decrement and test the count register (CTR) (bdnz and friends). -// This pass is based on the HexagonHardwareLoops pass. // // The pattern that defines the induction variable can changed depending on // prior optimizations. For example, the IndVarSimplify phase run by 'opt' // normalizes induction variables, and the Loop Strength Reduction pass // run by 'llc' may also make changes to the induction variable. -// The pattern detected by this phase is due to running Strength Reduction. // // Criteria for CTR loops: // - Countable loops (w/ ind. var for a trip count) -// - Assumes loops are normalized by IndVarSimplify // - Try inner-most loops first // - No nested CTR loops. // - No function calls in loops. // -// Note: As with unconverted loops, PPCBranchSelector must be run after this -// pass in order to convert long-displacement jumps into jump pairs. -// //===----------------------------------------------------------------------===// #define DEBUG_TYPE "ctrloops" -#include "PPC.h" -#include "MCTargetDesc/PPCPredicates.h" -#include "PPCTargetMachine.h" -#include "llvm/ADT/DenseMap.h" + +#include "llvm/Transforms/Scalar.h" #include "llvm/ADT/Statistic.h" -#include "llvm/CodeGen/MachineDominators.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineLoopInfo.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" #include "llvm/PassSupport.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ValueHandle.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Target/TargetLibraryInfo.h" +#include "PPCTargetMachine.h" +#include "PPC.h" + #include +#include using namespace llvm; +#ifndef NDEBUG +static cl::opt CTRLoopLimit("ppc-max-ctrloop", cl::Hidden, cl::init(-1)); +#endif + STATISTIC(NumCTRLoops, "Number of loops converted to CTR loops"); namespace llvm { @@ -59,717 +63,438 @@ namespace llvm { } namespace { - class CountValue; - struct PPCCTRLoops : public MachineFunctionPass { - MachineLoopInfo *MLI; - MachineRegisterInfo *MRI; - const TargetInstrInfo *TII; + struct PPCCTRLoops : public FunctionPass { + +#ifndef NDEBUG + static int Counter; +#endif public: - static char ID; // Pass identification, replacement for typeid + static char ID; - PPCCTRLoops() : MachineFunctionPass(ID) { + PPCCTRLoops() : FunctionPass(ID), TM(0) { + initializePPCCTRLoopsPass(*PassRegistry::getPassRegistry()); + } + PPCCTRLoops(PPCTargetMachine &TM) : FunctionPass(ID), TM(&TM) { initializePPCCTRLoopsPass(*PassRegistry::getPassRegistry()); } - virtual bool runOnMachineFunction(MachineFunction &MF); - - const char *getPassName() const { return "PPC CTR Loops"; } + virtual bool runOnFunction(Function &F); virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesCFG(); - AU.addRequired(); - AU.addPreserved(); - AU.addRequired(); - AU.addPreserved(); - MachineFunctionPass::getAnalysisUsage(AU); + AU.addRequired(); + AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); + AU.addRequired(); } private: - /// getCanonicalInductionVariable - Check to see if the loop has a canonical - /// induction variable. - /// Should be defined in MachineLoop. Based upon version in class Loop. - void getCanonicalInductionVariable(MachineLoop *L, - SmallVector &IVars, - SmallVector &IOps) const; + // FIXME: Copied from LoopSimplify. + BasicBlock *InsertPreheaderForLoop(Loop *L); + void PlaceSplitBlockCarefully(BasicBlock *NewBB, + SmallVectorImpl &SplitPreds, + Loop *L); - /// getTripCount - Return a loop-invariant LLVM register indicating the - /// number of times the loop will be executed. If the trip-count cannot - /// be determined, this return null. - CountValue *getTripCount(MachineLoop *L, - SmallVector &OldInsts) const; - - /// isInductionOperation - Return true if the instruction matches the - /// pattern for an opertion that defines an induction variable. - bool isInductionOperation(const MachineInstr *MI, unsigned IVReg) const; - - /// isInvalidOperation - Return true if the instruction is not valid within - /// a CTR loop. - bool isInvalidLoopOperation(const MachineInstr *MI) const; - - /// containsInavlidInstruction - Return true if the loop contains an - /// instruction that inhibits using the CTR loop. - bool containsInvalidInstruction(MachineLoop *L) const; - - /// converToCTRLoop - Given a loop, check if we can convert it to a - /// CTR loop. If so, then perform the conversion and return true. - bool convertToCTRLoop(MachineLoop *L); - - /// isDead - Return true if the instruction is now dead. - bool isDead(const MachineInstr *MI, - SmallVector &DeadPhis) const; - - /// removeIfDead - Remove the instruction if it is now dead. - void removeIfDead(MachineInstr *MI); + bool convertToCTRLoop(Loop *L); + private: + PPCTargetMachine *TM; + LoopInfo *LI; + ScalarEvolution *SE; + DataLayout *TD; + DominatorTree *DT; + const TargetLibraryInfo *LibInfo; }; char PPCCTRLoops::ID = 0; - - - // CountValue class - Abstraction for a trip count of a loop. A - // smaller vesrsion of the MachineOperand class without the concerns - // of changing the operand representation. - class CountValue { - public: - enum CountValueType { - CV_Register, - CV_Immediate - }; - private: - CountValueType Kind; - union Values { - unsigned RegNum; - int64_t ImmVal; - Values(unsigned r) : RegNum(r) {} - Values(int64_t i) : ImmVal(i) {} - } Contents; - bool isNegative; - - public: - CountValue(unsigned r, bool neg) : Kind(CV_Register), Contents(r), - isNegative(neg) {} - explicit CountValue(int64_t i) : Kind(CV_Immediate), Contents(i), - isNegative(i < 0) {} - CountValueType getType() const { return Kind; } - bool isReg() const { return Kind == CV_Register; } - bool isImm() const { return Kind == CV_Immediate; } - bool isNeg() const { return isNegative; } - - unsigned getReg() const { - assert(isReg() && "Wrong CountValue accessor"); - return Contents.RegNum; - } - void setReg(unsigned Val) { - Contents.RegNum = Val; - } - int64_t getImm() const { - assert(isImm() && "Wrong CountValue accessor"); - if (isNegative) { - return -Contents.ImmVal; - } - return Contents.ImmVal; - } - void setImm(int64_t Val) { - Contents.ImmVal = Val; - } - - void print(raw_ostream &OS, const TargetMachine *TM = 0) const { - if (isReg()) { OS << PrintReg(getReg()); } - if (isImm()) { OS << getImm(); } - } - }; +#ifndef NDEBUG + int PPCCTRLoops::Counter = 0; +#endif } // end anonymous namespace INITIALIZE_PASS_BEGIN(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops", false, false) -INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) -INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_DEPENDENCY(DominatorTree) +INITIALIZE_PASS_DEPENDENCY(LoopInfo) +INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) INITIALIZE_PASS_END(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops", false, false) -/// isCompareEquals - Returns true if the instruction is a compare equals -/// instruction with an immediate operand. -static bool isCompareEqualsImm(const MachineInstr *MI, bool &SignedCmp, - bool &Int64Cmp) { - if (MI->getOpcode() == PPC::CMPWI) { - SignedCmp = true; - Int64Cmp = false; - return true; - } else if (MI->getOpcode() == PPC::CMPDI) { - SignedCmp = true; - Int64Cmp = true; - return true; - } else if (MI->getOpcode() == PPC::CMPLWI) { - SignedCmp = false; - Int64Cmp = false; - return true; - } else if (MI->getOpcode() == PPC::CMPLDI) { - SignedCmp = false; - Int64Cmp = true; - return true; - } - - return false; +FunctionPass *llvm::createPPCCTRLoops(PPCTargetMachine &TM) { + return new PPCCTRLoops(TM); } +bool PPCCTRLoops::runOnFunction(Function &F) { + LI = &getAnalysis(); + SE = &getAnalysis(); + DT = &getAnalysis(); + TD = getAnalysisIfAvailable(); + LibInfo = getAnalysisIfAvailable(); -/// createPPCCTRLoops - Factory for creating -/// the CTR loop phase. -FunctionPass *llvm::createPPCCTRLoops() { - return new PPCCTRLoops(); -} + bool MadeChange = false; - -bool PPCCTRLoops::runOnMachineFunction(MachineFunction &MF) { - DEBUG(dbgs() << "********* PPC CTR Loops *********\n"); - - bool Changed = false; - - // get the loop information - MLI = &getAnalysis(); - // get the register information - MRI = &MF.getRegInfo(); - // the target specific instructio info. - TII = MF.getTarget().getInstrInfo(); - - for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end(); + for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I) { - MachineLoop *L = *I; - if (!L->getParentLoop()) { - Changed |= convertToCTRLoop(L); - } + Loop *L = *I; + if (!L->getParentLoop()) + MadeChange |= convertToCTRLoop(L); } - return Changed; + return MadeChange; } -/// getCanonicalInductionVariable - Check to see if the loop has a canonical -/// induction variable. We check for a simple recurrence pattern - an -/// integer recurrence that decrements by one each time through the loop and -/// ends at zero. If so, return the phi node that corresponds to it. -/// -/// Based upon the similar code in LoopInfo except this code is specific to -/// the machine. -/// This method assumes that the IndVarSimplify pass has been run by 'opt'. -/// -void -PPCCTRLoops::getCanonicalInductionVariable(MachineLoop *L, - SmallVector &IVars, - SmallVector &IOps) const { - MachineBasicBlock *TopMBB = L->getTopBlock(); - MachineBasicBlock::pred_iterator PI = TopMBB->pred_begin(); - assert(PI != TopMBB->pred_end() && - "Loop must have more than one incoming edge!"); - MachineBasicBlock *Backedge = *PI++; - if (PI == TopMBB->pred_end()) return; // dead loop - MachineBasicBlock *Incoming = *PI++; - if (PI != TopMBB->pred_end()) return; // multiple backedges? +bool PPCCTRLoops::convertToCTRLoop(Loop *L) { + bool MadeChange = false; - // make sure there is one incoming and one backedge and determine which - // is which. - if (L->contains(Incoming)) { - if (L->contains(Backedge)) - return; - std::swap(Incoming, Backedge); - } else if (!L->contains(Backedge)) - return; + Triple TT = Triple(L->getHeader()->getParent()->getParent()-> + getTargetTriple()); + if (!TT.isArch32Bit() && !TT.isArch64Bit()) + return MadeChange; // Unknown arch. type. - // Loop over all of the PHI nodes, looking for a canonical induction variable: - // - The PHI node is "reg1 = PHI reg2, BB1, reg3, BB2". - // - The recurrence comes from the backedge. - // - the definition is an induction operatio.n - for (MachineBasicBlock::iterator I = TopMBB->begin(), E = TopMBB->end(); - I != E && I->isPHI(); ++I) { - MachineInstr *MPhi = &*I; - unsigned DefReg = MPhi->getOperand(0).getReg(); - for (unsigned i = 1; i != MPhi->getNumOperands(); i += 2) { - // Check each operand for the value from the backedge. - MachineBasicBlock *MBB = MPhi->getOperand(i+1).getMBB(); - if (L->contains(MBB)) { // operands comes from the backedge - // Check if the definition is an induction operation. - MachineInstr *DI = MRI->getVRegDef(MPhi->getOperand(i).getReg()); - if (isInductionOperation(DI, DefReg)) { - IOps.push_back(DI); - IVars.push_back(MPhi); - } - } - } - } - return; -} - -/// getTripCount - Return a loop-invariant LLVM value indicating the -/// number of times the loop will be executed. The trip count can -/// be either a register or a constant value. If the trip-count -/// cannot be determined, this returns null. -/// -/// We find the trip count from the phi instruction that defines the -/// induction variable. We follow the links to the CMP instruction -/// to get the trip count. -/// -/// Based upon getTripCount in LoopInfo. -/// -CountValue *PPCCTRLoops::getTripCount(MachineLoop *L, - SmallVector &OldInsts) const { - MachineBasicBlock *LastMBB = L->getExitingBlock(); - // Don't generate a CTR loop if the loop has more than one exit. - if (LastMBB == 0) - return 0; - - MachineBasicBlock::iterator LastI = LastMBB->getFirstTerminator(); - if (LastI->getOpcode() != PPC::BCC) - return 0; - - // We need to make sure that this compare is defining the condition - // register actually used by the terminating branch. - - unsigned PredReg = LastI->getOperand(1).getReg(); - DEBUG(dbgs() << "Examining loop with first terminator: " << *LastI); - - unsigned PredCond = LastI->getOperand(0).getImm(); - if (PredCond != PPC::PRED_EQ && PredCond != PPC::PRED_NE) - return 0; - - // Check that the loop has a induction variable. - SmallVector IVars, IOps; - getCanonicalInductionVariable(L, IVars, IOps); - for (unsigned i = 0; i < IVars.size(); ++i) { - MachineInstr *IOp = IOps[i]; - MachineInstr *IV_Inst = IVars[i]; - - // Canonical loops will end with a 'cmpwi/cmpdi cr, IV, Imm', - // if Imm is 0, get the count from the PHI opnd - // if Imm is -M, than M is the count - // Otherwise, Imm is the count - MachineOperand *IV_Opnd; - const MachineOperand *InitialValue; - if (!L->contains(IV_Inst->getOperand(2).getMBB())) { - InitialValue = &IV_Inst->getOperand(1); - IV_Opnd = &IV_Inst->getOperand(3); - } else { - InitialValue = &IV_Inst->getOperand(3); - IV_Opnd = &IV_Inst->getOperand(1); - } - - DEBUG(dbgs() << "Considering:\n"); - DEBUG(dbgs() << " induction operation: " << *IOp); - DEBUG(dbgs() << " induction variable: " << *IV_Inst); - DEBUG(dbgs() << " initial value: " << *InitialValue << "\n"); - - // Look for the cmp instruction to determine if we - // can get a useful trip count. The trip count can - // be either a register or an immediate. The location - // of the value depends upon the type (reg or imm). - for (MachineRegisterInfo::reg_iterator - RI = MRI->reg_begin(IV_Opnd->getReg()), RE = MRI->reg_end(); - RI != RE; ++RI) { - IV_Opnd = &RI.getOperand(); - bool SignedCmp, Int64Cmp; - MachineInstr *MI = IV_Opnd->getParent(); - if (L->contains(MI) && isCompareEqualsImm(MI, SignedCmp, Int64Cmp) && - MI->getOperand(0).getReg() == PredReg) { - - OldInsts.push_back(MI); - OldInsts.push_back(IOp); - - DEBUG(dbgs() << " compare: " << *MI); - - const MachineOperand &MO = MI->getOperand(2); - assert(MO.isImm() && "IV Cmp Operand should be an immediate"); - - int64_t ImmVal; - if (SignedCmp) - ImmVal = (short) MO.getImm(); - else - ImmVal = MO.getImm(); - - const MachineInstr *IV_DefInstr = MRI->getVRegDef(IV_Opnd->getReg()); - assert(L->contains(IV_DefInstr->getParent()) && - "IV definition should occurs in loop"); - int64_t iv_value = (short) IV_DefInstr->getOperand(2).getImm(); - - assert(InitialValue->isReg() && "Expecting register for init value"); - unsigned InitialValueReg = InitialValue->getReg(); - - MachineInstr *DefInstr = MRI->getVRegDef(InitialValueReg); - - // Here we need to look for an immediate load (an li or lis/ori pair). - if (DefInstr && (DefInstr->getOpcode() == PPC::ORI8 || - DefInstr->getOpcode() == PPC::ORI)) { - int64_t start = DefInstr->getOperand(2).getImm(); - MachineInstr *DefInstr2 = - MRI->getVRegDef(DefInstr->getOperand(1).getReg()); - if (DefInstr2 && (DefInstr2->getOpcode() == PPC::LIS8 || - DefInstr2->getOpcode() == PPC::LIS)) { - DEBUG(dbgs() << " initial constant: " << *DefInstr); - DEBUG(dbgs() << " initial constant: " << *DefInstr2); - - start |= int64_t(short(DefInstr2->getOperand(1).getImm())) << 16; - - int64_t count = ImmVal - start; - if ((count % iv_value) != 0) { - return 0; - } - - OldInsts.push_back(DefInstr); - OldInsts.push_back(DefInstr2); - - // count/iv_value, the trip count, should be positive here. If it - // is negative, that indicates that the counter will wrap. - if (Int64Cmp) - return new CountValue(count/iv_value); - else - return new CountValue(uint32_t(count/iv_value)); - } - } else if (DefInstr && (DefInstr->getOpcode() == PPC::LI8 || - DefInstr->getOpcode() == PPC::LI)) { - DEBUG(dbgs() << " initial constant: " << *DefInstr); - - int64_t count = ImmVal - - int64_t(short(DefInstr->getOperand(1).getImm())); - if ((count % iv_value) != 0) { - return 0; - } - - OldInsts.push_back(DefInstr); - - if (Int64Cmp) - return new CountValue(count/iv_value); - else - return new CountValue(uint32_t(count/iv_value)); - } else if (iv_value == 1 || iv_value == -1) { - // We can't determine a constant starting value. - if (ImmVal == 0) { - return new CountValue(InitialValueReg, iv_value > 0); - } - // FIXME: handle non-zero end value. - } - // FIXME: handle non-unit increments (we might not want to introduce - // division but we can handle some 2^n cases with shifts). - - } - } - } - return 0; -} - -/// isInductionOperation - return true if the operation is matches the -/// pattern that defines an induction variable: -/// addi iv, c -/// -bool -PPCCTRLoops::isInductionOperation(const MachineInstr *MI, - unsigned IVReg) const { - return ((MI->getOpcode() == PPC::ADDI || MI->getOpcode() == PPC::ADDI8) && - MI->getOperand(1).isReg() && // could be a frame index instead - MI->getOperand(1).getReg() == IVReg); -} - -/// isInvalidOperation - Return true if the operation is invalid within -/// CTR loop. -bool -PPCCTRLoops::isInvalidLoopOperation(const MachineInstr *MI) const { - - // call is not allowed because the callee may use a CTR loop - if (MI->getDesc().isCall()) { - return true; - } - // check if the instruction defines a CTR loop register - // (this will also catch nested CTR loops) - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); - if (MO.isReg() && MO.isDef() && - (MO.getReg() == PPC::CTR || MO.getReg() == PPC::CTR8)) { - return true; - } - } - return false; -} - -/// containsInvalidInstruction - Return true if the loop contains -/// an instruction that inhibits the use of the CTR loop function. -/// -bool PPCCTRLoops::containsInvalidInstruction(MachineLoop *L) const { - const std::vector Blocks = L->getBlocks(); - for (unsigned i = 0, e = Blocks.size(); i != e; ++i) { - MachineBasicBlock *MBB = Blocks[i]; - for (MachineBasicBlock::iterator - MII = MBB->begin(), E = MBB->end(); MII != E; ++MII) { - const MachineInstr *MI = &*MII; - if (isInvalidLoopOperation(MI)) { - return true; - } - } - } - return false; -} - -/// isDead returns true if the instruction is dead -/// (this was essentially copied from DeadMachineInstructionElim::isDead, but -/// with special cases for inline asm, physical registers and instructions with -/// side effects removed) -bool PPCCTRLoops::isDead(const MachineInstr *MI, - SmallVector &DeadPhis) const { - // Examine each operand. - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); - if (MO.isReg() && MO.isDef()) { - unsigned Reg = MO.getReg(); - if (!MRI->use_nodbg_empty(Reg)) { - // This instruction has users, but if the only user is the phi node for - // the parent block, and the only use of that phi node is this - // instruction, then this instruction is dead: both it (and the phi - // node) can be removed. - MachineRegisterInfo::use_iterator I = MRI->use_begin(Reg); - if (llvm::next(I) == MRI->use_end() && - I.getOperand().getParent()->isPHI()) { - MachineInstr *OnePhi = I.getOperand().getParent(); - - for (unsigned j = 0, f = OnePhi->getNumOperands(); j != f; ++j) { - const MachineOperand &OPO = OnePhi->getOperand(j); - if (OPO.isReg() && OPO.isDef()) { - unsigned OPReg = OPO.getReg(); - - MachineRegisterInfo::use_iterator nextJ; - for (MachineRegisterInfo::use_iterator J = MRI->use_begin(OPReg), - E = MRI->use_end(); J!=E; J=nextJ) { - nextJ = llvm::next(J); - MachineOperand& Use = J.getOperand(); - MachineInstr *UseMI = Use.getParent(); - - if (MI != UseMI) { - // The phi node has a user that is not MI, bail... - return false; - } - } - } - } - - DeadPhis.push_back(OnePhi); - } else { - // This def has a non-debug use. Don't delete the instruction! - return false; - } - } - } - } - - // If there are no defs with uses, the instruction is dead. - return true; -} - -void PPCCTRLoops::removeIfDead(MachineInstr *MI) { - // This procedure was essentially copied from DeadMachineInstructionElim - - SmallVector DeadPhis; - if (isDead(MI, DeadPhis)) { - DEBUG(dbgs() << "CTR looping will remove: " << *MI); - - // It is possible that some DBG_VALUE instructions refer to this - // instruction. Examine each def operand for such references; - // if found, mark the DBG_VALUE as undef (but don't delete it). - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg() || !MO.isDef()) - continue; - unsigned Reg = MO.getReg(); - MachineRegisterInfo::use_iterator nextI; - for (MachineRegisterInfo::use_iterator I = MRI->use_begin(Reg), - E = MRI->use_end(); I!=E; I=nextI) { - nextI = llvm::next(I); // I is invalidated by the setReg - MachineOperand& Use = I.getOperand(); - MachineInstr *UseMI = Use.getParent(); - if (UseMI==MI) - continue; - if (Use.isDebug()) // this might also be a instr -> phi -> instr case - // which can also be removed. - UseMI->getOperand(0).setReg(0U); - } - } - - MI->eraseFromParent(); - for (unsigned i = 0; i < DeadPhis.size(); ++i) { - DeadPhis[i]->eraseFromParent(); - } - } -} - -/// converToCTRLoop - check if the loop is a candidate for -/// converting to a CTR loop. If so, then perform the -/// transformation. -/// -/// This function works on innermost loops first. A loop can -/// be converted if it is a counting loop; either a register -/// value or an immediate. -/// -/// The code makes several assumptions about the representation -/// of the loop in llvm. -bool PPCCTRLoops::convertToCTRLoop(MachineLoop *L) { - bool Changed = false; // Process nested loops first. - for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I) { - Changed |= convertToCTRLoop(*I); + for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) { + MadeChange |= convertToCTRLoop(*I); } + // If a nested loop has been converted, then we can't convert this loop. - if (Changed) { - return Changed; - } + if (MadeChange) + return MadeChange; - SmallVector OldInsts; - // Are we able to determine the trip count for the loop? - CountValue *TripCount = getTripCount(L, OldInsts); - if (TripCount == 0) { - DEBUG(dbgs() << "failed to get trip count!\n"); - return false; - } - - if (TripCount->isImm()) { - DEBUG(dbgs() << "constant trip count: " << TripCount->getImm() << "\n"); - - // FIXME: We currently can't form 64-bit constants - // (including 32-bit unsigned constants) - if (!isInt<32>(TripCount->getImm())) +#ifndef NDEBUG + // Stop trying after reaching the limit (if any). + int Limit = CTRLoopLimit; + if (Limit >= 0) { + if (Counter >= CTRLoopLimit) return false; + Counter++; } +#endif - // Does the loop contain any invalid instructions? - if (containsInvalidInstruction(L)) { - return false; - } - MachineBasicBlock *Preheader = L->getLoopPreheader(); - // No preheader means there's not place for the loop instr. - if (Preheader == 0) { - return false; - } - MachineBasicBlock::iterator InsertPos = Preheader->getFirstTerminator(); + // We don't want to spill/restore the counter register, and so we don't + // want to use the counter register if the loop contains calls. + for (Loop::block_iterator I = L->block_begin(), IE = L->block_end(); + I != IE; ++I) { + for (BasicBlock::iterator J = (*I)->begin(), JE = (*I)->end(); + J != JE; ++J) { + if (CallInst *CI = dyn_cast(J)) { + if (!TM) + return MadeChange; + const TargetLowering *TLI = TM->getTargetLowering(); - DebugLoc dl; - if (InsertPos != Preheader->end()) - dl = InsertPos->getDebugLoc(); + if (Function *F = CI->getCalledFunction()) { + // Most intrinsics don't become function calls, but some might. + // sin, cos, exp and log are always calls. + unsigned Opcode; + if (F->getIntrinsicID() != Intrinsic::not_intrinsic) { + switch (F->getIntrinsicID()) { + default: continue; + case Intrinsic::setjmp: + case Intrinsic::longjmp: + case Intrinsic::memcpy: + case Intrinsic::memmove: + case Intrinsic::memset: + case Intrinsic::powi: + case Intrinsic::log: + case Intrinsic::log2: + case Intrinsic::log10: + case Intrinsic::exp: + case Intrinsic::exp2: + case Intrinsic::pow: + case Intrinsic::sin: + case Intrinsic::cos: + return MadeChange; + case Intrinsic::sqrt: Opcode = ISD::FSQRT; break; + case Intrinsic::floor: Opcode = ISD::FFLOOR; break; + case Intrinsic::ceil: Opcode = ISD::FCEIL; break; + case Intrinsic::trunc: Opcode = ISD::FTRUNC; break; + case Intrinsic::rint: Opcode = ISD::FRINT; break; + case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break; + } + } - MachineBasicBlock *LastMBB = L->getExitingBlock(); - // Don't generate CTR loop if the loop has more than one exit. - if (LastMBB == 0) { - return false; - } - MachineBasicBlock::iterator LastI = LastMBB->getFirstTerminator(); + // PowerPC does not use [US]DIVREM or other library calls for + // operations on regular types which are not otherwise library calls + // (i.e. soft float or atomics). If adapting for targets that do, + // additional care is required here. - // Determine the loop start. - MachineBasicBlock *LoopStart = L->getTopBlock(); - if (L->getLoopLatch() != LastMBB) { - // When the exit and latch are not the same, use the latch block as the - // start. - // The loop start address is used only after the 1st iteration, and the loop - // latch may contains instrs. that need to be executed after the 1st iter. - LoopStart = L->getLoopLatch(); - // Make sure the latch is a successor of the exit, otherwise it won't work. - if (!LastMBB->isSuccessor(LoopStart)) { - return false; + LibFunc::Func Func; + if (!F->hasLocalLinkage() && F->hasName() && LibInfo && + LibInfo->getLibFunc(F->getName(), Func) && + LibInfo->hasOptimizedCodeGen(Func)) { + // Non-read-only functions are never treated as intrinsics. + if (!CI->onlyReadsMemory()) + return MadeChange; + + // Conversion happens only for FP calls. + if (!CI->getArgOperand(0)->getType()->isFloatingPointTy()) + return MadeChange; + + switch (Func) { + default: return MadeChange; + case LibFunc::copysign: + case LibFunc::copysignf: + case LibFunc::copysignl: + continue; // ISD::FCOPYSIGN is never a library call. + case LibFunc::fabs: + case LibFunc::fabsf: + case LibFunc::fabsl: + continue; // ISD::FABS is never a library call. + case LibFunc::sqrt: + case LibFunc::sqrtf: + case LibFunc::sqrtl: + Opcode = ISD::FSQRT; break; + case LibFunc::floor: + case LibFunc::floorf: + case LibFunc::floorl: + Opcode = ISD::FFLOOR; break; + case LibFunc::nearbyint: + case LibFunc::nearbyintf: + case LibFunc::nearbyintl: + Opcode = ISD::FNEARBYINT; break; + case LibFunc::ceil: + case LibFunc::ceilf: + case LibFunc::ceill: + Opcode = ISD::FCEIL; break; + case LibFunc::rint: + case LibFunc::rintf: + case LibFunc::rintl: + Opcode = ISD::FRINT; break; + case LibFunc::trunc: + case LibFunc::truncf: + case LibFunc::truncl: + Opcode = ISD::FTRUNC; break; + } + + MVT VTy = + TLI->getSimpleValueType(CI->getArgOperand(0)->getType(), true); + if (VTy == MVT::Other) + return MadeChange; + + if (TLI->isOperationLegalOrCustom(Opcode, VTy)) + continue; + else if (VTy.isVector() && + TLI->isOperationLegalOrCustom(Opcode, VTy.getScalarType())) + continue; + + return MadeChange; + } + } + + return MadeChange; + } else if (isa(J) && + J->getType()->getScalarType()->isPPC_FP128Ty()) { + // Most operations on ppc_f128 values become calls. + return MadeChange; + } else if (isa(J) || isa(J) || + isa(J) || isa(J)) { + CastInst *CI = cast(J); + if (CI->getSrcTy()->getScalarType()->isPPC_FP128Ty() || + CI->getDestTy()->getScalarType()->isPPC_FP128Ty()) + return MadeChange; + } else if (isa(J) || isa(J)) { + // On PowerPC, indirect jumps use the counter register. + return MadeChange; + } else if (SwitchInst *SI = dyn_cast(J)) { + if (!TM) + return MadeChange; + const TargetLowering *TLI = TM->getTargetLowering(); + + if (TLI->supportJumpTables() && + SI->getNumCases()+1 >= (unsigned) TLI->getMinimumJumpTableEntries()) + return MadeChange; + } } } - // Convert the loop to a CTR loop - DEBUG(dbgs() << "Change to CTR loop at "; L->dump()); + SmallVector ExitingBlocks; + L->getExitingBlocks(ExitingBlocks); - MachineFunction *MF = LastMBB->getParent(); - const PPCSubtarget &Subtarget = MF->getTarget().getSubtarget(); - bool isPPC64 = Subtarget.isPPC64(); + BasicBlock *CountedExitBlock = 0; + const SCEV *ExitCount = 0; + BranchInst *CountedExitBranch = 0; + for (SmallVector::iterator I = ExitingBlocks.begin(), + IE = ExitingBlocks.end(); I != IE; ++I) { + const SCEV *EC = SE->getExitCount(L, *I); + DEBUG(dbgs() << "Exit Count for " << *L << " from block " << + (*I)->getName() << ": " << *EC << "\n"); + if (isa(EC)) + continue; + if (const SCEVConstant *ConstEC = dyn_cast(EC)) { + if (ConstEC->getValue()->isZero()) + continue; + } else if (!SE->isLoopInvariant(EC, L)) + continue; - const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; - const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; - const TargetRegisterClass *RC = isPPC64 ? G8RC : GPRC; + // We now have a loop-invariant count of loop iterations (which is not the + // constant zero) for which we know that this loop will not exit via this + // exisiting block. - unsigned CountReg; - if (TripCount->isReg()) { - // Create a copy of the loop count register. - const TargetRegisterClass *SrcRC = - MF->getRegInfo().getRegClass(TripCount->getReg()); - CountReg = MF->getRegInfo().createVirtualRegister(RC); - unsigned CopyOp = (isPPC64 && GPRC->hasSubClassEq(SrcRC)) ? - (unsigned) PPC::EXTSW_32_64 : - (unsigned) TargetOpcode::COPY; - BuildMI(*Preheader, InsertPos, dl, - TII->get(CopyOp), CountReg).addReg(TripCount->getReg()); - if (TripCount->isNeg()) { - unsigned CountReg1 = CountReg; - CountReg = MF->getRegInfo().createVirtualRegister(RC); - BuildMI(*Preheader, InsertPos, dl, - TII->get(isPPC64 ? PPC::NEG8 : PPC::NEG), - CountReg).addReg(CountReg1); + // We need to make sure that this block will run on every loop iteration. + // For this to be true, we must dominate all blocks with backedges. Such + // blocks are in-loop predecessors to the header block. + bool NotAlways = false; + for (pred_iterator PI = pred_begin(L->getHeader()), + PIE = pred_end(L->getHeader()); PI != PIE; ++PI) { + if (!L->contains(*PI)) + continue; + + if (!DT->dominates(*I, *PI)) { + NotAlways = true; + break; + } } - } else { - assert(TripCount->isImm() && "Expecting immedate vaule for trip count"); - // Put the trip count in a register for transfer into the count register. - int64_t CountImm = TripCount->getImm(); - if (TripCount->isNeg()) - CountImm = -CountImm; + if (NotAlways) + continue; - CountReg = MF->getRegInfo().createVirtualRegister(RC); - if (abs64(CountImm) > 0x7FFF) { - BuildMI(*Preheader, InsertPos, dl, - TII->get(isPPC64 ? PPC::LIS8 : PPC::LIS), - CountReg).addImm((CountImm >> 16) & 0xFFFF); - unsigned CountReg1 = CountReg; - CountReg = MF->getRegInfo().createVirtualRegister(RC); - BuildMI(*Preheader, InsertPos, dl, - TII->get(isPPC64 ? PPC::ORI8 : PPC::ORI), - CountReg).addReg(CountReg1).addImm(CountImm & 0xFFFF); - } else { - BuildMI(*Preheader, InsertPos, dl, - TII->get(isPPC64 ? PPC::LI8 : PPC::LI), - CountReg).addImm(CountImm); - } + // Make sure this blocks ends with a conditional branch. + Instruction *TI = (*I)->getTerminator(); + if (!TI) + continue; + + if (BranchInst *BI = dyn_cast(TI)) { + if (!BI->isConditional()) + continue; + + CountedExitBranch = BI; + } else + continue; + + // Note that this block may not be the loop latch block, even if the loop + // has a latch block. + CountedExitBlock = *I; + ExitCount = EC; + break; } - // Add the mtctr instruction to the beginning of the loop. - BuildMI(*Preheader, InsertPos, dl, - TII->get(isPPC64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(CountReg, - TripCount->isImm() ? RegState::Kill : 0); + if (!CountedExitBlock) + return MadeChange; - // Make sure the loop start always has a reference in the CFG. We need to - // create a BlockAddress operand to get this mechanism to work both the - // MachineBasicBlock and BasicBlock objects need the flag set. - LoopStart->setHasAddressTaken(); - // This line is needed to set the hasAddressTaken flag on the BasicBlock - // object - BlockAddress::get(const_cast(LoopStart->getBasicBlock())); + BasicBlock *Preheader = L->getLoopPreheader(); + if (!Preheader) + Preheader = InsertPreheaderForLoop(L); + if (!Preheader) + return MadeChange; - // Replace the loop branch with a bdnz instruction. - dl = LastI->getDebugLoc(); - const std::vector Blocks = L->getBlocks(); - for (unsigned i = 0, e = Blocks.size(); i != e; ++i) { - MachineBasicBlock *MBB = Blocks[i]; - if (MBB != Preheader) - MBB->addLiveIn(isPPC64 ? PPC::CTR8 : PPC::CTR); - } + DEBUG(dbgs() << "Preheader for exit count: " << Preheader->getName() << "\n"); - // The loop ends with either: - // - a conditional branch followed by an unconditional branch, or - // - a conditional branch to the loop start. - assert(LastI->getOpcode() == PPC::BCC && - "loop end must start with a BCC instruction"); - // Either the BCC branches to the beginning of the loop, or it - // branches out of the loop and there is an unconditional branch - // to the start of the loop. - MachineBasicBlock *BranchTarget = LastI->getOperand(2).getMBB(); - BuildMI(*LastMBB, LastI, dl, - TII->get((BranchTarget == LoopStart) ? - (isPPC64 ? PPC::BDNZ8 : PPC::BDNZ) : - (isPPC64 ? PPC::BDZ8 : PPC::BDZ))).addMBB(BranchTarget); + // Insert the count into the preheader and replace the condition used by the + // selected branch. + MadeChange = true; - // Conditional branch; just delete it. - DEBUG(dbgs() << "Removing old branch: " << *LastI); - LastMBB->erase(LastI); + SCEVExpander SCEVE(*SE, "loopcnt"); + LLVMContext &C = SE->getContext(); + Type *CountType = TT.isArch64Bit() ? Type::getInt64Ty(C) : + Type::getInt32Ty(C); + if (!ExitCount->getType()->isPointerTy() && + ExitCount->getType() != CountType) + ExitCount = SE->getZeroExtendExpr(ExitCount, CountType); + ExitCount = SE->getAddExpr(ExitCount, + SE->getConstant(CountType, 1)); + Value *ECValue = SCEVE.expandCodeFor(ExitCount, CountType, + Preheader->getTerminator()); - delete TripCount; + IRBuilder<> CountBuilder(Preheader->getTerminator()); + Module *M = Preheader->getParent()->getParent(); + Value *MTCTRFunc = Intrinsic::getDeclaration(M, Intrinsic::ppc_mtctr, + CountType); + CountBuilder.CreateCall(MTCTRFunc, ECValue); - // The induction operation (add) and the comparison (cmpwi) may now be - // unneeded. If these are unneeded, then remove them. - for (unsigned i = 0; i < OldInsts.size(); ++i) - removeIfDead(OldInsts[i]); + IRBuilder<> CondBuilder(CountedExitBranch); + Value *DecFunc = + Intrinsic::getDeclaration(M, Intrinsic::ppc_is_decremented_ctr_nonzero); + Value *NewCond = CondBuilder.CreateCall(DecFunc); + Value *OldCond = CountedExitBranch->getCondition(); + CountedExitBranch->setCondition(NewCond); + + // The false branch must exit the loop. + if (!L->contains(CountedExitBranch->getSuccessor(0))) + CountedExitBranch->swapSuccessors(); + + // The old condition may be dead now, and may have even created a dead PHI + // (the original induction variable). + RecursivelyDeleteTriviallyDeadInstructions(OldCond); + DeleteDeadPHIs(CountedExitBlock); ++NumCTRLoops; - return true; + return MadeChange; +} + +// FIXME: Copied from LoopSimplify. +BasicBlock *PPCCTRLoops::InsertPreheaderForLoop(Loop *L) { + BasicBlock *Header = L->getHeader(); + + // Compute the set of predecessors of the loop that are not in the loop. + SmallVector OutsideBlocks; + for (pred_iterator PI = pred_begin(Header), PE = pred_end(Header); + PI != PE; ++PI) { + BasicBlock *P = *PI; + if (!L->contains(P)) { // Coming in from outside the loop? + // If the loop is branched to from an indirect branch, we won't + // be able to fully transform the loop, because it prohibits + // edge splitting. + if (isa(P->getTerminator())) return 0; + + // Keep track of it. + OutsideBlocks.push_back(P); + } + } + + // Split out the loop pre-header. + BasicBlock *PreheaderBB; + if (!Header->isLandingPad()) { + PreheaderBB = SplitBlockPredecessors(Header, OutsideBlocks, ".preheader", + this); + } else { + SmallVector NewBBs; + SplitLandingPadPredecessors(Header, OutsideBlocks, ".preheader", + ".split-lp", this, NewBBs); + PreheaderBB = NewBBs[0]; + } + + PreheaderBB->getTerminator()->setDebugLoc( + Header->getFirstNonPHI()->getDebugLoc()); + DEBUG(dbgs() << "Creating pre-header " + << PreheaderBB->getName() << "\n"); + + // Make sure that NewBB is put someplace intelligent, which doesn't mess up + // code layout too horribly. + PlaceSplitBlockCarefully(PreheaderBB, OutsideBlocks, L); + + return PreheaderBB; +} + +void PPCCTRLoops::PlaceSplitBlockCarefully(BasicBlock *NewBB, + SmallVectorImpl &SplitPreds, + Loop *L) { + // Check to see if NewBB is already well placed. + Function::iterator BBI = NewBB; --BBI; + for (unsigned i = 0, e = SplitPreds.size(); i != e; ++i) { + if (&*BBI == SplitPreds[i]) + return; + } + + // If it isn't already after an outside block, move it after one. This is + // always good as it makes the uncond branch from the outside block into a + // fall-through. + + // Figure out *which* outside block to put this after. Prefer an outside + // block that neighbors a BB actually in the loop. + BasicBlock *FoundBB = 0; + for (unsigned i = 0, e = SplitPreds.size(); i != e; ++i) { + Function::iterator BBI = SplitPreds[i]; + if (++BBI != NewBB->getParent()->end() && + L->contains(BBI)) { + FoundBB = SplitPreds[i]; + break; + } + } + + // If our heuristic for a *good* bb to place this after doesn't find + // anything, just pick something. It's likely better than leaving it within + // the loop. + if (!FoundBB) + FoundBB = SplitPreds[0]; + NewBB->moveAfter(FoundBB); } diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index aed0fbb6c84e..2d71b346074f 100644 --- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -1242,6 +1242,15 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { getI32Imm(BROpc) }; return CurDAG->SelectNodeTo(N, SelectCCOp, N->getValueType(0), Ops, 4); } + case PPCISD::BDNZ: + case PPCISD::BDZ: { + bool IsPPC64 = PPCSubTarget.isPPC64(); + SDValue Ops[] = { N->getOperand(1), N->getOperand(0) }; + return CurDAG->SelectNodeTo(N, N->getOpcode() == PPCISD::BDNZ ? + (IsPPC64 ? PPC::BDNZ8 : PPC::BDNZ) : + (IsPPC64 ? PPC::BDZ8 : PPC::BDZ), + MVT::Other, Ops, 2); + } case PPCISD::COND_BRANCH: { // Op #0 is the Chain. // Op #1 is the PPC::PRED_* number. diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index eee2bb87defe..3e65606c43ae 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -313,6 +313,9 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) // We want to custom lower some of our intrinsics. setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); + // To handle counter-based loop conditions. + setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i1, Custom); + // Comparisons that require checking two conditions. setCondCodeAction(ISD::SETULT, MVT::f32, Expand); setCondCodeAction(ISD::SETULT, MVT::f64, Expand); @@ -646,6 +649,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::LARX: return "PPCISD::LARX"; case PPCISD::STCX: return "PPCISD::STCX"; case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH"; + case PPCISD::BDNZ: return "PPCISD::BDNZ"; + case PPCISD::BDZ: return "PPCISD::BDZ"; case PPCISD::MFFS: return "PPCISD::MFFS"; case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ"; case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN"; @@ -5777,6 +5782,9 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG); case ISD::MUL: return LowerMUL(Op, DAG); + // For counter-based loop handling. + case ISD::INTRINSIC_W_CHAIN: return SDValue(); + // Frame & Return address. case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); @@ -5791,6 +5799,22 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N, switch (N->getOpcode()) { default: llvm_unreachable("Do not know how to custom type legalize this operation!"); + case ISD::INTRINSIC_W_CHAIN: { + if (cast(N->getOperand(1))->getZExtValue() != + Intrinsic::ppc_is_decremented_ctr_nonzero) + break; + + assert(N->getValueType(0) == MVT::i1 && + "Unexpected result type for CTR decrement intrinsic"); + EVT SVT = getSetCCResultType(N->getValueType(0)); + SDVTList VTs = DAG.getVTList(SVT, MVT::Other); + SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0), + N->getOperand(1)); + + Results.push_back(NewInt); + Results.push_back(NewInt.getValue(1)); + break; + } case ISD::VAARG: { if (!TM.getSubtarget().isSVR4ABI() || TM.getSubtarget().isPPC64()) @@ -7102,6 +7126,39 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, // compare down to code that is difficult to reassemble. ISD::CondCode CC = cast(N->getOperand(1))->get(); SDValue LHS = N->getOperand(2), RHS = N->getOperand(3); + + // Sometimes the promoted value of the intrinsic is ANDed by some non-zero + // value. If so, pass-through the AND to get to the intrinsic. + if (LHS.getOpcode() == ISD::AND && + LHS.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN && + cast(LHS.getOperand(0).getOperand(1))->getZExtValue() == + Intrinsic::ppc_is_decremented_ctr_nonzero && + isa(LHS.getOperand(1)) && + !cast(LHS.getOperand(1))->getConstantIntValue()-> + isZero()) + LHS = LHS.getOperand(0); + + if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN && + cast(LHS.getOperand(1))->getZExtValue() == + Intrinsic::ppc_is_decremented_ctr_nonzero && + isa(RHS)) { + assert((CC == ISD::SETEQ || CC == ISD::SETNE) && + "Counter decrement comparison is not EQ or NE"); + + unsigned Val = cast(RHS)->getZExtValue(); + bool isBDNZ = (CC == ISD::SETEQ && Val) || + (CC == ISD::SETNE && !Val); + + // We now need to make the intrinsic dead (it cannot be instruction + // selected). + DAG.ReplaceAllUsesOfValueWith(LHS.getValue(1), LHS.getOperand(0)); + assert(LHS.getNode()->hasOneUse() && + "Counter decrement has more than one use"); + + return DAG.getNode(isBDNZ ? PPCISD::BDNZ : PPCISD::BDZ, dl, MVT::Other, + N->getOperand(0), N->getOperand(4)); + } + int CompareOpc; bool isDot; diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h index 2a1cc121daea..fc4314c4932d 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -146,6 +146,10 @@ namespace llvm { /// an optional input flag argument. COND_BRANCH, + /// CHAIN = BDNZ CHAIN, DESTBB - These are used to create counter-based + /// loops. + BDNZ, BDZ, + /// F8RC = FADDRTZ F8RC, F8RC - This is an FADD done with rounding /// towards zero. Used only as part of the long double-to-int /// conversion sequence. diff --git a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td index bff4c230ce68..4b3c22d7cf1d 100644 --- a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td @@ -293,6 +293,12 @@ def MTCTR8 : XFXForm_7_ext<31, 467, 9, (outs), (ins g8rc:$rS), "mtctr $rS", SprMTSPR>, PPC970_DGroup_First, PPC970_Unit_FXU; } +let hasSideEffects = 1, isCodeGenOnly = 1, Defs = [CTR8] in { +let Pattern = [(int_ppc_mtctr i64:$rS)] in +def MTCTR8se : XFXForm_7_ext<31, 467, 9, (outs), (ins g8rc:$rS), + "mtctr $rS", SprMTSPR>, + PPC970_DGroup_First, PPC970_Unit_FXU; +} let Pattern = [(set i64:$rT, readcyclecounter)] in def MFTB8 : XFXForm_1_ext<31, 339, 268, (outs g8rc:$rT), (ins), diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td index 9c39b34ab079..6501ef713f65 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -1700,6 +1700,12 @@ def MTCTR : XFXForm_7_ext<31, 467, 9, (outs), (ins gprc:$rS), "mtctr $rS", SprMTSPR>, PPC970_DGroup_First, PPC970_Unit_FXU; } +let hasSideEffects = 1, isCodeGenOnly = 1, Defs = [CTR] in { +let Pattern = [(int_ppc_mtctr i32:$rS)] in +def MTCTRse : XFXForm_7_ext<31, 467, 9, (outs), (ins gprc:$rS), + "mtctr $rS", SprMTSPR>, + PPC970_DGroup_First, PPC970_Unit_FXU; +} let Defs = [LR] in { def MTLR : XFXForm_7_ext<31, 467, 8, (outs), (ins gprc:$rS), diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp index 2be6324fd7be..7a74f3d459a9 100644 --- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -136,6 +136,11 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const { Reserved.set(PPC::FP); Reserved.set(PPC::FP8); + // The counter registers must be reserved so that counter-based loops can + // be correctly formed (and the mtctr instructions are not DCE'd). + Reserved.set(PPC::CTR); + Reserved.set(PPC::CTR8); + Reserved.set(PPC::R1); Reserved.set(PPC::LR); Reserved.set(PPC::LR8); diff --git a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp index 0b099edff400..ca012670f3b3 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -91,7 +91,7 @@ public: return *getPPCTargetMachine().getSubtargetImpl(); } - virtual bool addPreRegAlloc(); + virtual bool addPreISel(); virtual bool addILPOpts(); virtual bool addInstSelector(); virtual bool addPreSched2(); @@ -103,9 +103,9 @@ TargetPassConfig *PPCTargetMachine::createPassConfig(PassManagerBase &PM) { return new PPCPassConfig(this, PM); } -bool PPCPassConfig::addPreRegAlloc() { +bool PPCPassConfig::addPreISel() { if (!DisableCTRLoops && getOptLevel() != CodeGenOpt::None) - addPass(createPPCCTRLoops()); + addPass(createPPCCTRLoops(getPPCTargetMachine())); return false; } diff --git a/llvm/test/CodeGen/PowerPC/ctrloop-le.ll b/llvm/test/CodeGen/PowerPC/ctrloop-le.ll new file mode 100644 index 000000000000..21a6faba2869 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/ctrloop-le.ll @@ -0,0 +1,446 @@ +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64" +target triple = "powerpc64-unknown-linux-gnu" +; RUN: llc < %s -march=ppc64 | FileCheck %s + +; CHECK: test_pos1_ir_sle +; CHECK: bdnz +; a < b +define void @test_pos1_ir_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind { +entry: + %cmp3 = icmp sle i32 28395, %b + br i1 %cmp3, label %for.body.lr.ph, label %for.end + +for.body.lr.ph: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.lr.ph, %for.body + %i.04 = phi i32 [ 28395, %for.body.lr.ph ], [ %inc, %for.body ] + %arrayidx = getelementptr inbounds i8* %p, i32 %i.04 + %0 = load i8* %arrayidx, align 1 + %conv = zext i8 %0 to i32 + %add = add nsw i32 %conv, 1 + %conv1 = trunc i32 %add to i8 + store i8 %conv1, i8* %arrayidx, align 1 + %inc = add nsw i32 %i.04, 1 + %cmp = icmp sle i32 %inc, %b + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} + + + +; CHECK: test_pos2_ir_sle +; FIXME: Support this loop! +; CHECK-NOT: bdnz +; a < b +define void @test_pos2_ir_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind { +entry: + %cmp3 = icmp sle i32 9073, %b + br i1 %cmp3, label %for.body.lr.ph, label %for.end + +for.body.lr.ph: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.lr.ph, %for.body + %i.04 = phi i32 [ 9073, %for.body.lr.ph ], [ %inc, %for.body ] + %arrayidx = getelementptr inbounds i8* %p, i32 %i.04 + %0 = load i8* %arrayidx, align 1 + %conv = zext i8 %0 to i32 + %add = add nsw i32 %conv, 1 + %conv1 = trunc i32 %add to i8 + store i8 %conv1, i8* %arrayidx, align 1 + %inc = add nsw i32 %i.04, 2 + %cmp = icmp sle i32 %inc, %b + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} + + + +; CHECK: test_pos4_ir_sle +; FIXME: Support this loop! +; CHECK-NOT: bdnz +; a < b +define void @test_pos4_ir_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind { +entry: + %cmp3 = icmp sle i32 21956, %b + br i1 %cmp3, label %for.body.lr.ph, label %for.end + +for.body.lr.ph: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.lr.ph, %for.body + %i.04 = phi i32 [ 21956, %for.body.lr.ph ], [ %inc, %for.body ] + %arrayidx = getelementptr inbounds i8* %p, i32 %i.04 + %0 = load i8* %arrayidx, align 1 + %conv = zext i8 %0 to i32 + %add = add nsw i32 %conv, 1 + %conv1 = trunc i32 %add to i8 + store i8 %conv1, i8* %arrayidx, align 1 + %inc = add nsw i32 %i.04, 4 + %cmp = icmp sle i32 %inc, %b + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} + + + +; CHECK: test_pos8_ir_sle +; FIXME: Support this loop! +; CHECK-NOT: bdnz +; a < b +define void @test_pos8_ir_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind { +entry: + %cmp3 = icmp sle i32 16782, %b + br i1 %cmp3, label %for.body.lr.ph, label %for.end + +for.body.lr.ph: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.lr.ph, %for.body + %i.04 = phi i32 [ 16782, %for.body.lr.ph ], [ %inc, %for.body ] + %arrayidx = getelementptr inbounds i8* %p, i32 %i.04 + %0 = load i8* %arrayidx, align 1 + %conv = zext i8 %0 to i32 + %add = add nsw i32 %conv, 1 + %conv1 = trunc i32 %add to i8 + store i8 %conv1, i8* %arrayidx, align 1 + %inc = add nsw i32 %i.04, 8 + %cmp = icmp sle i32 %inc, %b + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} + + + +; CHECK: test_pos16_ir_sle +; FIXME: Support this loop! +; CHECK-NOT: bdnz +; a < b +define void @test_pos16_ir_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind { +entry: + %cmp3 = icmp sle i32 19097, %b + br i1 %cmp3, label %for.body.lr.ph, label %for.end + +for.body.lr.ph: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.lr.ph, %for.body + %i.04 = phi i32 [ 19097, %for.body.lr.ph ], [ %inc, %for.body ] + %arrayidx = getelementptr inbounds i8* %p, i32 %i.04 + %0 = load i8* %arrayidx, align 1 + %conv = zext i8 %0 to i32 + %add = add nsw i32 %conv, 1 + %conv1 = trunc i32 %add to i8 + store i8 %conv1, i8* %arrayidx, align 1 + %inc = add nsw i32 %i.04, 16 + %cmp = icmp sle i32 %inc, %b + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} + + + +; CHECK: test_pos1_ri_sle +; CHECK: bdnz +; a < b +define void @test_pos1_ri_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind { +entry: + %cmp3 = icmp sle i32 %a, 14040 + br i1 %cmp3, label %for.body.lr.ph, label %for.end + +for.body.lr.ph: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.lr.ph, %for.body + %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ] + %arrayidx = getelementptr inbounds i8* %p, i32 %i.04 + %0 = load i8* %arrayidx, align 1 + %conv = zext i8 %0 to i32 + %add = add nsw i32 %conv, 1 + %conv1 = trunc i32 %add to i8 + store i8 %conv1, i8* %arrayidx, align 1 + %inc = add nsw i32 %i.04, 1 + %cmp = icmp sle i32 %inc, 14040 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} + + + +; CHECK: test_pos2_ri_sle +; CHECK: bdnz +; a < b +define void @test_pos2_ri_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind { +entry: + %cmp3 = icmp sle i32 %a, 13710 + br i1 %cmp3, label %for.body.lr.ph, label %for.end + +for.body.lr.ph: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.lr.ph, %for.body + %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ] + %arrayidx = getelementptr inbounds i8* %p, i32 %i.04 + %0 = load i8* %arrayidx, align 1 + %conv = zext i8 %0 to i32 + %add = add nsw i32 %conv, 1 + %conv1 = trunc i32 %add to i8 + store i8 %conv1, i8* %arrayidx, align 1 + %inc = add nsw i32 %i.04, 2 + %cmp = icmp sle i32 %inc, 13710 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} + + + +; CHECK: test_pos4_ri_sle +; CHECK: bdnz +; a < b +define void @test_pos4_ri_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind { +entry: + %cmp3 = icmp sle i32 %a, 9920 + br i1 %cmp3, label %for.body.lr.ph, label %for.end + +for.body.lr.ph: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.lr.ph, %for.body + %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ] + %arrayidx = getelementptr inbounds i8* %p, i32 %i.04 + %0 = load i8* %arrayidx, align 1 + %conv = zext i8 %0 to i32 + %add = add nsw i32 %conv, 1 + %conv1 = trunc i32 %add to i8 + store i8 %conv1, i8* %arrayidx, align 1 + %inc = add nsw i32 %i.04, 4 + %cmp = icmp sle i32 %inc, 9920 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} + + + +; CHECK: test_pos8_ri_sle +; CHECK: bdnz +; a < b +define void @test_pos8_ri_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind { +entry: + %cmp3 = icmp sle i32 %a, 18924 + br i1 %cmp3, label %for.body.lr.ph, label %for.end + +for.body.lr.ph: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.lr.ph, %for.body + %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ] + %arrayidx = getelementptr inbounds i8* %p, i32 %i.04 + %0 = load i8* %arrayidx, align 1 + %conv = zext i8 %0 to i32 + %add = add nsw i32 %conv, 1 + %conv1 = trunc i32 %add to i8 + store i8 %conv1, i8* %arrayidx, align 1 + %inc = add nsw i32 %i.04, 8 + %cmp = icmp sle i32 %inc, 18924 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} + + + +; CHECK: test_pos16_ri_sle +; CHECK: bdnz +; a < b +define void @test_pos16_ri_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind { +entry: + %cmp3 = icmp sle i32 %a, 11812 + br i1 %cmp3, label %for.body.lr.ph, label %for.end + +for.body.lr.ph: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.lr.ph, %for.body + %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ] + %arrayidx = getelementptr inbounds i8* %p, i32 %i.04 + %0 = load i8* %arrayidx, align 1 + %conv = zext i8 %0 to i32 + %add = add nsw i32 %conv, 1 + %conv1 = trunc i32 %add to i8 + store i8 %conv1, i8* %arrayidx, align 1 + %inc = add nsw i32 %i.04, 16 + %cmp = icmp sle i32 %inc, 11812 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} + + + +; CHECK: test_pos1_rr_sle +; FIXME: Support this loop! +; CHECK-NOT: bdnz +; a < b +define void @test_pos1_rr_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind { +entry: + %cmp3 = icmp sle i32 %a, %b + br i1 %cmp3, label %for.body.lr.ph, label %for.end + +for.body.lr.ph: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.lr.ph, %for.body + %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ] + %arrayidx = getelementptr inbounds i8* %p, i32 %i.04 + %0 = load i8* %arrayidx, align 1 + %conv = zext i8 %0 to i32 + %add = add nsw i32 %conv, 1 + %conv1 = trunc i32 %add to i8 + store i8 %conv1, i8* %arrayidx, align 1 + %inc = add nsw i32 %i.04, 1 + %cmp = icmp sle i32 %inc, %b + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} + + + +; CHECK: test_pos2_rr_sle +; FIXME: Support this loop! +; CHECK-NOT: bdnz +; a < b +define void @test_pos2_rr_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind { +entry: + %cmp3 = icmp sle i32 %a, %b + br i1 %cmp3, label %for.body.lr.ph, label %for.end + +for.body.lr.ph: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.lr.ph, %for.body + %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ] + %arrayidx = getelementptr inbounds i8* %p, i32 %i.04 + %0 = load i8* %arrayidx, align 1 + %conv = zext i8 %0 to i32 + %add = add nsw i32 %conv, 1 + %conv1 = trunc i32 %add to i8 + store i8 %conv1, i8* %arrayidx, align 1 + %inc = add nsw i32 %i.04, 2 + %cmp = icmp sle i32 %inc, %b + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} + + + +; CHECK: test_pos4_rr_sle +; FIXME: Support this loop! +; CHECK-NOT: bdnz +; a < b +define void @test_pos4_rr_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind { +entry: + %cmp3 = icmp sle i32 %a, %b + br i1 %cmp3, label %for.body.lr.ph, label %for.end + +for.body.lr.ph: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.lr.ph, %for.body + %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ] + %arrayidx = getelementptr inbounds i8* %p, i32 %i.04 + %0 = load i8* %arrayidx, align 1 + %conv = zext i8 %0 to i32 + %add = add nsw i32 %conv, 1 + %conv1 = trunc i32 %add to i8 + store i8 %conv1, i8* %arrayidx, align 1 + %inc = add nsw i32 %i.04, 4 + %cmp = icmp sle i32 %inc, %b + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} + + + +; CHECK: test_pos8_rr_sle +; FIXME: Support this loop! +; CHECK-NOT: bdnz +; a < b +define void @test_pos8_rr_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind { +entry: + %cmp3 = icmp sle i32 %a, %b + br i1 %cmp3, label %for.body.lr.ph, label %for.end + +for.body.lr.ph: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.lr.ph, %for.body + %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ] + %arrayidx = getelementptr inbounds i8* %p, i32 %i.04 + %0 = load i8* %arrayidx, align 1 + %conv = zext i8 %0 to i32 + %add = add nsw i32 %conv, 1 + %conv1 = trunc i32 %add to i8 + store i8 %conv1, i8* %arrayidx, align 1 + %inc = add nsw i32 %i.04, 8 + %cmp = icmp sle i32 %inc, %b + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} + + + +; CHECK: test_pos16_rr_sle +; FIXME: Support this loop! +; CHECK-NOT: bdnz +; a < b +define void @test_pos16_rr_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind { +entry: + %cmp3 = icmp sle i32 %a, %b + br i1 %cmp3, label %for.body.lr.ph, label %for.end + +for.body.lr.ph: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.lr.ph, %for.body + %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ] + %arrayidx = getelementptr inbounds i8* %p, i32 %i.04 + %0 = load i8* %arrayidx, align 1 + %conv = zext i8 %0 to i32 + %add = add nsw i32 %conv, 1 + %conv1 = trunc i32 %add to i8 + store i8 %conv1, i8* %arrayidx, align 1 + %inc = add nsw i32 %i.04, 16 + %cmp = icmp sle i32 %inc, %b + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} + diff --git a/llvm/test/CodeGen/PowerPC/ctrloop-lt.ll b/llvm/test/CodeGen/PowerPC/ctrloop-lt.ll new file mode 100644 index 000000000000..448716d6f419 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/ctrloop-lt.ll @@ -0,0 +1,445 @@ +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64" +target triple = "powerpc64-unknown-linux-gnu" +; RUN: llc < %s -march=ppc64 | FileCheck %s + +; CHECK: test_pos1_ir_slt +; CHECK: bdnz +; a < b +define void @test_pos1_ir_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind { +entry: + %cmp3 = icmp slt i32 8531, %b + br i1 %cmp3, label %for.body.lr.ph, label %for.end + +for.body.lr.ph: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.lr.ph, %for.body + %i.04 = phi i32 [ 8531, %for.body.lr.ph ], [ %inc, %for.body ] + %arrayidx = getelementptr inbounds i8* %p, i32 %i.04 + %0 = load i8* %arrayidx, align 1 + %conv = zext i8 %0 to i32 + %add = add nsw i32 %conv, 1 + %conv1 = trunc i32 %add to i8 + store i8 %conv1, i8* %arrayidx, align 1 + %inc = add nsw i32 %i.04, 1 + %cmp = icmp slt i32 %inc, %b + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} + + + +; CHECK: test_pos2_ir_slt +; FIXME: Support this loop! +; CHECK-NOT: bdnz +; a < b +define void @test_pos2_ir_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind { +entry: + %cmp3 = icmp slt i32 9152, %b + br i1 %cmp3, label %for.body.lr.ph, label %for.end + +for.body.lr.ph: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.lr.ph, %for.body + %i.04 = phi i32 [ 9152, %for.body.lr.ph ], [ %inc, %for.body ] + %arrayidx = getelementptr inbounds i8* %p, i32 %i.04 + %0 = load i8* %arrayidx, align 1 + %conv = zext i8 %0 to i32 + %add = add nsw i32 %conv, 1 + %conv1 = trunc i32 %add to i8 + store i8 %conv1, i8* %arrayidx, align 1 + %inc = add nsw i32 %i.04, 2 + %cmp = icmp slt i32 %inc, %b + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} + + + +; CHECK: test_pos4_ir_slt +; FIXME: Support this loop! +; CHECK-NOT: bdnz +; a < b +define void @test_pos4_ir_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind { +entry: + %cmp3 = icmp slt i32 18851, %b + br i1 %cmp3, label %for.body.lr.ph, label %for.end + +for.body.lr.ph: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.lr.ph, %for.body + %i.04 = phi i32 [ 18851, %for.body.lr.ph ], [ %inc, %for.body ] + %arrayidx = getelementptr inbounds i8* %p, i32 %i.04 + %0 = load i8* %arrayidx, align 1 + %conv = zext i8 %0 to i32 + %add = add nsw i32 %conv, 1 + %conv1 = trunc i32 %add to i8 + store i8 %conv1, i8* %arrayidx, align 1 + %inc = add nsw i32 %i.04, 4 + %cmp = icmp slt i32 %inc, %b + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} + + + +; CHECK: test_pos8_ir_slt +; FIXME: Support this loop! +; CHECK-NOT: bdnz +; a < b +define void @test_pos8_ir_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind { +entry: + %cmp3 = icmp slt i32 25466, %b + br i1 %cmp3, label %for.body.lr.ph, label %for.end + +for.body.lr.ph: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.lr.ph, %for.body + %i.04 = phi i32 [ 25466, %for.body.lr.ph ], [ %inc, %for.body ] + %arrayidx = getelementptr inbounds i8* %p, i32 %i.04 + %0 = load i8* %arrayidx, align 1 + %conv = zext i8 %0 to i32 + %add = add nsw i32 %conv, 1 + %conv1 = trunc i32 %add to i8 + store i8 %conv1, i8* %arrayidx, align 1 + %inc = add nsw i32 %i.04, 8 + %cmp = icmp slt i32 %inc, %b + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} + + + +; CHECK: test_pos16_ir_slt +; FIXME: Support this loop! +; CHECK-NOT: bdnz +; a < b +define void @test_pos16_ir_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind { +entry: + %cmp3 = icmp slt i32 9295, %b + br i1 %cmp3, label %for.body.lr.ph, label %for.end + +for.body.lr.ph: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.lr.ph, %for.body + %i.04 = phi i32 [ 9295, %for.body.lr.ph ], [ %inc, %for.body ] + %arrayidx = getelementptr inbounds i8* %p, i32 %i.04 + %0 = load i8* %arrayidx, align 1 + %conv = zext i8 %0 to i32 + %add = add nsw i32 %conv, 1 + %conv1 = trunc i32 %add to i8 + store i8 %conv1, i8* %arrayidx, align 1 + %inc = add nsw i32 %i.04, 16 + %cmp = icmp slt i32 %inc, %b + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} + + + +; CHECK: test_pos1_ri_slt +; CHECK: bdnz +; a < b +define void @test_pos1_ri_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind { +entry: + %cmp3 = icmp slt i32 %a, 31236 + br i1 %cmp3, label %for.body.lr.ph, label %for.end + +for.body.lr.ph: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.lr.ph, %for.body + %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ] + %arrayidx = getelementptr inbounds i8* %p, i32 %i.04 + %0 = load i8* %arrayidx, align 1 + %conv = zext i8 %0 to i32 + %add = add nsw i32 %conv, 1 + %conv1 = trunc i32 %add to i8 + store i8 %conv1, i8* %arrayidx, align 1 + %inc = add nsw i32 %i.04, 1 + %cmp = icmp slt i32 %inc, 31236 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} + + + +; CHECK: test_pos2_ri_slt +; CHECK: bdnz +; a < b +define void @test_pos2_ri_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind { +entry: + %cmp3 = icmp slt i32 %a, 22653 + br i1 %cmp3, label %for.body.lr.ph, label %for.end + +for.body.lr.ph: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.lr.ph, %for.body + %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ] + %arrayidx = getelementptr inbounds i8* %p, i32 %i.04 + %0 = load i8* %arrayidx, align 1 + %conv = zext i8 %0 to i32 + %add = add nsw i32 %conv, 1 + %conv1 = trunc i32 %add to i8 + store i8 %conv1, i8* %arrayidx, align 1 + %inc = add nsw i32 %i.04, 2 + %cmp = icmp slt i32 %inc, 22653 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} + + + +; CHECK: test_pos4_ri_slt +; CHECK: bdnz +; a < b +define void @test_pos4_ri_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind { +entry: + %cmp3 = icmp slt i32 %a, 1431 + br i1 %cmp3, label %for.body.lr.ph, label %for.end + +for.body.lr.ph: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.lr.ph, %for.body + %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ] + %arrayidx = getelementptr inbounds i8* %p, i32 %i.04 + %0 = load i8* %arrayidx, align 1 + %conv = zext i8 %0 to i32 + %add = add nsw i32 %conv, 1 + %conv1 = trunc i32 %add to i8 + store i8 %conv1, i8* %arrayidx, align 1 + %inc = add nsw i32 %i.04, 4 + %cmp = icmp slt i32 %inc, 1431 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} + + + +; CHECK: test_pos8_ri_slt +; CHECK: bdnz +; a < b +define void @test_pos8_ri_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind { +entry: + %cmp3 = icmp slt i32 %a, 22403 + br i1 %cmp3, label %for.body.lr.ph, label %for.end + +for.body.lr.ph: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.lr.ph, %for.body + %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ] + %arrayidx = getelementptr inbounds i8* %p, i32 %i.04 + %0 = load i8* %arrayidx, align 1 + %conv = zext i8 %0 to i32 + %add = add nsw i32 %conv, 1 + %conv1 = trunc i32 %add to i8 + store i8 %conv1, i8* %arrayidx, align 1 + %inc = add nsw i32 %i.04, 8 + %cmp = icmp slt i32 %inc, 22403 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} + + + +; CHECK: test_pos16_ri_slt +; CHECK: bdnz +; a < b +define void @test_pos16_ri_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind { +entry: + %cmp3 = icmp slt i32 %a, 21715 + br i1 %cmp3, label %for.body.lr.ph, label %for.end + +for.body.lr.ph: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.lr.ph, %for.body + %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ] + %arrayidx = getelementptr inbounds i8* %p, i32 %i.04 + %0 = load i8* %arrayidx, align 1 + %conv = zext i8 %0 to i32 + %add = add nsw i32 %conv, 1 + %conv1 = trunc i32 %add to i8 + store i8 %conv1, i8* %arrayidx, align 1 + %inc = add nsw i32 %i.04, 16 + %cmp = icmp slt i32 %inc, 21715 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} + + + +; CHECK: test_pos1_rr_slt +; CHECK: bdnz +; a < b +define void @test_pos1_rr_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind { +entry: + %cmp3 = icmp slt i32 %a, %b + br i1 %cmp3, label %for.body.lr.ph, label %for.end + +for.body.lr.ph: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.lr.ph, %for.body + %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ] + %arrayidx = getelementptr inbounds i8* %p, i32 %i.04 + %0 = load i8* %arrayidx, align 1 + %conv = zext i8 %0 to i32 + %add = add nsw i32 %conv, 1 + %conv1 = trunc i32 %add to i8 + store i8 %conv1, i8* %arrayidx, align 1 + %inc = add nsw i32 %i.04, 1 + %cmp = icmp slt i32 %inc, %b + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} + + + +; CHECK: test_pos2_rr_slt +; FIXME: Support this loop! +; CHECK-NOT: bdnz +; a < b +define void @test_pos2_rr_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind { +entry: + %cmp3 = icmp slt i32 %a, %b + br i1 %cmp3, label %for.body.lr.ph, label %for.end + +for.body.lr.ph: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.lr.ph, %for.body + %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ] + %arrayidx = getelementptr inbounds i8* %p, i32 %i.04 + %0 = load i8* %arrayidx, align 1 + %conv = zext i8 %0 to i32 + %add = add nsw i32 %conv, 1 + %conv1 = trunc i32 %add to i8 + store i8 %conv1, i8* %arrayidx, align 1 + %inc = add nsw i32 %i.04, 2 + %cmp = icmp slt i32 %inc, %b + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} + + + +; CHECK: test_pos4_rr_slt +; FIXME: Support this loop! +; CHECK-NOT: bdnz +; a < b +define void @test_pos4_rr_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind { +entry: + %cmp3 = icmp slt i32 %a, %b + br i1 %cmp3, label %for.body.lr.ph, label %for.end + +for.body.lr.ph: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.lr.ph, %for.body + %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ] + %arrayidx = getelementptr inbounds i8* %p, i32 %i.04 + %0 = load i8* %arrayidx, align 1 + %conv = zext i8 %0 to i32 + %add = add nsw i32 %conv, 1 + %conv1 = trunc i32 %add to i8 + store i8 %conv1, i8* %arrayidx, align 1 + %inc = add nsw i32 %i.04, 4 + %cmp = icmp slt i32 %inc, %b + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} + + + +; CHECK: test_pos8_rr_slt +; FIXME: Support this loop! +; CHECK-NOT: bdnz +; a < b +define void @test_pos8_rr_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind { +entry: + %cmp3 = icmp slt i32 %a, %b + br i1 %cmp3, label %for.body.lr.ph, label %for.end + +for.body.lr.ph: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.lr.ph, %for.body + %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ] + %arrayidx = getelementptr inbounds i8* %p, i32 %i.04 + %0 = load i8* %arrayidx, align 1 + %conv = zext i8 %0 to i32 + %add = add nsw i32 %conv, 1 + %conv1 = trunc i32 %add to i8 + store i8 %conv1, i8* %arrayidx, align 1 + %inc = add nsw i32 %i.04, 8 + %cmp = icmp slt i32 %inc, %b + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} + + + +; CHECK: test_pos16_rr_slt +; FIXME: Support this loop! +; CHECK-NOT: bdnz +; a < b +define void @test_pos16_rr_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind { +entry: + %cmp3 = icmp slt i32 %a, %b + br i1 %cmp3, label %for.body.lr.ph, label %for.end + +for.body.lr.ph: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.lr.ph, %for.body + %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ] + %arrayidx = getelementptr inbounds i8* %p, i32 %i.04 + %0 = load i8* %arrayidx, align 1 + %conv = zext i8 %0 to i32 + %add = add nsw i32 %conv, 1 + %conv1 = trunc i32 %add to i8 + store i8 %conv1, i8* %arrayidx, align 1 + %inc = add nsw i32 %i.04, 16 + %cmp = icmp slt i32 %inc, %b + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} + diff --git a/llvm/test/CodeGen/PowerPC/ctrloop-ne.ll b/llvm/test/CodeGen/PowerPC/ctrloop-ne.ll new file mode 100644 index 000000000000..636030a15dd2 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/ctrloop-ne.ll @@ -0,0 +1,449 @@ +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64" +target triple = "powerpc64-unknown-linux-gnu" +; RUN: llc < %s -march=ppc64 | FileCheck %s + +; CHECK: test_pos1_ir_ne +; CHECK: bdnz +; a < b +define void @test_pos1_ir_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind { +entry: + %cmp3 = icmp slt i32 32623, %b + br i1 %cmp3, label %for.body.lr.ph, label %for.end + +for.body.lr.ph: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.lr.ph, %for.body + %i.04 = phi i32 [ 32623, %for.body.lr.ph ], [ %inc, %for.body ] + %arrayidx = getelementptr inbounds i8* %p, i32 %i.04 + %0 = load i8* %arrayidx, align 1 + %conv = zext i8 %0 to i32 + %add = add nsw i32 %conv, 1 + %conv1 = trunc i32 %add to i8 + store i8 %conv1, i8* %arrayidx, align 1 + %inc = add nsw i32 %i.04, 1 + %cmp = icmp ne i32 %inc, %b + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} + + + +; CHECK: test_pos2_ir_ne +; FIXME: Support this loop! +; CHECK-NOT: bdnz +; a < b +define void @test_pos2_ir_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind { +entry: + %cmp3 = icmp slt i32 29554, %b + br i1 %cmp3, label %for.body.lr.ph, label %for.end + +for.body.lr.ph: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.lr.ph, %for.body + %i.04 = phi i32 [ 29554, %for.body.lr.ph ], [ %inc, %for.body ] + %arrayidx = getelementptr inbounds i8* %p, i32 %i.04 + %0 = load i8* %arrayidx, align 1 + %conv = zext i8 %0 to i32 + %add = add nsw i32 %conv, 1 + %conv1 = trunc i32 %add to i8 + store i8 %conv1, i8* %arrayidx, align 1 + %inc = add nsw i32 %i.04, 2 + %cmp = icmp ne i32 %inc, %b + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} + + + +; CHECK: test_pos4_ir_ne +; FIXME: Support this loop! +; CHECK-NOT: bdnz +; a < b +define void @test_pos4_ir_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind { +entry: + %cmp3 = icmp slt i32 15692, %b + br i1 %cmp3, label %for.body.lr.ph, label %for.end + +for.body.lr.ph: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.lr.ph, %for.body + %i.04 = phi i32 [ 15692, %for.body.lr.ph ], [ %inc, %for.body ] + %arrayidx = getelementptr inbounds i8* %p, i32 %i.04 + %0 = load i8* %arrayidx, align 1 + %conv = zext i8 %0 to i32 + %add = add nsw i32 %conv, 1 + %conv1 = trunc i32 %add to i8 + store i8 %conv1, i8* %arrayidx, align 1 + %inc = add nsw i32 %i.04, 4 + %cmp = icmp ne i32 %inc, %b + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} + + + +; CHECK: test_pos8_ir_ne +; FIXME: Support this loop! +; CHECK-NOT: bdnz +; a < b +define void @test_pos8_ir_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind { +entry: + %cmp3 = icmp slt i32 10449, %b + br i1 %cmp3, label %for.body.lr.ph, label %for.end + +for.body.lr.ph: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.lr.ph, %for.body + %i.04 = phi i32 [ 10449, %for.body.lr.ph ], [ %inc, %for.body ] + %arrayidx = getelementptr inbounds i8* %p, i32 %i.04 + %0 = load i8* %arrayidx, align 1 + %conv = zext i8 %0 to i32 + %add = add nsw i32 %conv, 1 + %conv1 = trunc i32 %add to i8 + store i8 %conv1, i8* %arrayidx, align 1 + %inc = add nsw i32 %i.04, 8 + %cmp = icmp ne i32 %inc, %b + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} + + + +; CHECK: test_pos16_ir_ne +; FIXME: Support this loop! +; CHECK-NOT: bdnz +; a < b +define void @test_pos16_ir_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind { +entry: + %cmp3 = icmp slt i32 32087, %b + br i1 %cmp3, label %for.body.lr.ph, label %for.end + +for.body.lr.ph: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.lr.ph, %for.body + %i.04 = phi i32 [ 32087, %for.body.lr.ph ], [ %inc, %for.body ] + %arrayidx = getelementptr inbounds i8* %p, i32 %i.04 + %0 = load i8* %arrayidx, align 1 + %conv = zext i8 %0 to i32 + %add = add nsw i32 %conv, 1 + %conv1 = trunc i32 %add to i8 + store i8 %conv1, i8* %arrayidx, align 1 + %inc = add nsw i32 %i.04, 16 + %cmp = icmp ne i32 %inc, %b + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} + + + +; CHECK: test_pos1_ri_ne +; CHECK: bdnz +; a < b +define void @test_pos1_ri_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind { +entry: + %cmp3 = icmp slt i32 %a, 3472 + br i1 %cmp3, label %for.body.lr.ph, label %for.end + +for.body.lr.ph: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.lr.ph, %for.body + %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ] + %arrayidx = getelementptr inbounds i8* %p, i32 %i.04 + %0 = load i8* %arrayidx, align 1 + %conv = zext i8 %0 to i32 + %add = add nsw i32 %conv, 1 + %conv1 = trunc i32 %add to i8 + store i8 %conv1, i8* %arrayidx, align 1 + %inc = add nsw i32 %i.04, 1 + %cmp = icmp ne i32 %inc, 3472 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} + + + +; CHECK: test_pos2_ri_ne +; FIXME: Support this loop! +; CHECK-NOT: bdnz +; a < b +define void @test_pos2_ri_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind { +entry: + %cmp3 = icmp slt i32 %a, 8730 + br i1 %cmp3, label %for.body.lr.ph, label %for.end + +for.body.lr.ph: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.lr.ph, %for.body + %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ] + %arrayidx = getelementptr inbounds i8* %p, i32 %i.04 + %0 = load i8* %arrayidx, align 1 + %conv = zext i8 %0 to i32 + %add = add nsw i32 %conv, 1 + %conv1 = trunc i32 %add to i8 + store i8 %conv1, i8* %arrayidx, align 1 + %inc = add nsw i32 %i.04, 2 + %cmp = icmp ne i32 %inc, 8730 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} + + + +; CHECK: test_pos4_ri_ne +; FIXME: Support this loop! +; CHECK-NOT: bdnz +; a < b +define void @test_pos4_ri_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind { +entry: + %cmp3 = icmp slt i32 %a, 1493 + br i1 %cmp3, label %for.body.lr.ph, label %for.end + +for.body.lr.ph: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.lr.ph, %for.body + %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ] + %arrayidx = getelementptr inbounds i8* %p, i32 %i.04 + %0 = load i8* %arrayidx, align 1 + %conv = zext i8 %0 to i32 + %add = add nsw i32 %conv, 1 + %conv1 = trunc i32 %add to i8 + store i8 %conv1, i8* %arrayidx, align 1 + %inc = add nsw i32 %i.04, 4 + %cmp = icmp ne i32 %inc, 1493 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} + + + +; CHECK: test_pos8_ri_ne +; FIXME: Support this loop! +; CHECK-NOT: bdnz +; a < b +define void @test_pos8_ri_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind { +entry: + %cmp3 = icmp slt i32 %a, 1706 + br i1 %cmp3, label %for.body.lr.ph, label %for.end + +for.body.lr.ph: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.lr.ph, %for.body + %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ] + %arrayidx = getelementptr inbounds i8* %p, i32 %i.04 + %0 = load i8* %arrayidx, align 1 + %conv = zext i8 %0 to i32 + %add = add nsw i32 %conv, 1 + %conv1 = trunc i32 %add to i8 + store i8 %conv1, i8* %arrayidx, align 1 + %inc = add nsw i32 %i.04, 8 + %cmp = icmp ne i32 %inc, 1706 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} + + + +; CHECK: test_pos16_ri_ne +; FIXME: Support this loop! +; CHECK-NOT: bdnz +; a < b +define void @test_pos16_ri_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind { +entry: + %cmp3 = icmp slt i32 %a, 1886 + br i1 %cmp3, label %for.body.lr.ph, label %for.end + +for.body.lr.ph: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.lr.ph, %for.body + %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ] + %arrayidx = getelementptr inbounds i8* %p, i32 %i.04 + %0 = load i8* %arrayidx, align 1 + %conv = zext i8 %0 to i32 + %add = add nsw i32 %conv, 1 + %conv1 = trunc i32 %add to i8 + store i8 %conv1, i8* %arrayidx, align 1 + %inc = add nsw i32 %i.04, 16 + %cmp = icmp ne i32 %inc, 1886 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} + + + +; CHECK: test_pos1_rr_ne +; CHECK: bdnz +; a < b +define void @test_pos1_rr_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind { +entry: + %cmp3 = icmp slt i32 %a, %b + br i1 %cmp3, label %for.body.lr.ph, label %for.end + +for.body.lr.ph: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.lr.ph, %for.body + %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ] + %arrayidx = getelementptr inbounds i8* %p, i32 %i.04 + %0 = load i8* %arrayidx, align 1 + %conv = zext i8 %0 to i32 + %add = add nsw i32 %conv, 1 + %conv1 = trunc i32 %add to i8 + store i8 %conv1, i8* %arrayidx, align 1 + %inc = add nsw i32 %i.04, 1 + %cmp = icmp ne i32 %inc, %b + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} + + + +; CHECK: test_pos2_rr_ne +; FIXME: Support this loop! +; CHECK-NOT: bdnz +; a < b +define void @test_pos2_rr_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind { +entry: + %cmp3 = icmp slt i32 %a, %b + br i1 %cmp3, label %for.body.lr.ph, label %for.end + +for.body.lr.ph: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.lr.ph, %for.body + %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ] + %arrayidx = getelementptr inbounds i8* %p, i32 %i.04 + %0 = load i8* %arrayidx, align 1 + %conv = zext i8 %0 to i32 + %add = add nsw i32 %conv, 1 + %conv1 = trunc i32 %add to i8 + store i8 %conv1, i8* %arrayidx, align 1 + %inc = add nsw i32 %i.04, 2 + %cmp = icmp ne i32 %inc, %b + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} + + + +; CHECK: test_pos4_rr_ne +; FIXME: Support this loop! +; CHECK-NOT: bdnz +; a < b +define void @test_pos4_rr_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind { +entry: + %cmp3 = icmp slt i32 %a, %b + br i1 %cmp3, label %for.body.lr.ph, label %for.end + +for.body.lr.ph: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.lr.ph, %for.body + %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ] + %arrayidx = getelementptr inbounds i8* %p, i32 %i.04 + %0 = load i8* %arrayidx, align 1 + %conv = zext i8 %0 to i32 + %add = add nsw i32 %conv, 1 + %conv1 = trunc i32 %add to i8 + store i8 %conv1, i8* %arrayidx, align 1 + %inc = add nsw i32 %i.04, 4 + %cmp = icmp ne i32 %inc, %b + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} + + + +; CHECK: test_pos8_rr_ne +; FIXME: Support this loop! +; CHECK-NOT: bdnz +; a < b +define void @test_pos8_rr_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind { +entry: + %cmp3 = icmp slt i32 %a, %b + br i1 %cmp3, label %for.body.lr.ph, label %for.end + +for.body.lr.ph: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.lr.ph, %for.body + %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ] + %arrayidx = getelementptr inbounds i8* %p, i32 %i.04 + %0 = load i8* %arrayidx, align 1 + %conv = zext i8 %0 to i32 + %add = add nsw i32 %conv, 1 + %conv1 = trunc i32 %add to i8 + store i8 %conv1, i8* %arrayidx, align 1 + %inc = add nsw i32 %i.04, 8 + %cmp = icmp ne i32 %inc, %b + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} + + + +; CHECK: test_pos16_rr_ne +; FIXME: Support this loop! +; CHECK-NOT: bdnz +; a < b +define void @test_pos16_rr_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind { +entry: + %cmp3 = icmp slt i32 %a, %b + br i1 %cmp3, label %for.body.lr.ph, label %for.end + +for.body.lr.ph: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.lr.ph, %for.body + %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ] + %arrayidx = getelementptr inbounds i8* %p, i32 %i.04 + %0 = load i8* %arrayidx, align 1 + %conv = zext i8 %0 to i32 + %add = add nsw i32 %conv, 1 + %conv1 = trunc i32 %add to i8 + store i8 %conv1, i8* %arrayidx, align 1 + %inc = add nsw i32 %i.04, 16 + %cmp = icmp ne i32 %inc, %b + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} + diff --git a/llvm/test/CodeGen/PowerPC/ctrloops.ll b/llvm/test/CodeGen/PowerPC/ctrloops.ll index f11e332d5fbc..ca00f687aa4e 100644 --- a/llvm/test/CodeGen/PowerPC/ctrloops.ll +++ b/llvm/test/CodeGen/PowerPC/ctrloops.ll @@ -22,7 +22,7 @@ for.end: ; preds = %for.body ; CHECK: @test1 ; CHECK-NOT: or 3, 3, 3 ; CHECK: mtctr -; CHECK-NOT: addi +; CHECK-NOT: addi {[0-9]+} ; CHECK-NOT: cmplwi ; CHECK: bdnz } @@ -45,7 +45,7 @@ for.end: ; preds = %for.body, %entry ret void ; CHECK: @test2 ; CHECK: mtctr -; CHECK-NOT: addi +; CHECK-NOT: addi {[0-9]+} ; CHECK-NOT: cmplwi ; CHECK: bdnz } @@ -69,7 +69,7 @@ for.end: ; preds = %for.body, %entry ret void ; CHECK: @test3 ; CHECK: mtctr -; CHECK-NOT: addi +; CHECK-NOT: addi {[0-9]+} ; CHECK-NOT: cmplwi ; CHECK: bdnz } diff --git a/llvm/test/CodeGen/PowerPC/negctr.ll b/llvm/test/CodeGen/PowerPC/negctr.ll index 2f6995c65dd8..ef33bb7e947d 100644 --- a/llvm/test/CodeGen/PowerPC/negctr.ll +++ b/llvm/test/CodeGen/PowerPC/negctr.ll @@ -14,9 +14,12 @@ for.body: ; preds = %for.body, %entry %exitcond = icmp eq i32 %lftr.wideiv, 0 br i1 %exitcond, label %for.end, label %for.body -; FIXME: We currently can't form the 32-bit unsigned trip count necessary here! ; CHECK: @main -; CHECK-NOT: bdnz +; CHECK: li [[REG:[0-9]+]], 0 +; CHECK: oris [[REG2:[0-9]+]], [[REG]], 65535 +; CHECK: ori [[REG3:[0-9]+]], [[REG2]], 65535 +; CHECK: mtctr [[REG3]] +; CHECK: bdnz for.end: ; preds = %for.body, %entry ret void