Adds the ability to use an epilog remainder loop during loop unrolling and makes
this the default behavior. Patch by Evgeny Stupachenko (evstupac@gmail.com). Differential Revision: http://reviews.llvm.org/D18158 llvm-svn: 265388
This commit is contained in:
parent
849045f2aa
commit
188de5ae69
|
@ -34,10 +34,11 @@ bool UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool AllowRuntime,
|
|||
LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
|
||||
AssumptionCache *AC, bool PreserveLCSSA);
|
||||
|
||||
bool UnrollRuntimeLoopProlog(Loop *L, unsigned Count,
|
||||
bool AllowExpensiveTripCount, LoopInfo *LI,
|
||||
ScalarEvolution *SE, DominatorTree *DT,
|
||||
bool PreserveLCSSA);
|
||||
bool UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
|
||||
bool AllowExpensiveTripCount,
|
||||
bool UseEpilogRemainder, LoopInfo *LI,
|
||||
ScalarEvolution *SE, DominatorTree *DT,
|
||||
bool PreserveLCSSA);
|
||||
|
||||
MDNode *GetUnrollMetadata(MDNode *LoopID, StringRef Name);
|
||||
}
|
||||
|
|
|
@ -44,6 +44,11 @@ using namespace llvm;
|
|||
STATISTIC(NumCompletelyUnrolled, "Number of loops completely unrolled");
|
||||
STATISTIC(NumUnrolled, "Number of loops unrolled (completely or otherwise)");
|
||||
|
||||
static cl::opt<bool>
|
||||
UnrollRuntimeEpilog("unroll-runtime-epilog", cl::init(true), cl::Hidden,
|
||||
cl::desc("Allow runtime unrolled loops to be unrolled "
|
||||
"with epilog instead of prolog."));
|
||||
|
||||
/// Convert the instruction operands from referencing the current values into
|
||||
/// those specified by VMap.
|
||||
static inline void remapInstruction(Instruction *I,
|
||||
|
@ -288,12 +293,13 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
|
|||
"convergent "
|
||||
"operation.");
|
||||
});
|
||||
// Don't output the runtime loop prolog if Count is a multiple of
|
||||
// TripMultiple. Such a prolog is never needed, and is unsafe if the loop
|
||||
// Don't output the runtime loop remainder if Count is a multiple of
|
||||
// TripMultiple. Such a remainder is never needed, and is unsafe if the loop
|
||||
// contains a convergent instruction.
|
||||
if (RuntimeTripCount && TripMultiple % Count != 0 &&
|
||||
!UnrollRuntimeLoopProlog(L, Count, AllowExpensiveTripCount, LI, SE, DT,
|
||||
PreserveLCSSA))
|
||||
!UnrollRuntimeLoopRemainder(L, Count, AllowExpensiveTripCount,
|
||||
UnrollRuntimeEpilog, LI, SE, DT,
|
||||
PreserveLCSSA))
|
||||
return false;
|
||||
|
||||
// Notify ScalarEvolution that the loop will be substantially changed,
|
||||
|
|
|
@ -16,8 +16,8 @@
|
|||
// case, we need to generate code to execute these 'left over' iterations.
|
||||
//
|
||||
// The current strategy generates an if-then-else sequence prior to the
|
||||
// unrolled loop to execute the 'left over' iterations. Other strategies
|
||||
// include generate a loop before or after the unrolled loop.
|
||||
// unrolled loop to execute the 'left over' iterations before or after the
|
||||
// unrolled loop.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
|
@ -60,33 +60,35 @@ STATISTIC(NumRuntimeUnrolled,
|
|||
/// than the unroll factor.
|
||||
///
|
||||
static void ConnectProlog(Loop *L, Value *BECount, unsigned Count,
|
||||
BasicBlock *LastPrologBB, BasicBlock *PrologEnd,
|
||||
BasicBlock *OrigPH, BasicBlock *NewPH,
|
||||
ValueToValueMapTy &VMap, DominatorTree *DT,
|
||||
LoopInfo *LI, bool PreserveLCSSA) {
|
||||
BasicBlock *PrologExit, BasicBlock *PreHeader,
|
||||
BasicBlock *NewPreHeader, ValueToValueMapTy &VMap,
|
||||
DominatorTree *DT, LoopInfo *LI, bool PreserveLCSSA) {
|
||||
BasicBlock *Latch = L->getLoopLatch();
|
||||
assert(Latch && "Loop must have a latch");
|
||||
BasicBlock *PrologLatch = cast<BasicBlock>(VMap[Latch]);
|
||||
|
||||
// Create a PHI node for each outgoing value from the original loop
|
||||
// (which means it is an outgoing value from the prolog code too).
|
||||
// The new PHI node is inserted in the prolog end basic block.
|
||||
// The new PHI name is added as an operand of a PHI node in either
|
||||
// The new PHI node value is added as an operand of a PHI node in either
|
||||
// the loop header or the loop exit block.
|
||||
for (succ_iterator SBI = succ_begin(Latch), SBE = succ_end(Latch);
|
||||
SBI != SBE; ++SBI) {
|
||||
for (BasicBlock::iterator BBI = (*SBI)->begin();
|
||||
PHINode *PN = dyn_cast<PHINode>(BBI); ++BBI) {
|
||||
|
||||
for (BasicBlock *Succ : successors(Latch)) {
|
||||
for (Instruction &BBI : *Succ) {
|
||||
PHINode *PN = dyn_cast<PHINode>(&BBI);
|
||||
// Exit when we passed all PHI nodes.
|
||||
if (!PN)
|
||||
break;
|
||||
// Add a new PHI node to the prolog end block and add the
|
||||
// appropriate incoming values.
|
||||
PHINode *NewPN = PHINode::Create(PN->getType(), 2, PN->getName()+".unr",
|
||||
PrologEnd->getTerminator());
|
||||
PHINode *NewPN = PHINode::Create(PN->getType(), 2, PN->getName() + ".unr",
|
||||
PrologExit->getFirstNonPHI());
|
||||
// Adding a value to the new PHI node from the original loop preheader.
|
||||
// This is the value that skips all the prolog code.
|
||||
if (L->contains(PN)) {
|
||||
NewPN->addIncoming(PN->getIncomingValueForBlock(NewPH), OrigPH);
|
||||
NewPN->addIncoming(PN->getIncomingValueForBlock(NewPreHeader),
|
||||
PreHeader);
|
||||
} else {
|
||||
NewPN->addIncoming(UndefValue::get(PN->getType()), OrigPH);
|
||||
NewPN->addIncoming(UndefValue::get(PN->getType()), PreHeader);
|
||||
}
|
||||
|
||||
Value *V = PN->getIncomingValueForBlock(Latch);
|
||||
|
@ -97,22 +99,22 @@ static void ConnectProlog(Loop *L, Value *BECount, unsigned Count,
|
|||
}
|
||||
// Adding a value to the new PHI node from the last prolog block
|
||||
// that was created.
|
||||
NewPN->addIncoming(V, LastPrologBB);
|
||||
NewPN->addIncoming(V, PrologLatch);
|
||||
|
||||
// Update the existing PHI node operand with the value from the
|
||||
// new PHI node. How this is done depends on if the existing
|
||||
// PHI node is in the original loop block, or the exit block.
|
||||
if (L->contains(PN)) {
|
||||
PN->setIncomingValue(PN->getBasicBlockIndex(NewPH), NewPN);
|
||||
PN->setIncomingValue(PN->getBasicBlockIndex(NewPreHeader), NewPN);
|
||||
} else {
|
||||
PN->addIncoming(NewPN, PrologEnd);
|
||||
PN->addIncoming(NewPN, PrologExit);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Create a branch around the original loop, which is taken if there are no
|
||||
// iterations remaining to be executed after running the prologue.
|
||||
Instruction *InsertPt = PrologEnd->getTerminator();
|
||||
Instruction *InsertPt = PrologExit->getTerminator();
|
||||
IRBuilder<> B(InsertPt);
|
||||
|
||||
assert(Count != 0 && "nonsensical Count!");
|
||||
|
@ -126,25 +128,152 @@ static void ConnectProlog(Loop *L, Value *BECount, unsigned Count,
|
|||
BasicBlock *Exit = L->getUniqueExitBlock();
|
||||
assert(Exit && "Loop must have a single exit block only");
|
||||
// Split the exit to maintain loop canonicalization guarantees
|
||||
SmallVector<BasicBlock*, 4> Preds(pred_begin(Exit), pred_end(Exit));
|
||||
SmallVector<BasicBlock*, 4> Preds(predecessors(Exit));
|
||||
SplitBlockPredecessors(Exit, Preds, ".unr-lcssa", DT, LI,
|
||||
PreserveLCSSA);
|
||||
// Add the branch to the exit block (around the unrolled loop)
|
||||
B.CreateCondBr(BrLoopExit, Exit, NewPH);
|
||||
B.CreateCondBr(BrLoopExit, Exit, NewPreHeader);
|
||||
InsertPt->eraseFromParent();
|
||||
}
|
||||
|
||||
/// Connect the unrolling epilog code to the original loop.
|
||||
/// The unrolling epilog code contains code to execute the
|
||||
/// 'extra' iterations if the run-time trip count modulo the
|
||||
/// unroll count is non-zero.
|
||||
///
|
||||
/// This function performs the following:
|
||||
/// - Update PHI nodes at the unrolling loop exit and epilog loop exit
|
||||
/// - Create PHI nodes at the unrolling loop exit to combine
|
||||
/// values that exit the unrolling loop code and jump around it.
|
||||
/// - Update PHI operands in the epilog loop by the new PHI nodes
|
||||
/// - Branch around the epilog loop if extra iters (ModVal) is zero.
|
||||
///
|
||||
static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit,
|
||||
BasicBlock *Exit, BasicBlock *PreHeader,
|
||||
BasicBlock *EpilogPreHeader, BasicBlock *NewPreHeader,
|
||||
ValueToValueMapTy &VMap, DominatorTree *DT,
|
||||
LoopInfo *LI, bool PreserveLCSSA) {
|
||||
BasicBlock *Latch = L->getLoopLatch();
|
||||
assert(Latch && "Loop must have a latch");
|
||||
BasicBlock *EpilogLatch = cast<BasicBlock>(VMap[Latch]);
|
||||
|
||||
// Loop structure should be the following:
|
||||
//
|
||||
// PreHeader
|
||||
// NewPreHeader
|
||||
// Header
|
||||
// ...
|
||||
// Latch
|
||||
// NewExit (PN)
|
||||
// EpilogPreHeader
|
||||
// EpilogHeader
|
||||
// ...
|
||||
// EpilogLatch
|
||||
// Exit (EpilogPN)
|
||||
|
||||
// Update PHI nodes at NewExit and Exit.
|
||||
for (Instruction &BBI : *NewExit) {
|
||||
PHINode *PN = dyn_cast<PHINode>(&BBI);
|
||||
// Exit when we passed all PHI nodes.
|
||||
if (!PN)
|
||||
break;
|
||||
// PN should be used in another PHI located in Exit block as
|
||||
// Exit was split by SplitBlockPredecessors into Exit and NewExit
|
||||
// Basicaly it should look like:
|
||||
// NewExit:
|
||||
// PN = PHI [I, Latch]
|
||||
// ...
|
||||
// Exit:
|
||||
// EpilogPN = PHI [PN, EpilogPreHeader]
|
||||
//
|
||||
// There is EpilogPreHeader incoming block instead of NewExit as
|
||||
// NewExit was spilt 1 more time to get EpilogPreHeader.
|
||||
assert(PN->hasOneUse() && "The phi should have 1 use");
|
||||
PHINode *EpilogPN = cast<PHINode> (PN->use_begin()->getUser());
|
||||
assert(EpilogPN->getParent() == Exit && "EpilogPN should be in Exit block");
|
||||
|
||||
// Add incoming PreHeader from branch around the Loop
|
||||
PN->addIncoming(UndefValue::get(PN->getType()), PreHeader);
|
||||
|
||||
Value *V = PN->getIncomingValueForBlock(Latch);
|
||||
Instruction *I = dyn_cast<Instruction>(V);
|
||||
if (I && L->contains(I))
|
||||
// If value comes from an instruction in the loop add VMap value.
|
||||
V = VMap[I];
|
||||
// For the instruction out of the loop, constant or undefined value
|
||||
// insert value itself.
|
||||
EpilogPN->addIncoming(V, EpilogLatch);
|
||||
|
||||
assert(EpilogPN->getBasicBlockIndex(EpilogPreHeader) >= 0 &&
|
||||
"EpilogPN should have EpilogPreHeader incoming block");
|
||||
// Change EpilogPreHeader incoming block to NewExit.
|
||||
EpilogPN->setIncomingBlock(EpilogPN->getBasicBlockIndex(EpilogPreHeader),
|
||||
NewExit);
|
||||
// Now PHIs should look like:
|
||||
// NewExit:
|
||||
// PN = PHI [I, Latch], [undef, PreHeader]
|
||||
// ...
|
||||
// Exit:
|
||||
// EpilogPN = PHI [PN, NewExit], [VMap[I], EpilogLatch]
|
||||
}
|
||||
|
||||
// Create PHI nodes at NewExit (from the unrolling loop Latch and PreHeader).
|
||||
// Update corresponding PHI nodes in epilog loop.
|
||||
for (BasicBlock *Succ : successors(Latch)) {
|
||||
// Skip this as we already updated phis in exit blocks.
|
||||
if (!L->contains(Succ))
|
||||
continue;
|
||||
for (Instruction &BBI : *Succ) {
|
||||
PHINode *PN = dyn_cast<PHINode>(&BBI);
|
||||
// Exit when we passed all PHI nodes.
|
||||
if (!PN)
|
||||
break;
|
||||
// Add new PHI nodes to the loop exit block and update epilog
|
||||
// PHIs with the new PHI values.
|
||||
PHINode *NewPN = PHINode::Create(PN->getType(), 2, PN->getName() + ".unr",
|
||||
NewExit->getFirstNonPHI());
|
||||
// Adding a value to the new PHI node from the unrolling loop preheader.
|
||||
NewPN->addIncoming(PN->getIncomingValueForBlock(NewPreHeader), PreHeader);
|
||||
// Adding a value to the new PHI node from the unrolling loop latch.
|
||||
NewPN->addIncoming(PN->getIncomingValueForBlock(Latch), Latch);
|
||||
|
||||
// Update the existing PHI node operand with the value from the new PHI
|
||||
// node. Corresponding instruction in epilog loop should be PHI.
|
||||
PHINode *VPN = cast<PHINode>(VMap[&BBI]);
|
||||
VPN->setIncomingValue(VPN->getBasicBlockIndex(EpilogPreHeader), NewPN);
|
||||
}
|
||||
}
|
||||
|
||||
Instruction *InsertPt = NewExit->getTerminator();
|
||||
IRBuilder<> B(InsertPt);
|
||||
Value *BrLoopExit = B.CreateIsNotNull(ModVal);
|
||||
assert(Exit && "Loop must have a single exit block only");
|
||||
// Split the exit to maintain loop canonicalization guarantees
|
||||
SmallVector<BasicBlock*, 4> Preds(predecessors(Exit));
|
||||
SplitBlockPredecessors(Exit, Preds, ".epilog-lcssa", DT, LI,
|
||||
PreserveLCSSA);
|
||||
// Add the branch to the exit block (around the unrolling loop)
|
||||
B.CreateCondBr(BrLoopExit, EpilogPreHeader, Exit);
|
||||
InsertPt->eraseFromParent();
|
||||
}
|
||||
|
||||
/// Create a clone of the blocks in a loop and connect them together.
|
||||
/// If UnrollProlog is true, loop structure will not be cloned, otherwise a new
|
||||
/// loop will be created including all cloned blocks, and the iterator of it
|
||||
/// switches to count NewIter down to 0.
|
||||
/// If CreateRemainderLoop is false, loop structure will not be cloned,
|
||||
/// otherwise a new loop will be created including all cloned blocks, and the
|
||||
/// iterator of it switches to count NewIter down to 0.
|
||||
/// The cloned blocks should be inserted between InsertTop and InsertBot.
|
||||
/// If loop structure is cloned InsertTop should be new preheader, InsertBot
|
||||
/// new loop exit.
|
||||
///
|
||||
static void CloneLoopBlocks(Loop *L, Value *NewIter, const bool UnrollProlog,
|
||||
static void CloneLoopBlocks(Loop *L, Value *NewIter,
|
||||
const bool CreateRemainderLoop,
|
||||
const bool UseEpilogRemainder,
|
||||
BasicBlock *InsertTop, BasicBlock *InsertBot,
|
||||
BasicBlock *Preheader,
|
||||
std::vector<BasicBlock *> &NewBlocks,
|
||||
LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap,
|
||||
LoopInfo *LI) {
|
||||
BasicBlock *Preheader = L->getLoopPreheader();
|
||||
StringRef suffix = UseEpilogRemainder ? "epil" : "prol";
|
||||
BasicBlock *Header = L->getHeader();
|
||||
BasicBlock *Latch = L->getLoopLatch();
|
||||
Function *F = Header->getParent();
|
||||
|
@ -152,7 +281,7 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter, const bool UnrollProlog,
|
|||
LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO();
|
||||
Loop *NewLoop = nullptr;
|
||||
Loop *ParentLoop = L->getParentLoop();
|
||||
if (!UnrollProlog) {
|
||||
if (CreateRemainderLoop) {
|
||||
NewLoop = new Loop();
|
||||
if (ParentLoop)
|
||||
ParentLoop->addChildLoop(NewLoop);
|
||||
|
@ -163,7 +292,7 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter, const bool UnrollProlog,
|
|||
// For each block in the original loop, create a new copy,
|
||||
// and update the value map with the newly created values.
|
||||
for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {
|
||||
BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, ".prol", F);
|
||||
BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, "." + suffix, F);
|
||||
NewBlocks.push_back(NewBB);
|
||||
|
||||
if (NewLoop)
|
||||
|
@ -179,16 +308,17 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter, const bool UnrollProlog,
|
|||
}
|
||||
|
||||
if (Latch == *BB) {
|
||||
// For the last block, if UnrollProlog is true, create a direct jump to
|
||||
// InsertBot. If not, create a loop back to cloned head.
|
||||
// For the last block, if CreateRemainderLoop is false, create a direct
|
||||
// jump to InsertBot. If not, create a loop back to cloned head.
|
||||
VMap.erase((*BB)->getTerminator());
|
||||
BasicBlock *FirstLoopBB = cast<BasicBlock>(VMap[Header]);
|
||||
BranchInst *LatchBR = cast<BranchInst>(NewBB->getTerminator());
|
||||
IRBuilder<> Builder(LatchBR);
|
||||
if (UnrollProlog) {
|
||||
if (!CreateRemainderLoop) {
|
||||
Builder.CreateBr(InsertBot);
|
||||
} else {
|
||||
PHINode *NewIdx = PHINode::Create(NewIter->getType(), 2, "prol.iter",
|
||||
PHINode *NewIdx = PHINode::Create(NewIter->getType(), 2,
|
||||
suffix + ".iter",
|
||||
FirstLoopBB->getFirstNonPHI());
|
||||
Value *IdxSub =
|
||||
Builder.CreateSub(NewIdx, ConstantInt::get(NewIdx->getType(), 1),
|
||||
|
@ -207,9 +337,15 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter, const bool UnrollProlog,
|
|||
// cloned loop.
|
||||
for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
|
||||
PHINode *NewPHI = cast<PHINode>(VMap[&*I]);
|
||||
if (UnrollProlog) {
|
||||
VMap[&*I] = NewPHI->getIncomingValueForBlock(Preheader);
|
||||
cast<BasicBlock>(VMap[Header])->getInstList().erase(NewPHI);
|
||||
if (!CreateRemainderLoop) {
|
||||
if (UseEpilogRemainder) {
|
||||
unsigned idx = NewPHI->getBasicBlockIndex(Preheader);
|
||||
NewPHI->setIncomingBlock(idx, InsertTop);
|
||||
NewPHI->removeIncomingValue(Latch, false);
|
||||
} else {
|
||||
VMap[&*I] = NewPHI->getIncomingValueForBlock(Preheader);
|
||||
cast<BasicBlock>(VMap[Header])->getInstList().erase(NewPHI);
|
||||
}
|
||||
} else {
|
||||
unsigned idx = NewPHI->getBasicBlockIndex(Preheader);
|
||||
NewPHI->setIncomingBlock(idx, InsertTop);
|
||||
|
@ -254,7 +390,7 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter, const bool UnrollProlog,
|
|||
}
|
||||
}
|
||||
|
||||
/// Insert code in the prolog code when unrolling a loop with a
|
||||
/// Insert code in the prolog/epilog code when unrolling a loop with a
|
||||
/// run-time trip-count.
|
||||
///
|
||||
/// This method assumes that the loop unroll factor is total number
|
||||
|
@ -266,6 +402,7 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter, const bool UnrollProlog,
|
|||
/// instruction in SimplifyCFG.cpp. Then, the backend decides how code for
|
||||
/// the switch instruction is generated.
|
||||
///
|
||||
/// ***Prolog case***
|
||||
/// extraiters = tripcount % loopfactor
|
||||
/// if (extraiters == 0) jump Loop:
|
||||
/// else jump Prol
|
||||
|
@ -277,17 +414,35 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter, const bool UnrollProlog,
|
|||
/// ...
|
||||
/// End:
|
||||
///
|
||||
bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count,
|
||||
bool AllowExpensiveTripCount, LoopInfo *LI,
|
||||
ScalarEvolution *SE, DominatorTree *DT,
|
||||
bool PreserveLCSSA) {
|
||||
// For now, only unroll loops that contain a single exit.
|
||||
/// ***Epilog case***
|
||||
/// extraiters = tripcount % loopfactor
|
||||
/// if (extraiters == tripcount) jump LoopExit:
|
||||
/// unroll_iters = tripcount - extraiters
|
||||
/// Loop: LoopBody; (executes unroll_iter times);
|
||||
/// unroll_iter -= 1
|
||||
/// if (unroll_iter != 0) jump Loop:
|
||||
/// LoopExit:
|
||||
/// if (extraiters == 0) jump EpilExit:
|
||||
/// Epil: LoopBody; (executes extraiters times)
|
||||
/// extraiters -= 1 // Omitted if unroll factor is 2.
|
||||
/// if (extraiters != 0) jump Epil: // Omitted if unroll factor is 2.
|
||||
/// EpilExit:
|
||||
|
||||
bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
|
||||
bool AllowExpensiveTripCount,
|
||||
bool UseEpilogRemainder,
|
||||
LoopInfo *LI, ScalarEvolution *SE,
|
||||
DominatorTree *DT, bool PreserveLCSSA) {
|
||||
// for now, only unroll loops that contain a single exit
|
||||
if (!L->getExitingBlock())
|
||||
return false;
|
||||
|
||||
// Make sure the loop is in canonical form, and there is a single
|
||||
// exit block only.
|
||||
if (!L->isLoopSimplifyForm() || !L->getUniqueExitBlock())
|
||||
if (!L->isLoopSimplifyForm())
|
||||
return false;
|
||||
BasicBlock *Exit = L->getUniqueExitBlock(); // successor out of loop
|
||||
if (!Exit)
|
||||
return false;
|
||||
|
||||
// Use Scalar Evolution to compute the trip count. This allows more loops to
|
||||
|
@ -311,8 +466,8 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count,
|
|||
return false;
|
||||
|
||||
BasicBlock *Header = L->getHeader();
|
||||
BasicBlock *PH = L->getLoopPreheader();
|
||||
BranchInst *PreHeaderBR = cast<BranchInst>(PH->getTerminator());
|
||||
BasicBlock *PreHeader = L->getLoopPreheader();
|
||||
BranchInst *PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator());
|
||||
const DataLayout &DL = Header->getModule()->getDataLayout();
|
||||
SCEVExpander Expander(*SE, DL, "loop-unroll");
|
||||
if (!AllowExpensiveTripCount &&
|
||||
|
@ -330,26 +485,75 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count,
|
|||
SE->forgetLoop(ParentLoop);
|
||||
|
||||
BasicBlock *Latch = L->getLoopLatch();
|
||||
// It helps to split the original preheader twice, one for the end of the
|
||||
// prolog code and one for a new loop preheader.
|
||||
BasicBlock *PEnd = SplitEdge(PH, Header, DT, LI);
|
||||
BasicBlock *NewPH = SplitBlock(PEnd, PEnd->getTerminator(), DT, LI);
|
||||
PreHeaderBR = cast<BranchInst>(PH->getTerminator());
|
||||
|
||||
// Loop structure is the following:
|
||||
//
|
||||
// PreHeader
|
||||
// Header
|
||||
// ...
|
||||
// Latch
|
||||
// Exit
|
||||
|
||||
BasicBlock *NewPreHeader;
|
||||
BasicBlock *NewExit = nullptr;
|
||||
BasicBlock *PrologExit = nullptr;
|
||||
BasicBlock *EpilogPreHeader = nullptr;
|
||||
BasicBlock *PrologPreHeader = nullptr;
|
||||
|
||||
if (UseEpilogRemainder) {
|
||||
// If epilog remainder
|
||||
// Split PreHeader to insert a branch around loop for unrolling.
|
||||
NewPreHeader = SplitBlock(PreHeader, PreHeader->getTerminator(), DT, LI);
|
||||
NewPreHeader->setName(PreHeader->getName() + ".new");
|
||||
// Split Exit to create phi nodes from branch above.
|
||||
SmallVector<BasicBlock*, 4> Preds(predecessors(Exit));
|
||||
NewExit = SplitBlockPredecessors(Exit, Preds, ".unr-lcssa",
|
||||
DT, LI, PreserveLCSSA);
|
||||
// Split NewExit to insert epilog remainder loop.
|
||||
EpilogPreHeader = SplitBlock(NewExit, NewExit->getTerminator(), DT, LI);
|
||||
EpilogPreHeader->setName(Header->getName() + ".epil.preheader");
|
||||
} else {
|
||||
// If prolog remainder
|
||||
// Split the original preheader twice to insert prolog remainder loop
|
||||
PrologPreHeader = SplitEdge(PreHeader, Header, DT, LI);
|
||||
PrologPreHeader->setName(Header->getName() + ".prol.preheader");
|
||||
PrologExit = SplitBlock(PrologPreHeader, PrologPreHeader->getTerminator(),
|
||||
DT, LI);
|
||||
PrologExit->setName(Header->getName() + ".prol.loopexit");
|
||||
// Split PrologExit to get NewPreHeader.
|
||||
NewPreHeader = SplitBlock(PrologExit, PrologExit->getTerminator(), DT, LI);
|
||||
NewPreHeader->setName(PreHeader->getName() + ".new");
|
||||
}
|
||||
// Loop structure should be the following:
|
||||
// Epilog Prolog
|
||||
//
|
||||
// PreHeader PreHeader
|
||||
// *NewPreHeader *PrologPreHeader
|
||||
// Header *PrologExit
|
||||
// ... *NewPreHeader
|
||||
// Latch Header
|
||||
// *NewExit ...
|
||||
// *EpilogPreHeader Latch
|
||||
// Exit Exit
|
||||
|
||||
// Calculate conditions for branch around loop for unrolling
|
||||
// in epilog case and around prolog remainder loop in prolog case.
|
||||
// Compute the number of extra iterations required, which is:
|
||||
// extra iterations = run-time trip count % (loop unroll factor + 1)
|
||||
// extra iterations = run-time trip count % loop unroll factor
|
||||
PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator());
|
||||
Value *TripCount = Expander.expandCodeFor(TripCountSC, TripCountSC->getType(),
|
||||
PreHeaderBR);
|
||||
Value *BECount = Expander.expandCodeFor(BECountSC, BECountSC->getType(),
|
||||
PreHeaderBR);
|
||||
|
||||
IRBuilder<> B(PreHeaderBR);
|
||||
Value *ModVal;
|
||||
// Calculate ModVal = (BECount + 1) % Count.
|
||||
// Note that TripCount is BECount + 1.
|
||||
if (isPowerOf2_32(Count)) {
|
||||
// When Count is power of 2 we don't BECount for epilog case, however we'll
|
||||
// need it for a branch around unrolling loop for prolog case.
|
||||
ModVal = B.CreateAnd(TripCount, Count - 1, "xtraiter");
|
||||
// 1. There are no iterations to be run in the prologue loop.
|
||||
// 1. There are no iterations to be run in the prolog/epilog loop.
|
||||
// OR
|
||||
// 2. The addition computing TripCount overflowed.
|
||||
//
|
||||
|
@ -371,18 +575,18 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count,
|
|||
ConstantInt::get(BECount->getType(), Count),
|
||||
"xtraiter");
|
||||
}
|
||||
Value *BranchVal = B.CreateIsNotNull(ModVal, "lcmp.mod");
|
||||
|
||||
// Branch to either the extra iterations or the cloned/unrolled loop.
|
||||
// We will fix up the true branch label when adding loop body copies.
|
||||
B.CreateCondBr(BranchVal, PEnd, PEnd);
|
||||
assert(PreHeaderBR->isUnconditional() &&
|
||||
PreHeaderBR->getSuccessor(0) == PEnd &&
|
||||
"CFG edges in Preheader are not correct");
|
||||
Value *CmpOperand =
|
||||
UseEpilogRemainder ? TripCount :
|
||||
ConstantInt::get(TripCount->getType(), 0);
|
||||
Value *BranchVal = B.CreateICmpNE(ModVal, CmpOperand, "lcmp.mod");
|
||||
BasicBlock *FirstLoop = UseEpilogRemainder ? NewPreHeader : PrologPreHeader;
|
||||
BasicBlock *SecondLoop = UseEpilogRemainder ? NewExit : PrologExit;
|
||||
// Branch to either remainder (extra iterations) loop or unrolling loop.
|
||||
B.CreateCondBr(BranchVal, FirstLoop, SecondLoop);
|
||||
PreHeaderBR->eraseFromParent();
|
||||
Function *F = Header->getParent();
|
||||
// Get an ordered list of blocks in the loop to help with the ordering of the
|
||||
// cloned blocks in the prolog code.
|
||||
// cloned blocks in the prolog/epilog code
|
||||
LoopBlocksDFS LoopBlocks(L);
|
||||
LoopBlocks.perform(LI);
|
||||
|
||||
|
@ -394,17 +598,38 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count,
|
|||
std::vector<BasicBlock *> NewBlocks;
|
||||
ValueToValueMapTy VMap;
|
||||
|
||||
bool UnrollPrologue = Count == 2;
|
||||
// For unroll factor 2 remainder loop will have 1 iterations.
|
||||
// Do not create 1 iteration loop.
|
||||
bool CreateRemainderLoop = (Count != 2);
|
||||
|
||||
// Clone all the basic blocks in the loop. If Count is 2, we don't clone
|
||||
// the loop, otherwise we create a cloned loop to execute the extra
|
||||
// iterations. This function adds the appropriate CFG connections.
|
||||
CloneLoopBlocks(L, ModVal, UnrollPrologue, PH, PEnd, NewBlocks, LoopBlocks,
|
||||
VMap, LI);
|
||||
BasicBlock *InsertBot = UseEpilogRemainder ? Exit : PrologExit;
|
||||
BasicBlock *InsertTop = UseEpilogRemainder ? EpilogPreHeader : PrologPreHeader;
|
||||
CloneLoopBlocks(L, ModVal, CreateRemainderLoop, UseEpilogRemainder, InsertTop,
|
||||
InsertBot, NewPreHeader, NewBlocks, LoopBlocks, VMap, LI);
|
||||
|
||||
// Insert the cloned blocks into the function just before the original loop.
|
||||
F->getBasicBlockList().splice(PEnd->getIterator(), F->getBasicBlockList(),
|
||||
NewBlocks[0]->getIterator(), F->end());
|
||||
// Insert the cloned blocks into the function.
|
||||
F->getBasicBlockList().splice(InsertBot->getIterator(),
|
||||
F->getBasicBlockList(),
|
||||
NewBlocks[0]->getIterator(),
|
||||
F->end());
|
||||
|
||||
// Loop structure should be the following:
|
||||
// Epilog Prolog
|
||||
//
|
||||
// PreHeader PreHeader
|
||||
// NewPreHeader PrologPreHeader
|
||||
// Header PrologHeader
|
||||
// ... ...
|
||||
// Latch PrologLatch
|
||||
// NewExit PrologExit
|
||||
// EpilogPreHeader NewPreHeader
|
||||
// EpilogHeader Header
|
||||
// ... ...
|
||||
// EpilogLatch Latch
|
||||
// Exit Exit
|
||||
|
||||
// Rewrite the cloned instruction operands to use the values created when the
|
||||
// clone is created.
|
||||
|
@ -415,11 +640,38 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count,
|
|||
}
|
||||
}
|
||||
|
||||
// Connect the prolog code to the original loop and update the
|
||||
// PHI functions.
|
||||
BasicBlock *LastLoopBB = cast<BasicBlock>(VMap[Latch]);
|
||||
ConnectProlog(L, BECount, Count, LastLoopBB, PEnd, PH, NewPH, VMap, DT, LI,
|
||||
PreserveLCSSA);
|
||||
if (UseEpilogRemainder) {
|
||||
// Connect the epilog code to the original loop and update the
|
||||
// PHI functions.
|
||||
ConnectEpilog(L, ModVal, NewExit, Exit, PreHeader,
|
||||
EpilogPreHeader, NewPreHeader, VMap, DT, LI,
|
||||
PreserveLCSSA);
|
||||
|
||||
// Update counter in loop for unrolling.
|
||||
// I should be multiply of Count.
|
||||
IRBuilder<> B2(NewPreHeader->getTerminator());
|
||||
Value *TestVal = B2.CreateSub(TripCount, ModVal, "unroll_iter");
|
||||
BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator());
|
||||
B2.SetInsertPoint(LatchBR);
|
||||
PHINode *NewIdx = PHINode::Create(TestVal->getType(), 2, "niter",
|
||||
Header->getFirstNonPHI());
|
||||
Value *IdxSub =
|
||||
B2.CreateSub(NewIdx, ConstantInt::get(NewIdx->getType(), 1),
|
||||
NewIdx->getName() + ".nsub");
|
||||
Value *IdxCmp;
|
||||
if (LatchBR->getSuccessor(0) == Header)
|
||||
IdxCmp = B2.CreateIsNotNull(IdxSub, NewIdx->getName() + ".ncmp");
|
||||
else
|
||||
IdxCmp = B2.CreateIsNull(IdxSub, NewIdx->getName() + ".ncmp");
|
||||
NewIdx->addIncoming(TestVal, NewPreHeader);
|
||||
NewIdx->addIncoming(IdxSub, Latch);
|
||||
LatchBR->setCondition(IdxCmp);
|
||||
} else {
|
||||
// Connect the prolog code to the original loop and update the
|
||||
// PHI functions.
|
||||
ConnectProlog(L, BECount, Count, PrologExit, PreHeader, NewPreHeader,
|
||||
VMap, DT, LI, PreserveLCSSA);
|
||||
}
|
||||
NumRuntimeUnrolled++;
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -1,13 +1,21 @@
|
|||
; RUN: opt < %s -S -loop-unroll -mtriple aarch64 -mcpu=cortex-a57 | FileCheck %s
|
||||
; RUN: opt < %s -S -loop-unroll -mtriple aarch64 -mcpu=cortex-a57 | FileCheck %s -check-prefix=EPILOG
|
||||
; RUN: opt < %s -S -loop-unroll -mtriple aarch64 -mcpu=cortex-a57 -unroll-runtime-epilog=false | FileCheck %s -check-prefix=PROLOG
|
||||
|
||||
; Tests for unrolling loops with run-time trip counts
|
||||
|
||||
; CHECK: %xtraiter = and i32 %n
|
||||
; CHECK: %lcmp.mod = icmp ne i32 %xtraiter, 0
|
||||
; CHECK: br i1 %lcmp.mod, label %for.body.prol, label %for.body.preheader.split
|
||||
; EPILOG: %xtraiter = and i32 %n
|
||||
; EPILOG: %lcmp.mod = icmp ne i32 %xtraiter, %n
|
||||
; EPILOG: br i1 %lcmp.mod, label %for.body.preheader.new, label %for.end.loopexit.unr-lcssa
|
||||
|
||||
; CHECK: for.body.prol:
|
||||
; CHECK: for.body:
|
||||
; PROLOG: %xtraiter = and i32 %n
|
||||
; PROLOG: %lcmp.mod = icmp ne i32 %xtraiter, 0
|
||||
; PROLOG: br i1 %lcmp.mod, label %for.body.prol.preheader, label %for.body.prol.loopexit
|
||||
|
||||
; EPILOG: for.body:
|
||||
; EPILOG: for.body.epil:
|
||||
|
||||
; PROLOG: for.body.prol:
|
||||
; PROLOG: for.body:
|
||||
|
||||
define i32 @test(i32* nocapture %a, i32 %n) nounwind uwtable readonly {
|
||||
entry:
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
; RUN: opt < %s -S -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2 -loop-unroll | FileCheck %s
|
||||
; RUN: opt < %s -S -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2 -loop-unroll | FileCheck %s -check-prefix=EPILOG
|
||||
; RUN: opt < %s -S -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2 -loop-unroll -unroll-runtime-epilog=false | FileCheck %s -check-prefix=PROLOG
|
||||
define void @unroll_opt_for_size() nounwind optsize {
|
||||
entry:
|
||||
br label %loop
|
||||
|
@ -13,11 +14,17 @@ exit:
|
|||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @unroll_opt_for_size
|
||||
; CHECK: add
|
||||
; CHECK-NEXT: add
|
||||
; CHECK-NEXT: add
|
||||
; CHECK: icmp
|
||||
; EPILOG-LABEL: @unroll_opt_for_size
|
||||
; EPILOG: add
|
||||
; EPILOG-NEXT: add
|
||||
; EPILOG-NEXT: add
|
||||
; EPILOG: icmp
|
||||
|
||||
; PROLOG-LABEL: @unroll_opt_for_size
|
||||
; PROLOG: add
|
||||
; PROLOG-NEXT: add
|
||||
; PROLOG-NEXT: add
|
||||
; PROLOG: icmp
|
||||
|
||||
define i32 @test(i32* nocapture %a, i32 %n) nounwind uwtable readonly {
|
||||
entry:
|
||||
|
@ -40,8 +47,13 @@ for.end: ; preds = %for.body, %entry
|
|||
ret i32 %sum.0.lcssa
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @test
|
||||
; CHECK: for.body.prol{{.*}}:
|
||||
; CHECK: for.body:
|
||||
; CHECK: br i1 %exitcond.7, label %for.end.loopexit{{.*}}, label %for.body
|
||||
; EPILOG-LABEL: @test
|
||||
; EPILOG: for.body:
|
||||
; EPILOG: br i1 %niter.ncmp.7, label %for.end.loopexit{{.*}}, label %for.body
|
||||
; EPILOG: for.body.epil{{.*}}:
|
||||
|
||||
; PROLOG-LABEL: @test
|
||||
; PROLOG: for.body.prol{{.*}}:
|
||||
; PROLOG: for.body:
|
||||
; PROLOG: br i1 %exitcond.7, label %for.end.loopexit{{.*}}, label %for.body
|
||||
|
||||
|
|
|
@ -14,9 +14,9 @@ for.body: ; preds = %for.body, %entry
|
|||
|
||||
exit: ; preds = %for.body
|
||||
%ret = phi x86_mmx [ undef, %for.body ]
|
||||
; CHECK: %[[ret_unr:.*]] = phi x86_mmx [ undef,
|
||||
; CHECK: %[[ret_ph:.*]] = phi x86_mmx [ undef,
|
||||
; CHECK: %[[ret:.*]] = phi x86_mmx [ %[[ret_unr]], {{.*}} ], [ %[[ret_ph]]
|
||||
; CHECK: %[[ret_ph:.*]] = phi x86_mmx [ undef, %entry
|
||||
; CHECK: %[[ret_ph1:.*]] = phi x86_mmx [ undef,
|
||||
; CHECK: %[[ret:.*]] = phi x86_mmx [ %[[ret_ph]], {{.*}} ], [ %[[ret_ph1]],
|
||||
; CHECK: ret x86_mmx %[[ret]]
|
||||
ret x86_mmx %ret
|
||||
}
|
||||
|
|
|
@ -34,7 +34,7 @@ define i32 @test2(i64* %loc, i64 %conv7) {
|
|||
; CHECK: udiv
|
||||
; CHECK: udiv
|
||||
; CHECK-NOT: udiv
|
||||
; CHECK-LABEL: for.body.prol
|
||||
; CHECK-LABEL: for.body
|
||||
entry:
|
||||
%rem0 = load i64, i64* %loc, align 8
|
||||
%ExpensiveComputation = udiv i64 %rem0, 42 ; <<< Extra computations are added to the trip-count expression
|
||||
|
|
|
@ -1,18 +1,30 @@
|
|||
; RUN: opt < %s -S -loop-unroll -unroll-runtime=true | FileCheck %s
|
||||
; RUN: opt < %s -S -loop-unroll -unroll-runtime=true | FileCheck %s -check-prefix=EPILOG
|
||||
; RUN: opt < %s -S -loop-unroll -unroll-runtime=true -unroll-runtime-epilog=false | FileCheck %s -check-prefix=PROLOG
|
||||
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
|
||||
|
||||
; Tests for unrolling loops with run-time trip counts
|
||||
|
||||
; CHECK: %xtraiter = and i32 %n
|
||||
; CHECK: %lcmp.mod = icmp ne i32 %xtraiter, 0
|
||||
; CHECK: br i1 %lcmp.mod, label %for.body.prol, label %for.body.preheader.split
|
||||
; EPILOG: %xtraiter = and i32 %n
|
||||
; EPILOG: %lcmp.mod = icmp ne i32 %xtraiter, %n
|
||||
; EPILOG: br i1 %lcmp.mod, label %for.body.preheader.new, label %for.end.loopexit.unr-lcssa
|
||||
|
||||
; PROLOG: %xtraiter = and i32 %n
|
||||
; PROLOG: %lcmp.mod = icmp ne i32 %xtraiter, 0
|
||||
; PROLOG: br i1 %lcmp.mod, label %for.body.prol.preheader, label %for.body.prol.loopexit
|
||||
|
||||
; EPILOG: for.body.epil:
|
||||
; EPILOG: %indvars.iv.epil = phi i64 [ %indvars.iv.next.epil, %for.body.epil ], [ %indvars.iv.unr, %for.body.epil.preheader ]
|
||||
; EPILOG: %epil.iter.sub = sub i32 %epil.iter, 1
|
||||
; EPILOG: %epil.iter.cmp = icmp ne i32 %epil.iter.sub, 0
|
||||
; EPILOG: br i1 %epil.iter.cmp, label %for.body.epil, label %for.end.loopexit.epilog-lcssa, !llvm.loop !0
|
||||
|
||||
; PROLOG: for.body.prol:
|
||||
; PROLOG: %indvars.iv.prol = phi i64 [ %indvars.iv.next.prol, %for.body.prol ], [ 0, %for.body.prol.preheader ]
|
||||
; PROLOG: %prol.iter.sub = sub i32 %prol.iter, 1
|
||||
; PROLOG: %prol.iter.cmp = icmp ne i32 %prol.iter.sub, 0
|
||||
; PROLOG: br i1 %prol.iter.cmp, label %for.body.prol, label %for.body.prol.loopexit, !llvm.loop !0
|
||||
|
||||
; CHECK: for.body.prol:
|
||||
; CHECK: %indvars.iv.prol = phi i64 [ %indvars.iv.next.prol, %for.body.prol ], [ 0, %for.body.preheader ]
|
||||
; CHECK: %prol.iter.sub = sub i32 %prol.iter, 1
|
||||
; CHECK: %prol.iter.cmp = icmp ne i32 %prol.iter.sub, 0
|
||||
; CHECK: br i1 %prol.iter.cmp, label %for.body.prol, label %for.body.preheader.split, !llvm.loop !0
|
||||
|
||||
define i32 @test(i32* nocapture %a, i32 %n) nounwind uwtable readonly {
|
||||
entry:
|
||||
|
@ -39,8 +51,11 @@ for.end: ; preds = %for.body, %entry
|
|||
; Still try to completely unroll loops with compile-time trip counts
|
||||
; even if the -unroll-runtime is specified
|
||||
|
||||
; CHECK: for.body:
|
||||
; CHECK-NOT: for.body.prol:
|
||||
; EPILOG: for.body:
|
||||
; EPILOG-NOT: for.body.epil:
|
||||
|
||||
; PROLOG: for.body:
|
||||
; PROLOG-NOT: for.body.prol:
|
||||
|
||||
define i32 @test1(i32* nocapture %a) nounwind uwtable readonly {
|
||||
entry:
|
||||
|
@ -64,7 +79,8 @@ for.end: ; preds = %for.body
|
|||
; This is test 2007-05-09-UnknownTripCount.ll which can be unrolled now
|
||||
; if the -unroll-runtime option is turned on
|
||||
|
||||
; CHECK: bb72.2:
|
||||
; EPILOG: bb72.2:
|
||||
; PROLOG: bb72.2:
|
||||
|
||||
define void @foo(i32 %trips) {
|
||||
entry:
|
||||
|
@ -86,8 +102,11 @@ cond_true138:
|
|||
|
||||
; Test run-time unrolling for a loop that counts down by -2.
|
||||
|
||||
; CHECK: for.body.prol:
|
||||
; CHECK: br i1 %prol.iter.cmp, label %for.body.prol, label %for.body.preheader.split
|
||||
; EPILOG: for.body.epil:
|
||||
; EPILOG: br i1 %epil.iter.cmp, label %for.body.epil, label %for.cond.for.end_crit_edge.epilog-lcssa
|
||||
|
||||
; PROLOG: for.body.prol:
|
||||
; PROLOG: br i1 %prol.iter.cmp, label %for.body.prol, label %for.body.prol.loopexit
|
||||
|
||||
define zeroext i16 @down(i16* nocapture %p, i32 %len) nounwind uwtable readonly {
|
||||
entry:
|
||||
|
@ -116,8 +135,11 @@ for.end: ; preds = %for.cond.for.end_cr
|
|||
}
|
||||
|
||||
; Test run-time unrolling disable metadata.
|
||||
; CHECK: for.body:
|
||||
; CHECK-NOT: for.body.prol:
|
||||
; EPILOG: for.body:
|
||||
; EPILOG-NOT: for.body.epil:
|
||||
|
||||
; PROLOG: for.body:
|
||||
; PROLOG-NOT: for.body.prol:
|
||||
|
||||
define zeroext i16 @test2(i16* nocapture %p, i32 %len) nounwind uwtable readonly {
|
||||
entry:
|
||||
|
@ -148,6 +170,8 @@ for.end: ; preds = %for.cond.for.end_cr
|
|||
!0 = distinct !{!0, !1}
|
||||
!1 = !{!"llvm.loop.unroll.runtime.disable"}
|
||||
|
||||
; CHECK: !0 = distinct !{!0, !1}
|
||||
; CHECK: !1 = !{!"llvm.loop.unroll.disable"}
|
||||
; EPILOG: !0 = distinct !{!0, !1}
|
||||
; EPILOG: !1 = !{!"llvm.loop.unroll.disable"}
|
||||
|
||||
; PROLOG: !0 = distinct !{!0, !1}
|
||||
; PROLOG: !1 = !{!"llvm.loop.unroll.disable"}
|
||||
|
|
|
@ -1,19 +1,35 @@
|
|||
; RUN: opt < %s -S -loop-unroll -unroll-runtime -unroll-count=2 | FileCheck %s
|
||||
; RUN: opt < %s -S -loop-unroll -unroll-runtime -unroll-count=2 | FileCheck %s -check-prefix=EPILOG
|
||||
; RUN: opt < %s -S -loop-unroll -unroll-runtime -unroll-count=2 -unroll-runtime-epilog=false | FileCheck %s -check-prefix=PROLOG
|
||||
|
||||
; This tests that setting the unroll count works
|
||||
|
||||
; CHECK: for.body.preheader:
|
||||
; CHECK: br {{.*}} label %for.body.prol, label %for.body.preheader.split, !dbg [[PH_LOC:![0-9]+]]
|
||||
; CHECK: for.body.prol:
|
||||
; CHECK: br label %for.body.preheader.split, !dbg [[BODY_LOC:![0-9]+]]
|
||||
; CHECK: for.body.preheader.split:
|
||||
; CHECK: br {{.*}} label %for.end.loopexit, label %for.body.preheader.split.split, !dbg [[PH_LOC]]
|
||||
; CHECK: for.body:
|
||||
; CHECK: br i1 %exitcond.1, label %for.end.loopexit.unr-lcssa, label %for.body, !dbg [[BODY_LOC]]
|
||||
; CHECK-NOT: br i1 %exitcond.4, label %for.end.loopexit{{.*}}, label %for.body
|
||||
|
||||
; CHECK-DAG: [[PH_LOC]] = !DILocation(line: 101, column: 1, scope: !{{.*}})
|
||||
; CHECK-DAG: [[BODY_LOC]] = !DILocation(line: 102, column: 1, scope: !{{.*}})
|
||||
; EPILOG: for.body.preheader:
|
||||
; EPILOG: br i1 %lcmp.mod, label %for.body.preheader.new, label %for.end.loopexit.unr-lcssa, !dbg [[PH_LOC:![0-9]+]]
|
||||
; EPILOG: for.body:
|
||||
; EPILOG: br i1 %niter.ncmp.1, label %for.end.loopexit.unr-lcssa.loopexit, label %for.body, !dbg [[BODY_LOC:![0-9]+]]
|
||||
; EPILOG-NOT: br i1 %niter.ncmp.2, label %for.end.loopexit{{.*}}, label %for.body
|
||||
; EPILOG: for.body.epil.preheader:
|
||||
; EPILOG: br label %for.body.epil, !dbg [[EXIT_LOC:![0-9]+]]
|
||||
; EPILOG: for.body.epil:
|
||||
; EPILOG: br label %for.end.loopexit.epilog-lcssa, !dbg [[BODY_LOC:![0-9]+]]
|
||||
|
||||
; EPILOG-DAG: [[PH_LOC]] = !DILocation(line: 101, column: 1, scope: !{{.*}})
|
||||
; EPILOG-DAG: [[BODY_LOC]] = !DILocation(line: 102, column: 1, scope: !{{.*}})
|
||||
; EPILOG-DAG: [[EXIT_LOC]] = !DILocation(line: 103, column: 1, scope: !{{.*}})
|
||||
|
||||
; PROLOG: for.body.preheader:
|
||||
; PROLOG: br {{.*}} label %for.body.prol.preheader, label %for.body.prol.loopexit, !dbg [[PH_LOC:![0-9]+]]
|
||||
; PROLOG: for.body.prol:
|
||||
; PROLOG: br label %for.body.prol.loopexit, !dbg [[BODY_LOC:![0-9]+]]
|
||||
; PROLOG: for.body.prol.loopexit:
|
||||
; PROLOG: br {{.*}} label %for.end.loopexit, label %for.body.preheader.new, !dbg [[PH_LOC]]
|
||||
; PROLOG: for.body:
|
||||
; PROLOG: br i1 %exitcond.1, label %for.end.loopexit.unr-lcssa, label %for.body, !dbg [[BODY_LOC]]
|
||||
; PROLOG-NOT: br i1 %exitcond.4, label %for.end.loopexit{{.*}}, label %for.body
|
||||
|
||||
; PROLOG-DAG: [[PH_LOC]] = !DILocation(line: 101, column: 1, scope: !{{.*}})
|
||||
; PROLOG-DAG: [[BODY_LOC]] = !DILocation(line: 102, column: 1, scope: !{{.*}})
|
||||
|
||||
define i32 @test(i32* nocapture %a, i32 %n) nounwind uwtable readonly !dbg !6 {
|
||||
entry:
|
||||
|
|
|
@ -1,12 +1,18 @@
|
|||
; RUN: opt < %s -S -loop-unroll -unroll-threshold=25 -unroll-runtime -unroll-count=8 | FileCheck %s
|
||||
; RUN: opt < %s -S -loop-unroll -unroll-threshold=25 -unroll-runtime -unroll-count=8 | FileCheck %s -check-prefix=EPILOG
|
||||
; RUN: opt < %s -S -loop-unroll -unroll-threshold=25 -unroll-runtime -unroll-runtime-epilog=false | FileCheck %s -check-prefix=PROLOG
|
||||
|
||||
; Choose a smaller, power-of-two, unroll count if the loop is too large.
|
||||
; This test makes sure we're not unrolling 'odd' counts
|
||||
|
||||
; CHECK: for.body.prol:
|
||||
; CHECK: for.body:
|
||||
; CHECK: br i1 %exitcond.3, label %for.end.loopexit{{.*}}, label %for.body
|
||||
; CHECK-NOT: br i1 %exitcond.4, label %for.end.loopexit{{.*}}, label %for.body
|
||||
; EPILOG: for.body:
|
||||
; EPILOG: br i1 %niter.ncmp.3, label %for.end.loopexit.unr-lcssa.loopexit{{.*}}, label %for.body
|
||||
; EPILOG-NOT: br i1 %niter.ncmp.4, label %for.end.loopexit.unr-lcssa.loopexit{{.*}}, label %for.body
|
||||
; EPILOG: for.body.epil:
|
||||
|
||||
; PROLOG: for.body.prol:
|
||||
; PROLOG: for.body:
|
||||
; PROLOG: br i1 %exitcond.3, label %for.end.loopexit{{.*}}, label %for.body
|
||||
; PROLOG-NOT: br i1 %exitcond.4, label %for.end.loopexit{{.*}}, label %for.body
|
||||
|
||||
define i32 @test(i32* nocapture %a, i32 %n) nounwind uwtable readonly {
|
||||
entry:
|
||||
|
|
|
@ -1,13 +1,21 @@
|
|||
; RUN: opt < %s -S -O2 -unroll-runtime=true | FileCheck %s
|
||||
; RUN: opt < %s -S -O2 -unroll-runtime=true | FileCheck %s -check-prefix=EPILOG
|
||||
; RUN: opt < %s -S -O2 -unroll-runtime=true -unroll-runtime-epilog=false | FileCheck %s -check-prefix=PROLOG
|
||||
|
||||
; Check runtime unrolling prologue can be promoted by LICM pass.
|
||||
|
||||
; CHECK: entry:
|
||||
; CHECK: %xtraiter
|
||||
; CHECK: %lcmp.mod
|
||||
; CHECK: loop1:
|
||||
; CHECK: br i1 %lcmp.mod
|
||||
; CHECK: loop2.prol:
|
||||
; EPILOG: entry:
|
||||
; EPILOG: %xtraiter
|
||||
; EPILOG: %lcmp.mod
|
||||
; EPILOG: loop1:
|
||||
; EPILOG: br i1 %lcmp.mod
|
||||
; EPILOG: loop2.epil:
|
||||
|
||||
; PROLOG: entry:
|
||||
; PROLOG: %xtraiter
|
||||
; PROLOG: %lcmp.mod
|
||||
; PROLOG: loop1:
|
||||
; PROLOG: br i1 %lcmp.mod
|
||||
; PROLOG: loop2.prol:
|
||||
|
||||
define void @unroll(i32 %iter, i32* %addr1, i32* %addr2) nounwind {
|
||||
entry:
|
||||
|
|
|
@ -11,9 +11,6 @@ entry:
|
|||
%cmp1 = icmp eq i3 %n, 0
|
||||
br i1 %cmp1, label %for.end, label %for.body
|
||||
|
||||
; UNROLL-16-NOT: for.body.prol:
|
||||
; UNROLL-4: for.body.prol:
|
||||
|
||||
for.body: ; preds = %for.body, %entry
|
||||
; UNROLL-16-LABEL: for.body:
|
||||
; UNROLL-4-LABEL: for.body:
|
||||
|
@ -39,6 +36,10 @@ for.body: ; preds = %for.body, %entry
|
|||
|
||||
; UNROLL-16-LABEL: for.end
|
||||
; UNROLL-4-LABEL: for.end
|
||||
|
||||
; UNROLL-16-NOT: for.body.epil:
|
||||
; UNROLL-4: for.body.epil:
|
||||
|
||||
for.end: ; preds = %for.body, %entry
|
||||
%sum.0.lcssa = phi i3 [ 0, %entry ], [ %add, %for.body ]
|
||||
ret i3 %sum.0.lcssa
|
||||
|
|
|
@ -13,13 +13,13 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
|
|||
; CHECK: entry:
|
||||
; CHECK-NEXT: %0 = add i32 %N, 1
|
||||
; CHECK-NEXT: %xtraiter = and i32 %0, 1
|
||||
; CHECK-NEXT: %lcmp.mod = icmp ne i32 %xtraiter, 0
|
||||
; CHECK-NEXT: br i1 %lcmp.mod, label %while.body.prol, label %entry.split
|
||||
; CHECK-NEXT: %lcmp.mod = icmp ne i32 %xtraiter, %0
|
||||
; CHECK-NEXT: br i1 %lcmp.mod, label %entry.new, label %while.end.unr-lcssa
|
||||
|
||||
; CHECK: while.body.prol:
|
||||
; CHECK: br label %entry.split
|
||||
; CHECK: while.body.epil:
|
||||
; CHECK: br label %while.end.epilog-lcssa
|
||||
|
||||
; CHECK: entry.split:
|
||||
; CHECK: while.end.epilog-lcssa:
|
||||
|
||||
; Function Attrs: nounwind readnone ssp uwtable
|
||||
define i32 @foo(i32 %N) {
|
||||
|
|
|
@ -4,14 +4,14 @@
|
|||
; RUN: opt < %s -O2 -S | FileCheck %s
|
||||
|
||||
; After loop unroll:
|
||||
; %dec18 = add nsw i32 %dec18.in, -1
|
||||
; %niter.nsub = add nsw i32 %niter, -1
|
||||
; ...
|
||||
; %dec18.1 = add nsw i32 %dec18, -1
|
||||
; %niter.nsub.1 = add nsw i32 %niter.nsub, -1
|
||||
; should be merged to:
|
||||
; %dec18.1 = add nsw i32 %dec18.in, -2
|
||||
; %dec18.1 = add nsw i32 %niter, -2
|
||||
;
|
||||
; CHECK-LABEL: @_Z3fn1v(
|
||||
; CHECK: %dec18.1 = add nsw i32 %dec18.in, -2
|
||||
; CHECK: %niter.nsub.1 = add i32 %niter, -2
|
||||
|
||||
; ModuleID = '<stdin>'
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
|
|
|
@ -171,10 +171,6 @@ for.end: ; preds = %for.body, %entry
|
|||
; should be duplicated (original and 4x unrolled).
|
||||
;
|
||||
; CHECK-LABEL: @runtime_loop_with_count4(
|
||||
; CHECK: for.body.prol:
|
||||
; CHECK: store
|
||||
; CHECK-NOT: store
|
||||
; CHECK: br i1
|
||||
; CHECK: for.body
|
||||
; CHECK: store
|
||||
; CHECK: store
|
||||
|
@ -182,6 +178,10 @@ for.end: ; preds = %for.body, %entry
|
|||
; CHECK: store
|
||||
; CHECK-NOT: store
|
||||
; CHECK: br i1
|
||||
; CHECK: for.body.epil:
|
||||
; CHECK: store
|
||||
; CHECK-NOT: store
|
||||
; CHECK: br i1
|
||||
define void @runtime_loop_with_count4(i32* nocapture %a, i32 %b) {
|
||||
entry:
|
||||
%cmp3 = icmp sgt i32 %b, 0
|
||||
|
@ -287,10 +287,6 @@ for.end: ; preds = %for.body
|
|||
; (original and 8x).
|
||||
;
|
||||
; CHECK-LABEL: @runtime_loop_with_enable(
|
||||
; CHECK: for.body.prol:
|
||||
; CHECK: store
|
||||
; CHECK-NOT: store
|
||||
; CHECK: br i1
|
||||
; CHECK: for.body:
|
||||
; CHECK: store i32
|
||||
; CHECK: store i32
|
||||
|
@ -302,6 +298,10 @@ for.end: ; preds = %for.body
|
|||
; CHECK: store i32
|
||||
; CHECK-NOT: store i32
|
||||
; CHECK: br i1
|
||||
; CHECK: for.body.epil:
|
||||
; CHECK: store
|
||||
; CHECK-NOT: store
|
||||
; CHECK: br i1
|
||||
define void @runtime_loop_with_enable(i32* nocapture %a, i32 %b) {
|
||||
entry:
|
||||
%cmp3 = icmp sgt i32 %b, 0
|
||||
|
@ -328,16 +328,16 @@ for.end: ; preds = %for.body, %entry
|
|||
; should be duplicated (original and 3x unrolled).
|
||||
;
|
||||
; CHECK-LABEL: @runtime_loop_with_count3(
|
||||
; CHECK: for.body.prol:
|
||||
; CHECK: store
|
||||
; CHECK-NOT: store
|
||||
; CHECK: br i1
|
||||
; CHECK: for.body
|
||||
; CHECK: store
|
||||
; CHECK: store
|
||||
; CHECK: store
|
||||
; CHECK-NOT: store
|
||||
; CHECK: br i1
|
||||
; CHECK: for.body.epil:
|
||||
; CHECK: store
|
||||
; CHECK-NOT: store
|
||||
; CHECK: br i1
|
||||
define void @runtime_loop_with_count3(i32* nocapture %a, i32 %b) {
|
||||
entry:
|
||||
%cmp3 = icmp sgt i32 %b, 0
|
||||
|
|
Loading…
Reference in New Issue