[LoopUnroll] Include hotness of region in opt remark
LoopUnroll is a loop pass, so the analysis of OptimizationRemarkEmitter is added to the common function analysis passes that loop passes depend on. The BFI and indirectly BPI used in this pass is computed lazily so no overhead should be observed unless -pass-remarks-with-hotness is used. This is how the patch affects the O3 pipeline: Dominator Tree Construction Natural Loop Information Canonicalize natural loops Loop-Closed SSA Form Pass Basic Alias Analysis (stateless AA impl) Function Alias Analysis Results Scalar Evolution Analysis + Lazy Branch Probability Analysis + Lazy Block Frequency Analysis + Optimization Remark Emitter Loop Pass Manager Rotate Loops Loop Invariant Code Motion Unswitch loops Simplify the CFG Dominator Tree Construction Basic Alias Analysis (stateless AA impl) Function Alias Analysis Results Combine redundant instructions Natural Loop Information Canonicalize natural loops Loop-Closed SSA Form Pass Scalar Evolution Analysis + Lazy Branch Probability Analysis + Lazy Block Frequency Analysis + Optimization Remark Emitter Loop Pass Manager Induction Variable Simplification Recognize loop idioms Delete dead loops Unroll loops ... llvm-svn: 277203
This commit is contained in:
parent
716a94787c
commit
12937c361f
|
@ -27,12 +27,14 @@ class LoopInfo;
|
|||
class LPPassManager;
|
||||
class MDNode;
|
||||
class Pass;
|
||||
class OptimizationRemarkEmitter;
|
||||
class ScalarEvolution;
|
||||
|
||||
bool UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force,
|
||||
bool AllowRuntime, bool AllowExpensiveTripCount,
|
||||
unsigned TripMultiple, LoopInfo *LI, ScalarEvolution *SE,
|
||||
DominatorTree *DT, AssumptionCache *AC, bool PreserveLCSSA);
|
||||
DominatorTree *DT, AssumptionCache *AC,
|
||||
OptimizationRemarkEmitter *ORE, bool PreserveLCSSA);
|
||||
|
||||
bool UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
|
||||
bool AllowExpensiveTripCount,
|
||||
|
|
|
@ -21,11 +21,11 @@
|
|||
#include "llvm/Analysis/LoopPass.h"
|
||||
#include "llvm/Analysis/LoopPassManager.h"
|
||||
#include "llvm/Analysis/LoopUnrollAnalyzer.h"
|
||||
#include "llvm/Analysis/OptimizationDiagnosticInfo.h"
|
||||
#include "llvm/Analysis/ScalarEvolution.h"
|
||||
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
|
||||
#include "llvm/Analysis/TargetTransformInfo.h"
|
||||
#include "llvm/IR/DataLayout.h"
|
||||
#include "llvm/IR/DiagnosticInfo.h"
|
||||
#include "llvm/IR/Dominators.h"
|
||||
#include "llvm/IR/InstVisitor.h"
|
||||
#include "llvm/IR/IntrinsicInst.h"
|
||||
|
@ -693,8 +693,10 @@ static bool canUnrollCompletely(Loop *L, unsigned Threshold,
|
|||
// Calculates unroll count and writes it to UP.Count.
|
||||
static bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI,
|
||||
DominatorTree &DT, LoopInfo *LI,
|
||||
ScalarEvolution *SE, unsigned TripCount,
|
||||
unsigned TripMultiple, unsigned LoopSize,
|
||||
ScalarEvolution *SE,
|
||||
OptimizationRemarkEmitter *ORE,
|
||||
unsigned TripCount, unsigned TripMultiple,
|
||||
unsigned LoopSize,
|
||||
TargetTransformInfo::UnrollingPreferences &UP) {
|
||||
// BEInsns represents number of instructions optimized when "back edge"
|
||||
// becomes "fall through" in unrolled loop.
|
||||
|
@ -736,9 +738,6 @@ static bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI,
|
|||
PragmaEnableUnroll || UserUnrollCount;
|
||||
|
||||
uint64_t UnrolledSize;
|
||||
DebugLoc LoopLoc = L->getStartLoc();
|
||||
Function *F = L->getHeader()->getParent();
|
||||
LLVMContext &Ctx = F->getContext();
|
||||
|
||||
if (ExplicitUnroll && TripCount != 0) {
|
||||
// If the loop has an unrolling pragma, we want to be more aggressive with
|
||||
|
@ -813,8 +812,8 @@ static bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI,
|
|||
}
|
||||
if (UP.Count < 2) {
|
||||
if (PragmaEnableUnroll)
|
||||
emitOptimizationRemarkMissed(
|
||||
Ctx, DEBUG_TYPE, *F, LoopLoc,
|
||||
ORE->emitOptimizationRemarkMissed(
|
||||
DEBUG_TYPE, L,
|
||||
"Unable to unroll loop as directed by unroll(enable) pragma "
|
||||
"because unrolled size is too large.");
|
||||
UP.Count = 0;
|
||||
|
@ -824,8 +823,8 @@ static bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI,
|
|||
}
|
||||
if ((PragmaFullUnroll || PragmaEnableUnroll) && TripCount &&
|
||||
UP.Count != TripCount)
|
||||
emitOptimizationRemarkMissed(
|
||||
Ctx, DEBUG_TYPE, *F, LoopLoc,
|
||||
ORE->emitOptimizationRemarkMissed(
|
||||
DEBUG_TYPE, L,
|
||||
"Unable to fully unroll loop as directed by unroll pragma because "
|
||||
"unrolled size is too large.");
|
||||
return ExplicitUnroll;
|
||||
|
@ -833,8 +832,8 @@ static bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI,
|
|||
assert(TripCount == 0 &&
|
||||
"All cases when TripCount is constant should be covered here.");
|
||||
if (PragmaFullUnroll)
|
||||
emitOptimizationRemarkMissed(
|
||||
Ctx, DEBUG_TYPE, *F, LoopLoc,
|
||||
ORE->emitOptimizationRemarkMissed(
|
||||
DEBUG_TYPE, L,
|
||||
"Unable to fully unroll loop as directed by unroll(full) pragma "
|
||||
"because loop has a runtime trip count.");
|
||||
|
||||
|
@ -877,8 +876,8 @@ static bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI,
|
|||
<< TripMultiple << ". Reducing unroll count from "
|
||||
<< OrigCount << " to " << UP.Count << ".\n");
|
||||
if (PragmaCount > 0 && !UP.AllowRemainder)
|
||||
emitOptimizationRemarkMissed(
|
||||
Ctx, DEBUG_TYPE, *F, LoopLoc,
|
||||
ORE->emitOptimizationRemarkMissed(
|
||||
DEBUG_TYPE, L,
|
||||
Twine("Unable to unroll loop the number of times directed by "
|
||||
"unroll_count pragma because remainder loop is restricted "
|
||||
"(that could architecture specific or because the loop "
|
||||
|
@ -898,7 +897,8 @@ static bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI,
|
|||
|
||||
static bool tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
|
||||
ScalarEvolution *SE, const TargetTransformInfo &TTI,
|
||||
AssumptionCache &AC, bool PreserveLCSSA,
|
||||
AssumptionCache &AC, OptimizationRemarkEmitter &ORE,
|
||||
bool PreserveLCSSA,
|
||||
Optional<unsigned> ProvidedCount,
|
||||
Optional<unsigned> ProvidedThreshold,
|
||||
Optional<bool> ProvidedAllowPartial,
|
||||
|
@ -963,8 +963,8 @@ static bool tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
|
|||
if (Convergent)
|
||||
UP.AllowRemainder = false;
|
||||
|
||||
bool IsCountSetExplicitly = computeUnrollCount(L, TTI, DT, LI, SE, TripCount,
|
||||
TripMultiple, LoopSize, UP);
|
||||
bool IsCountSetExplicitly = computeUnrollCount(
|
||||
L, TTI, DT, LI, SE, &ORE, TripCount, TripMultiple, LoopSize, UP);
|
||||
if (!UP.Count)
|
||||
return false;
|
||||
// Unroll factor (Count) must be less or equal to TripCount.
|
||||
|
@ -974,7 +974,7 @@ static bool tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
|
|||
// Unroll the loop.
|
||||
if (!UnrollLoop(L, UP.Count, TripCount, UP.Force, UP.Runtime,
|
||||
UP.AllowExpensiveTripCount, TripMultiple, LI, SE, &DT, &AC,
|
||||
PreserveLCSSA))
|
||||
&ORE, PreserveLCSSA))
|
||||
return false;
|
||||
|
||||
// If loop has an unroll count pragma or unrolled by explicitly set count
|
||||
|
@ -1014,11 +1014,12 @@ public:
|
|||
const TargetTransformInfo &TTI =
|
||||
getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
|
||||
auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
|
||||
auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
|
||||
bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
|
||||
|
||||
return tryToUnrollLoop(L, DT, LI, SE, TTI, AC, PreserveLCSSA, ProvidedCount,
|
||||
ProvidedThreshold, ProvidedAllowPartial,
|
||||
ProvidedRuntime);
|
||||
return tryToUnrollLoop(L, DT, LI, SE, TTI, AC, ORE, PreserveLCSSA,
|
||||
ProvidedCount, ProvidedThreshold,
|
||||
ProvidedAllowPartial, ProvidedRuntime);
|
||||
}
|
||||
|
||||
/// This transformation requires natural loop information & requires that
|
||||
|
@ -1068,6 +1069,7 @@ PreservedAnalyses LoopUnrollPass::run(Loop &L, AnalysisManager<Loop> &AM) {
|
|||
ScalarEvolution *SE = FAM.getCachedResult<ScalarEvolutionAnalysis>(*F);
|
||||
auto *TTI = FAM.getCachedResult<TargetIRAnalysis>(*F);
|
||||
auto *AC = FAM.getCachedResult<AssumptionAnalysis>(*F);
|
||||
auto *ORE = FAM.getCachedResult<OptimizationRemarkEmitterAnalysis>(*F);
|
||||
if (!DT)
|
||||
report_fatal_error("LoopUnrollPass: DominatorTreeAnalysis not cached at a higher level");
|
||||
if (!LI)
|
||||
|
@ -1078,9 +1080,12 @@ PreservedAnalyses LoopUnrollPass::run(Loop &L, AnalysisManager<Loop> &AM) {
|
|||
report_fatal_error("LoopUnrollPass: TargetIRAnalysis not cached at a higher level");
|
||||
if (!AC)
|
||||
report_fatal_error("LoopUnrollPass: AssumptionAnalysis not cached at a higher level");
|
||||
if (!ORE)
|
||||
report_fatal_error("LoopUnrollPass: OptimizationRemarkEmitterAnalysis not "
|
||||
"cached at a higher level");
|
||||
|
||||
bool Changed = tryToUnrollLoop(
|
||||
&L, *DT, LI, SE, *TTI, *AC, /*PreserveLCSSA*/ true, ProvidedCount,
|
||||
&L, *DT, LI, SE, *TTI, *AC, *ORE, /*PreserveLCSSA*/ true, ProvidedCount,
|
||||
ProvidedThreshold, ProvidedAllowPartial, ProvidedRuntime);
|
||||
|
||||
if (!Changed)
|
||||
|
|
|
@ -23,10 +23,10 @@
|
|||
#include "llvm/Analysis/InstructionSimplify.h"
|
||||
#include "llvm/Analysis/LoopIterator.h"
|
||||
#include "llvm/Analysis/LoopPass.h"
|
||||
#include "llvm/Analysis/OptimizationDiagnosticInfo.h"
|
||||
#include "llvm/Analysis/ScalarEvolution.h"
|
||||
#include "llvm/IR/BasicBlock.h"
|
||||
#include "llvm/IR/DataLayout.h"
|
||||
#include "llvm/IR/DiagnosticInfo.h"
|
||||
#include "llvm/IR/Dominators.h"
|
||||
#include "llvm/IR/LLVMContext.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
|
@ -204,7 +204,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force,
|
|||
bool AllowRuntime, bool AllowExpensiveTripCount,
|
||||
unsigned TripMultiple, LoopInfo *LI, ScalarEvolution *SE,
|
||||
DominatorTree *DT, AssumptionCache *AC,
|
||||
bool PreserveLCSSA) {
|
||||
OptimizationRemarkEmitter *ORE, bool PreserveLCSSA) {
|
||||
BasicBlock *Preheader = L->getLoopPreheader();
|
||||
if (!Preheader) {
|
||||
DEBUG(dbgs() << " Can't unroll; loop preheader-insertion failed.\n");
|
||||
|
@ -323,21 +323,16 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force,
|
|||
}
|
||||
|
||||
// Report the unrolling decision.
|
||||
DebugLoc LoopLoc = L->getStartLoc();
|
||||
Function *F = Header->getParent();
|
||||
LLVMContext &Ctx = F->getContext();
|
||||
|
||||
if (CompletelyUnroll) {
|
||||
DEBUG(dbgs() << "COMPLETELY UNROLLING loop %" << Header->getName()
|
||||
<< " with trip count " << TripCount << "!\n");
|
||||
emitOptimizationRemark(Ctx, DEBUG_TYPE, *F, LoopLoc,
|
||||
Twine("completely unrolled loop with ") +
|
||||
Twine(TripCount) + " iterations");
|
||||
ORE->emitOptimizationRemark(DEBUG_TYPE, L,
|
||||
Twine("completely unrolled loop with ") +
|
||||
Twine(TripCount) + " iterations");
|
||||
} else {
|
||||
auto EmitDiag = [&](const Twine &T) {
|
||||
emitOptimizationRemark(Ctx, DEBUG_TYPE, *F, LoopLoc,
|
||||
"unrolled loop by a factor of " + Twine(Count) +
|
||||
T);
|
||||
ORE->emitOptimizationRemark(
|
||||
DEBUG_TYPE, L, "unrolled loop by a factor of " + Twine(Count) + T);
|
||||
};
|
||||
|
||||
DEBUG(dbgs() << "UNROLLING loop %" << Header->getName()
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
#include "llvm/Analysis/GlobalsModRef.h"
|
||||
#include "llvm/Analysis/GlobalsModRef.h"
|
||||
#include "llvm/Analysis/LoopInfo.h"
|
||||
#include "llvm/Analysis/OptimizationDiagnosticInfo.h"
|
||||
#include "llvm/Analysis/ScalarEvolution.h"
|
||||
#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
|
||||
#include "llvm/Analysis/ScalarEvolutionExpander.h"
|
||||
|
@ -961,6 +962,8 @@ void llvm::getLoopAnalysisUsage(AnalysisUsage &AU) {
|
|||
AU.addPreserved<SCEVAAWrapperPass>();
|
||||
AU.addRequired<ScalarEvolutionWrapperPass>();
|
||||
AU.addPreserved<ScalarEvolutionWrapperPass>();
|
||||
AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
|
||||
AU.addPreserved<OptimizationRemarkEmitterWrapperPass>();
|
||||
}
|
||||
|
||||
/// Manually defined generic "LoopPass" dependency initialization. This is used
|
||||
|
@ -981,6 +984,7 @@ void llvm::initializeLoopPassPass(PassRegistry &Registry) {
|
|||
INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
|
||||
INITIALIZE_PASS_DEPENDENCY(SCEVAAWrapperPass)
|
||||
INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
|
||||
INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
|
||||
}
|
||||
|
||||
/// \brief Find string metadata for loop
|
||||
|
|
|
@ -0,0 +1,28 @@
|
|||
; RUN: opt < %s -S -loop-unroll -pass-remarks=loop-unroll -pass-remarks-with-hotness -unroll-count=16 2>&1 | FileCheck -check-prefix=COMPLETE-UNROLL %s
|
||||
; RUN: opt < %s -S -loop-unroll -pass-remarks=loop-unroll -pass-remarks-with-hotness -unroll-count=4 2>&1 | FileCheck -check-prefix=PARTIAL-UNROLL %s
|
||||
|
||||
; COMPLETE-UNROLL: remark: {{.*}}: completely unrolled loop with 16 iterations (hotness: 300)
|
||||
; PARTIAL-UNROLL: remark: {{.*}}: unrolled loop by a factor of 4 {{.*}} (hotness: 300)
|
||||
|
||||
define i32 @sum() !prof !0 {
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %for.body, %entry
|
||||
%s.06 = phi i32 [ 0, %entry ], [ %add1, %for.body ]
|
||||
%i.05 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
|
||||
%add = add nsw i32 %i.05, 4
|
||||
%call = tail call i32 @baz(i32 %add) #2
|
||||
%add1 = add nsw i32 %call, %s.06
|
||||
%inc = add nsw i32 %i.05, 1
|
||||
%exitcond = icmp eq i32 %inc, 16
|
||||
br i1 %exitcond, label %for.end, label %for.body, !prof !1
|
||||
|
||||
for.end: ; preds = %for.body
|
||||
ret i32 %add1
|
||||
}
|
||||
|
||||
declare i32 @baz(i32)
|
||||
|
||||
!0 = !{!"function_entry_count", i64 3}
|
||||
!1 = !{!"branch_weights", i32 1, i32 99}
|
|
@ -1,5 +1,5 @@
|
|||
; RUN: opt < %s -S -loop-unroll -verify-loop-info | FileCheck %s
|
||||
; RUN: opt < %s -S -passes='function(require<scalar-evolution>,require<targetir>,loop(unroll),verify<loops>)' | FileCheck %s
|
||||
; RUN: opt < %s -S -passes='function(require<scalar-evolution>,require<targetir>,require<opt-remark-emit>,loop(unroll),verify<loops>)' | FileCheck %s
|
||||
;
|
||||
; Unit tests for LoopInfo::markAsRemoved.
|
||||
|
||||
|
|
Loading…
Reference in New Issue