From b8d7efe785fa6ba06003d2efd06c26a31254b505 Mon Sep 17 00:00:00 2001 From: Kenneth Uildriks Date: Sat, 9 Oct 2010 22:06:36 +0000 Subject: [PATCH] Now using a variant of the existing inlining heuristics to decide whether to create a given specialization of a function in PartialSpecialization. If the total performance bonus across all callsites passing the same constant exceeds the specialization cost, we create the specialization. llvm-svn: 116158 --- llvm/include/llvm/Analysis/InlineCost.h | 12 +++ llvm/lib/Analysis/InlineCost.cpp | 70 +++++++++++++++ .../Transforms/IPO/PartialSpecialization.cpp | 90 ++++++++++--------- .../PartialSpecialize/heuristics.ll | 49 ++++++++++ 4 files changed, 181 insertions(+), 40 deletions(-) create mode 100644 llvm/test/Transforms/PartialSpecialize/heuristics.ll diff --git a/llvm/include/llvm/Analysis/InlineCost.h b/llvm/include/llvm/Analysis/InlineCost.h index 9963ba44d0bd..ccec4c524746 100644 --- a/llvm/include/llvm/Analysis/InlineCost.h +++ b/llvm/include/llvm/Analysis/InlineCost.h @@ -143,6 +143,18 @@ namespace llvm { Function *Callee, SmallPtrSet &NeverInline); + /// getSpecializationBonus - The heuristic used to determine the per-call + /// performance boost for using a specialization of Callee with argument + /// SpecializedArgNos replaced by a constant. + int getSpecializationBonus(Function *Callee, + SmallVectorImpl &SpecializedArgNo); + + /// getSpecializationCost - The heuristic used to determine the code-size + /// impact of creating a specialized version of Callee with argument + /// SpecializedArgNo replaced by a constant. + InlineCost getSpecializationCost(Function *Callee, + SmallVectorImpl &SpecializedArgNo); + /// getInlineFudgeFactor - Return a > 1.0 factor if the inliner should use a /// higher threshold to determine if the function call should be inlined. float getInlineFudgeFactor(CallSite CS); diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp index a0e2ec715fd9..b103897977b3 100644 --- a/llvm/lib/Analysis/InlineCost.cpp +++ b/llvm/lib/Analysis/InlineCost.cpp @@ -312,6 +312,42 @@ bool InlineCostAnalyzer::FunctionInfo::NeverInline() Metrics.containsIndirectBr); } +// getSpecializationBonus - The heuristic used to determine the per-call +// performance boost for using a specialization of Callee with argument +// specializedArgNo replaced by a constant. +int InlineCostAnalyzer::getSpecializationBonus(Function *Callee, + SmallVectorImpl &SpecializedArgNos) +{ + if (Callee->mayBeOverridden()) + return 0; + + int Bonus = 0; + // If this function uses the coldcc calling convention, prefer not to + // specialize it. + if (Callee->getCallingConv() == CallingConv::Cold) + Bonus -= InlineConstants::ColdccPenalty; + + // Get information about the callee. + FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee]; + + // If we haven't calculated this information yet, do so now. + if (CalleeFI->Metrics.NumBlocks == 0) + CalleeFI->analyzeFunction(Callee); + + + for (unsigned i = 0, s = SpecializedArgNos.size(); + i < s; ++i ) + { + Bonus += CalleeFI->ArgumentWeights[SpecializedArgNos[i]].ConstantBonus; + } + // Calls usually take a long time, so they make the specialization gain + // smaller. + Bonus -= CalleeFI->Metrics.NumCalls * InlineConstants::CallPenalty; + + return Bonus; +} + + // getInlineCost - The heuristic used to determine if we should inline the // function call or not. // @@ -442,6 +478,40 @@ InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS, return llvm::InlineCost::get(InlineCost); } +// getSpecializationCost - The heuristic used to determine the code-size +// impact of creating a specialized version of Callee with argument +// SpecializedArgNo replaced by a constant. +InlineCost InlineCostAnalyzer::getSpecializationCost(Function *Callee, + SmallVectorImpl &SpecializedArgNos) +{ + // Don't specialize functions which can be redefined at link-time to mean + // something else. + if (Callee->mayBeOverridden()) + return llvm::InlineCost::getNever(); + + // Get information about the callee. + FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee]; + + // If we haven't calculated this information yet, do so now. + if (CalleeFI->Metrics.NumBlocks == 0) + CalleeFI->analyzeFunction(Callee); + + int Cost = 0; + + // Look at the orginal size of the callee. Each instruction counts as 5. + Cost += CalleeFI->Metrics.NumInsts * InlineConstants::InstrCost; + + // Offset that with the amount of code that can be constant-folded + // away with the given arguments replaced by constants. + for (SmallVectorImpl::iterator an = SpecializedArgNos.begin(), + ae = SpecializedArgNos.end(); an != ae; ++an) + { + Cost -= CalleeFI->ArgumentWeights[*an].ConstantWeight; + } + + return llvm::InlineCost::get(Cost); +} + // getInlineFudgeFactor - Return a > 1.0 factor if the inliner should use a // higher threshold to determine if the function call should be inlined. float InlineCostAnalyzer::getInlineFudgeFactor(CallSite CS) { diff --git a/llvm/lib/Transforms/IPO/PartialSpecialization.cpp b/llvm/lib/Transforms/IPO/PartialSpecialization.cpp index 756c9b3ed405..037189e949a4 100644 --- a/llvm/lib/Transforms/IPO/PartialSpecialization.cpp +++ b/llvm/lib/Transforms/IPO/PartialSpecialization.cpp @@ -25,6 +25,7 @@ #include "llvm/Module.h" #include "llvm/Pass.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/InlineCost.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Support/CallSite.h" #include "llvm/ADT/DenseSet.h" @@ -37,17 +38,12 @@ STATISTIC(numReplaced, "Number of callers replaced by specialization"); // Maximum number of arguments markable interested static const int MaxInterests = 6; -// Call must be used at least occasionally -static const int CallsMin = 5; - -// Must have 10% of calls having the same constant to specialize on -static const double ConstValPercent = .1; - namespace { typedef SmallVector InterestingArgVector; class PartSpec : public ModulePass { void scanForInterest(Function&, InterestingArgVector&); int scanDistribution(Function&, int, std::map&); + InlineCostAnalyzer CA; public : static char ID; // Pass identification, replacement for typeid PartSpec() : ModulePass(ID) {} @@ -79,6 +75,10 @@ SpecializeFunction(Function* F, NF->setLinkage(GlobalValue::InternalLinkage); F->getParent()->getFunctionList().push_back(NF); + // FIXME: Specialized versions getting the same constants should also get + // the same name. That way, specializations for public functions can be + // marked linkonce_odr and reused across modules. + for (Value::use_iterator ii = F->use_begin(), ee = F->use_end(); ii != ee; ) { Value::use_iterator i = ii; @@ -144,22 +144,37 @@ bool PartSpec::runOnModule(Module &M) { bool breakOuter = false; for (unsigned int x = 0; !breakOuter && x < interestingArgs.size(); ++x) { std::map distribution; - int total = scanDistribution(F, interestingArgs[x], distribution); - if (total > CallsMin) - for (std::map::iterator ii = distribution.begin(), - ee = distribution.end(); ii != ee; ++ii) - if (total > ii->second && ii->first && - ii->second > total * ConstValPercent) { - ValueMap m; - Function::arg_iterator arg = F.arg_begin(); - for (int y = 0; y < interestingArgs[x]; ++y) - ++arg; - m[&*arg] = ii->first; - SpecializeFunction(&F, m); - ++numSpecialized; - breakOuter = true; - Changed = true; - } + scanDistribution(F, interestingArgs[x], distribution); + for (std::map::iterator ii = distribution.begin(), + ee = distribution.end(); ii != ee; ++ii) { + // The distribution map might have an entry for NULL (i.e., one or more + // callsites were passing a non-constant there). We allow that to + // happen so that we can see whether any callsites pass a non-constant; + // if none do and the function is internal, we might have an opportunity + // to kill the original function. + if (!ii->first) continue; + int bonus = ii->second; + SmallVector argnos; + argnos.push_back(interestingArgs[x]); + InlineCost cost = CA.getSpecializationCost(&F, argnos); + // FIXME: If this is the last constant entry, and no non-constant + // entries exist, and the target function is internal, the cost should + // be reduced by the original size of the target function, almost + // certainly making it negative and causing a specialization that will + // leave the original function dead and removable. + if (cost.isAlways() || + (cost.isVariable() && cost.getValue() < bonus)) { + ValueMap m; + Function::arg_iterator arg = F.arg_begin(); + for (int y = 0; y < interestingArgs[x]; ++y) + ++arg; + m[&*arg] = ii->first; + SpecializeFunction(&F, m); + ++numSpecialized; + breakOuter = true; + Changed = true; + } + } } } return Changed; @@ -170,28 +185,20 @@ bool PartSpec::runOnModule(Module &M) { void PartSpec::scanForInterest(Function& F, InterestingArgVector& args) { for(Function::arg_iterator ii = F.arg_begin(), ee = F.arg_end(); ii != ee; ++ii) { - for(Value::use_iterator ui = ii->use_begin(), ue = ii->use_end(); - ui != ue; ++ui) { - - bool interesting = false; - User *U = *ui; - if (isa(U)) interesting = true; - else if (isa(U)) - interesting = ui->getOperand(0) == ii; - else if (isa(U)) - interesting = ui->getOperand(0) == ii; - else if (isa(U)) interesting = true; - else if (isa(U)) interesting = true; - - if (interesting) { - args.push_back(std::distance(F.arg_begin(), ii)); - break; - } + int argno = std::distance(F.arg_begin(), ii); + SmallVector argnos; + argnos.push_back(argno); + int bonus = CA.getSpecializationBonus(&F, argnos); + if (bonus > 0) { + args.push_back(argno); } } } /// scanDistribution - Construct a histogram of constants for arg of F at arg. +/// For each distinct constant, we'll compute the total of the specialization +/// bonus across all callsites passing that constant; if that total exceeds +/// the specialization cost, we will create the specialization. int PartSpec::scanDistribution(Function& F, int arg, std::map& dist) { bool hasIndirect = false; @@ -201,7 +208,10 @@ int PartSpec::scanDistribution(Function& F, int arg, User *U = *ii; CallSite CS(U); if (CS && CS.getCalledFunction() == &F) { - ++dist[dyn_cast(CS.getArgument(arg))]; + SmallVector argnos; + argnos.push_back(arg); + dist[dyn_cast(CS.getArgument(arg))] += + CA.getSpecializationBonus(&F, argnos); ++total; } else hasIndirect = true; diff --git a/llvm/test/Transforms/PartialSpecialize/heuristics.ll b/llvm/test/Transforms/PartialSpecialize/heuristics.ll new file mode 100644 index 000000000000..5ccf9ad86efa --- /dev/null +++ b/llvm/test/Transforms/PartialSpecialize/heuristics.ll @@ -0,0 +1,49 @@ +; If there are not enough callsites for a particular specialization to +; justify its existence, the specialization shouldn't be created. +; +; RUN: opt -S -partialspecialization -disable-inlining %s | FileCheck %s +declare void @callback1() +declare void @callback2() + +declare void @othercall() + +define internal void @UseCallback(void()* %pCallback) { + call void %pCallback() + call void @othercall() + call void @othercall() + call void @othercall() + call void @othercall() + call void @othercall() + call void @othercall() + call void @othercall() + call void @othercall() + call void @othercall() + call void @othercall() + call void @othercall() + call void @othercall() + call void @othercall() + call void @othercall() + call void @othercall() + call void @othercall() + call void @othercall() + call void @othercall() + ret void +} + +define void @foo(void()* %pNonConstCallback) +{ +Entry: +; CHECK: Entry +; CHECK-NOT: call void @UseCallback(void ()* @callback1) +; CHECK: call void @UseCallback(void ()* @callback2) +; CHECK-NEXT: call void @UseCallback(void ()* @callback2) +; CHECK-NEXT: ret void + call void @UseCallback(void()* @callback1) + call void @UseCallback(void()* @callback1) + call void @UseCallback(void()* @callback1) + call void @UseCallback(void()* @callback1) + call void @UseCallback(void()* @callback2) + call void @UseCallback(void()* @callback2) + + ret void +}