Now using a variant of the existing inlining heuristics to decide whether to create a given specialization of a function in PartialSpecialization. If the total performance bonus across all callsites passing the same constant exceeds the specialization cost, we create the specialization.
llvm-svn: 116158
This commit is contained in:
parent
cf263b0cbd
commit
b8d7efe785
|
@ -143,6 +143,18 @@ namespace llvm {
|
||||||
Function *Callee,
|
Function *Callee,
|
||||||
SmallPtrSet<const Function *, 16> &NeverInline);
|
SmallPtrSet<const Function *, 16> &NeverInline);
|
||||||
|
|
||||||
|
/// getSpecializationBonus - The heuristic used to determine the per-call
|
||||||
|
/// performance boost for using a specialization of Callee with argument
|
||||||
|
/// SpecializedArgNos replaced by a constant.
|
||||||
|
int getSpecializationBonus(Function *Callee,
|
||||||
|
SmallVectorImpl<unsigned> &SpecializedArgNo);
|
||||||
|
|
||||||
|
/// getSpecializationCost - The heuristic used to determine the code-size
|
||||||
|
/// impact of creating a specialized version of Callee with argument
|
||||||
|
/// SpecializedArgNo replaced by a constant.
|
||||||
|
InlineCost getSpecializationCost(Function *Callee,
|
||||||
|
SmallVectorImpl<unsigned> &SpecializedArgNo);
|
||||||
|
|
||||||
/// getInlineFudgeFactor - Return a > 1.0 factor if the inliner should use a
|
/// getInlineFudgeFactor - Return a > 1.0 factor if the inliner should use a
|
||||||
/// higher threshold to determine if the function call should be inlined.
|
/// higher threshold to determine if the function call should be inlined.
|
||||||
float getInlineFudgeFactor(CallSite CS);
|
float getInlineFudgeFactor(CallSite CS);
|
||||||
|
|
|
@ -312,6 +312,42 @@ bool InlineCostAnalyzer::FunctionInfo::NeverInline()
|
||||||
Metrics.containsIndirectBr);
|
Metrics.containsIndirectBr);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
// getSpecializationBonus - The heuristic used to determine the per-call
|
||||||
|
// performance boost for using a specialization of Callee with argument
|
||||||
|
// specializedArgNo replaced by a constant.
|
||||||
|
int InlineCostAnalyzer::getSpecializationBonus(Function *Callee,
|
||||||
|
SmallVectorImpl<unsigned> &SpecializedArgNos)
|
||||||
|
{
|
||||||
|
if (Callee->mayBeOverridden())
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
int Bonus = 0;
|
||||||
|
// If this function uses the coldcc calling convention, prefer not to
|
||||||
|
// specialize it.
|
||||||
|
if (Callee->getCallingConv() == CallingConv::Cold)
|
||||||
|
Bonus -= InlineConstants::ColdccPenalty;
|
||||||
|
|
||||||
|
// Get information about the callee.
|
||||||
|
FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee];
|
||||||
|
|
||||||
|
// If we haven't calculated this information yet, do so now.
|
||||||
|
if (CalleeFI->Metrics.NumBlocks == 0)
|
||||||
|
CalleeFI->analyzeFunction(Callee);
|
||||||
|
|
||||||
|
|
||||||
|
for (unsigned i = 0, s = SpecializedArgNos.size();
|
||||||
|
i < s; ++i )
|
||||||
|
{
|
||||||
|
Bonus += CalleeFI->ArgumentWeights[SpecializedArgNos[i]].ConstantBonus;
|
||||||
|
}
|
||||||
|
// Calls usually take a long time, so they make the specialization gain
|
||||||
|
// smaller.
|
||||||
|
Bonus -= CalleeFI->Metrics.NumCalls * InlineConstants::CallPenalty;
|
||||||
|
|
||||||
|
return Bonus;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
// getInlineCost - The heuristic used to determine if we should inline the
|
// getInlineCost - The heuristic used to determine if we should inline the
|
||||||
// function call or not.
|
// function call or not.
|
||||||
//
|
//
|
||||||
|
@ -442,6 +478,40 @@ InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS,
|
||||||
return llvm::InlineCost::get(InlineCost);
|
return llvm::InlineCost::get(InlineCost);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// getSpecializationCost - The heuristic used to determine the code-size
|
||||||
|
// impact of creating a specialized version of Callee with argument
|
||||||
|
// SpecializedArgNo replaced by a constant.
|
||||||
|
InlineCost InlineCostAnalyzer::getSpecializationCost(Function *Callee,
|
||||||
|
SmallVectorImpl<unsigned> &SpecializedArgNos)
|
||||||
|
{
|
||||||
|
// Don't specialize functions which can be redefined at link-time to mean
|
||||||
|
// something else.
|
||||||
|
if (Callee->mayBeOverridden())
|
||||||
|
return llvm::InlineCost::getNever();
|
||||||
|
|
||||||
|
// Get information about the callee.
|
||||||
|
FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee];
|
||||||
|
|
||||||
|
// If we haven't calculated this information yet, do so now.
|
||||||
|
if (CalleeFI->Metrics.NumBlocks == 0)
|
||||||
|
CalleeFI->analyzeFunction(Callee);
|
||||||
|
|
||||||
|
int Cost = 0;
|
||||||
|
|
||||||
|
// Look at the orginal size of the callee. Each instruction counts as 5.
|
||||||
|
Cost += CalleeFI->Metrics.NumInsts * InlineConstants::InstrCost;
|
||||||
|
|
||||||
|
// Offset that with the amount of code that can be constant-folded
|
||||||
|
// away with the given arguments replaced by constants.
|
||||||
|
for (SmallVectorImpl<unsigned>::iterator an = SpecializedArgNos.begin(),
|
||||||
|
ae = SpecializedArgNos.end(); an != ae; ++an)
|
||||||
|
{
|
||||||
|
Cost -= CalleeFI->ArgumentWeights[*an].ConstantWeight;
|
||||||
|
}
|
||||||
|
|
||||||
|
return llvm::InlineCost::get(Cost);
|
||||||
|
}
|
||||||
|
|
||||||
// getInlineFudgeFactor - Return a > 1.0 factor if the inliner should use a
|
// getInlineFudgeFactor - Return a > 1.0 factor if the inliner should use a
|
||||||
// higher threshold to determine if the function call should be inlined.
|
// higher threshold to determine if the function call should be inlined.
|
||||||
float InlineCostAnalyzer::getInlineFudgeFactor(CallSite CS) {
|
float InlineCostAnalyzer::getInlineFudgeFactor(CallSite CS) {
|
||||||
|
|
|
@ -25,6 +25,7 @@
|
||||||
#include "llvm/Module.h"
|
#include "llvm/Module.h"
|
||||||
#include "llvm/Pass.h"
|
#include "llvm/Pass.h"
|
||||||
#include "llvm/ADT/Statistic.h"
|
#include "llvm/ADT/Statistic.h"
|
||||||
|
#include "llvm/Analysis/InlineCost.h"
|
||||||
#include "llvm/Transforms/Utils/Cloning.h"
|
#include "llvm/Transforms/Utils/Cloning.h"
|
||||||
#include "llvm/Support/CallSite.h"
|
#include "llvm/Support/CallSite.h"
|
||||||
#include "llvm/ADT/DenseSet.h"
|
#include "llvm/ADT/DenseSet.h"
|
||||||
|
@ -37,17 +38,12 @@ STATISTIC(numReplaced, "Number of callers replaced by specialization");
|
||||||
// Maximum number of arguments markable interested
|
// Maximum number of arguments markable interested
|
||||||
static const int MaxInterests = 6;
|
static const int MaxInterests = 6;
|
||||||
|
|
||||||
// Call must be used at least occasionally
|
|
||||||
static const int CallsMin = 5;
|
|
||||||
|
|
||||||
// Must have 10% of calls having the same constant to specialize on
|
|
||||||
static const double ConstValPercent = .1;
|
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
typedef SmallVector<int, MaxInterests> InterestingArgVector;
|
typedef SmallVector<int, MaxInterests> InterestingArgVector;
|
||||||
class PartSpec : public ModulePass {
|
class PartSpec : public ModulePass {
|
||||||
void scanForInterest(Function&, InterestingArgVector&);
|
void scanForInterest(Function&, InterestingArgVector&);
|
||||||
int scanDistribution(Function&, int, std::map<Constant*, int>&);
|
int scanDistribution(Function&, int, std::map<Constant*, int>&);
|
||||||
|
InlineCostAnalyzer CA;
|
||||||
public :
|
public :
|
||||||
static char ID; // Pass identification, replacement for typeid
|
static char ID; // Pass identification, replacement for typeid
|
||||||
PartSpec() : ModulePass(ID) {}
|
PartSpec() : ModulePass(ID) {}
|
||||||
|
@ -79,6 +75,10 @@ SpecializeFunction(Function* F,
|
||||||
NF->setLinkage(GlobalValue::InternalLinkage);
|
NF->setLinkage(GlobalValue::InternalLinkage);
|
||||||
F->getParent()->getFunctionList().push_back(NF);
|
F->getParent()->getFunctionList().push_back(NF);
|
||||||
|
|
||||||
|
// FIXME: Specialized versions getting the same constants should also get
|
||||||
|
// the same name. That way, specializations for public functions can be
|
||||||
|
// marked linkonce_odr and reused across modules.
|
||||||
|
|
||||||
for (Value::use_iterator ii = F->use_begin(), ee = F->use_end();
|
for (Value::use_iterator ii = F->use_begin(), ee = F->use_end();
|
||||||
ii != ee; ) {
|
ii != ee; ) {
|
||||||
Value::use_iterator i = ii;
|
Value::use_iterator i = ii;
|
||||||
|
@ -144,22 +144,37 @@ bool PartSpec::runOnModule(Module &M) {
|
||||||
bool breakOuter = false;
|
bool breakOuter = false;
|
||||||
for (unsigned int x = 0; !breakOuter && x < interestingArgs.size(); ++x) {
|
for (unsigned int x = 0; !breakOuter && x < interestingArgs.size(); ++x) {
|
||||||
std::map<Constant*, int> distribution;
|
std::map<Constant*, int> distribution;
|
||||||
int total = scanDistribution(F, interestingArgs[x], distribution);
|
scanDistribution(F, interestingArgs[x], distribution);
|
||||||
if (total > CallsMin)
|
for (std::map<Constant*, int>::iterator ii = distribution.begin(),
|
||||||
for (std::map<Constant*, int>::iterator ii = distribution.begin(),
|
ee = distribution.end(); ii != ee; ++ii) {
|
||||||
ee = distribution.end(); ii != ee; ++ii)
|
// The distribution map might have an entry for NULL (i.e., one or more
|
||||||
if (total > ii->second && ii->first &&
|
// callsites were passing a non-constant there). We allow that to
|
||||||
ii->second > total * ConstValPercent) {
|
// happen so that we can see whether any callsites pass a non-constant;
|
||||||
ValueMap<const Value*, Value*> m;
|
// if none do and the function is internal, we might have an opportunity
|
||||||
Function::arg_iterator arg = F.arg_begin();
|
// to kill the original function.
|
||||||
for (int y = 0; y < interestingArgs[x]; ++y)
|
if (!ii->first) continue;
|
||||||
++arg;
|
int bonus = ii->second;
|
||||||
m[&*arg] = ii->first;
|
SmallVector<unsigned, 1> argnos;
|
||||||
SpecializeFunction(&F, m);
|
argnos.push_back(interestingArgs[x]);
|
||||||
++numSpecialized;
|
InlineCost cost = CA.getSpecializationCost(&F, argnos);
|
||||||
breakOuter = true;
|
// FIXME: If this is the last constant entry, and no non-constant
|
||||||
Changed = true;
|
// entries exist, and the target function is internal, the cost should
|
||||||
}
|
// be reduced by the original size of the target function, almost
|
||||||
|
// certainly making it negative and causing a specialization that will
|
||||||
|
// leave the original function dead and removable.
|
||||||
|
if (cost.isAlways() ||
|
||||||
|
(cost.isVariable() && cost.getValue() < bonus)) {
|
||||||
|
ValueMap<const Value*, Value*> m;
|
||||||
|
Function::arg_iterator arg = F.arg_begin();
|
||||||
|
for (int y = 0; y < interestingArgs[x]; ++y)
|
||||||
|
++arg;
|
||||||
|
m[&*arg] = ii->first;
|
||||||
|
SpecializeFunction(&F, m);
|
||||||
|
++numSpecialized;
|
||||||
|
breakOuter = true;
|
||||||
|
Changed = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return Changed;
|
return Changed;
|
||||||
|
@ -170,28 +185,20 @@ bool PartSpec::runOnModule(Module &M) {
|
||||||
void PartSpec::scanForInterest(Function& F, InterestingArgVector& args) {
|
void PartSpec::scanForInterest(Function& F, InterestingArgVector& args) {
|
||||||
for(Function::arg_iterator ii = F.arg_begin(), ee = F.arg_end();
|
for(Function::arg_iterator ii = F.arg_begin(), ee = F.arg_end();
|
||||||
ii != ee; ++ii) {
|
ii != ee; ++ii) {
|
||||||
for(Value::use_iterator ui = ii->use_begin(), ue = ii->use_end();
|
int argno = std::distance(F.arg_begin(), ii);
|
||||||
ui != ue; ++ui) {
|
SmallVector<unsigned, 1> argnos;
|
||||||
|
argnos.push_back(argno);
|
||||||
bool interesting = false;
|
int bonus = CA.getSpecializationBonus(&F, argnos);
|
||||||
User *U = *ui;
|
if (bonus > 0) {
|
||||||
if (isa<CmpInst>(U)) interesting = true;
|
args.push_back(argno);
|
||||||
else if (isa<CallInst>(U))
|
|
||||||
interesting = ui->getOperand(0) == ii;
|
|
||||||
else if (isa<InvokeInst>(U))
|
|
||||||
interesting = ui->getOperand(0) == ii;
|
|
||||||
else if (isa<SwitchInst>(U)) interesting = true;
|
|
||||||
else if (isa<BranchInst>(U)) interesting = true;
|
|
||||||
|
|
||||||
if (interesting) {
|
|
||||||
args.push_back(std::distance(F.arg_begin(), ii));
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// scanDistribution - Construct a histogram of constants for arg of F at arg.
|
/// scanDistribution - Construct a histogram of constants for arg of F at arg.
|
||||||
|
/// For each distinct constant, we'll compute the total of the specialization
|
||||||
|
/// bonus across all callsites passing that constant; if that total exceeds
|
||||||
|
/// the specialization cost, we will create the specialization.
|
||||||
int PartSpec::scanDistribution(Function& F, int arg,
|
int PartSpec::scanDistribution(Function& F, int arg,
|
||||||
std::map<Constant*, int>& dist) {
|
std::map<Constant*, int>& dist) {
|
||||||
bool hasIndirect = false;
|
bool hasIndirect = false;
|
||||||
|
@ -201,7 +208,10 @@ int PartSpec::scanDistribution(Function& F, int arg,
|
||||||
User *U = *ii;
|
User *U = *ii;
|
||||||
CallSite CS(U);
|
CallSite CS(U);
|
||||||
if (CS && CS.getCalledFunction() == &F) {
|
if (CS && CS.getCalledFunction() == &F) {
|
||||||
++dist[dyn_cast<Constant>(CS.getArgument(arg))];
|
SmallVector<unsigned, 1> argnos;
|
||||||
|
argnos.push_back(arg);
|
||||||
|
dist[dyn_cast<Constant>(CS.getArgument(arg))] +=
|
||||||
|
CA.getSpecializationBonus(&F, argnos);
|
||||||
++total;
|
++total;
|
||||||
} else
|
} else
|
||||||
hasIndirect = true;
|
hasIndirect = true;
|
||||||
|
|
|
@ -0,0 +1,49 @@
|
||||||
|
; If there are not enough callsites for a particular specialization to
|
||||||
|
; justify its existence, the specialization shouldn't be created.
|
||||||
|
;
|
||||||
|
; RUN: opt -S -partialspecialization -disable-inlining %s | FileCheck %s
|
||||||
|
declare void @callback1()
|
||||||
|
declare void @callback2()
|
||||||
|
|
||||||
|
declare void @othercall()
|
||||||
|
|
||||||
|
define internal void @UseCallback(void()* %pCallback) {
|
||||||
|
call void %pCallback()
|
||||||
|
call void @othercall()
|
||||||
|
call void @othercall()
|
||||||
|
call void @othercall()
|
||||||
|
call void @othercall()
|
||||||
|
call void @othercall()
|
||||||
|
call void @othercall()
|
||||||
|
call void @othercall()
|
||||||
|
call void @othercall()
|
||||||
|
call void @othercall()
|
||||||
|
call void @othercall()
|
||||||
|
call void @othercall()
|
||||||
|
call void @othercall()
|
||||||
|
call void @othercall()
|
||||||
|
call void @othercall()
|
||||||
|
call void @othercall()
|
||||||
|
call void @othercall()
|
||||||
|
call void @othercall()
|
||||||
|
call void @othercall()
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define void @foo(void()* %pNonConstCallback)
|
||||||
|
{
|
||||||
|
Entry:
|
||||||
|
; CHECK: Entry
|
||||||
|
; CHECK-NOT: call void @UseCallback(void ()* @callback1)
|
||||||
|
; CHECK: call void @UseCallback(void ()* @callback2)
|
||||||
|
; CHECK-NEXT: call void @UseCallback(void ()* @callback2)
|
||||||
|
; CHECK-NEXT: ret void
|
||||||
|
call void @UseCallback(void()* @callback1)
|
||||||
|
call void @UseCallback(void()* @callback1)
|
||||||
|
call void @UseCallback(void()* @callback1)
|
||||||
|
call void @UseCallback(void()* @callback1)
|
||||||
|
call void @UseCallback(void()* @callback2)
|
||||||
|
call void @UseCallback(void()* @callback2)
|
||||||
|
|
||||||
|
ret void
|
||||||
|
}
|
Loading…
Reference in New Issue