hanchenye-llvm-project/llvm/lib/Analysis/DataStructure/BottomUpClosure.cpp

380 lines
13 KiB
C++
Raw Normal View History

//===- BottomUpClosure.cpp - Compute bottom-up interprocedural closure ----===//
//
// This file implements the BUDataStructures class, which represents the
// Bottom-Up Interprocedural closure of the data structure graph over the
// program. This is useful for applications like pool allocation, but **not**
// applications like alias analysis.
//
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/DataStructure.h"
#include "llvm/Analysis/DSGraph.h"
#include "llvm/Module.h"
#include "Support/Statistic.h"
2002-11-18 06:16:28 +08:00
namespace {
Statistic<> MaxSCC("budatastructure", "Maximum SCC Size in Call Graph");
RegisterAnalysis<BUDataStructures>
X("budatastructure", "Bottom-up Data Structure Analysis");
2002-11-18 06:16:28 +08:00
}
using namespace DS;
static bool isVAHackFn(const Function *F) {
return F->getName() == "printf" || F->getName() == "sscanf" ||
F->getName() == "fprintf" || F->getName() == "open" ||
F->getName() == "sprintf" || F->getName() == "fputs" ||
F->getName() == "fscanf";
}
// isCompleteNode - Return true if we know all of the targets of this node, and
// if the call sites are not external.
//
static inline bool isCompleteNode(DSNode *N) {
if (N->isIncomplete()) return false;
const std::vector<GlobalValue*> &Callees = N->getGlobals();
for (unsigned i = 0, e = Callees.size(); i != e; ++i)
if (Callees[i]->isExternal())
if (!isVAHackFn(cast<Function>(Callees[i])))
return false; // External function found...
return true; // otherwise ok
}
struct CallSiteIterator {
// FCs are the edges out of the current node are the call site targets...
std::vector<DSCallSite> *FCs;
unsigned CallSite;
unsigned CallSiteEntry;
CallSiteIterator(std::vector<DSCallSite> &CS) : FCs(&CS) {
CallSite = 0; CallSiteEntry = 0;
advanceToValidCallee();
}
// End iterator ctor...
CallSiteIterator(std::vector<DSCallSite> &CS, bool) : FCs(&CS) {
CallSite = FCs->size(); CallSiteEntry = 0;
}
void advanceToValidCallee() {
while (CallSite < FCs->size()) {
if ((*FCs)[CallSite].isDirectCall()) {
if (CallSiteEntry == 0 && // direct call only has one target...
(!(*FCs)[CallSite].getCalleeFunc()->isExternal() ||
isVAHackFn((*FCs)[CallSite].getCalleeFunc()))) // If not external
return;
} else {
DSNode *CalleeNode = (*FCs)[CallSite].getCalleeNode();
if (CallSiteEntry || isCompleteNode(CalleeNode)) {
const std::vector<GlobalValue*> &Callees = CalleeNode->getGlobals();
if (CallSiteEntry < Callees.size())
return;
}
}
CallSiteEntry = 0;
++CallSite;
}
}
public:
static CallSiteIterator begin(DSGraph &G) { return G.getAuxFunctionCalls(); }
static CallSiteIterator end(DSGraph &G) {
return CallSiteIterator(G.getAuxFunctionCalls(), true);
}
static CallSiteIterator begin(std::vector<DSCallSite> &CSs) { return CSs; }
static CallSiteIterator end(std::vector<DSCallSite> &CSs) {
return CallSiteIterator(CSs, true);
}
bool operator==(const CallSiteIterator &CSI) const {
return CallSite == CSI.CallSite && CallSiteEntry == CSI.CallSiteEntry;
}
bool operator!=(const CallSiteIterator &CSI) const { return !operator==(CSI);}
unsigned getCallSiteIdx() const { return CallSite; }
DSCallSite &getCallSite() const { return (*FCs)[CallSite]; }
Function *operator*() const {
if ((*FCs)[CallSite].isDirectCall()) {
return (*FCs)[CallSite].getCalleeFunc();
} else {
DSNode *Node = (*FCs)[CallSite].getCalleeNode();
return cast<Function>(Node->getGlobals()[CallSiteEntry]);
}
}
CallSiteIterator& operator++() { // Preincrement
++CallSiteEntry;
advanceToValidCallee();
return *this;
}
CallSiteIterator operator++(int) { // Postincrement
CallSiteIterator tmp = *this; ++*this; return tmp;
}
};
// run - Calculate the bottom up data structure graphs for each function in the
// program.
//
bool BUDataStructures::run(Module &M) {
LocalDataStructures &LocalDSA = getAnalysis<LocalDataStructures>();
GlobalsGraph = new DSGraph(LocalDSA.getGlobalsGraph());
2003-02-04 03:11:38 +08:00
GlobalsGraph->setPrintAuxCalls();
Function *MainFunc = M.getMainFunction();
if (MainFunc)
calculateReachableGraphs(MainFunc);
// Calculate the graphs for any functions that are unreachable from main...
for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
if (!I->isExternal() && DSInfo.find(I) == DSInfo.end()) {
2002-11-18 06:16:28 +08:00
#ifndef NDEBUG
if (MainFunc)
std::cerr << "*** Function unreachable from main: "
<< I->getName() << "\n";
2002-11-18 06:16:28 +08:00
#endif
calculateReachableGraphs(I); // Calculate all graphs...
}
return false;
}
void BUDataStructures::calculateReachableGraphs(Function *F) {
std::vector<Function*> Stack;
hash_map<Function*, unsigned> ValMap;
unsigned NextID = 1;
calculateGraphs(F, Stack, NextID, ValMap);
}
DSGraph &BUDataStructures::getOrCreateGraph(Function *F) {
// Has the graph already been created?
DSGraph *&Graph = DSInfo[F];
if (Graph) return *Graph;
// Copy the local version into DSInfo...
Graph = new DSGraph(getAnalysis<LocalDataStructures>().getDSGraph(*F));
Graph->setGlobalsGraph(GlobalsGraph);
Graph->setPrintAuxCalls();
// Start with a copy of the original call sites...
Graph->getAuxFunctionCalls() = Graph->getFunctionCalls();
return *Graph;
}
unsigned BUDataStructures::calculateGraphs(Function *F,
std::vector<Function*> &Stack,
unsigned &NextID,
hash_map<Function*, unsigned> &ValMap) {
assert(ValMap.find(F) == ValMap.end() && "Shouldn't revisit functions!");
unsigned Min = NextID++, MyID = Min;
ValMap[F] = Min;
Stack.push_back(F);
if (F->isExternal()) { // sprintf, fprintf, sscanf, etc...
// No callees!
Stack.pop_back();
ValMap[F] = ~0;
return Min;
}
DSGraph &Graph = getOrCreateGraph(F);
// The edges out of the current node are the call site targets...
for (CallSiteIterator I = CallSiteIterator::begin(Graph),
E = CallSiteIterator::end(Graph); I != E; ++I) {
Function *Callee = *I;
unsigned M;
// Have we visited the destination function yet?
hash_map<Function*, unsigned>::iterator It = ValMap.find(Callee);
if (It == ValMap.end()) // No, visit it now.
M = calculateGraphs(Callee, Stack, NextID, ValMap);
else // Yes, get it's number.
M = It->second;
if (M < Min) Min = M;
}
assert(ValMap[F] == MyID && "SCC construction assumption wrong!");
if (Min != MyID)
return Min; // This is part of a larger SCC!
// If this is a new SCC, process it now.
if (Stack.back() == F) { // Special case the single "SCC" case here.
DEBUG(std::cerr << "Visiting single node SCC #: " << MyID << " fn: "
<< F->getName() << "\n");
Stack.pop_back();
DSGraph &G = getDSGraph(*F);
DEBUG(std::cerr << " [BU] Calculating graph for: " << F->getName()<< "\n");
calculateGraph(G);
DEBUG(std::cerr << " [BU] Done inlining: " << F->getName() << " ["
<< G.getGraphSize() << "+" << G.getAuxFunctionCalls().size()
<< "]\n");
2002-11-18 06:16:28 +08:00
if (MaxSCC < 1) MaxSCC = 1;
// Should we revisit the graph?
if (CallSiteIterator::begin(G) != CallSiteIterator::end(G)) {
ValMap.erase(F);
return calculateGraphs(F, Stack, NextID, ValMap);
} else {
ValMap[F] = ~0U;
}
return MyID;
} else {
// SCCFunctions - Keep track of the functions in the current SCC
//
hash_set<Function*> SCCFunctions;
Function *NF;
std::vector<Function*>::iterator FirstInSCC = Stack.end();
DSGraph *SCCGraph = 0;
do {
NF = *--FirstInSCC;
ValMap[NF] = ~0U;
SCCFunctions.insert(NF);
// Figure out which graph is the largest one, in order to speed things up
// a bit in situations where functions in the SCC have widely different
// graph sizes.
DSGraph &NFGraph = getDSGraph(*NF);
if (!SCCGraph || SCCGraph->getGraphSize() < NFGraph.getGraphSize())
SCCGraph = &NFGraph;
} while (NF != F);
std::cerr << "Calculating graph for SCC #: " << MyID << " of size: "
<< SCCFunctions.size() << "\n";
2002-11-18 06:16:28 +08:00
// Compute the Max SCC Size...
if (MaxSCC < SCCFunctions.size())
MaxSCC = SCCFunctions.size();
// First thing first, collapse all of the DSGraphs into a single graph for
// the entire SCC. We computed the largest graph, so clone all of the other
// (smaller) graphs into it. Discard all of the old graphs.
//
for (hash_set<Function*>::iterator I = SCCFunctions.begin(),
E = SCCFunctions.end(); I != E; ++I) {
DSGraph &G = getDSGraph(**I);
if (&G != SCCGraph) {
DSGraph::NodeMapTy NodeMap;
SCCGraph->cloneInto(G, SCCGraph->getScalarMap(),
SCCGraph->getReturnNodes(), NodeMap, 0);
// Update the DSInfo map and delete the old graph...
DSInfo[*I] = SCCGraph;
delete &G;
}
}
// Now that we have one big happy family, resolve all of the call sites in
// the graph...
calculateGraph(*SCCGraph);
DEBUG(std::cerr << " [BU] Done inlining SCC [" << SCCGraph->getGraphSize()
<< "+" << SCCGraph->getAuxFunctionCalls().size() << "]\n");
std::cerr << "DONE with SCC #: " << MyID << "\n";
// We never have to revisit "SCC" processed functions...
// Drop the stuff we don't need from the end of the stack
Stack.erase(FirstInSCC, Stack.end());
return MyID;
}
return MyID; // == Min
}
// releaseMemory - If the pass pipeline is done with this pass, we can release
// our memory... here...
//
void BUDataStructures::releaseMemory() {
for (hash_map<Function*, DSGraph*>::iterator I = DSInfo.begin(),
E = DSInfo.end(); I != E; ++I) {
I->second->getReturnNodes().erase(I->first);
if (I->second->getReturnNodes().empty())
delete I->second;
}
// Empty map so next time memory is released, data structures are not
// re-deleted.
DSInfo.clear();
delete GlobalsGraph;
GlobalsGraph = 0;
}
void BUDataStructures::calculateGraph(DSGraph &Graph) {
// Move our call site list into TempFCs so that inline call sites go into the
// new call site list and doesn't invalidate our iterators!
std::vector<DSCallSite> TempFCs;
std::vector<DSCallSite> &AuxCallsList = Graph.getAuxFunctionCalls();
TempFCs.swap(AuxCallsList);
DSGraph::ReturnNodesTy &ReturnNodes = Graph.getReturnNodes();
// Loop over all of the resolvable call sites
unsigned LastCallSiteIdx = ~0U;
for (CallSiteIterator I = CallSiteIterator::begin(TempFCs),
E = CallSiteIterator::end(TempFCs); I != E; ++I) {
// If we skipped over any call sites, they must be unresolvable, copy them
// to the real call site list.
LastCallSiteIdx++;
for (; LastCallSiteIdx < I.getCallSiteIdx(); ++LastCallSiteIdx)
AuxCallsList.push_back(TempFCs[LastCallSiteIdx]);
LastCallSiteIdx = I.getCallSiteIdx();
// Resolve the current call...
Function *Callee = *I;
DSCallSite &CS = I.getCallSite();
if (Callee->isExternal()) {
// Ignore this case, simple varargs functions we cannot stub out!
} else if (ReturnNodes.find(Callee) != ReturnNodes.end()) {
// Self recursion... simply link up the formal arguments with the
// actual arguments...
DEBUG(std::cerr << " Self Inlining: " << Callee->getName() << "\n");
// Handle self recursion by resolving the arguments and return value
Graph.mergeInGraph(CS, *Callee, Graph, 0);
} else {
// Get the data structure graph for the called function.
//
DSGraph &GI = getDSGraph(*Callee); // Graph to inline
DEBUG(std::cerr << " Inlining graph for " << Callee->getName()
2003-02-04 03:11:38 +08:00
<< "[" << GI.getGraphSize() << "+"
<< GI.getAuxFunctionCalls().size() << "] into ["
<< Graph.getGraphSize() << "+"
2003-02-04 03:11:38 +08:00
<< Graph.getAuxFunctionCalls().size() << "]\n");
// Handle self recursion by resolving the arguments and return value
Graph.mergeInGraph(CS, *Callee, GI,
DSGraph::KeepModRefBits |
DSGraph::StripAllocaBit | DSGraph::DontCloneCallNodes);
2002-11-18 06:16:28 +08:00
#if 0
Graph.writeGraphToFile(std::cerr, "bu_" + F.getName() + "_after_" +
Callee->getName());
#endif
}
}
// Make sure to catch any leftover unresolvable calls...
for (++LastCallSiteIdx; LastCallSiteIdx < TempFCs.size(); ++LastCallSiteIdx)
AuxCallsList.push_back(TempFCs[LastCallSiteIdx]);
TempFCs.clear();
// Recompute the Incomplete markers. If there are any function calls left
// now that are complete, we must loop!
Graph.maskIncompleteMarkers();
Graph.markIncompleteNodes(DSGraph::MarkFormalArgs);
2003-02-04 03:11:38 +08:00
// FIXME: materialize nodes from the globals graph as neccesary...
Graph.removeDeadNodes(DSGraph::KeepUnreachableGlobals);
//Graph.writeGraphToFile(std::cerr, "bu_" + F.getName());
}