Refactor Internalization pass to use as a callback instead of a StringSet (NFC)

This will save a bunch of copies / initialization of intermediate
datastructure, and (hopefully) simplify the code.

This also abstract the symbol preservation mechanism outside of the
Internalization pass into the client code, which is not forced
to keep a map of strings for instance (ThinLTO will prefere hashes).

From: Mehdi Amini <mehdi.amini@apple.com>
llvm-svn: 266163
This commit is contained in:
Mehdi Amini 2016-04-13 04:20:32 +00:00
parent 9a16d655c7
commit 4078709957
6 changed files with 145 additions and 120 deletions

View File

@ -19,6 +19,8 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSet.h"
#include <functional>
namespace llvm {
class ModuleSummaryIndex;
@ -120,17 +122,16 @@ Pass *createPruneEHPass();
/// createInternalizePass - This pass loops over all of the functions in the
/// input module, internalizing all globals (functions and variables) it can.
////
/// The symbols in \p ExportList are never internalized.
/// Before internalizing a symbol, the callback \p MustPreserveGV is invoked and
/// gives to the client the ability to prevent internalizing specific symbols.
///
/// The symbol in DSOList are internalized if it is safe to drop them from
/// the symbol table.
///
/// Note that commandline options that are used with the above function are not
/// used now!
ModulePass *createInternalizePass(StringSet<> ExportList);
/// Same as above, but with an exportList created for an array.
ModulePass *createInternalizePass(ArrayRef<const char *> ExportList);
ModulePass *
createInternalizePass(std::function<bool(const GlobalValue &)> MustPreserveGV);
/// createInternalizePass - Same as above, but with an empty exportList.
ModulePass *createInternalizePass();

View File

@ -337,8 +337,22 @@ void LTOCodeGenerator::applyScopeRestrictions() {
if (ScopeRestrictionsDone || !ShouldInternalize)
return;
LTOInternalize(*MergedModule, *TargetMach, MustPreserveSymbols,
AsmUndefinedRefs,
// Declare a callback for the internalize pass that will ask for every
// candidate GlobalValue if it can be internalized or not.
Mangler Mangler;
SmallString<64> MangledName;
auto MustPreserveGV = [&](const GlobalValue &GV) -> bool {
// Need to mangle the GV as the "MustPreserveSymbols" StringSet is filled
// with the linker supplied name, which on Darwin includes a leading
// underscore.
MangledName.clear();
MangledName.reserve(GV.getName().size() + 1);
Mangler::getNameWithPrefix(MangledName, GV.getName(),
MergedModule->getDataLayout());
return MustPreserveSymbols.count(MangledName);
};
LTOInternalize(*MergedModule, *TargetMach, MustPreserveGV, AsmUndefinedRefs,
(ShouldRestoreGlobalsLinkage ? &ExternalSymbols : nullptr));
ScopeRestrictionsDone = true;

View File

@ -23,31 +23,26 @@
using namespace llvm;
namespace {
class ComputePreserveList {
// Helper class that populate the array of symbols used in inlined assembly.
class ComputeAsmUsed {
public:
ComputePreserveList(const StringSet<> &MustPreserveSymbols,
const StringSet<> &AsmUndefinedRefs,
const TargetMachine &TM, const Module &TheModule,
StringMap<GlobalValue::LinkageTypes> *ExternalSymbols,
std::vector<const char *> &MustPreserveList,
SmallPtrSetImpl<const GlobalValue *> &AsmUsed)
: MustPreserveSymbols(MustPreserveSymbols),
AsmUndefinedRefs(AsmUndefinedRefs), TM(TM),
ExternalSymbols(ExternalSymbols), MustPreserveList(MustPreserveList),
AsmUsed(AsmUsed) {
ComputeAsmUsed(const StringSet<> &AsmUndefinedRefs, const TargetMachine &TM,
const Module &TheModule,
StringMap<GlobalValue::LinkageTypes> *ExternalSymbols,
SmallPtrSetImpl<const GlobalValue *> &AsmUsed)
: AsmUndefinedRefs(AsmUndefinedRefs), TM(TM),
ExternalSymbols(ExternalSymbols), AsmUsed(AsmUsed) {
accumulateAndSortLibcalls(TheModule);
for (const Function &F : TheModule)
applyRestriction(F);
findAsmUses(F);
for (const GlobalVariable &GV : TheModule.globals())
applyRestriction(GV);
findAsmUses(GV);
for (const GlobalAlias &GA : TheModule.aliases())
applyRestriction(GA);
findAsmUses(GA);
}
private:
// Inputs
const StringSet<> &MustPreserveSymbols;
const StringSet<> &AsmUndefinedRefs;
const TargetMachine &TM;
@ -57,7 +52,6 @@ private:
// Output
StringMap<GlobalValue::LinkageTypes> *ExternalSymbols;
std::vector<const char *> &MustPreserveList;
SmallPtrSetImpl<const GlobalValue *> &AsmUsed;
// Collect names of runtime library functions. User-defined functions with the
@ -97,7 +91,7 @@ private:
Libcalls.end());
}
void applyRestriction(const GlobalValue &GV) {
void findAsmUses(const GlobalValue &GV) {
// There are no restrictions to apply to declarations.
if (GV.isDeclaration())
return;
@ -109,8 +103,6 @@ private:
SmallString<64> Buffer;
TM.getNameWithPrefix(Buffer, &GV, Mangler);
if (MustPreserveSymbols.count(Buffer))
MustPreserveList.push_back(GV.getName().data());
if (AsmUndefinedRefs.count(Buffer))
AsmUsed.insert(&GV);
@ -146,18 +138,14 @@ static void findUsedValues(GlobalVariable *LLVMUsed,
UsedValues.insert(GV);
}
// mark which symbols can not be internalized
void llvm::LTOInternalize(
Module &TheModule, const TargetMachine &TM,
const StringSet<> &MustPreserveSymbols, const StringSet<> &AsmUndefinedRefs,
const std::function<bool(const GlobalValue &)> &MustPreserveSymbols,
const StringSet<> &AsmUndefinedRefs,
StringMap<GlobalValue::LinkageTypes> *ExternalSymbols) {
legacy::PassManager passes;
// mark which symbols can not be internalized
Mangler Mangler;
std::vector<const char *> MustPreserveList;
SmallPtrSet<const GlobalValue *, 8> AsmUsed;
ComputePreserveList(MustPreserveSymbols, AsmUndefinedRefs, TM, TheModule,
ExternalSymbols, MustPreserveList, AsmUsed);
ComputeAsmUsed(AsmUndefinedRefs, TM, TheModule, ExternalSymbols, AsmUsed);
GlobalVariable *LLVMCompilerUsed =
TheModule.getGlobalVariable("llvm.compiler.used");
@ -182,7 +170,8 @@ void llvm::LTOInternalize(
LLVMCompilerUsed->setSection("llvm.metadata");
}
passes.add(createInternalizePass(MustPreserveList));
legacy::PassManager passes;
passes.add(createInternalizePass(MustPreserveSymbols));
// apply scope restrictions
passes.run(TheModule);

View File

@ -17,14 +17,17 @@
#include "llvm/ADT/StringSet.h"
#include "llvm/IR/GlobalValue.h"
#include <functional>
namespace llvm {
class Module;
class TargetMachine;
void LTOInternalize(Module &TheModule, const TargetMachine &TM,
const StringSet<> &MustPreserveSymbols,
const StringSet<> &AsmUndefinedRefs,
StringMap<GlobalValue::LinkageTypes> *ExternalSymbols);
void LTOInternalize(
Module &TheModule, const TargetMachine &TM,
const std::function<bool(const GlobalValue &)> &MustPreserveSymbols,
const StringSet<> &AsmUndefinedRefs,
StringMap<GlobalValue::LinkageTypes> *ExternalSymbols);
}
#endif // LLVM_LTO_LTOINTERNALIZE_H

View File

@ -106,10 +106,10 @@ void LLVMAddIPSCCPPass(LLVMPassManagerRef PM) {
}
void LLVMAddInternalizePass(LLVMPassManagerRef PM, unsigned AllButMain) {
std::vector<const char *> Export;
if (AllButMain)
Export.push_back("main");
unwrap(PM)->add(createInternalizePass(Export));
auto PreserveMain = [=](const GlobalValue &GV) {
return AllButMain && GV.getName() == "main";
};
unwrap(PM)->add(createInternalizePass(PreserveMain));
}
void LLVMAddStripDeadPrototypesPass(LLVMPassManagerRef PM) {

View File

@ -54,19 +54,84 @@ APIList("internalize-public-api-list", cl::value_desc("list"),
cl::CommaSeparated);
namespace {
class InternalizePass : public ModulePass {
// Helper to load an API list to preserve from file and expose it as a functor
// to the Internalize Pass
class PreserveAPIList {
public:
PreserveAPIList() {
if (!APIFile.empty())
LoadFile(APIFile);
ExternalNames.insert(APIList.begin(), APIList.end());
}
bool operator()(const GlobalValue &GV) {
return ExternalNames.count(GV.getName());
}
private:
// Contains the set of symbols loaded from file
StringSet<> ExternalNames;
public:
static char ID; // Pass identification, replacement for typeid
explicit InternalizePass();
explicit InternalizePass(ArrayRef<const char *> ExportList);
explicit InternalizePass(StringSet<> ExportList);
void LoadFile(const char *Filename);
void LoadFile(StringRef Filename) {
// Load the APIFile...
std::ifstream In(Filename.data());
if (!In.good()) {
errs() << "WARNING: Internalize couldn't load file '" << Filename
<< "'! Continuing as if it's empty.\n";
return; // Just continue as if the file were empty
}
while (In) {
std::string Symbol;
In >> Symbol;
if (!Symbol.empty())
ExternalNames.insert(Symbol);
}
}
};
}
namespace {
class InternalizePass : public ModulePass {
// Client supply callback to control wheter a symbol must be preserved.
std::function<bool(const GlobalValue &)> MustPreserveGV;
// Set of symbols private to the compiler that this pass should not touch.
StringSet<> AlwaysPreserved;
// Return false if we're allowed to internalize this GV.
bool ShouldPreserveGV(const GlobalValue &GV) {
// Function must be defined here
if (GV.isDeclaration())
return true;
// Available externally is really just a "declaration with a body".
if (GV.hasAvailableExternallyLinkage())
return true;
// Assume that dllexported symbols are referenced elsewhere
if (GV.hasDLLExportStorageClass())
return true;
// Already local, has nothing to do.
if (GV.hasLocalLinkage())
return false;
// Check some special cases
if (AlwaysPreserved.count(GV.getName()))
return true;
return MustPreserveGV(GV);
}
bool maybeInternalize(GlobalValue &GV,
const std::set<const Comdat *> &ExternalComdats);
void checkComdatVisibility(GlobalValue &GV,
std::set<const Comdat *> &ExternalComdats);
public:
static char ID; // Pass identification, replacement for typeid
explicit InternalizePass();
InternalizePass(std::function<bool(const GlobalValue &)> MustPreserveGV);
bool runOnModule(Module &M) override;
void getAnalysisUsage(AnalysisUsage &AU) const override {
@ -80,60 +145,13 @@ char InternalizePass::ID = 0;
INITIALIZE_PASS(InternalizePass, "internalize",
"Internalize Global Symbols", false, false)
InternalizePass::InternalizePass() : ModulePass(ID) {
InternalizePass::InternalizePass()
: ModulePass(ID), MustPreserveGV(PreserveAPIList()) {}
InternalizePass::InternalizePass(
std::function<bool(const GlobalValue &)> MustPreserveGV)
: ModulePass(ID), MustPreserveGV(std::move(MustPreserveGV)) {
initializeInternalizePassPass(*PassRegistry::getPassRegistry());
if (!APIFile.empty()) // If a filename is specified, use it.
LoadFile(APIFile.c_str());
ExternalNames.insert(APIList.begin(), APIList.end());
}
InternalizePass::InternalizePass(ArrayRef<const char *> ExportList)
: ModulePass(ID) {
initializeInternalizePassPass(*PassRegistry::getPassRegistry());
for(ArrayRef<const char *>::const_iterator itr = ExportList.begin();
itr != ExportList.end(); itr++) {
ExternalNames.insert(*itr);
}
}
InternalizePass::InternalizePass(StringSet<> ExportList)
: ModulePass(ID), ExternalNames(std::move(ExportList)) {}
void InternalizePass::LoadFile(const char *Filename) {
// Load the APIFile...
std::ifstream In(Filename);
if (!In.good()) {
errs() << "WARNING: Internalize couldn't load file '" << Filename
<< "'! Continuing as if it's empty.\n";
return; // Just continue as if the file were empty
}
while (In) {
std::string Symbol;
In >> Symbol;
if (!Symbol.empty())
ExternalNames.insert(Symbol);
}
}
static bool isExternallyVisible(const GlobalValue &GV,
const StringSet<> &ExternalNames) {
// Function must be defined here
if (GV.isDeclaration())
return true;
// Available externally is really just a "declaration with a body".
if (GV.hasAvailableExternallyLinkage())
return true;
// Assume that dllexported symbols are referenced elsewhere
if (GV.hasDLLExportStorageClass())
return true;
// Marked to keep external?
if (!GV.hasLocalLinkage() && ExternalNames.count(GV.getName()))
return true;
return false;
}
// Internalize GV if it is possible to do so, i.e. it is not externally visible
@ -154,7 +172,7 @@ bool InternalizePass::maybeInternalize(
if (GV.hasLocalLinkage())
return false;
if (isExternallyVisible(GV, ExternalNames))
if (ShouldPreserveGV(GV))
return false;
}
@ -171,7 +189,7 @@ void InternalizePass::checkComdatVisibility(
if (!C)
return;
if (isExternallyVisible(GV, ExternalNames))
if (ShouldPreserveGV(GV))
ExternalComdats.insert(C);
}
@ -204,7 +222,7 @@ bool InternalizePass::runOnModule(Module &M) {
// conservative, we internalize symbols in llvm.compiler.used, but we
// keep llvm.compiler.used so that the symbol is not deleted by llvm.
for (GlobalValue *V : Used) {
ExternalNames.insert(V->getName());
AlwaysPreserved.insert(V->getName());
}
// Mark all functions not in the api as internal.
@ -223,20 +241,20 @@ bool InternalizePass::runOnModule(Module &M) {
// Never internalize the llvm.used symbol. It is used to implement
// attribute((used)).
// FIXME: Shouldn't this just filter on llvm.metadata section??
ExternalNames.insert("llvm.used");
ExternalNames.insert("llvm.compiler.used");
AlwaysPreserved.insert("llvm.used");
AlwaysPreserved.insert("llvm.compiler.used");
// Never internalize anchors used by the machine module info, else the info
// won't find them. (see MachineModuleInfo.)
ExternalNames.insert("llvm.global_ctors");
ExternalNames.insert("llvm.global_dtors");
ExternalNames.insert("llvm.global.annotations");
AlwaysPreserved.insert("llvm.global_ctors");
AlwaysPreserved.insert("llvm.global_dtors");
AlwaysPreserved.insert("llvm.global.annotations");
// Never internalize symbols code-gen inserts.
// FIXME: We should probably add this (and the __stack_chk_guard) via some
// type of call-back in CodeGen.
ExternalNames.insert("__stack_chk_fail");
ExternalNames.insert("__stack_chk_guard");
AlwaysPreserved.insert("__stack_chk_fail");
AlwaysPreserved.insert("__stack_chk_guard");
// Mark all global variables with initializers that are not in the api as
// internal as well.
@ -268,12 +286,12 @@ bool InternalizePass::runOnModule(Module &M) {
return true;
}
ModulePass *llvm::createInternalizePass() { return new InternalizePass(); }
ModulePass *llvm::createInternalizePass(ArrayRef<const char *> ExportList) {
return new InternalizePass(ExportList);
ModulePass *llvm::createInternalizePass() {
return new InternalizePass(PreserveAPIList());
}
ModulePass *llvm::createInternalizePass(StringSet<> ExportList) {
return new InternalizePass(std::move(ExportList));
ModulePass *llvm::createInternalizePass(
std::function<bool(const GlobalValue &)> MustPreserveGV) {
return new InternalizePass(std::move(MustPreserveGV));
}