LowerBitSets: Extend pass to support functions as bitset members.

This change extends the bitset lowering pass to support bitsets that may
contain either functions or global variables. A function bitset is lowered to
a jump table that is laid out before one of the functions in the bitset.

Also add support for non-string bitset identifier names. This allows for
distinct metadata nodes to stand in for names with internal linkage,
as done in D11857.

Differential Revision: http://reviews.llvm.org/D11856

llvm-svn: 247080
This commit is contained in:
Peter Collingbourne 2015-09-08 21:57:45 +00:00
parent 18608a0a55
commit c634ed0b1a
8 changed files with 559 additions and 144 deletions

View File

@ -10,17 +10,41 @@ for the type of the class or its derived classes.
To use the mechanism, a client creates a global metadata node named
``llvm.bitsets``. Each element is a metadata node with three elements:
the first is a metadata string containing an identifier for the bitset,
the second is a global variable and the third is a byte offset into the
global variable.
1. a metadata object representing an identifier for the bitset
2. either a global variable or a function
3. a byte offset into the global (generally zero for functions)
Each bitset must exclusively contain either global variables or functions.
.. admonition:: Limitation
The current implementation only supports functions as members of bitsets on
the x86-32 and x86-64 architectures.
This will cause a link-time optimization pass to generate bitsets from the
memory addresses referenced from the elements of the bitset metadata. The pass
will lay out the referenced globals consecutively, so their definitions must
be available at LTO time. The `GlobalLayoutBuilder`_ class is responsible for
laying out the globals efficiently to minimize the sizes of the underlying
bitsets. An intrinsic, :ref:`llvm.bitset.test <bitset.test>`, generates code
to test whether a given pointer is a member of a bitset.
memory addresses referenced from the elements of the bitset metadata. The
pass will lay out referenced global variables consecutively, so their
definitions must be available at LTO time.
A bit set containing functions is transformed into a jump table, which
is a block of code consisting of one branch instruction for each of the
functions in the bit set that branches to the target function, and redirect
any taken function addresses to the corresponding jump table entry. In the
object file's symbol table, the jump table entries take the identities of
the original functions, so that addresses taken outside the module will pass
any verification done inside the module.
Jump tables may call external functions, so their definitions need not
be available at LTO time. Note that if an externally defined function is a
member of a bitset, there is no guarantee that its identity within the module
will be the same as its identity outside of the module, as the former will
be the jump table entry if a jump table is necessary.
The `GlobalLayoutBuilder`_ class is responsible for laying out the globals
efficiently to minimize the sizes of the underlying bitsets. An intrinsic,
:ref:`llvm.bitset.test <bitset.test>`, generates code to test whether a
given pointer is a member of a bitset.
:Example:
@ -33,13 +57,25 @@ to test whether a given pointer is a member of a bitset.
@c = internal global i32 0
@d = internal global [2 x i32] [i32 0, i32 0]
!llvm.bitsets = !{!0, !1, !2, !3, !4}
define void @e() {
ret void
}
define void @f() {
ret void
}
declare void @g()
!llvm.bitsets = !{!0, !1, !2, !3, !4, !5, !6}
!0 = !{!"bitset1", i32* @a, i32 0}
!1 = !{!"bitset1", i32* @b, i32 0}
!2 = !{!"bitset2", i32* @b, i32 0}
!3 = !{!"bitset2", i32* @c, i32 0}
!4 = !{!"bitset2", i32* @d, i32 4}
!5 = !{!"bitset3", void ()* @e, i32 0}
!6 = !{!"bitset3", void ()* @g, i32 0}
declare i1 @llvm.bitset.test(i8* %ptr, metadata %bitset) nounwind readnone
@ -55,6 +91,12 @@ to test whether a given pointer is a member of a bitset.
ret i1 %x
}
define i1 @baz(void ()* %p) {
%pi8 = bitcast void ()* %p to i8*
%x = call i1 @llvm.bitset.test(i8* %pi8, metadata !"bitset3")
ret i1 %x
}
define void @main() {
%a1 = call i1 @foo(i32* @a) ; returns 1
%b1 = call i1 @foo(i32* @b) ; returns 1
@ -64,6 +106,9 @@ to test whether a given pointer is a member of a bitset.
%c2 = call i1 @bar(i32* @c) ; returns 1
%d02 = call i1 @bar(i32* getelementptr ([2 x i32]* @d, i32 0, i32 0)) ; returns 0
%d12 = call i1 @bar(i32* getelementptr ([2 x i32]* @d, i32 0, i32 1)) ; returns 1
%e = call i1 @baz(void ()* @e) ; returns 1
%f = call i1 @baz(void ()* @f) ; returns 0
%g = call i1 @baz(void ()* @g) ; returns 1
ret void
}

View File

@ -11837,7 +11837,7 @@ Arguments:
""""""""""
The first argument is a pointer to be tested. The second argument is a
metadata string containing the name of a :doc:`bitset <BitSets>`.
metadata object representing an identifier for a :doc:`bitset <BitSets>`.
Overview:
"""""""""

View File

@ -26,7 +26,7 @@
namespace llvm {
class DataLayout;
class GlobalVariable;
class GlobalObject;
class Value;
class raw_ostream;
@ -56,7 +56,7 @@ struct BitSetInfo {
bool containsGlobalOffset(uint64_t Offset) const;
bool containsValue(const DataLayout &DL,
const DenseMap<GlobalVariable *, uint64_t> &GlobalLayout,
const DenseMap<GlobalObject *, uint64_t> &GlobalLayout,
Value *V, uint64_t COffset = 0) const;
void print(raw_ostream &OS) const;

View File

@ -19,6 +19,8 @@
#include "llvm/ADT/Triple.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalObject.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
@ -61,9 +63,9 @@ bool BitSetInfo::containsGlobalOffset(uint64_t Offset) const {
bool BitSetInfo::containsValue(
const DataLayout &DL,
const DenseMap<GlobalVariable *, uint64_t> &GlobalLayout, Value *V,
const DenseMap<GlobalObject *, uint64_t> &GlobalLayout, Value *V,
uint64_t COffset) const {
if (auto GV = dyn_cast<GlobalVariable>(V)) {
if (auto GV = dyn_cast<GlobalObject>(V)) {
auto I = GlobalLayout.find(GV);
if (I == GlobalLayout.end())
return false;
@ -211,34 +213,48 @@ struct LowerBitSets : public ModulePass {
Module *M;
bool LinkerSubsectionsViaSymbols;
Triple::ArchType Arch;
Triple::ObjectFormatType ObjectFormat;
IntegerType *Int1Ty;
IntegerType *Int8Ty;
IntegerType *Int32Ty;
Type *Int32PtrTy;
IntegerType *Int64Ty;
Type *IntPtrTy;
IntegerType *IntPtrTy;
// The llvm.bitsets named metadata.
NamedMDNode *BitSetNM;
// Mapping from bitset mdstrings to the call sites that test them.
DenseMap<MDString *, std::vector<CallInst *>> BitSetTestCallSites;
// Mapping from bitset identifiers to the call sites that test them.
DenseMap<Metadata *, std::vector<CallInst *>> BitSetTestCallSites;
std::vector<ByteArrayInfo> ByteArrayInfos;
BitSetInfo
buildBitSet(MDString *BitSet,
const DenseMap<GlobalVariable *, uint64_t> &GlobalLayout);
buildBitSet(Metadata *BitSet,
const DenseMap<GlobalObject *, uint64_t> &GlobalLayout);
ByteArrayInfo *createByteArray(BitSetInfo &BSI);
void allocateByteArrays();
Value *createBitSetTest(IRBuilder<> &B, BitSetInfo &BSI, ByteArrayInfo *&BAI,
Value *BitOffset);
void lowerBitSetCalls(ArrayRef<Metadata *> BitSets,
Constant *CombinedGlobalAddr,
const DenseMap<GlobalObject *, uint64_t> &GlobalLayout);
Value *
lowerBitSetCall(CallInst *CI, BitSetInfo &BSI, ByteArrayInfo *&BAI,
GlobalVariable *CombinedGlobal,
const DenseMap<GlobalVariable *, uint64_t> &GlobalLayout);
void buildBitSetsFromGlobals(const std::vector<MDString *> &BitSets,
const std::vector<GlobalVariable *> &Globals);
Constant *CombinedGlobal,
const DenseMap<GlobalObject *, uint64_t> &GlobalLayout);
void buildBitSetsFromGlobalVariables(ArrayRef<Metadata *> BitSets,
ArrayRef<GlobalVariable *> Globals);
unsigned getJumpTableEntrySize();
Type *getJumpTableEntryType();
Constant *createJumpTableEntry(GlobalObject *Src, Function *Dest,
unsigned Distance);
void verifyBitSetMDNode(MDNode *Op);
void buildBitSetsFromFunctions(ArrayRef<Metadata *> BitSets,
ArrayRef<Function *> Functions);
void buildBitSetsFromDisjointSet(ArrayRef<Metadata *> BitSets,
ArrayRef<GlobalObject *> Globals);
bool buildBitSets();
bool eraseBitSetMetadata();
@ -262,6 +278,8 @@ bool LowerBitSets::doInitialization(Module &Mod) {
Triple TargetTriple(M->getTargetTriple());
LinkerSubsectionsViaSymbols = TargetTriple.isMacOSX();
Arch = TargetTriple.getArch();
ObjectFormat = TargetTriple.getObjectFormat();
Int1Ty = Type::getInt1Ty(M->getContext());
Int8Ty = Type::getInt8Ty(M->getContext());
@ -280,8 +298,8 @@ bool LowerBitSets::doInitialization(Module &Mod) {
/// Build a bit set for BitSet using the object layouts in
/// GlobalLayout.
BitSetInfo LowerBitSets::buildBitSet(
MDString *BitSet,
const DenseMap<GlobalVariable *, uint64_t> &GlobalLayout) {
Metadata *BitSet,
const DenseMap<GlobalObject *, uint64_t> &GlobalLayout) {
BitSetBuilder BSB;
// Compute the byte offset of each element of this bitset.
@ -289,8 +307,11 @@ BitSetInfo LowerBitSets::buildBitSet(
for (MDNode *Op : BitSetNM->operands()) {
if (Op->getOperand(0) != BitSet || !Op->getOperand(1))
continue;
auto OpGlobal = dyn_cast<GlobalVariable>(
cast<ConstantAsMetadata>(Op->getOperand(1))->getValue());
Constant *OpConst =
cast<ConstantAsMetadata>(Op->getOperand(1))->getValue();
if (auto GA = dyn_cast<GlobalAlias>(OpConst))
OpConst = GA->getAliasee();
auto OpGlobal = dyn_cast<GlobalObject>(OpConst);
if (!OpGlobal)
continue;
uint64_t Offset =
@ -439,17 +460,16 @@ Value *LowerBitSets::createBitSetTest(IRBuilder<> &B, BitSetInfo &BSI,
/// replace the call with.
Value *LowerBitSets::lowerBitSetCall(
CallInst *CI, BitSetInfo &BSI, ByteArrayInfo *&BAI,
GlobalVariable *CombinedGlobal,
const DenseMap<GlobalVariable *, uint64_t> &GlobalLayout) {
Constant *CombinedGlobalIntAddr,
const DenseMap<GlobalObject *, uint64_t> &GlobalLayout) {
Value *Ptr = CI->getArgOperand(0);
const DataLayout &DL = M->getDataLayout();
if (BSI.containsValue(DL, GlobalLayout, Ptr))
return ConstantInt::getTrue(CombinedGlobal->getParent()->getContext());
return ConstantInt::getTrue(M->getContext());
Constant *GlobalAsInt = ConstantExpr::getPtrToInt(CombinedGlobal, IntPtrTy);
Constant *OffsetedGlobalAsInt = ConstantExpr::getAdd(
GlobalAsInt, ConstantInt::get(IntPtrTy, BSI.ByteOffset));
CombinedGlobalIntAddr, ConstantInt::get(IntPtrTy, BSI.ByteOffset));
BasicBlock *InitialBB = CI->getParent();
@ -508,18 +528,19 @@ Value *LowerBitSets::lowerBitSetCall(
/// Given a disjoint set of bitsets and globals, layout the globals, build the
/// bit sets and lower the llvm.bitset.test calls.
void LowerBitSets::buildBitSetsFromGlobals(
const std::vector<MDString *> &BitSets,
const std::vector<GlobalVariable *> &Globals) {
void LowerBitSets::buildBitSetsFromGlobalVariables(
ArrayRef<Metadata *> BitSets, ArrayRef<GlobalVariable *> Globals) {
// Build a new global with the combined contents of the referenced globals.
// This global is a struct whose even-indexed elements contain the original
// contents of the referenced globals and whose odd-indexed elements contain
// any padding required to align the next element to the next power of 2.
std::vector<Constant *> GlobalInits;
const DataLayout &DL = M->getDataLayout();
for (GlobalVariable *G : Globals) {
GlobalInits.push_back(G->getInitializer());
uint64_t InitSize = DL.getTypeAllocSize(G->getInitializer()->getType());
// Compute the amount of padding required to align the next element to the
// next power of 2.
// Compute the amount of padding required.
uint64_t Padding = NextPowerOf2(InitSize - 1) - InitSize;
// Cap at 128 was found experimentally to have a good data/instruction
@ -541,30 +562,12 @@ void LowerBitSets::buildBitSetsFromGlobals(
DL.getStructLayout(cast<StructType>(NewInit->getType()));
// Compute the offsets of the original globals within the new global.
DenseMap<GlobalVariable *, uint64_t> GlobalLayout;
DenseMap<GlobalObject *, uint64_t> GlobalLayout;
for (unsigned I = 0; I != Globals.size(); ++I)
// Multiply by 2 to account for padding elements.
GlobalLayout[Globals[I]] = CombinedGlobalLayout->getElementOffset(I * 2);
// For each bitset in this disjoint set...
for (MDString *BS : BitSets) {
// Build the bitset.
BitSetInfo BSI = buildBitSet(BS, GlobalLayout);
DEBUG({
dbgs() << BS->getString() << ": ";
BSI.print(dbgs());
});
ByteArrayInfo *BAI = 0;
// Lower each call to llvm.bitset.test for this bitset.
for (CallInst *CI : BitSetTestCallSites[BS]) {
++NumBitSetCallsLowered;
Value *Lowered = lowerBitSetCall(CI, BSI, BAI, CombinedGlobal, GlobalLayout);
CI->replaceAllUsesWith(Lowered);
CI->eraseFromParent();
}
}
lowerBitSetCalls(BitSets, CombinedGlobal, GlobalLayout);
// Build aliases pointing to offsets into the combined global for each
// global from which we built the combined global, and replace references
@ -581,6 +584,7 @@ void LowerBitSets::buildBitSetsFromGlobals(
GlobalAlias *GAlias =
GlobalAlias::create(Globals[I]->getType(), Globals[I]->getLinkage(),
"", CombinedGlobalElemPtr, M);
GAlias->setVisibility(Globals[I]->getVisibility());
GAlias->takeName(Globals[I]);
Globals[I]->replaceAllUsesWith(GAlias);
}
@ -588,6 +592,330 @@ void LowerBitSets::buildBitSetsFromGlobals(
}
}
void LowerBitSets::lowerBitSetCalls(
ArrayRef<Metadata *> BitSets, Constant *CombinedGlobalAddr,
const DenseMap<GlobalObject *, uint64_t> &GlobalLayout) {
Constant *CombinedGlobalIntAddr =
ConstantExpr::getPtrToInt(CombinedGlobalAddr, IntPtrTy);
// For each bitset in this disjoint set...
for (Metadata *BS : BitSets) {
// Build the bitset.
BitSetInfo BSI = buildBitSet(BS, GlobalLayout);
DEBUG({
if (auto BSS = dyn_cast<MDString>(BS))
dbgs() << BSS->getString() << ": ";
else
dbgs() << "<unnamed>: ";
BSI.print(dbgs());
});
ByteArrayInfo *BAI = 0;
// Lower each call to llvm.bitset.test for this bitset.
for (CallInst *CI : BitSetTestCallSites[BS]) {
++NumBitSetCallsLowered;
Value *Lowered =
lowerBitSetCall(CI, BSI, BAI, CombinedGlobalIntAddr, GlobalLayout);
CI->replaceAllUsesWith(Lowered);
CI->eraseFromParent();
}
}
}
void LowerBitSets::verifyBitSetMDNode(MDNode *Op) {
if (Op->getNumOperands() != 3)
report_fatal_error(
"All operands of llvm.bitsets metadata must have 3 elements");
if (!Op->getOperand(1))
return;
auto OpConstMD = dyn_cast<ConstantAsMetadata>(Op->getOperand(1));
if (!OpConstMD)
report_fatal_error("Bit set element must be a constant");
auto OpGlobal = dyn_cast<GlobalObject>(OpConstMD->getValue());
if (!OpGlobal)
return;
if (OpGlobal->isThreadLocal())
report_fatal_error("Bit set element may not be thread-local");
if (OpGlobal->hasSection())
report_fatal_error("Bit set element may not have an explicit section");
if (isa<GlobalVariable>(OpGlobal) && OpGlobal->isDeclarationForLinker())
report_fatal_error("Bit set global var element must be a definition");
auto OffsetConstMD = dyn_cast<ConstantAsMetadata>(Op->getOperand(2));
if (!OffsetConstMD)
report_fatal_error("Bit set element offset must be a constant");
auto OffsetInt = dyn_cast<ConstantInt>(OffsetConstMD->getValue());
if (!OffsetInt)
report_fatal_error("Bit set element offset must be an integer constant");
}
static const unsigned kX86JumpTableEntrySize = 8;
unsigned LowerBitSets::getJumpTableEntrySize() {
if (Arch != Triple::x86 && Arch != Triple::x86_64)
report_fatal_error("Unsupported architecture for jump tables");
return kX86JumpTableEntrySize;
}
// Create a constant representing a jump table entry for the target. This
// consists of an instruction sequence containing a relative branch to Dest. The
// constant will be laid out at address Src+(Len*Distance) where Len is the
// target-specific jump table entry size.
Constant *LowerBitSets::createJumpTableEntry(GlobalObject *Src, Function *Dest,
unsigned Distance) {
if (Arch != Triple::x86 && Arch != Triple::x86_64)
report_fatal_error("Unsupported architecture for jump tables");
const unsigned kJmpPCRel32Code = 0xe9;
const unsigned kInt3Code = 0xcc;
ConstantInt *Jmp = ConstantInt::get(Int8Ty, kJmpPCRel32Code);
// Build a constant representing the displacement between the constant's
// address and Dest. This will resolve to a PC32 relocation referring to Dest.
Constant *DestInt = ConstantExpr::getPtrToInt(Dest, IntPtrTy);
Constant *SrcInt = ConstantExpr::getPtrToInt(Src, IntPtrTy);
Constant *Disp = ConstantExpr::getSub(DestInt, SrcInt);
ConstantInt *DispOffset =
ConstantInt::get(IntPtrTy, Distance * kX86JumpTableEntrySize + 5);
Constant *OffsetedDisp = ConstantExpr::getSub(Disp, DispOffset);
OffsetedDisp = ConstantExpr::getTrunc(OffsetedDisp, Int32Ty);
ConstantInt *Int3 = ConstantInt::get(Int8Ty, kInt3Code);
Constant *Fields[] = {
Jmp, OffsetedDisp, Int3, Int3, Int3,
};
return ConstantStruct::getAnon(Fields, /*Packed=*/true);
}
Type *LowerBitSets::getJumpTableEntryType() {
if (Arch != Triple::x86 && Arch != Triple::x86_64)
report_fatal_error("Unsupported architecture for jump tables");
return StructType::get(M->getContext(),
{Int8Ty, Int32Ty, Int8Ty, Int8Ty, Int8Ty},
/*Packed=*/true);
}
/// Given a disjoint set of bitsets and functions, build a jump table for the
/// functions, build the bit sets and lower the llvm.bitset.test calls.
void LowerBitSets::buildBitSetsFromFunctions(ArrayRef<Metadata *> BitSets,
ArrayRef<Function *> Functions) {
// Unlike the global bitset builder, the function bitset builder cannot
// re-arrange functions in a particular order and base its calculations on the
// layout of the functions' entry points, as we have no idea how large a
// particular function will end up being (the size could even depend on what
// this pass does!) Instead, we build a jump table, which is a block of code
// consisting of one branch instruction for each of the functions in the bit
// set that branches to the target function, and redirect any taken function
// addresses to the corresponding jump table entry. In the object file's
// symbol table, the symbols for the target functions also refer to the jump
// table entries, so that addresses taken outside the module will pass any
// verification done inside the module.
//
// In more concrete terms, suppose we have three functions f, g, h which are
// members of a single bitset, and a function foo that returns their
// addresses:
//
// f:
// mov 0, %eax
// ret
//
// g:
// mov 1, %eax
// ret
//
// h:
// mov 2, %eax
// ret
//
// foo:
// mov f, %eax
// mov g, %edx
// mov h, %ecx
// ret
//
// To create a jump table for these functions, we instruct the LLVM code
// generator to output a jump table in the .text section. This is done by
// representing the instructions in the jump table as an LLVM constant and
// placing them in a global variable in the .text section. The end result will
// (conceptually) look like this:
//
// f:
// jmp .Ltmp0 ; 5 bytes
// int3 ; 1 byte
// int3 ; 1 byte
// int3 ; 1 byte
//
// g:
// jmp .Ltmp1 ; 5 bytes
// int3 ; 1 byte
// int3 ; 1 byte
// int3 ; 1 byte
//
// h:
// jmp .Ltmp2 ; 5 bytes
// int3 ; 1 byte
// int3 ; 1 byte
// int3 ; 1 byte
//
// .Ltmp0:
// mov 0, %eax
// ret
//
// .Ltmp1:
// mov 1, %eax
// ret
//
// .Ltmp2:
// mov 2, %eax
// ret
//
// foo:
// mov f, %eax
// mov g, %edx
// mov h, %ecx
// ret
//
// Because the addresses of f, g, h are evenly spaced at a power of 2, in the
// normal case the check can be carried out using the same kind of simple
// arithmetic that we normally use for globals.
assert(!Functions.empty());
// Build a simple layout based on the regular layout of jump tables.
DenseMap<GlobalObject *, uint64_t> GlobalLayout;
unsigned EntrySize = getJumpTableEntrySize();
for (unsigned I = 0; I != Functions.size(); ++I)
GlobalLayout[Functions[I]] = I * EntrySize;
// Create a constant to hold the jump table.
ArrayType *JumpTableType =
ArrayType::get(getJumpTableEntryType(), Functions.size());
auto JumpTable = new GlobalVariable(*M, JumpTableType,
/*isConstant=*/true,
GlobalValue::PrivateLinkage, nullptr);
JumpTable->setSection(ObjectFormat == Triple::MachO
? "__TEXT,__text,regular,pure_instructions"
: ".text");
lowerBitSetCalls(BitSets, JumpTable, GlobalLayout);
// Build aliases pointing to offsets into the jump table, and replace
// references to the original functions with references to the aliases.
for (unsigned I = 0; I != Functions.size(); ++I) {
Constant *CombinedGlobalElemPtr = ConstantExpr::getBitCast(
ConstantExpr::getGetElementPtr(
JumpTableType, JumpTable,
ArrayRef<Constant *>{ConstantInt::get(IntPtrTy, 0),
ConstantInt::get(IntPtrTy, I)}),
Functions[I]->getType());
if (LinkerSubsectionsViaSymbols || Functions[I]->isDeclarationForLinker()) {
Functions[I]->replaceAllUsesWith(CombinedGlobalElemPtr);
} else {
GlobalAlias *GAlias = GlobalAlias::create(Functions[I]->getType(),
Functions[I]->getLinkage(), "",
CombinedGlobalElemPtr, M);
GAlias->setVisibility(Functions[I]->getVisibility());
GAlias->takeName(Functions[I]);
Functions[I]->replaceAllUsesWith(GAlias);
}
if (!Functions[I]->isDeclarationForLinker())
Functions[I]->setLinkage(GlobalValue::PrivateLinkage);
}
// Build and set the jump table's initializer.
std::vector<Constant *> JumpTableEntries;
for (unsigned I = 0; I != Functions.size(); ++I)
JumpTableEntries.push_back(
createJumpTableEntry(JumpTable, Functions[I], I));
JumpTable->setInitializer(
ConstantArray::get(JumpTableType, JumpTableEntries));
}
void LowerBitSets::buildBitSetsFromDisjointSet(
ArrayRef<Metadata *> BitSets, ArrayRef<GlobalObject *> Globals) {
llvm::DenseMap<Metadata *, uint64_t> BitSetIndices;
llvm::DenseMap<GlobalObject *, uint64_t> GlobalIndices;
for (auto B : BitSets)
BitSetIndices[B] = BitSetIndices.size();
for (auto G : Globals)
GlobalIndices[G] = GlobalIndices.size();
// For each bitset, build a set of indices that refer to globals referenced by
// the bitset.
std::vector<std::set<uint64_t>> BitSetMembers(BitSets.size());
if (BitSetNM) {
for (MDNode *Op : BitSetNM->operands()) {
// Op = { bitset name, global, offset }
if (!Op->getOperand(1))
continue;
auto I = BitSetIndices.find(Op->getOperand(0));
if (I == BitSetIndices.end())
continue;
auto OpGlobal = dyn_cast<GlobalObject>(
cast<ConstantAsMetadata>(Op->getOperand(1))->getValue());
if (!OpGlobal)
continue;
BitSetMembers[I->second].insert(GlobalIndices[OpGlobal]);
}
}
// Order the sets of indices by size. The GlobalLayoutBuilder works best
// when given small index sets first.
std::stable_sort(
BitSetMembers.begin(), BitSetMembers.end(),
[](const std::set<uint64_t> &O1, const std::set<uint64_t> &O2) {
return O1.size() < O2.size();
});
// Create a GlobalLayoutBuilder and provide it with index sets as layout
// fragments. The GlobalLayoutBuilder tries to lay out members of fragments as
// close together as possible.
GlobalLayoutBuilder GLB(Globals.size());
for (auto &&MemSet : BitSetMembers)
GLB.addFragment(MemSet);
// Build the bitsets from this disjoint set.
if (Globals.empty() || isa<GlobalVariable>(Globals[0])) {
// Build a vector of global variables with the computed layout.
std::vector<GlobalVariable *> OrderedGVs(Globals.size());
auto OGI = OrderedGVs.begin();
for (auto &&F : GLB.Fragments) {
for (auto &&Offset : F) {
auto GV = dyn_cast<GlobalVariable>(Globals[Offset]);
if (!GV)
report_fatal_error(
"Bit set may not contain both global variables and functions");
*OGI++ = GV;
}
}
buildBitSetsFromGlobalVariables(BitSets, OrderedGVs);
} else {
// Build a vector of functions with the computed layout.
std::vector<Function *> OrderedFns(Globals.size());
auto OFI = OrderedFns.begin();
for (auto &&F : GLB.Fragments) {
for (auto &&Offset : F) {
auto Fn = dyn_cast<Function>(Globals[Offset]);
if (!Fn)
report_fatal_error(
"Bit set may not contain both global variables and functions");
*OFI++ = Fn;
}
}
buildBitSetsFromFunctions(BitSets, OrderedFns);
}
}
/// Lower all bit sets in this module.
bool LowerBitSets::buildBitSets() {
Function *BitSetTestFunc =
@ -598,24 +926,36 @@ bool LowerBitSets::buildBitSets() {
// Equivalence class set containing bitsets and the globals they reference.
// This is used to partition the set of bitsets in the module into disjoint
// sets.
typedef EquivalenceClasses<PointerUnion<GlobalVariable *, MDString *>>
typedef EquivalenceClasses<PointerUnion<GlobalObject *, Metadata *>>
GlobalClassesTy;
GlobalClassesTy GlobalClasses;
// Verify the bitset metadata and build a mapping from bitset identifiers to
// their last observed index in BitSetNM. This will used later to
// deterministically order the list of bitset identifiers.
llvm::DenseMap<Metadata *, unsigned> BitSetIdIndices;
if (BitSetNM) {
for (unsigned I = 0, E = BitSetNM->getNumOperands(); I != E; ++I) {
MDNode *Op = BitSetNM->getOperand(I);
verifyBitSetMDNode(Op);
BitSetIdIndices[Op] = I;
}
}
for (const Use &U : BitSetTestFunc->uses()) {
auto CI = cast<CallInst>(U.getUser());
auto BitSetMDVal = dyn_cast<MetadataAsValue>(CI->getArgOperand(1));
if (!BitSetMDVal || !isa<MDString>(BitSetMDVal->getMetadata()))
if (!BitSetMDVal)
report_fatal_error(
"Second argument of llvm.bitset.test must be metadata string");
auto BitSet = cast<MDString>(BitSetMDVal->getMetadata());
"Second argument of llvm.bitset.test must be metadata");
auto BitSet = BitSetMDVal->getMetadata();
// Add the call site to the list of call sites for this bit set. We also use
// BitSetTestCallSites to keep track of whether we have seen this bit set
// before. If we have, we don't need to re-add the referenced globals to the
// equivalence class.
std::pair<DenseMap<MDString *, std::vector<CallInst *>>::iterator,
std::pair<DenseMap<Metadata *, std::vector<CallInst *>>::iterator,
bool> Ins =
BitSetTestCallSites.insert(
std::make_pair(BitSet, std::vector<CallInst *>()));
@ -630,31 +970,16 @@ bool LowerBitSets::buildBitSets() {
if (!BitSetNM)
continue;
// Verify the bitset metadata and add the referenced globals to the bitset's
// equivalence class.
// Add the referenced globals to the bitset's equivalence class.
for (MDNode *Op : BitSetNM->operands()) {
if (Op->getNumOperands() != 3)
report_fatal_error(
"All operands of llvm.bitsets metadata must have 3 elements");
if (Op->getOperand(0) != BitSet || !Op->getOperand(1))
continue;
auto OpConstMD = dyn_cast<ConstantAsMetadata>(Op->getOperand(1));
if (!OpConstMD)
report_fatal_error("Bit set element must be a constant");
auto OpGlobal = dyn_cast<GlobalVariable>(OpConstMD->getValue());
auto OpGlobal = dyn_cast<GlobalObject>(
cast<ConstantAsMetadata>(Op->getOperand(1))->getValue());
if (!OpGlobal)
continue;
auto OffsetConstMD = dyn_cast<ConstantAsMetadata>(Op->getOperand(2));
if (!OffsetConstMD)
report_fatal_error("Bit set element offset must be a constant");
auto OffsetInt = dyn_cast<ConstantInt>(OffsetConstMD->getValue());
if (!OffsetInt)
report_fatal_error(
"Bit set element offset must be an integer constant");
CurSet = GlobalClasses.unionSets(
CurSet, GlobalClasses.findLeader(GlobalClasses.insert(OpGlobal)));
}
@ -671,71 +996,25 @@ bool LowerBitSets::buildBitSets() {
++NumBitSetDisjointSets;
// Build the list of bitsets and referenced globals in this disjoint set.
std::vector<MDString *> BitSets;
std::vector<GlobalVariable *> Globals;
llvm::DenseMap<MDString *, uint64_t> BitSetIndices;
llvm::DenseMap<GlobalVariable *, uint64_t> GlobalIndices;
// Build the list of bitsets in this disjoint set.
std::vector<Metadata *> BitSets;
std::vector<GlobalObject *> Globals;
for (GlobalClassesTy::member_iterator MI = GlobalClasses.member_begin(I);
MI != GlobalClasses.member_end(); ++MI) {
if ((*MI).is<MDString *>()) {
BitSetIndices[MI->get<MDString *>()] = BitSets.size();
BitSets.push_back(MI->get<MDString *>());
} else {
GlobalIndices[MI->get<GlobalVariable *>()] = Globals.size();
Globals.push_back(MI->get<GlobalVariable *>());
}
if ((*MI).is<Metadata *>())
BitSets.push_back(MI->get<Metadata *>());
else
Globals.push_back(MI->get<GlobalObject *>());
}
// For each bitset, build a set of indices that refer to globals referenced
// by the bitset.
std::vector<std::set<uint64_t>> BitSetMembers(BitSets.size());
if (BitSetNM) {
for (MDNode *Op : BitSetNM->operands()) {
// Op = { bitset name, global, offset }
if (!Op->getOperand(1))
continue;
auto I = BitSetIndices.find(cast<MDString>(Op->getOperand(0)));
if (I == BitSetIndices.end())
continue;
auto OpGlobal = dyn_cast<GlobalVariable>(
cast<ConstantAsMetadata>(Op->getOperand(1))->getValue());
if (!OpGlobal)
continue;
BitSetMembers[I->second].insert(GlobalIndices[OpGlobal]);
}
}
// Order the sets of indices by size. The GlobalLayoutBuilder works best
// when given small index sets first.
std::stable_sort(
BitSetMembers.begin(), BitSetMembers.end(),
[](const std::set<uint64_t> &O1, const std::set<uint64_t> &O2) {
return O1.size() < O2.size();
});
// Create a GlobalLayoutBuilder and provide it with index sets as layout
// fragments. The GlobalLayoutBuilder tries to lay out members of fragments
// as close together as possible.
GlobalLayoutBuilder GLB(Globals.size());
for (auto &&MemSet : BitSetMembers)
GLB.addFragment(MemSet);
// Build a vector of globals with the computed layout.
std::vector<GlobalVariable *> OrderedGlobals(Globals.size());
auto OGI = OrderedGlobals.begin();
for (auto &&F : GLB.Fragments)
for (auto &&Offset : F)
*OGI++ = Globals[Offset];
// Order bitsets by name for determinism.
std::sort(BitSets.begin(), BitSets.end(), [](MDString *S1, MDString *S2) {
return S1->getString() < S2->getString();
// Order bitsets by BitSetNM index for determinism. This ordering is stable
// as there is a one-to-one mapping between metadata and indices.
std::sort(BitSets.begin(), BitSets.end(), [&](Metadata *M1, Metadata *M2) {
return BitSetIdIndices[M1] < BitSetIdIndices[M2];
});
// Build the bitsets from this disjoint set.
buildBitSetsFromGlobals(BitSets, OrderedGlobals);
// Lower the bitsets in this disjoint set.
buildBitSetsFromDisjointSet(BitSets, Globals);
}
allocateByteArrays();

View File

@ -0,0 +1,22 @@
; RUN: opt -S -lowerbitsets < %s | FileCheck %s
; Tests that we correctly handle external references, including the case where
; all functions in a bitset are external references.
target triple = "x86_64-unknown-linux-gnu"
declare void @foo()
; CHECK: @[[JT:.*]] = private constant [1 x <{ i8, i32, i8, i8, i8 }>] [<{ i8, i32, i8, i8, i8 }> <{ i8 -23, i32 trunc (i64 sub (i64 sub (i64 ptrtoint (void ()* @foo to i64), i64 ptrtoint ([1 x <{ i8, i32, i8, i8, i8 }>]* @[[JT]] to i64)), i64 5) to i32), i8 -52, i8 -52, i8 -52 }>], section ".text"
define i1 @bar(i8* %ptr) {
; CHECK: icmp eq i64 {{.*}}, ptrtoint ([1 x <{ i8, i32, i8, i8, i8 }>]* @[[JT]] to i64)
%p = call i1 @llvm.bitset.test(i8* %ptr, metadata !"void")
ret i1 %p
}
declare i1 @llvm.bitset.test(i8* %ptr, metadata %bitset) nounwind readnone
!0 = !{!"void", void ()* @foo, i64 0}
!llvm.bitsets = !{!0}

View File

@ -0,0 +1,35 @@
; RUN: opt -S -lowerbitsets < %s | FileCheck %s
; Tests that we correctly create a jump table for bitsets containing 2 or more
; functions.
target triple = "x86_64-unknown-linux-gnu"
target datalayout = "e-p:64:64"
; CHECK: @[[JT:.*]] = private constant [2 x <{ i8, i32, i8, i8, i8 }>] [<{ i8, i32, i8, i8, i8 }> <{ i8 -23, i32 trunc (i64 sub (i64 sub (i64 ptrtoint (void ()* @[[FNAME:.*]] to i64), i64 ptrtoint ([2 x <{ i8, i32, i8, i8, i8 }>]* @[[JT]] to i64)), i64 5) to i32), i8 -52, i8 -52, i8 -52 }>, <{ i8, i32, i8, i8, i8 }> <{ i8 -23, i32 trunc (i64 sub (i64 sub (i64 ptrtoint (void ()* @[[GNAME:.*]] to i64), i64 ptrtoint ([2 x <{ i8, i32, i8, i8, i8 }>]* @[[JT]] to i64)), i64 13) to i32), i8 -52, i8 -52, i8 -52 }>], section ".text"
; CHECK: @f = alias bitcast ([2 x <{ i8, i32, i8, i8, i8 }>]* @[[JT]] to void ()*)
; CHECK: @g = alias bitcast (<{ i8, i32, i8, i8, i8 }>* getelementptr inbounds ([2 x <{ i8, i32, i8, i8, i8 }>], [2 x <{ i8, i32, i8, i8, i8 }>]* @[[JT]], i64 0, i64 1) to void ()*)
; CHECK: define private void @[[FNAME]]() {
define void @f() {
ret void
}
; CHECK: define private void @[[GNAME]]() {
define void @g() {
ret void
}
!0 = !{!"bitset1", void ()* @f, i32 0}
!1 = !{!"bitset1", void ()* @g, i32 0}
!llvm.bitsets = !{ !0, !1 }
declare i1 @llvm.bitset.test(i8* %ptr, metadata %bitset) nounwind readnone
define i1 @foo(i8* %p) {
; CHECK: sub i64 {{.*}}, ptrtoint ([2 x <{ i8, i32, i8, i8, i8 }>]* @[[JT]] to i64)
%x = call i1 @llvm.bitset.test(i8* %p, metadata !"bitset1")
ret i1 %x
}

View File

@ -0,0 +1,34 @@
; RUN: opt -S -lowerbitsets < %s | FileCheck %s
; Tests that non-string metadata nodes may be used as bitset identifiers.
target datalayout = "e-p:32:32"
; CHECK: @[[BNAME:.*]] = private constant { [2 x i32] }
; CHECK: @[[ANAME:.*]] = private constant { i32 }
@a = constant i32 1
@b = constant [2 x i32] [i32 2, i32 3]
!0 = !{!2, i32* @a, i32 0}
!1 = !{!3, [2 x i32]* @b, i32 0}
!2 = distinct !{}
!3 = distinct !{}
!llvm.bitsets = !{ !0, !1 }
declare i1 @llvm.bitset.test(i8* %ptr, metadata %bitset) nounwind readnone
; CHECK-LABEL: @foo
define i1 @foo(i8* %p) {
; CHECK: icmp eq i32 {{.*}}, ptrtoint ({ i32 }* @[[ANAME]] to i32)
%x = call i1 @llvm.bitset.test(i8* %p, metadata !2)
ret i1 %x
}
; CHECK-LABEL: @bar
define i1 @bar(i8* %p) {
; CHECK: icmp eq i32 {{.*}}, ptrtoint ({ [2 x i32] }* @[[BNAME]] to i32)
%x = call i1 @llvm.bitset.test(i8* %p, metadata !3)
ret i1 %x
}

View File

@ -6,8 +6,8 @@ target datalayout = "e-p:32:32"
; CHECK: [[G:@[^ ]*]] = private constant { i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] } { i32 1, [0 x i8] zeroinitializer, [63 x i32] zeroinitializer, [4 x i8] zeroinitializer, i32 3, [0 x i8] zeroinitializer, [2 x i32] [i32 4, i32 5] }
@a = constant i32 1
@b = constant [63 x i32] zeroinitializer
@c = constant i32 3
@b = hidden constant [63 x i32] zeroinitializer
@c = protected constant i32 3
@d = constant [2 x i32] [i32 4, i32 5]
; CHECK: [[BA:@[^ ]*]] = private constant [68 x i8] c"\03\01\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\02\00\01"
@ -43,8 +43,8 @@ target datalayout = "e-p:32:32"
; CHECK: @bits_use.{{[0-9]*}} = private alias i8* @bits{{[0-9]*}}
; CHECK: @a = alias getelementptr inbounds ({ i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }, { i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }* [[G]], i32 0, i32 0)
; CHECK: @b = alias getelementptr inbounds ({ i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }, { i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }* [[G]], i32 0, i32 2)
; CHECK: @c = alias getelementptr inbounds ({ i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }, { i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }* [[G]], i32 0, i32 4)
; CHECK: @b = hidden alias getelementptr inbounds ({ i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }, { i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }* [[G]], i32 0, i32 2)
; CHECK: @c = protected alias getelementptr inbounds ({ i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }, { i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }* [[G]], i32 0, i32 4)
; CHECK: @d = alias getelementptr inbounds ({ i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }, { i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }* [[G]], i32 0, i32 6)
; CHECK-DARWIN: @aptr = constant i32* getelementptr inbounds ({ i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }, { i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }* [[G:@[^ ]*]], i32 0, i32 0)