[AsmPrinter] Access pointers to globals via pcrel GOT entries

Front-ends could use global unnamed_addr to hold pointers to other
symbols, like @gotequivalent below:

@foo = global i32 42
@gotequivalent = private unnamed_addr constant i32* @foo

@delta = global i32 trunc (i64 sub (i64 ptrtoint (i32** @gotequivalent to i64),
                                    i64 ptrtoint (i32* @delta to i64))
                           to i32)

The global @delta holds a data "PC"-relative offset to @gotequivalent,
an unnamed pointer to @foo. The darwin/x86-64 assembly output for this follows:

 .globl  _foo
_foo:
 .long   42

 .globl  _gotequivalent
_gotequivalent:
 .quad   _foo

 .globl  _delta
_delta:
 .long   _gotequivalent-_delta

Since unnamed_addr indicates that the address is not significant, only
the content, we can optimize the case above by replacing pc-relative
accesses to "GOT equivalent" globals, by a PC relative access to the GOT
entry of the final symbol instead. Therefore, "delta" can contain a pc
relative relocation to foo's GOT entry and we avoid the emission of
"gotequivalent", yielding the assembly code below:

 .globl  _foo
_foo:
 .long   42

 .globl  _delta
_delta:
 .long   _foo@GOTPCREL+4

There are a couple of advantages of doing this: (1) Front-ends that need
to emit a great deal of data to store pointers to external symbols could
save space by not emitting such "got equivalent" globals and (2) IR
constructs combined with this opt opens a way to represent GOT pcrel
relocations by using the LLVM IR, which is something we previously had
no way to express.

Differential Revision: http://reviews.llvm.org/D6922

rdar://problem/18534217

llvm-svn: 230264
This commit is contained in:
Bruno Cardoso Lopes 2015-02-23 21:26:18 +00:00
parent 4d7aae932c
commit 24492b057e
6 changed files with 346 additions and 16 deletions

View File

@ -97,6 +97,11 @@ public:
/// default, this is equal to CurrentFnSym.
MCSymbol *CurrentFnSymForSize;
/// Map global GOT equivalent MCSymbols to GlobalVariables and keep track of
/// its number of uses by other globals.
typedef std::pair<const GlobalVariable *, unsigned> GOTEquivUsePair;
DenseMap<const MCSymbol *, GOTEquivUsePair> GlobalGOTEquivs;
private:
// The garbage collection metadata printer table.
void *GCMetadataPrinters; // Really a DenseMap.
@ -242,6 +247,21 @@ public:
/// \brief Print a general LLVM constant to the .s file.
void EmitGlobalConstant(const Constant *CV);
/// \brief Unnamed constant global variables solely contaning a pointer to
/// another globals variable act like a global variable "proxy", or GOT
/// equivalents, i.e., it's only used to hold the address of the latter. One
/// optimization is to replace accesses to these proxies by using the GOT
/// entry for the final global instead. Hence, we select GOT equivalent
/// candidates among all the module global variables, avoid emitting them
/// unnecessarily and finally replace references to them by pc relative
/// accesses to GOT entries.
void computeGlobalGOTEquivs(Module &M);
/// \brief Constant expressions using GOT equivalent globals may not be
/// eligible for PC relative GOT entry conversion, in such cases we need to
/// emit the proxies we previously omitted in EmitGlobalVariable.
void emitGlobalGOTEquivs();
//===------------------------------------------------------------------===//
// Overridable Hooks
//===------------------------------------------------------------------===//

View File

@ -41,10 +41,14 @@ class TargetLoweringObjectFile : public MCObjectFileInfo {
const TargetLoweringObjectFile&) = delete;
void operator=(const TargetLoweringObjectFile&) = delete;
protected:
bool SupportIndirectSymViaGOTPCRel;
public:
MCContext &getContext() const { return *Ctx; }
TargetLoweringObjectFile() : MCObjectFileInfo(), Ctx(nullptr), DL(nullptr) {}
TargetLoweringObjectFile() : MCObjectFileInfo(), Ctx(nullptr), DL(nullptr),
SupportIndirectSymViaGOTPCRel(false) {}
virtual ~TargetLoweringObjectFile();
@ -158,6 +162,18 @@ public:
return nullptr;
}
/// \brief Target supports replacing a data "PC"-relative access to a symbol
/// through another symbol, by accessing the later via a GOT entry instead?
bool supportIndirectSymViaGOTPCRel() const {
return SupportIndirectSymViaGOTPCRel;
}
/// \brief Get the target specific PC relative GOT entry relocation
virtual const MCExpr *getIndirectSymViaGOTPCRel(const MCSymbol *Sym,
int64_t Offset) const {
return nullptr;
}
protected:
virtual const MCSection *
SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,

View File

@ -41,6 +41,7 @@
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCValue.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/MathExtras.h"
@ -340,6 +341,11 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
if (EmitSpecialLLVMGlobal(GV))
return;
// Skip the emission of global equivalents. The symbol can be emitted later
// on by emitGlobalGOTEquivs in case it turns out to be needed.
if (GlobalGOTEquivs.count(getSymbol(GV)))
return;
if (isVerbose()) {
GV->printAsOperand(OutStreamer.GetCommentOS(),
/*PrintType=*/false, GV->getParent());
@ -890,11 +896,95 @@ void AsmPrinter::EmitFunctionBody() {
OutStreamer.AddBlankLine();
}
/// \brief Compute the number of Global Variables that uses a Constant.
static unsigned getNumGlobalVariableUses(const Constant *C) {
if (!C)
return 0;
if (isa<GlobalVariable>(C))
return 1;
unsigned NumUses = 0;
for (auto *CU : C->users())
NumUses += getNumGlobalVariableUses(dyn_cast<Constant>(CU));
return NumUses;
}
/// \brief Only consider global GOT equivalents if at least one user is a
/// cstexpr inside an initializer of another global variables. Also, don't
/// handle cstexpr inside instructions. During global variable emission,
/// candidates are skipped and are emitted later in case at least one cstexpr
/// isn't replaced by a PC relative GOT entry access.
static bool isGOTEquivalentCandidate(const GlobalVariable *GV,
unsigned &NumGOTEquivUsers) {
// Global GOT equivalents are unnamed private globals with a constant
// pointer initializer to another global symbol. They must point to a
// GlobalVariable or Function, i.e., as GlobalValue.
if (!GV->hasUnnamedAddr() || !GV->hasInitializer() || !GV->isConstant() ||
!GV->isDiscardableIfUnused() || !dyn_cast<GlobalValue>(GV->getOperand(0)))
return false;
// To be a got equivalent, at least one of its users need to be a constant
// expression used by another global variable.
for (auto *U : GV->users())
NumGOTEquivUsers += getNumGlobalVariableUses(cast<Constant>(U));
return NumGOTEquivUsers > 0;
}
/// \brief Unnamed constant global variables solely contaning a pointer to
/// another globals variable is equivalent to a GOT table entry; it contains the
/// the address of another symbol. Optimize it and replace accesses to these
/// "GOT equivalents" by using the GOT entry for the final global instead.
/// Compute GOT equivalent candidates among all global variables to avoid
/// emitting them if possible later on, after it use is replaced by a GOT entry
/// access.
void AsmPrinter::computeGlobalGOTEquivs(Module &M) {
if (!getObjFileLowering().supportIndirectSymViaGOTPCRel())
return;
for (const auto &G : M.globals()) {
unsigned NumGOTEquivUsers = 0;
if (!isGOTEquivalentCandidate(&G, NumGOTEquivUsers))
continue;
const MCSymbol *GOTEquivSym = getSymbol(&G);
GlobalGOTEquivs[GOTEquivSym] = std::make_pair(&G, NumGOTEquivUsers);
}
}
/// \brief Constant expressions using GOT equivalent globals may not be eligible
/// for PC relative GOT entry conversion, in such cases we need to emit such
/// globals we previously omitted in EmitGlobalVariable.
void AsmPrinter::emitGlobalGOTEquivs() {
if (!getObjFileLowering().supportIndirectSymViaGOTPCRel())
return;
while (!GlobalGOTEquivs.empty()) {
DenseMap<const MCSymbol *, GOTEquivUsePair>::iterator I =
GlobalGOTEquivs.begin();
const MCSymbol *S = I->first;
const GlobalVariable *GV = I->second.first;
GlobalGOTEquivs.erase(S);
EmitGlobalVariable(GV);
}
}
bool AsmPrinter::doFinalization(Module &M) {
// Gather all GOT equivalent globals in the module. We really need two
// passes over the globals: one to compute and another to avoid its emission
// in EmitGlobalVariable, otherwise we would not be able to handle cases
// where the got equivalent shows up before its use.
computeGlobalGOTEquivs(M);
// Emit global variables.
for (const auto &G : M.globals())
EmitGlobalVariable(&G);
// Emit remaining GOT equivalent globals.
emitGlobalGOTEquivs();
// Emit visibility info for declarations
for (const Function &F : M) {
if (!F.isDeclaration())
@ -1679,7 +1769,9 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) {
}
}
static void emitGlobalConstantImpl(const Constant *C, AsmPrinter &AP);
static void emitGlobalConstantImpl(const Constant *C, AsmPrinter &AP,
const Constant *BaseCV = nullptr,
uint64_t Offset = 0);
/// isRepeatedByteSequence - Determine whether the given value is
/// composed of a repeated sequence of identical bytes and return the
@ -1808,20 +1900,22 @@ static void emitGlobalConstantDataSequential(const ConstantDataSequential *CDS,
}
static void emitGlobalConstantArray(const ConstantArray *CA, AsmPrinter &AP) {
static void emitGlobalConstantArray(const ConstantArray *CA, AsmPrinter &AP,
const Constant *BaseCV, uint64_t Offset) {
// See if we can aggregate some values. Make sure it can be
// represented as a series of bytes of the constant value.
int Value = isRepeatedByteSequence(CA, AP.TM);
const DataLayout &DL = *AP.TM.getDataLayout();
if (Value != -1) {
uint64_t Bytes =
AP.TM.getDataLayout()->getTypeAllocSize(
CA->getType());
uint64_t Bytes = DL.getTypeAllocSize(CA->getType());
AP.OutStreamer.EmitFill(Bytes, Value);
}
else {
for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i)
emitGlobalConstantImpl(CA->getOperand(i), AP);
for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i) {
emitGlobalConstantImpl(CA->getOperand(i), AP, BaseCV, Offset);
Offset += DL.getTypeAllocSize(CA->getOperand(i)->getType());
}
}
}
@ -1837,7 +1931,8 @@ static void emitGlobalConstantVector(const ConstantVector *CV, AsmPrinter &AP) {
AP.OutStreamer.EmitZeros(Padding);
}
static void emitGlobalConstantStruct(const ConstantStruct *CS, AsmPrinter &AP) {
static void emitGlobalConstantStruct(const ConstantStruct *CS, AsmPrinter &AP,
const Constant *BaseCV, uint64_t Offset) {
// Print the fields in successive locations. Pad to align if needed!
const DataLayout *DL = AP.TM.getDataLayout();
unsigned Size = DL->getTypeAllocSize(CS->getType());
@ -1846,15 +1941,15 @@ static void emitGlobalConstantStruct(const ConstantStruct *CS, AsmPrinter &AP) {
for (unsigned i = 0, e = CS->getNumOperands(); i != e; ++i) {
const Constant *Field = CS->getOperand(i);
// Print the actual field value.
emitGlobalConstantImpl(Field, AP, BaseCV, Offset+SizeSoFar);
// Check if padding is needed and insert one or more 0s.
uint64_t FieldSize = DL->getTypeAllocSize(Field->getType());
uint64_t PadSize = ((i == e-1 ? Size : Layout->getElementOffset(i+1))
- Layout->getElementOffset(i)) - FieldSize;
SizeSoFar += FieldSize + PadSize;
// Now print the actual field value.
emitGlobalConstantImpl(Field, AP);
// Insert padding - this may include padding to increase the size of the
// current field up to the ABI size (if the struct is not packed) as well
// as padding to ensure that the next field starts at the right offset.
@ -1970,9 +2065,100 @@ static void emitGlobalConstantLargeInt(const ConstantInt *CI, AsmPrinter &AP) {
}
}
static void emitGlobalConstantImpl(const Constant *CV, AsmPrinter &AP) {
/// \brief Transform a not absolute MCExpr containing a reference to a GOT
/// equivalent global, by a target specific GOT pc relative access to the
/// final symbol.
static void handleIndirectSymViaGOTPCRel(AsmPrinter &AP, const MCExpr **ME,
const Constant *BaseCst,
uint64_t Offset) {
// The global @foo below illustrates a global that uses a got equivalent.
//
// @bar = global i32 42
// @gotequiv = private unnamed_addr constant i32* @bar
// @foo = i32 trunc (i64 sub (i64 ptrtoint (i32** @gotequiv to i64),
// i64 ptrtoint (i32* @foo to i64))
// to i32)
//
// The cstexpr in @foo is converted into the MCExpr `ME`, where we actually
// check whether @foo is suitable to use a GOTPCREL. `ME` is usually in the
// form:
//
// foo = cstexpr, where
// cstexpr := <gotequiv> - "." + <cst>
// cstexpr := <gotequiv> - (<foo> - <offset from @foo base>) + <cst>
//
// After canonicalization by EvaluateAsRelocatable `ME` turns into:
//
// cstexpr := <gotequiv> - <foo> + gotpcrelcst, where
// gotpcrelcst := <offset from @foo base> + <cst>
//
MCValue MV;
if (!(*ME)->EvaluateAsRelocatable(MV, nullptr, nullptr) || MV.isAbsolute())
return;
const MCSymbol *GOTEquivSym = &MV.getSymA()->getSymbol();
if (!AP.GlobalGOTEquivs.count(GOTEquivSym))
return;
const GlobalValue *BaseGV = dyn_cast<GlobalValue>(BaseCst);
if (!BaseGV)
return;
const MCSymbol *BaseSym = AP.getSymbol(BaseGV);
if (BaseSym != &MV.getSymB()->getSymbol())
return;
// Make sure to match:
//
// gotpcrelcst := <offset from @foo base> + <cst>
//
int64_t GOTPCRelCst = Offset + MV.getConstant();
if (GOTPCRelCst < 0)
return;
// Emit the GOT PC relative to replace the got equivalent global, i.e.:
//
// bar:
// .long 42
// gotequiv:
// .quad bar
// foo:
// .long gotequiv - "." + <cst>
//
// is replaced by the target specific equivalent to:
//
// bar:
// .long 42
// foo:
// .long bar@GOTPCREL+<gotpcrelcst>
//
AsmPrinter::GOTEquivUsePair Result = AP.GlobalGOTEquivs[GOTEquivSym];
const GlobalVariable *GV = Result.first;
unsigned NumUses = Result.second;
const GlobalValue *FinalGV = dyn_cast<GlobalValue>(GV->getOperand(0));
const MCSymbol *FinalSym = AP.getSymbol(FinalGV);
*ME = AP.getObjFileLowering().getIndirectSymViaGOTPCRel(FinalSym,
GOTPCRelCst);
// Update GOT equivalent usage information
--NumUses;
if (NumUses)
AP.GlobalGOTEquivs[GOTEquivSym] = std::make_pair(GV, NumUses);
else
AP.GlobalGOTEquivs.erase(GOTEquivSym);
}
static void emitGlobalConstantImpl(const Constant *CV, AsmPrinter &AP,
const Constant *BaseCV, uint64_t Offset) {
const DataLayout *DL = AP.TM.getDataLayout();
uint64_t Size = DL->getTypeAllocSize(CV->getType());
// Globals with sub-elements such as combinations of arrays and structs
// are handled recursively by emitGlobalConstantImpl. Keep track of the
// constant symbol base and the current position with BaseCV and Offset.
if (!BaseCV && CV->hasOneUse())
BaseCV = dyn_cast<Constant>(CV->user_back());
if (isa<ConstantAggregateZero>(CV) || isa<UndefValue>(CV))
return AP.OutStreamer.EmitZeros(Size);
@ -2005,10 +2191,10 @@ static void emitGlobalConstantImpl(const Constant *CV, AsmPrinter &AP) {
return emitGlobalConstantDataSequential(CDS, AP);
if (const ConstantArray *CVA = dyn_cast<ConstantArray>(CV))
return emitGlobalConstantArray(CVA, AP);
return emitGlobalConstantArray(CVA, AP, BaseCV, Offset);
if (const ConstantStruct *CVS = dyn_cast<ConstantStruct>(CV))
return emitGlobalConstantStruct(CVS, AP);
return emitGlobalConstantStruct(CVS, AP, BaseCV, Offset);
if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV)) {
// Look through bitcasts, which might not be able to be MCExpr'ized (e.g. of
@ -2031,7 +2217,15 @@ static void emitGlobalConstantImpl(const Constant *CV, AsmPrinter &AP) {
// Otherwise, it must be a ConstantExpr. Lower it to an MCExpr, then emit it
// thread the streamer with EmitValue.
AP.OutStreamer.EmitValue(AP.lowerConstant(CV), Size);
const MCExpr *ME = AP.lowerConstant(CV);
// Since lowerConstant already folded and got rid of all IR pointer and
// integer casts, detect GOT equivalent accesses by looking into the MCExpr
// directly.
if (AP.getObjFileLowering().supportIndirectSymViaGOTPCRel())
handleIndirectSymViaGOTPCRel(AP, &ME, BaseCV, Offset);
AP.OutStreamer.EmitValue(ME, Size);
}
/// EmitGlobalConstant - Print a general LLVM constant to the .s file.

View File

@ -21,6 +21,11 @@
using namespace llvm;
using namespace dwarf;
X86_64MachoTargetObjectFile::X86_64MachoTargetObjectFile()
: TargetLoweringObjectFileMachO() {
SupportIndirectSymViaGOTPCRel = true;
}
const MCExpr *X86_64MachoTargetObjectFile::getTTypeGlobalReference(
const GlobalValue *GV, unsigned Encoding, Mangler &Mang,
const TargetMachine &TM, MachineModuleInfo *MMI,
@ -46,6 +51,17 @@ MCSymbol *X86_64MachoTargetObjectFile::getCFIPersonalitySymbol(
return TM.getSymbol(GV, Mang);
}
const MCExpr *X86_64MachoTargetObjectFile::getIndirectSymViaGOTPCRel(
const MCSymbol *Sym, int64_t Offset) const {
// On Darwin/X86-64, we need to use foo@GOTPCREL+4 to access the got entry
// from a data section. In case there's an additional offset, then use
// foo@GOTPCREL+4+<offset>.
const MCExpr *Res =
MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOTPCREL, getContext());
const MCExpr *Off = MCConstantExpr::Create(Offset+4, getContext());
return MCBinaryExpr::CreateAdd(Res, Off, getContext());
}
void
X86LinuxTargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM) {
TargetLoweringObjectFileELF::Initialize(Ctx, TM);

View File

@ -19,6 +19,8 @@ namespace llvm {
/// x86-64.
class X86_64MachoTargetObjectFile : public TargetLoweringObjectFileMachO {
public:
X86_64MachoTargetObjectFile();
const MCExpr *
getTTypeGlobalReference(const GlobalValue *GV, unsigned Encoding,
Mangler &Mang, const TargetMachine &TM,
@ -30,6 +32,10 @@ namespace llvm {
MCSymbol *getCFIPersonalitySymbol(const GlobalValue *GV, Mangler &Mang,
const TargetMachine &TM,
MachineModuleInfo *MMI) const override;
const MCExpr *
getIndirectSymViaGOTPCRel(const MCSymbol *Sym,
int64_t Offset) const override;
};
/// X86LinuxTargetObjectFile - This implementation is used for linux x86

View File

@ -0,0 +1,78 @@
; RUN: llc -mtriple=x86_64-apple-darwin %s -o %t
; RUN: FileCheck %s < %t
; RUN: FileCheck %s -check-prefix=GOT-EQUIV < %t
; GOT equivalent globals references can be replaced by the GOT entry of the
; final symbol instead.
%struct.data = type { i32, %struct.anon }
%struct.anon = type { i32, i32 }
; Check that these got equivalent symbols are never emitted or used
; GOT-EQUIV-NOT: _localgotequiv
; GOT-EQUIV-NOT: _extgotequiv
@localfoo = global i32 42
@localgotequiv = private unnamed_addr constant i32* @localfoo
@extfoo = external global i32
@extgotequiv = private unnamed_addr constant i32* @extfoo
; Don't replace GOT equivalent usage within instructions and emit the GOT
; equivalent since it can't be replaced by the GOT entry. @bargotequiv is
; used by an instruction inside @t0.
;
; CHECK: l_bargotequiv:
; CHECK-NEXT: .quad _extbar
@extbar = external global i32
@bargotequiv = private unnamed_addr constant i32* @extbar
@table = global [4 x %struct.data] [
; CHECK-LABEL: _table
%struct.data { i32 1, %struct.anon { i32 2, i32 3 } },
; Test GOT equivalent usage inside nested constant arrays.
; CHECK: .long 5
; CHECK-NOT: .long _localgotequiv-(_table+20)
; CHECK-NEXT: .long _localfoo@GOTPCREL+4
%struct.data { i32 4, %struct.anon { i32 5,
i32 trunc (i64 sub (i64 ptrtoint (i32** @localgotequiv to i64),
i64 ptrtoint (i32* getelementptr inbounds ([4 x %struct.data]* @table, i32 0, i64 1, i32 1, i32 1) to i64))
to i32)}
},
; CHECK: .long 5
; CHECK-NOT: _extgotequiv-(_table+32)
; CHECK-NEXT: .long _extfoo@GOTPCREL+4
%struct.data { i32 4, %struct.anon { i32 5,
i32 trunc (i64 sub (i64 ptrtoint (i32** @extgotequiv to i64),
i64 ptrtoint (i32* getelementptr inbounds ([4 x %struct.data]* @table, i32 0, i64 2, i32 1, i32 1) to i64))
to i32)}
},
; Test support for arbitrary constants into the GOTPCREL offset
; CHECK: .long 5
; CHECK-NOT: _extgotequiv-(_table+44)
; CHECK-NEXT: .long _extfoo@GOTPCREL+28
%struct.data { i32 4, %struct.anon { i32 5,
i32 add (i32 trunc (i64 sub (i64 ptrtoint (i32** @extgotequiv to i64),
i64 ptrtoint (i32* getelementptr inbounds ([4 x %struct.data]* @table, i32 0, i64 3, i32 1, i32 1) to i64))
to i32), i32 24)}
}
], align 16
; Test multiple uses of GOT equivalents.
; CHECK-LABEL: _delta
; CHECK: .long _extfoo@GOTPCREL+4
@delta = global i32 trunc (i64 sub (i64 ptrtoint (i32** @extgotequiv to i64),
i64 ptrtoint (i32* @delta to i64))
to i32)
; CHECK-LABEL: _deltaplus:
; CHECK: .long _localfoo@GOTPCREL+59
@deltaplus = global i32 add (i32 trunc (i64 sub (i64 ptrtoint (i32** @localgotequiv to i64),
i64 ptrtoint (i32* @deltaplus to i64))
to i32), i32 55)
define i32 @t0(i32 %a) {
%x = add i32 trunc (i64 sub (i64 ptrtoint (i32** @bargotequiv to i64),
i64 ptrtoint (i32 (i32)* @t0 to i64))
to i32), %a
ret i32 %x
}