First pass at supporting relocations. Relocations are written correctly to

the file now, however the relocated address is currently wrong.  Fixing
that will require some deep pondering.

llvm-svn: 30207
This commit is contained in:
Nate Begeman 2006-09-08 22:42:09 +00:00
parent de33f66286
commit 69df6132d7
5 changed files with 249 additions and 118 deletions

View File

@ -16,6 +16,7 @@
#include "llvm/DerivedTypes.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineRelocation.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetMachine.h"
#include <list>
@ -26,6 +27,53 @@ namespace llvm {
class MachineCodeEmitter;
class MachOCodeEmitter;
/// MachOSym - This struct contains information about each symbol that is
/// added to logical symbol table for the module. This is eventually
/// turned into a real symbol table in the file.
struct MachOSym {
const GlobalValue *GV; // The global value this corresponds to.
std::string GVName; // The mangled name of the global value.
uint32_t n_strx; // index into the string table
uint8_t n_type; // type flag
uint8_t n_sect; // section number or NO_SECT
int16_t n_desc; // see <mach-o/stab.h>
uint64_t n_value; // value for this symbol (or stab offset)
// Constants for the n_sect field
// see <mach-o/nlist.h>
enum { NO_SECT = 0 }; // symbol is not in any section
// Constants for the n_type field
// see <mach-o/nlist.h>
enum { N_UNDF = 0x0, // undefined, n_sect == NO_SECT
N_ABS = 0x2, // absolute, n_sect == NO_SECT
N_SECT = 0xe, // defined in section number n_sect
N_PBUD = 0xc, // prebound undefined (defined in a dylib)
N_INDR = 0xa // indirect
};
// The following bits are OR'd into the types above. For example, a type
// of 0x0f would be an external N_SECT symbol (0x0e | 0x01).
enum { N_EXT = 0x01, // external symbol bit
N_PEXT = 0x10 // private external symbol bit
};
// Constants for the n_desc field
// see <mach-o/loader.h>
enum { REFERENCE_FLAG_UNDEFINED_NON_LAZY = 0,
REFERENCE_FLAG_UNDEFINED_LAZY = 1,
REFERENCE_FLAG_DEFINED = 2,
REFERENCE_FLAG_PRIVATE_DEFINED = 3,
REFERENCE_FLAG_PRIVATE_UNDEFINED_NON_LAZY = 4,
REFERENCE_FLAG_PRIVATE_UNDEFINED_LAZY = 5
};
enum { N_NO_DEAD_STRIP = 0x0020, // symbol is not to be dead stripped
N_WEAK_REF = 0x0040, // symbol is weak referenced
N_WEAK_DEF = 0x0080 // coalesced symbol is a weak definition
};
MachOSym(const GlobalValue *gv, std::string name, uint8_t sect);
};
/// MachOWriter - This class implements the common target-independent code for
/// writing Mach-O files. Targets should derive a class from this to
/// parameterize the output format.
@ -55,7 +103,7 @@ namespace llvm {
/// Mang - The object used to perform name mangling for this module.
///
Mangler *Mang;
/// MCE - The MachineCodeEmitter object that we are exposing to emit machine
/// code for functions to the .o file.
MachOCodeEmitter *MCE;
@ -249,6 +297,28 @@ namespace llvm {
initprot(VM_PROT_ALL), nsects(0), flags(0) { }
};
/// MachORelocation - This struct contains information about each relocation
/// that needs to be emitted to the file.
/// see <mach-o/reloc.h>
struct MachORelocation {
uint32_t r_address; // offset in the section to what is being relocated
uint32_t r_symbolnum; // symbol index if r_extern == 1 else section index
bool r_pcrel; // was relocated pc-relative already
uint8_t r_length; // length = 2 ^ r_length
bool r_extern; //
uint8_t r_type; // if not 0, machine-specific relocation type.
uint32_t getPackedFields() {
return (r_symbolnum << 8) | (r_pcrel << 7) | ((r_length & 3) << 5) |
(r_extern << 4) | (r_type & 15);
}
MachORelocation(uint32_t addr, uint32_t index, bool pcrel, uint8_t len,
bool ext, uint8_t type) : r_address(addr),
r_symbolnum(index), r_pcrel(pcrel), r_length(len), r_extern(ext),
r_type(type) {}
};
/// MachOSection - This struct contains information about each section in a
/// particular segment that is emitted to the file. This is eventually
/// turned into the SectionCommand in the load command for a particlar
@ -275,6 +345,11 @@ namespace llvm {
/// up for emission to the file.
DataBuffer SectionData;
/// Relocations - The relocations that we have encountered so far in this
/// section that we will need to convert to MachORelocation entries when
/// the file is written.
std::vector<MachineRelocation> Relocations;
// Constants for the section types (low 8 bits of flags field)
// see <mach-o/loader.h>
enum { S_REGULAR = 0,
@ -467,57 +542,6 @@ namespace llvm {
/// DySymTab - symbol table info for the dynamic link editor
MachODySymTab DySymTab;
/// MachOSym - This struct contains information about each symbol that is
/// added to logical symbol table for the module. This is eventually
/// turned into a real symbol table in the file.
struct MachOSym {
const GlobalValue *GV; // The global value this corresponds to.
std::string GVName; // The mangled name of the global value.
uint32_t n_strx; // index into the string table
uint8_t n_type; // type flag
uint8_t n_sect; // section number or NO_SECT
int16_t n_desc; // see <mach-o/stab.h>
uint64_t n_value; // value for this symbol (or stab offset)
// Constants for the n_sect field
// see <mach-o/nlist.h>
enum { NO_SECT = 0 }; // symbol is not in any section
// Constants for the n_type field
// see <mach-o/nlist.h>
enum { N_UNDF = 0x0, // undefined, n_sect == NO_SECT
N_ABS = 0x2, // absolute, n_sect == NO_SECT
N_SECT = 0xe, // defined in section number n_sect
N_PBUD = 0xc, // prebound undefined (defined in a dylib)
N_INDR = 0xa // indirect
};
// The following bits are OR'd into the types above. For example, a type
// of 0x0f would be an external N_SECT symbol (0x0e | 0x01).
enum { N_EXT = 0x01, // external symbol bit
N_PEXT = 0x10 // private external symbol bit
};
// Constants for the n_desc field
// see <mach-o/loader.h>
enum { REFERENCE_FLAG_UNDEFINED_NON_LAZY = 0,
REFERENCE_FLAG_UNDEFINED_LAZY = 1,
REFERENCE_FLAG_DEFINED = 2,
REFERENCE_FLAG_PRIVATE_DEFINED = 3,
REFERENCE_FLAG_PRIVATE_UNDEFINED_NON_LAZY = 4,
REFERENCE_FLAG_PRIVATE_UNDEFINED_LAZY = 5
};
enum { N_NO_DEAD_STRIP = 0x0020, // symbol is not to be dead stripped
N_WEAK_REF = 0x0040, // symbol is weak referenced
N_WEAK_DEF = 0x0080 // coalesced symbol is a weak definition
};
MachOSym(const GlobalValue *gv, std::string name, uint8_t sect) : GV(gv),
GVName(name), n_strx(0), n_type(sect == NO_SECT ? N_UNDF : N_SECT),
n_sect(sect), n_desc(0), n_value(0) {
// FIXME: names aren't getting the proper global/local prefix
}
};
struct MachOSymCmp {
bool operator()(const MachOSym &LHS, const MachOSym &RHS) {
return LHS.GVName < RHS.GVName;
@ -530,13 +554,19 @@ namespace llvm {
/// PartitionByDefined - Simple boolean predicate that returns true if Sym
/// is defined in this module.
static bool PartitionByDefined(const MachOWriter::MachOSym &Sym);
static bool PartitionByDefined(const MachOSym &Sym);
protected:
/// SymbolTable - This is the list of symbols we have emitted to the file.
/// This actually gets rearranged before emission to the file (to put the
/// local symbols first in the list).
std::vector<MachOSym> SymbolTable;
/// RelocBuffer - A buffer to hold the mach-o relocations before we write
/// them out at the appropriate location in the file.
DataBuffer RelocBuffer;
/// SymT - A buffer to hold the symbol table before we write it out at the
/// appropriate location in the file.
DataBuffer SymT;
@ -641,8 +671,10 @@ namespace llvm {
void EmitGlobal(GlobalVariable *GV);
void EmitHeaderAndLoadCommands();
void EmitSections();
void EmitRelocations();
void BufferSymbolAndStringTable();
virtual void GetTargetRelocation(MachOSection &MOS, MachineRelocation &MR,
uint64_t Addr) = 0;
};
}

View File

@ -25,8 +25,8 @@
#include "llvm/Module.h"
#include "llvm/CodeGen/MachineCodeEmitter.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineRelocation.h"
#include "llvm/CodeGen/MachOWriter.h"
#include "llvm/ExecutionEngine/ExecutionEngine.h"
#include "llvm/Target/TargetJITInfo.h"
#include "llvm/Support/Mangler.h"
#include "llvm/Support/MathExtras.h"
@ -130,19 +130,10 @@ bool MachOCodeEmitter::finishFunction(MachineFunction &F) {
// Get a symbol for the function to add to the symbol table
const GlobalValue *FuncV = F.getFunction();
MachOWriter::MachOSym FnSym(FuncV, MOW.Mang->getValueName(FuncV), MOS->Index);
// Figure out the binding (linkage) of the symbol.
switch (FuncV->getLinkage()) {
default:
// appending linkage is illegal for functions.
assert(0 && "Unknown linkage type!");
case GlobalValue::ExternalLinkage:
FnSym.n_type |= MachOWriter::MachOSym::N_EXT;
break;
case GlobalValue::InternalLinkage:
break;
}
MachOSym FnSym(FuncV, MOW.Mang->getValueName(FuncV), MOS->Index);
// FIXME: emit constant pool to appropriate section(s)
// FIXME: emit jump table to appropriate section
// Resolve the function's relocations either to concrete pointers in the case
// of branches from one block to another, or to target relocation entries.
@ -152,15 +143,14 @@ bool MachOCodeEmitter::finishFunction(MachineFunction &F) {
void *MBBAddr = (void *)getMachineBasicBlockAddress(MR.getBasicBlock());
MR.setResultPointer(MBBAddr);
MOW.TM.getJITInfo()->relocate(BufferBegin, &MR, 1, 0);
// FIXME: we basically want the JITInfo relocate() function to rewrite
// this guy right now, so we just write the correct displacement
// to the file.
} else if (MR.isConstantPoolIndex() || MR.isJumpTableIndex()) {
// Get the address of the index.
uint64_t Addr = 0;
// Generate the relocation(s) for the index.
MOW.GetTargetRelocation(*MOS, MR, Addr);
} else {
// isString | isGV | isCPI | isJTI
// FIXME: do something smart here. We won't be able to relocate these
// until the sections are all layed out, but we still need to
// record them. Maybe emit TargetRelocations and then resolve
// those at file writing time?
// Handle other types later once we've finalized the sections in the file.
MOS->Relocations.push_back(MR);
}
}
Relocations.clear();
@ -175,7 +165,6 @@ bool MachOCodeEmitter::finishFunction(MachineFunction &F) {
//===----------------------------------------------------------------------===//
MachOWriter::MachOWriter(std::ostream &o, TargetMachine &tm) : O(o), TM(tm) {
// FIXME: set cpu type and cpu subtype somehow from TM
is64Bit = TM.getTargetData()->getPointerSizeInBits() == 64;
isLittleEndian = TM.getTargetData()->isLittleEndian();
@ -228,9 +217,7 @@ void MachOWriter::EmitGlobal(GlobalVariable *GV) {
// part of the common block if they are zero initialized and allowed to be
// merged with other symbols.
if (NoInit || GV->hasLinkOnceLinkage() || GV->hasWeakLinkage()) {
MachOWriter::MachOSym ExtOrCommonSym(GV, Mang->getValueName(GV),
MachOSym::NO_SECT);
ExtOrCommonSym.n_type |= MachOSym::N_EXT;
MachOSym ExtOrCommonSym(GV, Mang->getValueName(GV), MachOSym::NO_SECT);
// For undefined (N_UNDF) external (N_EXT) types, n_value is the size in
// bytes of the symbol.
ExtOrCommonSym.n_value = Size;
@ -254,9 +241,20 @@ void MachOWriter::EmitGlobal(GlobalVariable *GV) {
MachOSection &Sec = GV->isConstant() ? getConstSection(Ty) : getDataSection();
AddSymbolToSection(Sec, GV);
// FIXME: actually write out the initializer to the section. This will
// require ExecutionEngine's InitializeMemory() function, which will need to
// be enhanced to support relocations.
// FIXME: A couple significant changes are required for this to work, even for
// trivial cases such as a constant integer:
// 0. InitializeMemory needs to be split out of ExecutionEngine. We don't
// want to have to create an ExecutionEngine such as JIT just to write
// some bytes into a buffer. The only thing necessary for
// InitializeMemory to function properly should be TargetData.
//
// 1. InitializeMemory needs to be enhanced to return MachineRelocations
// rather than accessing the address of objects such basic blocks,
// constant pools, and jump tables. The client of InitializeMemory such
// as an object writer or jit emitter should then handle these relocs
// appropriately.
//
// FIXME: need to allocate memory for the global initializer.
}
@ -292,7 +290,7 @@ bool MachOWriter::doFinalization(Module &M) {
// Emit the symbol table to temporary buffers, so that we know the size of
// the string table when we write the load commands in the next phase.
BufferSymbolAndStringTable();
// Emit the header and load commands.
EmitHeaderAndLoadCommands();
@ -300,9 +298,7 @@ bool MachOWriter::doFinalization(Module &M) {
EmitSections();
// Emit the relocation entry data for each section.
// FIXME: presumably this should be a virtual method, since different targets
// have different relocation types.
EmitRelocations();
O.write((char*)&RelocBuffer[0], RelocBuffer.size());
// Write the symbol table and the string table to the end of the file.
O.write((char*)&SymT[0], SymT.size());
@ -368,10 +364,32 @@ void MachOWriter::EmitHeaderAndLoadCommands() {
outword(FH, SEG.nsects);
outword(FH, SEG.flags);
// Step #5: Write out the section commands for each section
// Step #5: Finish filling in the fields of the MachOSections
uint64_t currentAddr = 0;
for (std::list<MachOSection>::iterator I = SectionList.begin(),
E = SectionList.end(); I != E; ++I) {
I->offset = SEG.fileoff; // FIXME: separate offset
I->addr = currentAddr;
I->offset = currentAddr + SEG.fileoff;
// FIXME: do we need to do something with alignment here?
currentAddr += I->size;
}
// Step #6: Calculate the number of relocations for each section and write out
// the section commands for each section
currentAddr += SEG.fileoff;
for (std::list<MachOSection>::iterator I = SectionList.begin(),
E = SectionList.end(); I != E; ++I) {
// calculate the relocation info for this section command
// FIXME: this could get complicated calculating the address argument, we
// should probably split this out into its own function.
for (unsigned i = 0, e = I->Relocations.size(); i != e; ++i)
GetTargetRelocation(*I, I->Relocations[i], 0);
if (I->nreloc != 0) {
I->reloff = currentAddr;
currentAddr += I->nreloc * 8;
}
// write the finalized section command to the output buffer
outstring(FH, I->sectname, 16);
outstring(FH, I->segname, 16);
outaddr(FH, I->addr);
@ -387,9 +405,9 @@ void MachOWriter::EmitHeaderAndLoadCommands() {
outword(FH, I->reserved3);
}
// Step #6: Emit LC_SYMTAB/LC_DYSYMTAB load commands
// Step #7: Emit LC_SYMTAB/LC_DYSYMTAB load commands
// FIXME: add size of relocs
SymTab.symoff = SEG.fileoff + SEG.filesize;
SymTab.symoff = currentAddr;
SymTab.nsyms = SymbolTable.size();
SymTab.stroff = SymTab.symoff + SymT.size();
SymTab.strsize = StrT.size();
@ -436,12 +454,6 @@ void MachOWriter::EmitSections() {
}
}
void MachOWriter::EmitRelocations() {
// FIXME: this should probably be a pure virtual function, since the
// relocation types and layout of the relocations themselves are target
// specific.
}
/// PartitionByLocal - Simple boolean predicate that returns true if Sym is
/// a local symbol rather than an external symbol.
bool MachOWriter::PartitionByLocal(const MachOSym &Sym) {
@ -513,3 +525,23 @@ void MachOWriter::BufferSymbolAndStringTable() {
outaddr(SymT, I->n_value);
}
}
MachOSym::MachOSym(const GlobalValue *gv, std::string name, uint8_t sect) :
GV(gv), GVName(name), n_strx(0), n_type(sect == NO_SECT ? N_UNDF : N_SECT),
n_sect(sect), n_desc(0), n_value(0) {
// FIXME: take a target machine, and then add the appropriate prefix for
// the linkage type based on the TargetAsmInfo
switch (GV->getLinkage()) {
default:
assert(0 && "Unexpected linkage type!");
break;
case GlobalValue::WeakLinkage:
case GlobalValue::LinkOnceLinkage:
assert(!isa<Function>(gv) && "Unexpected linkage type for Function!");
case GlobalValue::ExternalLinkage:
n_type |= N_EXT;
break;
case GlobalValue::InternalLinkage:
break;
}
}

View File

@ -312,15 +312,12 @@ void PPCJITInfo::relocate(void *Function, MachineRelocation *MR,
"Relocation out of range!");
*RelocPos |= (ResultPtr & ((1 << 14)-1)) << 2;
break;
case PPC::reloc_absolute_ptr_high: // Pointer relocations.
case PPC::reloc_absolute_ptr_low:
case PPC::reloc_absolute_high: // high bits of ref -> low 16 of instr
case PPC::reloc_absolute_low: { // low bits of ref -> low 16 of instr
ResultPtr += MR->getConstantVal();
// If this is a high-part access, get the high-part.
if (MR->getRelocationType() == PPC::reloc_absolute_high ||
MR->getRelocationType() == PPC::reloc_absolute_ptr_high) {
if (MR->getRelocationType() == PPC::reloc_absolute_high) {
// If the low part will have a carry (really a borrow) from the low
// 16-bits into the high 16, add a bit to borrow from.
if (((int)ResultPtr << 16) < 0)

View File

@ -12,6 +12,7 @@
//
//===----------------------------------------------------------------------===//
#include "PPCRelocations.h"
#include "PPCTargetMachine.h"
#include "llvm/PassManager.h"
#include "llvm/CodeGen/MachOWriter.h"
@ -22,11 +23,28 @@ namespace {
class VISIBILITY_HIDDEN PPCMachOWriter : public MachOWriter {
public:
PPCMachOWriter(std::ostream &O, PPCTargetMachine &TM) : MachOWriter(O, TM) {
// FIMXE: choose ppc64 when appropriate
Header.cputype = MachOHeader::CPU_TYPE_POWERPC;
if (TM.getTargetData()->getPointerSizeInBits() == 64) {
Header.cputype = MachOHeader::CPU_TYPE_POWERPC64;
} else {
Header.cputype = MachOHeader::CPU_TYPE_POWERPC;
}
Header.cpusubtype = MachOHeader::CPU_SUBTYPE_POWERPC_ALL;
}
virtual void GetTargetRelocation(MachOSection &MOS, MachineRelocation &MR,
uint64_t Addr);
// Constants for the relocation r_type field.
// see <mach-o/ppc/reloc.h>
enum { PPC_RELOC_VANILLA, // generic relocation
PPC_RELOC_PAIR, // the second relocation entry of a pair
PPC_RELOC_BR14, // 14 bit branch displacement to word address
PPC_RELOC_BR24, // 24 bit branch displacement to word address
PPC_RELOC_HI16, // a PAIR follows with the low 16 bits
PPC_RELOC_LO16, // a PAIR follows with the high 16 bits
PPC_RELOC_HA16, // a PAIR follows, which is sign extended to 32b
PPC_RELOC_LO14 // LO16 with low 2 bits implicitly zero
};
};
}
@ -39,3 +57,69 @@ void llvm::addPPCMachOObjectWriterPass(FunctionPassManager &FPM,
FPM.add(EW);
FPM.add(createPPCCodeEmitterPass(TM, EW->getMachineCodeEmitter()));
}
/// GetTargetRelocation - For the MachineRelocation MR, convert it to one or
/// more PowerPC MachORelocation(s), add the new relocations to the
/// MachOSection, and rewrite the instruction at the section offset if required
/// by that relocation type.
void PPCMachOWriter::GetTargetRelocation(MachOSection &MOS,
MachineRelocation &MR,
uint64_t Addr) {
// Keep track of whether or not this is an externally defined relocation.
uint32_t index = MOS.Index;
bool isExtern = false;
// Get the address of the instruction to rewrite
unsigned char *RelocPos = &MOS.SectionData[0] + MR.getMachineCodeOffset();
// Get the address of whatever it is we're relocating, if possible.
if (MR.isGlobalValue()) {
// determine whether or not its external and then figure out what section
// we put it in if it's a locally defined symbol.
} else if (MR.isString()) {
// lookup in global values?
} else {
assert((MR.isConstantPoolIndex() || MR.isJumpTableIndex()) &&
"Unhandled MachineRelocation type!");
}
switch ((PPC::RelocationType)MR.getRelocationType()) {
default: assert(0 && "Unknown PPC relocation type!");
case PPC::reloc_pcrel_bx:
case PPC::reloc_pcrel_bcx:
case PPC::reloc_absolute_low_ix:
assert(0 && "Unhandled PPC relocation type!");
break;
case PPC::reloc_absolute_high:
{
MachORelocation HA16(MR.getMachineCodeOffset(), index, false, 2, isExtern,
PPC_RELOC_HA16);
MachORelocation PAIR(Addr & 0xFFFF, 0xFFFFFF, false, 2, isExtern,
PPC_RELOC_PAIR);
outword(RelocBuffer, HA16.r_address);
outword(RelocBuffer, HA16.getPackedFields());
outword(RelocBuffer, PAIR.r_address);
outword(RelocBuffer, PAIR.getPackedFields());
}
MOS.nreloc += 2;
Addr += 0x8000;
*(unsigned *)RelocPos &= 0xFFFF0000;
*(unsigned *)RelocPos |= ((Addr >> 16) & 0xFFFF);
break;
case PPC::reloc_absolute_low:
{
MachORelocation LO16(MR.getMachineCodeOffset(), index, false, 2, isExtern,
PPC_RELOC_LO16);
MachORelocation PAIR(Addr >> 16, 0xFFFFFF, false, 2, isExtern,
PPC_RELOC_PAIR);
outword(RelocBuffer, LO16.r_address);
outword(RelocBuffer, LO16.getPackedFields());
outword(RelocBuffer, PAIR.r_address);
outword(RelocBuffer, PAIR.getPackedFields());
}
MOS.nreloc += 2;
*(unsigned *)RelocPos &= 0xFFFF0000;
*(unsigned *)RelocPos |= (Addr & 0xFFFF);
break;
}
}

View File

@ -44,21 +44,7 @@ namespace llvm {
// reloc_absolute_low_ix - Absolute relocation for the 64-bit load/store
// instruction which have two implicit zero bits.
reloc_absolute_low_ix,
// reloc_absolute_ptr_high - Absolute relocation for references to lazy
// pointer stubs. In this case, the relocated instruction should be
// relocated to point to a POINTER to the indicated global. The low-16
// bits of the instruction are rewritten with the high 16-bits of the
// address of the pointer.
reloc_absolute_ptr_high,
// reloc_absolute_ptr_low - Absolute relocation for references to lazy
// pointer stubs. In this case, the relocated instruction should be
// relocated to point to a POINTER to the indicated global. The low-16
// bits of the instruction are rewritten with the low 16-bits of the
// address of the pointer.
reloc_absolute_ptr_low
reloc_absolute_low_ix
};
}
}