[llvm-objdump] Switch between ARM/Thumb based on mapping symbols.

The ARMDisassembler changes allow changing between ARM and Thumb mode
based on the MCSubtargetInfo, rather than the Target, which simplifies
the other changes a bit.

I'm not really happy with adding more target-specific logic to
tools/llvm-objdump/, but there isn't any easy way around it: the logic
in question specifically applies to disassembling an object file, and
that code simply isn't located in lib/Target, at least at the moment.

Differential Revision: https://reviews.llvm.org/D60927

llvm-svn: 363903
This commit is contained in:
Eli Friedman 2019-06-20 00:29:40 +00:00
parent e4c2e9b016
commit d88e28d13e
5 changed files with 120 additions and 78 deletions

View File

@ -54,7 +54,6 @@ class ELFObjectFileBase : public ObjectFile {
protected:
ELFObjectFileBase(unsigned int Type, MemoryBufferRef Source);
virtual uint16_t getEMachine() const = 0;
virtual uint64_t getSymbolSize(DataRefImpl Symb) const = 0;
virtual uint8_t getSymbolBinding(DataRefImpl Symb) const = 0;
virtual uint8_t getSymbolOther(DataRefImpl Symb) const = 0;
@ -91,6 +90,8 @@ public:
virtual uint16_t getEType() const = 0;
virtual uint16_t getEMachine() const = 0;
std::vector<std::pair<DataRefImpl, uint64_t>> getPltAddresses() const;
};

View File

@ -135,27 +135,22 @@ public:
~ARMDisassembler() override = default;
DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
ArrayRef<uint8_t> Bytes, uint64_t Address,
raw_ostream &VStream,
raw_ostream &CStream) const override;
};
/// Thumb disassembler for all Thumb platforms.
class ThumbDisassembler : public MCDisassembler {
public:
ThumbDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx) :
MCDisassembler(STI, Ctx) {
}
~ThumbDisassembler() override = default;
DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
ArrayRef<uint8_t> Bytes, uint64_t Address,
raw_ostream &VStream,
raw_ostream &CStream) const override;
private:
DecodeStatus getARMInstruction(MCInst &Instr, uint64_t &Size,
ArrayRef<uint8_t> Bytes, uint64_t Address,
raw_ostream &VStream,
raw_ostream &CStream) const;
DecodeStatus getThumbInstruction(MCInst &Instr, uint64_t &Size,
ArrayRef<uint8_t> Bytes, uint64_t Address,
raw_ostream &VStream,
raw_ostream &CStream) const;
mutable ITStatus ITBlock;
mutable VPTStatus VPTBlock;
@ -519,12 +514,6 @@ static MCDisassembler *createARMDisassembler(const Target &T,
return new ARMDisassembler(STI, Ctx);
}
static MCDisassembler *createThumbDisassembler(const Target &T,
const MCSubtargetInfo &STI,
MCContext &Ctx) {
return new ThumbDisassembler(STI, Ctx);
}
// Post-decoding checks
static DecodeStatus checkDecodedInstruction(MCInst &MI, uint64_t &Size,
uint64_t Address, raw_ostream &OS,
@ -562,6 +551,16 @@ DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
ArrayRef<uint8_t> Bytes,
uint64_t Address, raw_ostream &OS,
raw_ostream &CS) const {
if (STI.getFeatureBits()[ARM::ModeThumb])
return getThumbInstruction(MI, Size, Bytes, Address, OS, CS);
return getARMInstruction(MI, Size, Bytes, Address, OS, CS);
}
DecodeStatus ARMDisassembler::getARMInstruction(MCInst &MI, uint64_t &Size,
ArrayRef<uint8_t> Bytes,
uint64_t Address,
raw_ostream &OS,
raw_ostream &CS) const {
CommentStream = &CS;
assert(!STI.getFeatureBits()[ARM::ModeThumb] &&
@ -698,7 +697,7 @@ static bool isVectorPredicable(unsigned Opcode) {
// to fix up the predicate operands using this context information as a
// post-pass.
MCDisassembler::DecodeStatus
ThumbDisassembler::AddThumbPredicate(MCInst &MI) const {
ARMDisassembler::AddThumbPredicate(MCInst &MI) const {
MCDisassembler::DecodeStatus S = Success;
const FeatureBitset &FeatureBits = getSubtargetInfo().getFeatureBits();
@ -813,7 +812,7 @@ ThumbDisassembler::AddThumbPredicate(MCInst &MI) const {
// mode, the auto-generated decoder will give them an (incorrect)
// predicate operand. We need to rewrite these operands based on the IT
// context as a post-pass.
void ThumbDisassembler::UpdateThumbVFPPredicate(
void ARMDisassembler::UpdateThumbVFPPredicate(
DecodeStatus &S, MCInst &MI) const {
unsigned CC;
CC = ITBlock.getITCC();
@ -844,11 +843,11 @@ void ThumbDisassembler::UpdateThumbVFPPredicate(
}
}
DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
ArrayRef<uint8_t> Bytes,
uint64_t Address,
raw_ostream &OS,
raw_ostream &CS) const {
DecodeStatus ARMDisassembler::getThumbInstruction(MCInst &MI, uint64_t &Size,
ArrayRef<uint8_t> Bytes,
uint64_t Address,
raw_ostream &OS,
raw_ostream &CS) const {
CommentStream = &CS;
assert(STI.getFeatureBits()[ARM::ModeThumb] &&
@ -1046,9 +1045,9 @@ extern "C" void LLVMInitializeARMDisassembler() {
TargetRegistry::RegisterMCDisassembler(getTheARMBETarget(),
createARMDisassembler);
TargetRegistry::RegisterMCDisassembler(getTheThumbLETarget(),
createThumbDisassembler);
createARMDisassembler);
TargetRegistry::RegisterMCDisassembler(getTheThumbBETarget(),
createThumbDisassembler);
createARMDisassembler);
}
static const uint16_t GPRDecoderTable[] = {

View File

@ -1,7 +1,4 @@
;RUN: llc -mtriple=thumbv7-linux-gnueabi < %s | llvm-mc -triple=thumbv7-linux-gnueabi -filetype=obj > %t
; Two pass decoding needed because llvm-objdump does not respect mapping symbols
;RUN: llvm-objdump -triple=armv7 -d %t | FileCheck %s --check-prefix=ARM
;RUN: llvm-objdump -triple=thumbv7 -d %t | FileCheck %s --check-prefix=THUMB
;RUN: llc -mtriple=thumbv7-linux-gnueabi < %s | llvm-mc -triple=thumbv7-linux-gnueabi -filetype=obj | llvm-objdump -d - | FileCheck %s
define hidden i32 @bah(i8* %start) #0 align 2 {
%1 = ptrtoint i8* %start to i32
@ -10,13 +7,7 @@ define hidden i32 @bah(i8* %start) #0 align 2 {
ret i32 %3
}
; ARM: $a
; ARM-NEXT: 04 70 2d e5 str r7, [sp, #-4]!
; ARM: $t
; ARM-NEXT: 48 1c
; THUMB: $a{{.*}}:
; THUMB-NEXT: 04 70
; THUMB-NEXT: 2d e5
; THUMB: $t{{.*}}:
; THUMB-NEXT: 48 1c adds r0, r1, #1
; CHECK: $a{{.*}}:
; CHECK-NEXT: 04 70 2d e5 str r7, [sp, #-4]!
; CHECK: $t{{.*}}:
; CHECK-NEXT: 48 1c adds r0, r1, #1

View File

@ -1,5 +1,6 @@
@ RUN: llvm-mc < %s -triple armv7r -mattr=+hwdiv-arm -filetype=obj | llvm-objdump -triple=thumb -d - | FileCheck %s
@ RUN: llvm-mc < %s -triple armv7r -mattr=+hwdiv-arm -filetype=obj | llvm-objdump -triple=arm -d - | FileCheck %s --check-prefix=CHECK-ARM
@ RUN: llvm-mc < %s -triple armv7r -mattr=+hwdiv-arm -filetype=obj | llvm-objdump -d - | FileCheck %s
@ v7r implies Thumb hwdiv, but ARM hwdiv is optional
@ FIXME: Does that imply we should actually refuse to disassemble it?
.eabi_attribute Tag_CPU_arch, 10 // v7
.eabi_attribute Tag_CPU_arch_profile, 0x52 // 'R' profile
@ -9,8 +10,7 @@ div_arm:
udiv r0, r1, r2
@CHECK-LABEL: div_arm
@CHECK-NOT: udiv r0, r1, r2
@CHECK-ARM-NOT: udiv r0, r1, r2
@CHECK: 11 f2 30 e7 <unknown>
.thumb
div_thumb:

View File

@ -603,13 +603,18 @@ void SourcePrinter::printSourceLine(raw_ostream &OS,
OldLineInfo = LineInfo;
}
static bool isAArch64Elf(const ObjectFile *Obj) {
const auto *Elf = dyn_cast<ELFObjectFileBase>(Obj);
return Elf && Elf->getEMachine() == ELF::EM_AARCH64;
}
static bool isArmElf(const ObjectFile *Obj) {
return (Obj->isELF() &&
(Obj->getArch() == Triple::aarch64 ||
Obj->getArch() == Triple::aarch64_be ||
Obj->getArch() == Triple::arm || Obj->getArch() == Triple::armeb ||
Obj->getArch() == Triple::thumb ||
Obj->getArch() == Triple::thumbeb));
const auto *Elf = dyn_cast<ELFObjectFileBase>(Obj);
return Elf && Elf->getEMachine() == ELF::EM_ARM;
}
static bool hasMappingSymbols(const ObjectFile *Obj) {
return isArmElf(Obj) || isAArch64Elf(Obj);
}
static void printRelocation(const RelocationRef &Rel, uint64_t Address,
@ -954,10 +959,24 @@ static bool shouldAdjustVA(const SectionRef &Section) {
return false;
}
typedef std::pair<uint64_t, char> MappingSymbolPair;
static char getMappingSymbolKind(ArrayRef<MappingSymbolPair> MappingSymbols,
uint64_t Address) {
auto Sym = bsearch(MappingSymbols, [Address](const MappingSymbolPair &Val) {
return Val.first > Address;
});
// Return zero for any address before the first mapping symbol; this means
// we should use the default disassembly mode, depending on the target.
if (Sym == MappingSymbols.begin())
return '\x00';
return (Sym - 1)->second;
}
static uint64_t
dumpARMELFData(uint64_t SectionAddr, uint64_t Index, uint64_t End,
const ObjectFile *Obj, ArrayRef<uint8_t> Bytes,
const std::vector<uint64_t> &TextMappingSymsAddr) {
ArrayRef<MappingSymbolPair> MappingSymbols) {
support::endianness Endian =
Obj->isLittleEndian() ? support::little : support::big;
while (Index < End) {
@ -981,8 +1000,7 @@ dumpARMELFData(uint64_t SectionAddr, uint64_t Index, uint64_t End,
++Index;
}
outs() << "\n";
if (std::binary_search(TextMappingSymsAddr.begin(),
TextMappingSymsAddr.end(), Index))
if (getMappingSymbolKind(MappingSymbols, Index) != 'd')
break;
}
return Index;
@ -1023,10 +1041,19 @@ static void dumpELFData(uint64_t SectionAddr, uint64_t Index, uint64_t End,
}
static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj,
MCContext &Ctx, MCDisassembler *DisAsm,
MCContext &Ctx, MCDisassembler *PrimaryDisAsm,
MCDisassembler *SecondaryDisAsm,
const MCInstrAnalysis *MIA, MCInstPrinter *IP,
const MCSubtargetInfo *STI, PrettyPrinter &PIP,
const MCSubtargetInfo *PrimarySTI,
const MCSubtargetInfo *SecondarySTI,
PrettyPrinter &PIP,
SourcePrinter &SP, bool InlineRelocs) {
const MCSubtargetInfo *STI = PrimarySTI;
MCDisassembler *DisAsm = PrimaryDisAsm;
bool PrimaryIsThumb = false;
if (isArmElf(Obj))
PrimaryIsThumb = STI->checkFeatures("+thumb-mode");
std::map<SectionRef, std::vector<RelocationRef>> RelocMap;
if (InlineRelocs)
RelocMap = getRelocsMap(*Obj);
@ -1113,25 +1140,23 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj,
// Get the list of all the symbols in this section.
SectionSymbolsTy &Symbols = AllSymbols[Section];
std::vector<uint64_t> DataMappingSymsAddr;
std::vector<uint64_t> TextMappingSymsAddr;
if (isArmElf(Obj)) {
std::vector<MappingSymbolPair> MappingSymbols;
if (hasMappingSymbols(Obj)) {
for (const auto &Symb : Symbols) {
uint64_t Address = std::get<0>(Symb);
StringRef Name = std::get<1>(Symb);
if (Name.startswith("$d"))
DataMappingSymsAddr.push_back(Address - SectionAddr);
MappingSymbols.emplace_back(Address - SectionAddr, 'd');
if (Name.startswith("$x"))
TextMappingSymsAddr.push_back(Address - SectionAddr);
MappingSymbols.emplace_back(Address - SectionAddr, 'x');
if (Name.startswith("$a"))
TextMappingSymsAddr.push_back(Address - SectionAddr);
MappingSymbols.emplace_back(Address - SectionAddr, 'a');
if (Name.startswith("$t"))
TextMappingSymsAddr.push_back(Address - SectionAddr);
MappingSymbols.emplace_back(Address - SectionAddr, 't');
}
}
llvm::sort(DataMappingSymsAddr);
llvm::sort(TextMappingSymsAddr);
llvm::sort(MappingSymbols);
if (Obj->isELF() && Obj->getArch() == Triple::amdgcn) {
// AMDGPU disassembler uses symbolizer for printing labels
@ -1269,19 +1294,18 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj,
Index = End;
}
bool CheckARMELFData = isArmElf(Obj) &&
bool CheckARMELFData = hasMappingSymbols(Obj) &&
std::get<2>(Symbols[SI]) != ELF::STT_OBJECT &&
!DisassembleAll;
while (Index < End) {
// AArch64 ELF binaries can interleave data and text in the same
// section. We rely on the markers introduced to understand what we
// need to dump. If the data marker is within a function, it is
// ARM and AArch64 ELF binaries can interleave data and text in the
// same section. We rely on the markers introduced to understand what
// we need to dump. If the data marker is within a function, it is
// denoted as a word/short etc.
if (CheckARMELFData &&
std::binary_search(DataMappingSymsAddr.begin(),
DataMappingSymsAddr.end(), Index)) {
getMappingSymbolKind(MappingSymbols, Index) == 'd') {
Index = dumpARMELFData(SectionAddr, Index, End, Obj, Bytes,
TextMappingSymsAddr);
MappingSymbols);
continue;
}
@ -1302,6 +1326,16 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj,
}
}
if (SecondarySTI) {
if (getMappingSymbolKind(MappingSymbols, Index) == 'a') {
STI = PrimaryIsThumb ? SecondarySTI : PrimarySTI;
DisAsm = PrimaryIsThumb ? SecondaryDisAsm : PrimaryDisAsm;
} else if (getMappingSymbolKind(MappingSymbols, Index) == 't') {
STI = PrimaryIsThumb ? PrimarySTI : SecondarySTI;
DisAsm = PrimaryIsThumb ? PrimaryDisAsm : SecondaryDisAsm;
}
}
// Disassemble a real instruction or a data when disassemble all is
// provided
MCInst Inst;
@ -1459,6 +1493,22 @@ static void disassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
report_error(Obj->getFileName(),
"no disassembler for target " + TripleName);
// If we have an ARM object file, we need a second disassembler, because
// ARM CPUs have two different instruction sets: ARM mode, and Thumb mode.
// We use mapping symbols to switch between the two assemblers, where
// appropriate.
std::unique_ptr<MCDisassembler> SecondaryDisAsm;
std::unique_ptr<const MCSubtargetInfo> SecondarySTI;
if (isArmElf(Obj) && !STI->checkFeatures("+mclass")) {
if (STI->checkFeatures("+thumb-mode"))
Features.AddFeature("-thumb-mode");
else
Features.AddFeature("+thumb-mode");
SecondarySTI.reset(TheTarget->createMCSubtargetInfo(TripleName, MCPU,
Features.getString()));
SecondaryDisAsm.reset(TheTarget->createMCDisassembler(*SecondarySTI, Ctx));
}
std::unique_ptr<const MCInstrAnalysis> MIA(
TheTarget->createMCInstrAnalysis(MII.get()));
@ -1477,8 +1527,9 @@ static void disassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
if (!IP->applyTargetSpecificCLOption(Opt))
error("Unrecognized disassembler option: " + Opt);
disassembleObject(TheTarget, Obj, Ctx, DisAsm.get(), MIA.get(), IP.get(),
STI.get(), PIP, SP, InlineRelocs);
disassembleObject(TheTarget, Obj, Ctx, DisAsm.get(), SecondaryDisAsm.get(),
MIA.get(), IP.get(), STI.get(), SecondarySTI.get(), PIP,
SP, InlineRelocs);
}
void printRelocations(const ObjectFile *Obj) {