Updated the llvm-mc disassembler C API to support for the X86 target.

rdar://10873652

As part of this I updated the llvm-mc disassembler C API to always call the
SymbolLookUp call back even if there is no getOpInfo call back.  If there is a
getOpInfo call back that is tried first and then if that gets no information
then the  SymbolLookUp is called.  I also made the code more robust by
memset(3)'ing to zero the LLVMOpInfo1 struct before then setting
SymbolicOp.Value before for the call to getOpInfo.  And also don't use any
values from the  LLVMOpInfo1 struct if getOpInfo returns 0.  And also don't
use any of the ReferenceType or ReferenceName values from SymbolLookUp if it
returns NULL. rdar://10873563 and rdar://10873683

For the X86 target also fixed bugs so the annotations get printed. 

Also fixed a few places in the ARM target that was not producing symbolic
operands for some instructions.  rdar://10878166

llvm-svn: 151267
This commit is contained in:
Kevin Enderby 2012-02-23 18:18:17 +00:00
parent d5d166d4d4
commit 6fbcd8d439
8 changed files with 243 additions and 66 deletions

View File

@ -440,40 +440,38 @@ static bool tryAddingSymbolicOperand(uint64_t Address, int32_t Value,
MCInst &MI, const void *Decoder) {
const MCDisassembler *Dis = static_cast<const MCDisassembler*>(Decoder);
LLVMOpInfoCallback getOpInfo = Dis->getLLVMOpInfoCallback();
if (!getOpInfo)
return false;
struct LLVMOpInfo1 SymbolicOp;
memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1));
SymbolicOp.Value = Value;
void *DisInfo = Dis->getDisInfoBlock();
if (!getOpInfo(DisInfo, Address, 0 /* Offset */, InstSize, 1, &SymbolicOp)) {
if (isBranch) {
LLVMSymbolLookupCallback SymbolLookUp =
Dis->getLLVMSymbolLookupCallback();
if (SymbolLookUp) {
uint64_t ReferenceType;
ReferenceType = LLVMDisassembler_ReferenceType_In_Branch;
const char *ReferenceName;
const char *Name = SymbolLookUp(DisInfo, Value, &ReferenceType, Address,
&ReferenceName);
if (Name) {
SymbolicOp.AddSymbol.Name = Name;
SymbolicOp.AddSymbol.Present = true;
SymbolicOp.Value = 0;
}
else {
SymbolicOp.Value = Value;
}
if(ReferenceType == LLVMDisassembler_ReferenceType_Out_SymbolStub)
(*Dis->CommentStream) << "symbol stub for: " << ReferenceName;
}
else {
return false;
}
}
else {
if (!getOpInfo ||
!getOpInfo(DisInfo, Address, 0 /* Offset */, InstSize, 1, &SymbolicOp)) {
// Clear SymbolicOp.Value from above and also all other fields.
memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1));
LLVMSymbolLookupCallback SymbolLookUp = Dis->getLLVMSymbolLookupCallback();
if (!SymbolLookUp)
return false;
uint64_t ReferenceType;
if (isBranch)
ReferenceType = LLVMDisassembler_ReferenceType_In_Branch;
else
ReferenceType = LLVMDisassembler_ReferenceType_InOut_None;
const char *ReferenceName;
const char *Name = SymbolLookUp(DisInfo, Value, &ReferenceType, Address,
&ReferenceName);
if (Name) {
SymbolicOp.AddSymbol.Name = Name;
SymbolicOp.AddSymbol.Present = true;
}
// For branches always create an MCExpr so it gets printed as hex address.
else if (isBranch) {
SymbolicOp.Value = Value;
}
if(ReferenceType == LLVMDisassembler_ReferenceType_Out_SymbolStub)
(*Dis->CommentStream) << "symbol stub for: " << ReferenceName;
if (!Name && !isBranch)
return false;
}
MCContext *Ctx = Dis->getMCContext();
@ -548,7 +546,7 @@ static bool tryAddingSymbolicOperand(uint64_t Address, int32_t Value,
/// a literal 'C' string if the referenced address of the literal pool's entry
/// is an address into a section with 'C' string literals.
static void tryAddingPcLoadReferenceComment(uint64_t Address, int Value,
const void *Decoder) {
const void *Decoder) {
const MCDisassembler *Dis = static_cast<const MCDisassembler*>(Decoder);
LLVMSymbolLookupCallback SymbolLookUp = Dis->getLLVMSymbolLookupCallback();
if (SymbolLookUp) {
@ -1910,12 +1908,14 @@ DecodeBranchImmInstruction(llvm::MCInst &Inst, unsigned Insn,
if (pred == 0xF) {
Inst.setOpcode(ARM::BLXi);
imm |= fieldFromInstruction32(Insn, 24, 1) << 1;
if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<26>(imm) + 8,
true, 4, Inst, Decoder))
Inst.addOperand(MCOperand::CreateImm(SignExtend32<26>(imm)));
return S;
}
if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<26>(imm) + 8, true,
4, Inst, Decoder))
if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<26>(imm) + 8,
true, 4, Inst, Decoder))
Inst.addOperand(MCOperand::CreateImm(SignExtend32<26>(imm)));
if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder)))
return MCDisassembler::Fail;
@ -3127,7 +3127,9 @@ DecodeThumbBCCTargetOperand(llvm::MCInst &Inst, unsigned Val,
static DecodeStatus DecodeThumbBLTargetOperand(llvm::MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder){
Inst.addOperand(MCOperand::CreateImm(SignExtend32<22>(Val << 1)));
if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<22>(Val<<1) + 8,
true, 4, Inst, Decoder))
Inst.addOperand(MCOperand::CreateImm(SignExtend32<22>(Val << 1)));
return MCDisassembler::Success;
}

View File

@ -390,7 +390,11 @@ struct X86Operand : public MCParsedAsmOperand {
void addAbsMemOperands(MCInst &Inst, unsigned N) const {
assert((N == 1) && "Invalid number of operands!");
Inst.addOperand(MCOperand::CreateExpr(getMemDisp()));
// Add as immediates when possible.
if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemDisp()))
Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
else
Inst.addOperand(MCOperand::CreateExpr(getMemDisp()));
}
static X86Operand *CreateToken(StringRef Str, SMLoc Loc) {

View File

@ -18,7 +18,8 @@
#include "X86DisassemblerDecoder.h"
#include "llvm/MC/EDInstInfo.h"
#include "llvm/MC/MCDisassembler.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDisassembler.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrInfo.h"
@ -71,7 +72,8 @@ extern Target TheX86_32Target, TheX86_64Target;
}
static bool translateInstruction(MCInst &target,
InternalInstruction &source);
InternalInstruction &source,
const MCDisassembler *Dis);
X86GenericDisassembler::X86GenericDisassembler(const MCSubtargetInfo &STI,
DisassemblerMode mode,
@ -123,6 +125,8 @@ X86GenericDisassembler::getInstruction(MCInst &instr,
uint64_t address,
raw_ostream &vStream,
raw_ostream &cStream) const {
CommentStream = &cStream;
InternalInstruction internalInstr;
dlog_t loggerFn = logger;
@ -144,7 +148,8 @@ X86GenericDisassembler::getInstruction(MCInst &instr,
}
else {
size = internalInstr.length;
return (!translateInstruction(instr, internalInstr)) ? Success : Fail;
return (!translateInstruction(instr, internalInstr, this)) ?
Success : Fail;
}
}
@ -169,6 +174,119 @@ static void translateRegister(MCInst &mcInst, Reg reg) {
mcInst.addOperand(MCOperand::CreateReg(llvmRegnum));
}
/// tryAddingSymbolicOperand - trys to add a symbolic operand in place of the
/// immediate Value in the MCInst.
///
/// @param Value - The immediate Value, has had any PC adjustment made by
/// the caller.
/// @param isBranch - If the instruction is a branch instruction
/// @param Address - The starting address of the instruction
/// @param Offset - The byte offset to this immediate in the instruction
/// @param Width - The byte width of this immediate in the instruction
///
/// If the getOpInfo() function was set when setupForSymbolicDisassembly() was
/// called then that function is called to get any symbolic information for the
/// immediate in the instruction using the Address, Offset and Width. If that
/// returns non-zero then the symbolic information it returns is used to create
/// an MCExpr and that is added as an operand to the MCInst. If getOpInfo()
/// returns zero and isBranch is true then a symbol look up for immediate Value
/// is done and if a symbol is found an MCExpr is created with that, else
/// an MCExpr with the immediate Value is created. This function returns true
/// if it adds an operand to the MCInst and false otherwise.
static bool tryAddingSymbolicOperand(int64_t Value, bool isBranch,
uint64_t Address, uint64_t Offset,
uint64_t Width, MCInst &MI,
const MCDisassembler *Dis) {
LLVMOpInfoCallback getOpInfo = Dis->getLLVMOpInfoCallback();
struct LLVMOpInfo1 SymbolicOp;
memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1));
SymbolicOp.Value = Value;
void *DisInfo = Dis->getDisInfoBlock();
if (!getOpInfo ||
!getOpInfo(DisInfo, Address, Offset, Width, 1, &SymbolicOp)) {
// Clear SymbolicOp.Value from above and also all other fields.
memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1));
LLVMSymbolLookupCallback SymbolLookUp = Dis->getLLVMSymbolLookupCallback();
if (!SymbolLookUp)
return false;
uint64_t ReferenceType;
if (isBranch)
ReferenceType = LLVMDisassembler_ReferenceType_In_Branch;
else
ReferenceType = LLVMDisassembler_ReferenceType_InOut_None;
const char *ReferenceName;
const char *Name = SymbolLookUp(DisInfo, Value, &ReferenceType, Address,
&ReferenceName);
if (Name) {
SymbolicOp.AddSymbol.Name = Name;
SymbolicOp.AddSymbol.Present = true;
}
// For branches always create an MCExpr so it gets printed as hex address.
else if (isBranch) {
SymbolicOp.Value = Value;
}
if(ReferenceType == LLVMDisassembler_ReferenceType_Out_SymbolStub)
(*Dis->CommentStream) << "symbol stub for: " << ReferenceName;
if (!Name && !isBranch)
return false;
}
MCContext *Ctx = Dis->getMCContext();
const MCExpr *Add = NULL;
if (SymbolicOp.AddSymbol.Present) {
if (SymbolicOp.AddSymbol.Name) {
StringRef Name(SymbolicOp.AddSymbol.Name);
MCSymbol *Sym = Ctx->GetOrCreateSymbol(Name);
Add = MCSymbolRefExpr::Create(Sym, *Ctx);
} else {
Add = MCConstantExpr::Create((int)SymbolicOp.AddSymbol.Value, *Ctx);
}
}
const MCExpr *Sub = NULL;
if (SymbolicOp.SubtractSymbol.Present) {
if (SymbolicOp.SubtractSymbol.Name) {
StringRef Name(SymbolicOp.SubtractSymbol.Name);
MCSymbol *Sym = Ctx->GetOrCreateSymbol(Name);
Sub = MCSymbolRefExpr::Create(Sym, *Ctx);
} else {
Sub = MCConstantExpr::Create((int)SymbolicOp.SubtractSymbol.Value, *Ctx);
}
}
const MCExpr *Off = NULL;
if (SymbolicOp.Value != 0)
Off = MCConstantExpr::Create(SymbolicOp.Value, *Ctx);
const MCExpr *Expr;
if (Sub) {
const MCExpr *LHS;
if (Add)
LHS = MCBinaryExpr::CreateSub(Add, Sub, *Ctx);
else
LHS = MCUnaryExpr::CreateMinus(Sub, *Ctx);
if (Off != 0)
Expr = MCBinaryExpr::CreateAdd(LHS, Off, *Ctx);
else
Expr = LHS;
} else if (Add) {
if (Off != 0)
Expr = MCBinaryExpr::CreateAdd(Add, Off, *Ctx);
else
Expr = Add;
} else {
if (Off != 0)
Expr = Off;
else
Expr = MCConstantExpr::Create(0, *Ctx);
}
MI.addOperand(MCOperand::CreateExpr(Expr));
return true;
}
/// translateImmediate - Appends an immediate operand to an MCInst.
///
/// @param mcInst - The MCInst to append to.
@ -177,7 +295,8 @@ static void translateRegister(MCInst &mcInst, Reg reg) {
/// @param insn - The internal instruction.
static void translateImmediate(MCInst &mcInst, uint64_t immediate,
const OperandSpecifier &operand,
InternalInstruction &insn) {
InternalInstruction &insn,
const MCDisassembler *Dis) {
// Sign-extend the immediate if necessary.
OperandType type = operand.type;
@ -233,6 +352,8 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate,
}
}
bool isBranch = false;
uint64_t pcrel = 0;
switch (type) {
case TYPE_XMM128:
mcInst.addOperand(MCOperand::CreateReg(X86::XMM0 + (immediate >> 4)));
@ -240,8 +361,11 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate,
case TYPE_XMM256:
mcInst.addOperand(MCOperand::CreateReg(X86::YMM0 + (immediate >> 4)));
return;
case TYPE_MOFFS8:
case TYPE_REL8:
isBranch = true;
pcrel = insn.startLocation + insn.immediateOffset + insn.immediateSize;
// fall through to sign extend the immediate if needed.
case TYPE_MOFFS8:
if(immediate & 0x80)
immediate |= ~(0xffull);
break;
@ -249,9 +373,12 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate,
if(immediate & 0x8000)
immediate |= ~(0xffffull);
break;
case TYPE_MOFFS32:
case TYPE_REL32:
case TYPE_REL64:
isBranch = true;
pcrel = insn.startLocation + insn.immediateOffset + insn.immediateSize;
// fall through to sign extend the immediate if needed.
case TYPE_MOFFS32:
if(immediate & 0x80000000)
immediate |= ~(0xffffffffull);
break;
@ -261,7 +388,10 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate,
break;
}
mcInst.addOperand(MCOperand::CreateImm(immediate));
if(!tryAddingSymbolicOperand(immediate + pcrel, isBranch, insn.startLocation,
insn.immediateOffset, insn.immediateSize,
mcInst, Dis))
mcInst.addOperand(MCOperand::CreateImm(immediate));
}
/// translateRMRegister - Translates a register stored in the R/M field of the
@ -308,7 +438,8 @@ static bool translateRMRegister(MCInst &mcInst,
/// @param insn - The instruction to extract Mod, R/M, and SIB fields
/// from.
/// @return - 0 on success; nonzero otherwise
static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn) {
static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn,
const MCDisassembler *Dis) {
// Addresses in an MCInst are represented as five operands:
// 1. basereg (register) The R/M base, or (if there is a SIB) the
// SIB base
@ -326,6 +457,7 @@ static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn) {
MCOperand indexReg;
MCOperand displacement;
MCOperand segmentReg;
uint64_t pcrel = 0;
if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) {
if (insn.sibBase != SIB_BASE_NONE) {
@ -367,8 +499,11 @@ static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn) {
debug("EA_BASE_NONE and EA_DISP_NONE for ModR/M base");
return true;
}
if (insn.mode == MODE_64BIT)
if (insn.mode == MODE_64BIT){
pcrel = insn.startLocation +
insn.displacementOffset + insn.displacementSize;
baseReg = MCOperand::CreateReg(X86::RIP); // Section 2.2.1.6
}
else
baseReg = MCOperand::CreateReg(0);
@ -434,7 +569,10 @@ static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn) {
mcInst.addOperand(baseReg);
mcInst.addOperand(scaleAmount);
mcInst.addOperand(indexReg);
mcInst.addOperand(displacement);
if(!tryAddingSymbolicOperand(insn.displacement + pcrel, false,
insn.startLocation, insn.displacementOffset,
insn.displacementSize, mcInst, Dis))
mcInst.addOperand(displacement);
mcInst.addOperand(segmentReg);
return false;
}
@ -448,7 +586,7 @@ static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn) {
/// from.
/// @return - 0 on success; nonzero otherwise
static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand,
InternalInstruction &insn) {
InternalInstruction &insn, const MCDisassembler *Dis) {
switch (operand.type) {
default:
debug("Unexpected type for a R/M operand");
@ -488,7 +626,7 @@ static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand,
case TYPE_M1632:
case TYPE_M1664:
case TYPE_LEA:
return translateRMMemory(mcInst, insn);
return translateRMMemory(mcInst, insn, Dis);
}
}
@ -518,7 +656,8 @@ static bool translateFPRegister(MCInst &mcInst,
/// @param insn - The internal instruction.
/// @return - false on success; true otherwise.
static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand,
InternalInstruction &insn) {
InternalInstruction &insn,
const MCDisassembler *Dis) {
switch (operand.encoding) {
default:
debug("Unhandled operand encoding during translation");
@ -527,7 +666,7 @@ static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand,
translateRegister(mcInst, insn.reg);
return false;
case ENCODING_RM:
return translateRM(mcInst, operand, insn);
return translateRM(mcInst, operand, insn, Dis);
case ENCODING_CB:
case ENCODING_CW:
case ENCODING_CD:
@ -545,7 +684,8 @@ static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand,
translateImmediate(mcInst,
insn.immediates[insn.numImmediatesTranslated++],
operand,
insn);
insn,
Dis);
return false;
case ENCODING_RB:
case ENCODING_RW:
@ -564,7 +704,7 @@ static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand,
case ENCODING_DUP:
return translateOperand(mcInst,
insn.spec->operands[operand.type - TYPE_DUP0],
insn);
insn, Dis);
}
}
@ -575,7 +715,8 @@ static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand,
/// @param insn - The internal instruction.
/// @return - false on success; true otherwise.
static bool translateInstruction(MCInst &mcInst,
InternalInstruction &insn) {
InternalInstruction &insn,
const MCDisassembler *Dis) {
if (!insn.spec) {
debug("Instruction has no specification");
return true;
@ -589,7 +730,7 @@ static bool translateInstruction(MCInst &mcInst,
for (index = 0; index < X86_MAX_OPERANDS; ++index) {
if (insn.spec->operands[index].encoding != ENCODING_NONE) {
if (translateOperand(mcInst, insn.spec->operands[index], insn)) {
if (translateOperand(mcInst, insn.spec->operands[index], insn, Dis)) {
return true;
}
}

View File

@ -1014,6 +1014,7 @@ static int readDisplacement(struct InternalInstruction* insn) {
return 0;
insn->consumedDisplacement = TRUE;
insn->displacementOffset = insn->readerCursor - insn->startLocation;
switch (insn->eaDisplacement) {
case EA_DISP_NONE:
@ -1410,6 +1411,7 @@ static int readImmediate(struct InternalInstruction* insn, uint8_t size) {
size = insn->immediateSize;
else
insn->immediateSize = size;
insn->immediateOffset = insn->readerCursor - insn->startLocation;
switch (size) {
case 1:

View File

@ -459,6 +459,11 @@ struct InternalInstruction {
uint8_t addressSize;
uint8_t displacementSize;
uint8_t immediateSize;
/* Offsets from the start of the instruction to the pieces of data, which is
needed to find relocation entries for adding symbolic operands */
uint8_t displacementOffset;
uint8_t immediateOffset;
/* opcode state */

View File

@ -45,11 +45,12 @@ void X86ATTInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
if (!printAliasInstr(MI, OS))
printInstruction(MI, OS);
// Next always print the annotation.
printAnnotation(OS, Annot);
// If verbose assembly is enabled, we can print some informative comments.
if (CommentStream) {
printAnnotation(OS, Annot);
if (CommentStream)
EmitAnyX86InstComments(MI, *CommentStream, getRegisterName);
}
}
StringRef X86ATTInstPrinter::getOpcodeName(unsigned Opcode) const {
@ -103,11 +104,21 @@ void X86ATTInstPrinter::print_pcrel_imm(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
const MCOperand &Op = MI->getOperand(OpNo);
if (Op.isImm())
// Print this as a signed 32-bit value.
O << (int)Op.getImm();
O << Op.getImm();
else {
assert(Op.isExpr() && "unknown pcrel immediate operand");
O << *Op.getExpr();
// If a symbolic branch target was added as a constant expression then print
// that address in hex.
const MCConstantExpr *BranchTarget = dyn_cast<MCConstantExpr>(Op.getExpr());
int64_t Address;
if (BranchTarget && BranchTarget->EvaluateAsAbsolute(Address)) {
O << "0x";
O.write_hex(Address);
}
else {
// Otherwise, just print the expression.
O << *Op.getExpr();
}
}
}

View File

@ -35,12 +35,13 @@ void X86IntelInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
void X86IntelInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
StringRef Annot) {
printInstruction(MI, OS);
// Next always print the annotation.
printAnnotation(OS, Annot);
// If verbose assembly is enabled, we can print some informative comments.
if (CommentStream) {
printAnnotation(OS, Annot);
if (CommentStream)
EmitAnyX86InstComments(MI, *CommentStream, getRegisterName);
}
}
StringRef X86IntelInstPrinter::getOpcodeName(unsigned Opcode) const {
return getInstructionName(Opcode);
@ -95,7 +96,18 @@ void X86IntelInstPrinter::print_pcrel_imm(const MCInst *MI, unsigned OpNo,
O << Op.getImm();
else {
assert(Op.isExpr() && "unknown pcrel immediate operand");
O << *Op.getExpr();
// If a symbolic branch target was added as a constant expression then print
// that address in hex.
const MCConstantExpr *BranchTarget = dyn_cast<MCConstantExpr>(Op.getExpr());
int64_t Address;
if (BranchTarget && BranchTarget->EvaluateAsAbsolute(Address)) {
O << "0x";
O.write_hex(Address);
}
else {
// Otherwise, just print the expression.
O << *Op.getExpr();
}
}
}

View File

@ -97,9 +97,9 @@
sal $1, %eax
// moffset forms of moves, rdar://7947184
movb 0, %al // CHECK: movb 0, %al # encoding: [0xa0,A,A,A,A]
movw 0, %ax // CHECK: movw 0, %ax # encoding: [0x66,0xa1,A,A,A,A]
movl 0, %eax // CHECK: movl 0, %eax # encoding: [0xa1,A,A,A,A]
movb 0, %al // CHECK: movb 0, %al # encoding: [0xa0,0x00,0x00,0x00,0x00]
movw 0, %ax // CHECK: movw 0, %ax # encoding: [0x66,0xa1,0x00,0x00,0x00,0x00]
movl 0, %eax // CHECK: movl 0, %eax # encoding: [0xa1,0x00,0x00,0x00,0x00]
// rdar://7973775
into