The current version of LLVM X86 disassembler incorrectly interprets some possible sets of x86 prefixes. This patch is the first step to close PR7709 and PR17697. There will be next patch(es) to close relative PRs.

Differential Revision: https://reviews.llvm.org/D36788

M    lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp
M    lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
A    test/MC/Disassembler/X86/prefixes-i386.s
A    test/MC/Disassembler/X86/prefixes-x86_64.s
M    test/MC/Disassembler/X86/prefixes.txt

llvm-svn: 311882
This commit is contained in:
Andrew V. Tischenko 2017-08-28 10:43:14 +00:00
parent d0604acd8e
commit 574962a3b3
5 changed files with 204 additions and 18 deletions

View File

@ -277,6 +277,12 @@ static void dbgprintf(struct InternalInstruction* insn,
insn->dlog(insn->dlogArg, buffer);
}
static bool isREX(struct InternalInstruction *insn, uint8_t prefix) {
if (insn->mode == MODE_64BIT)
return prefix >= 0x40 && prefix <= 0x4f;
return false;
}
/*
* setPrefixPresent - Marks that a particular prefix is present at a particular
* location.
@ -290,6 +296,38 @@ static void setPrefixPresent(struct InternalInstruction* insn,
uint8_t prefix,
uint64_t location)
{
uint8_t nextByte;
switch (prefix) {
case 0xf2:
case 0xf3:
if (lookAtByte(insn, &nextByte))
break;
// TODO:
// 1. There could be several 0x66
// 2. if (nextByte == 0x66) and nextNextByte != 0x0f then
// it's not mandatory prefix
// 3. if (nextByte >= 0x40 && nextByte <= 0x4f) it's REX and we need
// 0x0f exactly after it to be mandatory prefix
if (isREX(insn, nextByte) || nextByte == 0x0f || nextByte == 0x66) {
// The last of 0xf2 /0xf3 is mandatory prefix
insn->mandatory_prefix = prefix;
insn->necessaryPrefixLocation = location;
break;
}
break;
case 0x66:
if (lookAtByte(insn, &nextByte))
break;
// 0x66 can't overwrite existing mandatory prefix and should be ignored
if (!insn->mandatory_prefix &&
(nextByte == 0x0f || isREX(insn, nextByte))) {
insn->mandatory_prefix = prefix;
insn->necessaryPrefixLocation = location;
}
break;
}
if (!insn->mandatory_prefix)
insn->necessaryPrefixLocation = location;
insn->prefixPresent[prefix] = 1;
insn->prefixLocations[prefix] = location;
}
@ -343,13 +381,10 @@ static int readPrefixes(struct InternalInstruction* insn) {
* If the byte is a LOCK/REP/REPNE prefix and not a part of the opcode, then
* break and let it be disassembled as a normal "instruction".
*/
if (insn->readerCursor - 1 == insn->startLocation && byte == 0xf0)
if (insn->readerCursor - 1 == insn->startLocation && byte == 0xf0) // LOCK
break;
if (insn->readerCursor - 1 == insn->startLocation
&& (byte == 0xf2 || byte == 0xf3)
&& !lookAtByte(insn, &nextByte))
{
if ((byte == 0xf2 || byte == 0xf3) && !lookAtByte(insn, &nextByte)) {
/*
* If the byte is 0xf2 or 0xf3, and any of the following conditions are
* met:
@ -357,9 +392,8 @@ static int readPrefixes(struct InternalInstruction* insn) {
* - it is followed by an xchg instruction
* then it should be disassembled as a xacquire/xrelease not repne/rep.
*/
if ((byte == 0xf2 || byte == 0xf3) &&
((nextByte == 0xf0) ||
((nextByte & 0xfe) == 0x86 || (nextByte & 0xf8) == 0x90)))
if (((nextByte == 0xf0) ||
((nextByte & 0xfe) == 0x86 || (nextByte & 0xf8) == 0x90)))
insn->xAcquireRelease = true;
/*
* Also if the byte is 0xf3, and the following condition is met:
@ -378,7 +412,13 @@ static int readPrefixes(struct InternalInstruction* insn) {
return -1;
unconsumeByte(insn);
}
if (nextByte != 0x0f && nextByte != 0x90)
// If the current byte can't be mandatory prefix then it's a simple repeat
// prefix and should be elaborated as a separated instr
if (nextByte != 0x0f &&
// We can have f2 f3 f2 66 f2 0f (in any order) as a valid set
// of prefixes with the last one as a mandatory prefix (SSE/SSE2...)
nextByte != 0xf2 && nextByte != 0xf3 && nextByte != 0x66 &&
nextByte != 0x90 && !isREX(insn, nextByte))
break;
}
@ -426,11 +466,13 @@ static int readPrefixes(struct InternalInstruction* insn) {
setPrefixPresent(insn, byte, prefixLocation);
break;
case 0x66: /* Operand-size override */
if (prefixGroups[2])
dbgprintf(insn, "Redundant Group 3 prefix");
prefixGroups[2] = true;
hasOpSize = true;
setPrefixPresent(insn, byte, prefixLocation);
if (!insn->mandatory_prefix) {
if (prefixGroups[2])
dbgprintf(insn, "Redundant Group 3 prefix");
prefixGroups[2] = true;
}
hasOpSize = true;
break;
case 0x67: /* Address-size override */
if (prefixGroups[3])
@ -624,10 +666,8 @@ static int readPrefixes(struct InternalInstruction* insn) {
insn->necessaryPrefixLocation = insn->readerCursor - 2;
dbgprintf(insn, "Found REX prefix 0x%hhx", byte);
} else {
} else
unconsumeByte(insn);
insn->necessaryPrefixLocation = insn->readerCursor - 1;
}
} else {
unconsumeByte(insn);
insn->necessaryPrefixLocation = insn->readerCursor - 1;
@ -950,8 +990,19 @@ static int getID(struct InternalInstruction* insn, const void *miiArg) {
} else {
return -1;
}
} else if (insn->mode != MODE_16BIT && !insn->mandatory_prefix) {
// If we don't have mandatory prefix we should use "standard" prefixes here
if (insn->prefixPresent[0x66])
attrMask |= ATTR_OPSIZE;
if (insn->prefixPresent[0x67])
attrMask |= ATTR_ADSIZE;
if (insn->prefixPresent[0xf2])
attrMask |= ATTR_XD;
if (insn->prefixPresent[0xf3])
attrMask |= ATTR_XS;
} else {
if (insn->mode != MODE_16BIT && isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation))
if (insn->mode != MODE_16BIT &&
isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation))
attrMask |= ATTR_OPSIZE;
else if (isPrefixAtLocation(insn, 0x67, insn->necessaryPrefixLocation))
attrMask |= ATTR_ADSIZE;

View File

@ -546,10 +546,13 @@ struct InternalInstruction {
// Prefix state
// TODO: we're able to get rid off the following 2 arrays
// 1 if the prefix byte corresponding to the entry is present; 0 if not
uint8_t prefixPresent[0x100];
// contains the location (for use with the reader) of the prefix byte
uint64_t prefixLocations[0x100];
// The possible mandatory prefix
uint8_t mandatory_prefix;
// The value of the vector extension prefix(EVEX/VEX/XOP), if present
uint8_t vectorExtensionPrefix[4];
// The type of the vector extension prefix

View File

@ -0,0 +1,68 @@
// RUN: llvm-mc -disassemble %s -triple=i386-apple-darwin9
// CHECK: movl %fs:24, %eax
0x64 0xa1 0x18 0x00 0x00 0x00 # mov eax, dword ptr fs:[18h]
# CHECK: rep
# CHECK-NEXT: insb %dx, %es:(%rdi)
0xf3 0x6c #rep ins
# CHECK: rep
# CHECK-NEXT: insl %dx, %es:(%rdi)
0xf3 0x6d #rep ins
# CHECK: rep
# CHECK-NEXT: movsb (%rsi), %es:(%rdi)
0xf3 0xa4 #rep movs
# CHECK: rep
# CHECK-NEXT: movsl (%rsi), %es:(%rdi)
0xf3 0xa5 #rep movs
# CHECK: rep
# CHECK-NEXT: outsb (%rsi), %dx
0xf3 0x6e #rep outs
# CHECK: rep
# CHECK-NEXT: outsl (%rsi), %dx
0xf3 0x6f #rep outs
# CHECK: rep
# CHECK-NEXT: lodsb (%rsi), %al
0xf3 0xac #rep lods
# CHECK: rep
# CHECK-NEXT: lodsl (%rsi), %eax
0xf3 0xad #rep lods
# CHECK: rep
# CHECK-NEXT: stosb %al, %es:(%rdi)
0xf3 0xaa #rep stos
# CHECK: rep
# CHECK-NEXT: stosl %eax, %es:(%rdi)
0xf3 0xab #rep stos
# CHECK: rep
# CHECK-NEXT: cmpsb %es:(%rdi), (%rsi)
0xf3 0xa6 #rep cmps
# CHECK: rep
# CHECK-NEXT: cmpsl %es:(%rdi), (%rsi)
0xf3 0xa7 #repe cmps
# CHECK: rep
# CHECK-NEXT: scasb %es:(%rdi), %al
0xf3 0xae #repe scas
# CHECK: rep
# CHECK-NEXT: scasl %es:(%rdi), %eax
0xf3 0xaf #repe scas
# CHECK: repne
# CHECK-NEXT: cmpsb %es:(%rdi), (%rsi)
0xf2 0xa6 #repne cmps
# CHECK: repne
# CHECK-NEXT: cmpsl %es:(%rdi), (%rsi)
0xf2 0xa7 #repne cmps
# CHECK: repne
# CHECK-NEXT: scasb %es:(%rdi), %al
0xf2 0xae #repne scas
# CHECK: repne
# CHECK-NEXT: scasl %es:(%rdi), %eax
0xf2 0xaf #repne scas
// CHECK: mulsd %xmm7, %xmm7
0x66 0xF3 0xF2 0x0F 0x59 0xFF
// CHECK: mulss %xmm7, %xmm7
0x66 0xF2 0xF3 0x0F 0x59 0xFF
// CHECK: mulpd %xmm7, %xmm7
0x66 0x0F 0x59 0xFF
// CHECK: mulsd %xmm7, %xmm7
0xf2 0x66 0x0f 0x59 0xff

View File

@ -0,0 +1,10 @@
// RUN: llvm-mc -disassemble %s -triple=x86_64-apple-darwin9
// CHECK: mulsd %xmm7, %xmm7
0x66 0xF3 0xF2 0x0F 0x59 0xFF
// CHECK: mulss %xmm7, %xmm7
0x66 0xF2 0xF3 0x0F 0x59 0xFF
// CHECK: mulpd %xmm7, %xmm7
0x66 0x0F 0x59 0xFF
// CHECK: mulsd %xmm7, %xmm7
0xf2 0x66 0x0f 0x59 0xff

View File

@ -1,5 +1,60 @@
# RUN: llvm-mc --disassemble %s -triple=x86_64 | FileCheck %s
# CHECK: rep
# CHECK-NEXT: insb %dx, %es:(%rdi)
0xf3 0x6c #rep ins
# CHECK: rep
# CHECK-NEXT: insl %dx, %es:(%rdi)
0xf3 0x6d #rep ins
# CHECK: rep
# CHECK-NEXT: movsb (%rsi), %es:(%rdi)
0xf3 0xa4 #rep movs
# CHECK: rep
# CHECK-NEXT: movsl (%rsi), %es:(%rdi)
0xf3 0xa5 #rep movs
# CHECK: rep
# CHECK-NEXT: outsb (%rsi), %dx
0xf3 0x6e #rep outs
# CHECK: rep
# CHECK-NEXT: outsl (%rsi), %dx
0xf3 0x6f #rep outs
# CHECK: rep
# CHECK-NEXT: lodsb (%rsi), %al
0xf3 0xac #rep lods
# CHECK: rep
# CHECK-NEXT: lodsl (%rsi), %eax
0xf3 0xad #rep lods
# CHECK: rep
# CHECK-NEXT: stosb %al, %es:(%rdi)
0xf3 0xaa #rep stos
# CHECK: rep
# CHECK-NEXT: stosl %eax, %es:(%rdi)
0xf3 0xab #rep stos
# CHECK: rep
# CHECK-NEXT: cmpsb %es:(%rdi), (%rsi)
0xf3 0xa6 #rep cmps
# CHECK: rep
# CHECK-NEXT: cmpsl %es:(%rdi), (%rsi)
0xf3 0xa7 #repe cmps
# CHECK: rep
# CHECK-NEXT: scasb %es:(%rdi), %al
0xf3 0xae #repe scas
# CHECK: rep
# CHECK-NEXT: scasl %es:(%rdi), %eax
0xf3 0xaf #repe scas
# CHECK: repne
# CHECK-NEXT: cmpsb %es:(%rdi), (%rsi)
0xf2 0xa6 #repne cmps
# CHECK: repne
# CHECK-NEXT: cmpsl %es:(%rdi), (%rsi)
0xf2 0xa7 #repne cmps
# CHECK: repne
# CHECK-NEXT: scasb %es:(%rdi), %al
0xf2 0xae #repne scas
# CHECK: repne
# CHECK-NEXT: scasl %es:(%rdi), %eax
0xf2 0xaf #repne scas
# CHECK: lock
# CHECK-NEXT: orl $16, %fs:776
0xf0 0x64 0x83 0x0c 0x25 0x08 0x03 0x00 0x00 0x10
@ -50,7 +105,6 @@
# Test that multiple redundant prefixes work (redundant, but valid x86).
# CHECK: rep
# CHECK-NEXT: rep
# CHECK-NEXT: stosq
0xf3 0xf3 0x48 0xab