From 574962a3b3e517f58d4a5caba752ef7f9f454568 Mon Sep 17 00:00:00 2001 From: "Andrew V. Tischenko" Date: Mon, 28 Aug 2017 10:43:14 +0000 Subject: [PATCH] The current version of LLVM X86 disassembler incorrectly interprets some possible sets of x86 prefixes. This patch is the first step to close PR7709 and PR17697. There will be next patch(es) to close relative PRs. Differential Revision: https://reviews.llvm.org/D36788 M lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp M lib/Target/X86/Disassembler/X86DisassemblerDecoder.h A test/MC/Disassembler/X86/prefixes-i386.s A test/MC/Disassembler/X86/prefixes-x86_64.s M test/MC/Disassembler/X86/prefixes.txt llvm-svn: 311882 --- .../Disassembler/X86DisassemblerDecoder.cpp | 85 +++++++++++++++---- .../X86/Disassembler/X86DisassemblerDecoder.h | 3 + llvm/test/MC/Disassembler/X86/prefixes-i386.s | 68 +++++++++++++++ .../MC/Disassembler/X86/prefixes-x86_64.s | 10 +++ llvm/test/MC/Disassembler/X86/prefixes.txt | 56 +++++++++++- 5 files changed, 204 insertions(+), 18 deletions(-) create mode 100644 llvm/test/MC/Disassembler/X86/prefixes-i386.s create mode 100644 llvm/test/MC/Disassembler/X86/prefixes-x86_64.s diff --git a/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp b/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp index 577b7a776c6d..74779c77cad0 100644 --- a/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp +++ b/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp @@ -277,6 +277,12 @@ static void dbgprintf(struct InternalInstruction* insn, insn->dlog(insn->dlogArg, buffer); } +static bool isREX(struct InternalInstruction *insn, uint8_t prefix) { + if (insn->mode == MODE_64BIT) + return prefix >= 0x40 && prefix <= 0x4f; + return false; +} + /* * setPrefixPresent - Marks that a particular prefix is present at a particular * location. @@ -290,6 +296,38 @@ static void setPrefixPresent(struct InternalInstruction* insn, uint8_t prefix, uint64_t location) { + uint8_t nextByte; + switch (prefix) { + case 0xf2: + case 0xf3: + if (lookAtByte(insn, &nextByte)) + break; + // TODO: + // 1. There could be several 0x66 + // 2. if (nextByte == 0x66) and nextNextByte != 0x0f then + // it's not mandatory prefix + // 3. if (nextByte >= 0x40 && nextByte <= 0x4f) it's REX and we need + // 0x0f exactly after it to be mandatory prefix + if (isREX(insn, nextByte) || nextByte == 0x0f || nextByte == 0x66) { + // The last of 0xf2 /0xf3 is mandatory prefix + insn->mandatory_prefix = prefix; + insn->necessaryPrefixLocation = location; + break; + } + break; + case 0x66: + if (lookAtByte(insn, &nextByte)) + break; + // 0x66 can't overwrite existing mandatory prefix and should be ignored + if (!insn->mandatory_prefix && + (nextByte == 0x0f || isREX(insn, nextByte))) { + insn->mandatory_prefix = prefix; + insn->necessaryPrefixLocation = location; + } + break; + } + if (!insn->mandatory_prefix) + insn->necessaryPrefixLocation = location; insn->prefixPresent[prefix] = 1; insn->prefixLocations[prefix] = location; } @@ -343,13 +381,10 @@ static int readPrefixes(struct InternalInstruction* insn) { * If the byte is a LOCK/REP/REPNE prefix and not a part of the opcode, then * break and let it be disassembled as a normal "instruction". */ - if (insn->readerCursor - 1 == insn->startLocation && byte == 0xf0) + if (insn->readerCursor - 1 == insn->startLocation && byte == 0xf0) // LOCK break; - if (insn->readerCursor - 1 == insn->startLocation - && (byte == 0xf2 || byte == 0xf3) - && !lookAtByte(insn, &nextByte)) - { + if ((byte == 0xf2 || byte == 0xf3) && !lookAtByte(insn, &nextByte)) { /* * If the byte is 0xf2 or 0xf3, and any of the following conditions are * met: @@ -357,9 +392,8 @@ static int readPrefixes(struct InternalInstruction* insn) { * - it is followed by an xchg instruction * then it should be disassembled as a xacquire/xrelease not repne/rep. */ - if ((byte == 0xf2 || byte == 0xf3) && - ((nextByte == 0xf0) || - ((nextByte & 0xfe) == 0x86 || (nextByte & 0xf8) == 0x90))) + if (((nextByte == 0xf0) || + ((nextByte & 0xfe) == 0x86 || (nextByte & 0xf8) == 0x90))) insn->xAcquireRelease = true; /* * Also if the byte is 0xf3, and the following condition is met: @@ -378,7 +412,13 @@ static int readPrefixes(struct InternalInstruction* insn) { return -1; unconsumeByte(insn); } - if (nextByte != 0x0f && nextByte != 0x90) + // If the current byte can't be mandatory prefix then it's a simple repeat + // prefix and should be elaborated as a separated instr + if (nextByte != 0x0f && + // We can have f2 f3 f2 66 f2 0f (in any order) as a valid set + // of prefixes with the last one as a mandatory prefix (SSE/SSE2...) + nextByte != 0xf2 && nextByte != 0xf3 && nextByte != 0x66 && + nextByte != 0x90 && !isREX(insn, nextByte)) break; } @@ -426,11 +466,13 @@ static int readPrefixes(struct InternalInstruction* insn) { setPrefixPresent(insn, byte, prefixLocation); break; case 0x66: /* Operand-size override */ - if (prefixGroups[2]) - dbgprintf(insn, "Redundant Group 3 prefix"); - prefixGroups[2] = true; - hasOpSize = true; setPrefixPresent(insn, byte, prefixLocation); + if (!insn->mandatory_prefix) { + if (prefixGroups[2]) + dbgprintf(insn, "Redundant Group 3 prefix"); + prefixGroups[2] = true; + } + hasOpSize = true; break; case 0x67: /* Address-size override */ if (prefixGroups[3]) @@ -624,10 +666,8 @@ static int readPrefixes(struct InternalInstruction* insn) { insn->necessaryPrefixLocation = insn->readerCursor - 2; dbgprintf(insn, "Found REX prefix 0x%hhx", byte); - } else { + } else unconsumeByte(insn); - insn->necessaryPrefixLocation = insn->readerCursor - 1; - } } else { unconsumeByte(insn); insn->necessaryPrefixLocation = insn->readerCursor - 1; @@ -950,8 +990,19 @@ static int getID(struct InternalInstruction* insn, const void *miiArg) { } else { return -1; } + } else if (insn->mode != MODE_16BIT && !insn->mandatory_prefix) { + // If we don't have mandatory prefix we should use "standard" prefixes here + if (insn->prefixPresent[0x66]) + attrMask |= ATTR_OPSIZE; + if (insn->prefixPresent[0x67]) + attrMask |= ATTR_ADSIZE; + if (insn->prefixPresent[0xf2]) + attrMask |= ATTR_XD; + if (insn->prefixPresent[0xf3]) + attrMask |= ATTR_XS; } else { - if (insn->mode != MODE_16BIT && isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation)) + if (insn->mode != MODE_16BIT && + isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation)) attrMask |= ATTR_OPSIZE; else if (isPrefixAtLocation(insn, 0x67, insn->necessaryPrefixLocation)) attrMask |= ATTR_ADSIZE; diff --git a/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h b/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h index b07fd0b17d35..56565b6f0e6a 100644 --- a/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h +++ b/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h @@ -546,10 +546,13 @@ struct InternalInstruction { // Prefix state + // TODO: we're able to get rid off the following 2 arrays // 1 if the prefix byte corresponding to the entry is present; 0 if not uint8_t prefixPresent[0x100]; // contains the location (for use with the reader) of the prefix byte uint64_t prefixLocations[0x100]; + // The possible mandatory prefix + uint8_t mandatory_prefix; // The value of the vector extension prefix(EVEX/VEX/XOP), if present uint8_t vectorExtensionPrefix[4]; // The type of the vector extension prefix diff --git a/llvm/test/MC/Disassembler/X86/prefixes-i386.s b/llvm/test/MC/Disassembler/X86/prefixes-i386.s new file mode 100644 index 000000000000..43d7748495b1 --- /dev/null +++ b/llvm/test/MC/Disassembler/X86/prefixes-i386.s @@ -0,0 +1,68 @@ +// RUN: llvm-mc -disassemble %s -triple=i386-apple-darwin9 + +// CHECK: movl %fs:24, %eax +0x64 0xa1 0x18 0x00 0x00 0x00 # mov eax, dword ptr fs:[18h] + +# CHECK: rep +# CHECK-NEXT: insb %dx, %es:(%rdi) +0xf3 0x6c #rep ins +# CHECK: rep +# CHECK-NEXT: insl %dx, %es:(%rdi) +0xf3 0x6d #rep ins +# CHECK: rep +# CHECK-NEXT: movsb (%rsi), %es:(%rdi) +0xf3 0xa4 #rep movs +# CHECK: rep +# CHECK-NEXT: movsl (%rsi), %es:(%rdi) +0xf3 0xa5 #rep movs +# CHECK: rep +# CHECK-NEXT: outsb (%rsi), %dx +0xf3 0x6e #rep outs +# CHECK: rep +# CHECK-NEXT: outsl (%rsi), %dx +0xf3 0x6f #rep outs +# CHECK: rep +# CHECK-NEXT: lodsb (%rsi), %al +0xf3 0xac #rep lods +# CHECK: rep +# CHECK-NEXT: lodsl (%rsi), %eax +0xf3 0xad #rep lods +# CHECK: rep +# CHECK-NEXT: stosb %al, %es:(%rdi) +0xf3 0xaa #rep stos +# CHECK: rep +# CHECK-NEXT: stosl %eax, %es:(%rdi) +0xf3 0xab #rep stos +# CHECK: rep +# CHECK-NEXT: cmpsb %es:(%rdi), (%rsi) +0xf3 0xa6 #rep cmps +# CHECK: rep +# CHECK-NEXT: cmpsl %es:(%rdi), (%rsi) +0xf3 0xa7 #repe cmps +# CHECK: rep +# CHECK-NEXT: scasb %es:(%rdi), %al +0xf3 0xae #repe scas +# CHECK: rep +# CHECK-NEXT: scasl %es:(%rdi), %eax +0xf3 0xaf #repe scas +# CHECK: repne +# CHECK-NEXT: cmpsb %es:(%rdi), (%rsi) +0xf2 0xa6 #repne cmps +# CHECK: repne +# CHECK-NEXT: cmpsl %es:(%rdi), (%rsi) +0xf2 0xa7 #repne cmps +# CHECK: repne +# CHECK-NEXT: scasb %es:(%rdi), %al +0xf2 0xae #repne scas +# CHECK: repne +# CHECK-NEXT: scasl %es:(%rdi), %eax +0xf2 0xaf #repne scas + +// CHECK: mulsd %xmm7, %xmm7 +0x66 0xF3 0xF2 0x0F 0x59 0xFF +// CHECK: mulss %xmm7, %xmm7 +0x66 0xF2 0xF3 0x0F 0x59 0xFF +// CHECK: mulpd %xmm7, %xmm7 +0x66 0x0F 0x59 0xFF +// CHECK: mulsd %xmm7, %xmm7 +0xf2 0x66 0x0f 0x59 0xff diff --git a/llvm/test/MC/Disassembler/X86/prefixes-x86_64.s b/llvm/test/MC/Disassembler/X86/prefixes-x86_64.s new file mode 100644 index 000000000000..397ea4104568 --- /dev/null +++ b/llvm/test/MC/Disassembler/X86/prefixes-x86_64.s @@ -0,0 +1,10 @@ +// RUN: llvm-mc -disassemble %s -triple=x86_64-apple-darwin9 + +// CHECK: mulsd %xmm7, %xmm7 +0x66 0xF3 0xF2 0x0F 0x59 0xFF +// CHECK: mulss %xmm7, %xmm7 +0x66 0xF2 0xF3 0x0F 0x59 0xFF +// CHECK: mulpd %xmm7, %xmm7 +0x66 0x0F 0x59 0xFF +// CHECK: mulsd %xmm7, %xmm7 +0xf2 0x66 0x0f 0x59 0xff diff --git a/llvm/test/MC/Disassembler/X86/prefixes.txt b/llvm/test/MC/Disassembler/X86/prefixes.txt index 9e002fab4656..983e09670d68 100644 --- a/llvm/test/MC/Disassembler/X86/prefixes.txt +++ b/llvm/test/MC/Disassembler/X86/prefixes.txt @@ -1,5 +1,60 @@ # RUN: llvm-mc --disassemble %s -triple=x86_64 | FileCheck %s +# CHECK: rep +# CHECK-NEXT: insb %dx, %es:(%rdi) +0xf3 0x6c #rep ins +# CHECK: rep +# CHECK-NEXT: insl %dx, %es:(%rdi) +0xf3 0x6d #rep ins +# CHECK: rep +# CHECK-NEXT: movsb (%rsi), %es:(%rdi) +0xf3 0xa4 #rep movs +# CHECK: rep +# CHECK-NEXT: movsl (%rsi), %es:(%rdi) +0xf3 0xa5 #rep movs +# CHECK: rep +# CHECK-NEXT: outsb (%rsi), %dx +0xf3 0x6e #rep outs +# CHECK: rep +# CHECK-NEXT: outsl (%rsi), %dx +0xf3 0x6f #rep outs +# CHECK: rep +# CHECK-NEXT: lodsb (%rsi), %al +0xf3 0xac #rep lods +# CHECK: rep +# CHECK-NEXT: lodsl (%rsi), %eax +0xf3 0xad #rep lods +# CHECK: rep +# CHECK-NEXT: stosb %al, %es:(%rdi) +0xf3 0xaa #rep stos +# CHECK: rep +# CHECK-NEXT: stosl %eax, %es:(%rdi) +0xf3 0xab #rep stos +# CHECK: rep +# CHECK-NEXT: cmpsb %es:(%rdi), (%rsi) +0xf3 0xa6 #rep cmps +# CHECK: rep +# CHECK-NEXT: cmpsl %es:(%rdi), (%rsi) +0xf3 0xa7 #repe cmps +# CHECK: rep +# CHECK-NEXT: scasb %es:(%rdi), %al +0xf3 0xae #repe scas +# CHECK: rep +# CHECK-NEXT: scasl %es:(%rdi), %eax +0xf3 0xaf #repe scas +# CHECK: repne +# CHECK-NEXT: cmpsb %es:(%rdi), (%rsi) +0xf2 0xa6 #repne cmps +# CHECK: repne +# CHECK-NEXT: cmpsl %es:(%rdi), (%rsi) +0xf2 0xa7 #repne cmps +# CHECK: repne +# CHECK-NEXT: scasb %es:(%rdi), %al +0xf2 0xae #repne scas +# CHECK: repne +# CHECK-NEXT: scasl %es:(%rdi), %eax +0xf2 0xaf #repne scas + # CHECK: lock # CHECK-NEXT: orl $16, %fs:776 0xf0 0x64 0x83 0x0c 0x25 0x08 0x03 0x00 0x00 0x10 @@ -50,7 +105,6 @@ # Test that multiple redundant prefixes work (redundant, but valid x86). # CHECK: rep -# CHECK-NEXT: rep # CHECK-NEXT: stosq 0xf3 0xf3 0x48 0xab