[WebAssembly] Made disassembler only use stack instructions.

Summary:
Now uses the StackBased bit from the tablegen defs to identify
stack instructions (and ignore register based or non-wasm instructions).

Also changed how we store operands, since we now have up to 16 of them
per instruction. To not cause static data bloat, these are compressed
into a tiny table.

+ a few other cleanups.

Tested:
- MCTest
- llvm-lit -v `find test -name WebAssembly`

Reviewers: dschuff, jgravelle-google, sunfish, tlively

Subscribers: sbc100, aheejin, llvm-commits

Differential Revision: https://reviews.llvm.org/D51320

llvm-svn: 341081
This commit is contained in:
Wouter van Oortmerssen 2018-08-30 15:40:53 +00:00
parent 2305c049a3
commit a733d08db2
3 changed files with 55 additions and 27 deletions

View File

@ -140,7 +140,7 @@ MCDisassembler::DecodeStatus WebAssemblyDisassembler::getInstruction(
MI.setOpcode(WasmInst->Opcode);
// Parse any operands.
for (uint8_t OPI = 0; OPI < WasmInst->NumOperands; OPI++) {
switch (WasmInst->Operands[OPI]) {
switch (OperandTable[WasmInst->OperandStart + OPI]) {
// ULEB operands:
case WebAssembly::OPERAND_BASIC_BLOCK:
case WebAssembly::OPERAND_LOCAL:
@ -194,15 +194,12 @@ MCDisassembler::DecodeStatus WebAssemblyDisassembler::getInstruction(
return MCDisassembler::Fail;
break;
}
case MCOI::OPERAND_REGISTER: {
// These are NOT actually in the instruction stream, but MC is going to
// expect operands to be present for them!
// FIXME: can MC re-generate register assignments or do we have to
// do this? Since this function decodes a single instruction, we don't
// have the proper context for tracking an operand stack here.
MI.addOperand(MCOperand::createReg(0));
break;
}
case MCOI::OPERAND_REGISTER:
// The tablegen header currently does not have any register operands since
// we use only the stack (_S) instructions.
// If you hit this that probably means a bad instruction definition in
// tablegen.
llvm_unreachable("Register operand in WebAssemblyDisassembler");
default:
llvm_unreachable("Unknown operand type in WebAssemblyDisassembler");
}

View File

@ -31,6 +31,6 @@
# CHECK: i64.trunc_u:sat/f64
0xFC 0x07
# v128.const is arbitrarily disassembled as v2f64
# CHECK: v128.const 0x1.60504030201p-911, 0x1.e0d0c0b0a0908p-783
# v128.const is arbitrarily disassembled as v16i8
# CHECK: v128.const 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
0xFD 0x00 0x00 0x01 0x02 0x03 0x04 0x05 0x06 0x07 0x08 0x09 0x0A 0x0B 0x0C 0x0D 0x0E 0x0F

View File

@ -42,15 +42,16 @@ void emitWebAssemblyDisassemblerTables(
auto Prefix = Opc >> 8;
Opc = Opc & 0xFF;
auto &CGIP = OpcodeTable[Prefix][Opc];
if (!CGIP.second ||
// Make sure we store the variant with the least amount of operands,
// which is the one without explicit registers. Only few instructions
// have these currently, would be good to have for all of them.
// FIXME: this picks the first of many typed variants, which is
// currently the except_ref one, though this shouldn't matter for
// disassembly purposes.
CGIP.second->Operands.OperandList.size() >
CGI.Operands.OperandList.size()) {
// All wasm instructions have a StackBased fieldof type bit, we only want
// the instructions for which this is 1.
auto Bit = Def.getValue("StackBased")->getValue()->
getCastTo(BitRecTy::get());
auto IsStackBased = Bit && reinterpret_cast<const BitInit *>(Bit)
->getValue();
if (IsStackBased && !CGIP.second) {
// this picks the first of many typed variants, which is
// currently the except_ref one, though this shouldn't matter for
// disassembly purposes.
CGIP = std::make_pair(I, &CGI);
}
}
@ -63,8 +64,9 @@ void emitWebAssemblyDisassemblerTables(
OS << " uint16_t Opcode;\n";
OS << " EntryType ET;\n";
OS << " uint8_t NumOperands;\n";
OS << " uint8_t Operands[4];\n";
OS << " uint16_t OperandStart;\n";
OS << "};\n\n";
std::vector<std::string> OperandTable, CurOperandList;
// Output one table per prefix.
for (auto &PrefixPair : OpcodeTable) {
if (PrefixPair.second.empty())
@ -81,24 +83,53 @@ void emitWebAssemblyDisassemblerTables(
OS.write_hex(static_cast<unsigned long long>(I));
OS << ": " << CGI.AsmString << "\n";
OS << " { " << InstIt->second.first << ", ET_Instruction, ";
OS << CGI.Operands.OperandList.size() << ", {\n";
OS << CGI.Operands.OperandList.size() << ", ";
// Collect operand types for storage in a shared list.
CurOperandList.clear();
for (auto &Op : CGI.Operands.OperandList) {
OS << " " << Op.OperandType << ",\n";
CurOperandList.push_back(Op.OperandType);
}
OS << " }\n";
// See if we already have stored this sequence before. This is not
// strictly necessary but makes the table really small.
size_t OperandStart = OperandTable.size();
if (CurOperandList.size() <= OperandTable.size()) {
for (size_t J = 0; J <= OperandTable.size() - CurOperandList.size();
++J) {
size_t K = 0;
for (; K < CurOperandList.size(); ++K) {
if (OperandTable[J + K] != CurOperandList[K]) break;
}
if (K == CurOperandList.size()) {
OperandStart = J;
break;
}
}
}
// Store operands if no prior occurrence.
if (OperandStart == OperandTable.size()) {
OperandTable.insert(OperandTable.end(), CurOperandList.begin(),
CurOperandList.end());
}
OS << OperandStart;
} else {
auto PrefixIt = OpcodeTable.find(I);
// If we have a non-empty table for it that's not 0, this is a prefix.
if (PrefixIt != OpcodeTable.end() && I && !PrefixPair.first) {
OS << " { 0, ET_Prefix, 0, {}";
OS << " { 0, ET_Prefix, 0, 0";
} else {
OS << " { 0, ET_Unused, 0, {}";
OS << " { 0, ET_Unused, 0, 0";
}
}
OS << " },\n";
}
OS << "};\n\n";
}
// Create a table of all operands:
OS << "const uint8_t OperandTable[] = {\n";
for (auto &Op : OperandTable) {
OS << " " << Op << ",\n";
}
OS << "};\n\n";
// Create a table of all extension tables:
OS << "struct { uint8_t Prefix; const WebAssemblyInstruction *Table; }\n";
OS << "PrefixTable[] = {\n";