use bytecode_info instead of bytecode vector

The run-time generated bytecode table is replaced by bytecode_info.  The
linear search for the mnemonic is replaced by an array access.
This commit is contained in:
Daniel Kroening 2019-05-19 14:49:04 +01:00
parent b107258ee8
commit b149012d96
9 changed files with 71 additions and 102 deletions

View File

@ -271,10 +271,9 @@ struct bytecode_infot const bytecode_info[]=
{ nullptr, 0xfd, '\0',0, 0, '\0'}, // zero-initialized NOLINT (*)
{ "impdep1", 0xfe, ' ', 0, 0, ' ' }, // ; reserved for implementation-dependent operations within debuggers; should not appear in any class file NOLINT(*)
{ "impdep2", 0xff, ' ', 0, 0, ' ' }, // ; reserved for implementation-dependent operations within debuggers; should not appear in any class file NOLINT(*)
{ nullptr, 0x00, '\0',0, 0, '\0'}, // zero-initialized NOLINT (*)
};
// clang-format on
static_assert(
sizeof(bytecode_info) == sizeof(bytecode_infot) * 257,
sizeof(bytecode_info) == sizeof(bytecode_infot) * 256,
"bytecode table has right size");

View File

@ -590,18 +590,6 @@ void java_bytecode_convert_methodt::convert(
to_java_class_type(class_symbol.type).lambda_method_handles());
}
const bytecode_infot &java_bytecode_convert_methodt::get_bytecode_info(
const irep_idt &statement)
{
for(const bytecode_infot *p=bytecode_info; p->mnemonic!=nullptr; p++)
if(statement==p->mnemonic)
return *p;
error() << "failed to find bytecode mnemonic `"
<< statement << '\'' << eom;
throw 0;
}
static irep_idt get_if_cmp_operator(const irep_idt &stmt)
{
if(stmt==patternt("if_?cmplt"))
@ -1007,14 +995,16 @@ code_blockt java_bytecode_convert_methodt::convert_instructions(
// a new maximal key
assert(a_entry.first==--address_map.end());
const std::string statement = bytecode_info[i_it->bytecode].mnemonic;
// clang-format off
if(i_it->statement != "goto" &&
i_it->statement != "return" &&
i_it->statement != patternt("?return") &&
i_it->statement != "athrow" &&
i_it->statement != "jsr" &&
i_it->statement != "jsr_w" &&
i_it->statement != "ret")
if(statement != "goto" &&
statement != "return" &&
statement != patternt("?return") &&
statement != "athrow" &&
statement != "jsr" &&
statement != "jsr_w" &&
statement != "ret")
{
// clang-format on
instructionst::const_iterator next=i_it;
@ -1022,25 +1012,27 @@ code_blockt java_bytecode_convert_methodt::convert_instructions(
a_entry.first->second.successors.push_back(next->address);
}
if(i_it->statement=="athrow" ||
i_it->statement=="putfield" ||
i_it->statement=="getfield" ||
i_it->statement=="checkcast" ||
i_it->statement=="newarray" ||
i_it->statement=="anewarray" ||
i_it->statement=="idiv" ||
i_it->statement=="ldiv" ||
i_it->statement=="irem" ||
i_it->statement=="lrem" ||
i_it->statement==patternt("?astore") ||
i_it->statement==patternt("?aload") ||
i_it->statement=="invokestatic" ||
i_it->statement=="invokevirtual" ||
i_it->statement=="invokespecial" ||
i_it->statement=="invokeinterface" ||
(threading_support && (i_it->statement=="monitorenter" ||
i_it->statement=="monitorexit")))
// clang-format off
if(statement == "athrow" ||
statement == "putfield" ||
statement == "getfield" ||
statement == "checkcast" ||
statement == "newarray" ||
statement == "anewarray" ||
statement == "idiv" ||
statement == "ldiv" ||
statement == "irem" ||
statement == "lrem" ||
statement == patternt("?astore") ||
statement == patternt("?aload") ||
statement == "invokestatic" ||
statement == "invokevirtual" ||
statement == "invokespecial" ||
statement == "invokeinterface" ||
(threading_support &&
(statement == "monitorenter" || statement == "monitorexit")))
{
// clang-format on
const std::vector<method_offsett> handler =
try_catch_handler(i_it->address, method.exception_table);
std::list<method_offsett> &successors = a_entry.first->second.successors;
@ -1048,14 +1040,16 @@ code_blockt java_bytecode_convert_methodt::convert_instructions(
targets.insert(handler.begin(), handler.end());
}
if(i_it->statement=="goto" ||
i_it->statement==patternt("if_?cmp??") ||
i_it->statement==patternt("if??") ||
i_it->statement=="ifnonnull" ||
i_it->statement=="ifnull" ||
i_it->statement=="jsr" ||
i_it->statement=="jsr_w")
// clang-format off
if(statement == "goto" ||
statement == patternt("if_?cmp??") ||
statement == patternt("if??") ||
statement == "ifnonnull" ||
statement == "ifnull" ||
statement == "jsr" ||
statement == "jsr_w")
{
// clang-format on
PRECONDITION(!i_it->args.empty());
auto target = numeric_cast_v<unsigned>(to_constant_expr(i_it->args[0]));
@ -1063,8 +1057,7 @@ code_blockt java_bytecode_convert_methodt::convert_instructions(
a_entry.first->second.successors.push_back(target);
if(i_it->statement=="jsr" ||
i_it->statement=="jsr_w")
if(statement == "jsr" || statement == "jsr_w")
{
auto next = std::next(i_it);
DATA_INVARIANT(
@ -1073,8 +1066,7 @@ code_blockt java_bytecode_convert_methodt::convert_instructions(
jsr_ret_targets.push_back(next->address);
}
}
else if(i_it->statement=="tableswitch" ||
i_it->statement=="lookupswitch")
else if(statement == "tableswitch" || statement == "lookupswitch")
{
bool is_label=true;
for(const auto &arg : i_it->args)
@ -1088,7 +1080,7 @@ code_blockt java_bytecode_convert_methodt::convert_instructions(
is_label=!is_label;
}
}
else if(i_it->statement=="ret")
else if(statement == "ret")
{
// Finish these later, once we've seen all jsr instructions.
ret_instructions.push_back(i_it);
@ -1144,11 +1136,11 @@ code_blockt java_bytecode_convert_methodt::convert_instructions(
stack.empty() || instruction.predecessors.size() <= 1 ||
has_prefix(stack.front().get_string(ID_C_base_name), "$stack"));
irep_idt statement=i_it->statement;
exprt arg0=i_it->args.size()>=1?i_it->args[0]:nil_exprt();
exprt arg1=i_it->args.size()>=2?i_it->args[1]:nil_exprt();
const bytecode_infot &stmt_bytecode_info = get_bytecode_info(statement);
const bytecode_infot &stmt_bytecode_info = bytecode_info[i_it->bytecode];
std::string statement = stmt_bytecode_info.mnemonic;
// deal with _idx suffixes
if(statement.size()>=2 &&

View File

@ -304,8 +304,6 @@ protected:
const methodt &,
const java_class_typet::java_lambda_method_handlest &);
const bytecode_infot &get_bytecode_info(const irep_idt &statement);
codet get_clinit_call(const irep_idt &classname);
bool is_method_inherited(

View File

@ -316,8 +316,9 @@ static void infer_opaque_type_fields(
for(const java_bytecode_parse_treet::instructiont &instruction :
method.instructions)
{
if(instruction.statement == "getfield" ||
instruction.statement == "putfield")
const std::string statement =
bytecode_info[instruction.bytecode].mnemonic;
if(statement == "getfield" || statement == "putfield")
{
const fieldref_exprt &fieldref =
expr_dynamic_cast<fieldref_exprt>(instruction.args[0]);
@ -461,11 +462,15 @@ static void generate_constant_global_variables(
{
// ldc* instructions are Java bytecode "load constant" ops, which can
// retrieve a numeric constant, String literal, or Class literal.
if(instruction.statement == "ldc" ||
instruction.statement == "ldc2" ||
instruction.statement == "ldc_w" ||
instruction.statement == "ldc2_w")
const std::string statement =
bytecode_info[instruction.bytecode].mnemonic;
// clang-format off
if(statement == "ldc" ||
statement == "ldc2" ||
statement == "ldc_w" ||
statement == "ldc2_w")
{
// clang-format on
INVARIANT(
instruction.args.size() != 0,
"ldc instructions should have an argument");
@ -589,8 +594,9 @@ static void create_stub_global_symbols(
for(const java_bytecode_parse_treet::instructiont &instruction :
method.instructions)
{
if(instruction.statement == "getstatic" ||
instruction.statement == "putstatic")
const std::string statement =
bytecode_info[instruction.bytecode].mnemonic;
if(statement == "getstatic" || statement == "putstatic")
{
INVARIANT(
instruction.args.size() > 0,

View File

@ -161,7 +161,7 @@ void java_bytecode_parse_treet::methodt::output(std::ostream &out) const
out << " // " << i.source_location << '\n';
out << " " << i.address << ": ";
out << i.statement;
out << bytecode_info[i.bytecode].mnemonic;
bool first = true;
for(const auto &arg : i.args)

View File

@ -56,7 +56,7 @@ struct java_bytecode_parse_treet
{
source_locationt source_location;
unsigned address;
irep_idt statement;
u8 bytecode;
typedef std::vector<exprt> argst;
argst args;
};

View File

@ -36,7 +36,6 @@ public:
explicit java_bytecode_parsert(bool skip_instructions)
: skip_instructions(skip_instructions)
{
populate_bytecode_mnemonics_table();
}
virtual bool parse();
@ -72,14 +71,6 @@ public:
constant_poolt constant_pool;
protected:
class bytecodet
{
public:
irep_idt mnemonic;
char format;
};
std::vector<bytecodet> bytecodes;
const bool skip_instructions = false;
pool_entryt &pool_entry(u2 index)
@ -104,23 +95,6 @@ protected:
return *java_type_from_string(id2string(pool_entry(index).s));
}
void populate_bytecode_mnemonics_table()
{
// This is only useful for rbytecodes, which in turn is only useful to
// parse method instructions.
if(skip_instructions)
return;
// pre-hash the mnemonics, so we do this only once
bytecodes.resize(256);
for(const bytecode_infot *p=bytecode_info; p->mnemonic!=nullptr; p++)
{
assert(p->opcode<bytecodes.size());
bytecodes[p->opcode].mnemonic=p->mnemonic;
bytecodes[p->opcode].format=p->format;
}
}
void rClassFile();
void rconstant_pool();
void rinterfaces(classt &parsed_class);
@ -954,9 +928,6 @@ void java_bytecode_parsert::rfields(classt &parsed_class)
void java_bytecode_parsert::rbytecode(
methodt::instructionst &instructions)
{
INVARIANT(
bytecodes.size() == 256, "bytecode mnemonics should have been populated");
u4 code_length=read_u4();
u4 address;
@ -978,19 +949,20 @@ void java_bytecode_parsert::rbytecode(
// [ifald]load, [ifald]store, ret and iinc
// All of these have either format of v, or V
INVARIANT(
bytecodes[bytecode].format == 'v' || bytecodes[bytecode].format == 'V',
"Unexpected wide instruction: " +
id2string(bytecodes[bytecode].mnemonic));
bytecode_info[bytecode].format == 'v' ||
bytecode_info[bytecode].format == 'V',
std::string("Unexpected wide instruction: ") +
bytecode_info[bytecode].mnemonic);
}
instructions.push_back(instructiont());
instructiont &instruction=instructions.back();
instruction.statement=bytecodes[bytecode].mnemonic;
instruction.bytecode = bytecode;
instruction.address=start_of_instruction;
instruction.source_location
.set_java_bytecode_index(std::to_string(bytecode_index));
switch(bytecodes[bytecode].format)
switch(bytecode_info[bytecode].format)
{
case ' ': // no further bytes
break;

View File

@ -184,7 +184,8 @@ static bool is_store_to_slot(
const java_bytecode_convert_methodt::instructiont &inst,
unsigned slotidx)
{
const std::string prevstatement=id2string(inst.statement);
const std::string prevstatement = bytecode_info[inst.bytecode].mnemonic;
if(!(prevstatement.size()>=1 && prevstatement.substr(1, 5)=="store"))
return false;

View File

@ -95,8 +95,9 @@ void require_parse_tree::require_instructions_match_expectation(
void require_parse_tree::expected_instructiont::require_instructions_equal(
java_bytecode_parse_treet::instructiont actual_instruction) const
{
REQUIRE(actual_instruction.statement == instruction_mnemoic);
REQUIRE(actual_instruction.args.size() == instruction_arguments.size());
REQUIRE(
instruction_mnemoic == bytecode_info[actual_instruction.bytecode].mnemonic);
REQUIRE(instruction_arguments.size() == actual_instruction.args.size());
auto actual_arg_it = actual_instruction.args.begin();
for(const exprt &expected_arg : actual_instruction.args)
{