Merge pull request #2350 from thk123/feature/TG-3813/load-specified-methods

[TG-3813] Allow specifying specific methods by regex to be loaded by lazy methods
This commit is contained in:
Thomas Kiley 2018-06-22 07:44:27 +01:00 committed by GitHub
commit 262affba78
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
14 changed files with 337 additions and 70 deletions

View File

@ -1,11 +1,9 @@
CORE symex-driven-lazy-loading-expected-failure
test.class
--lazy-methods --verbosity 10 --function test.f --lazy-methods-extra-entry-point test.sety
^EXIT=6$
^SIGNAL=0$
entry point 'test\.sety' is ambiguous between:
test\.sety:\(I\)V
test\.sety:\(F\)V
CI lazy methods: elaborate java::test\.sety:\(I\)V
CI lazy methods: elaborate java::test\.sety:\(F\)V
--
--
This doesn't work under symex-driven lazy loading because it is incompatible with --lazy-methods

View File

@ -1,6 +1,6 @@
CORE symex-driven-lazy-loading-expected-failure
test.class
--lazy-methods --verbosity 10 --function test.f --lazy-methods-extra-entry-point 'test.sety:(I)V' --lazy-methods-extra-entry-point 'test.sety:(F)V'
--lazy-methods --verbosity 10 --function test.f --lazy-methods-extra-entry-point 'test.sety:\(I\)V' --lazy-methods-extra-entry-point 'test.sety:\(F\)V'
^EXIT=0$
^SIGNAL=0$
VERIFICATION SUCCESSFUL

View File

@ -1,6 +1,6 @@
CORE symex-driven-lazy-loading-expected-failure
test.class
--lazy-methods --verbosity 10 --function test.f --lazy-methods-extra-entry-point 'test.sety:(I)V'
--lazy-methods --verbosity 10 --function test.f --lazy-methods-extra-entry-point 'test.sety:\(I\)V'
^EXIT=0$
^SIGNAL=0$
VERIFICATION SUCCESSFUL

View File

@ -1,6 +1,6 @@
CORE symex-driven-lazy-loading-expected-failure
test.class
--lazy-methods --verbosity 10 --function test.f --lazy-methods-extra-entry-point 'test.sety:(F)V'
--lazy-methods --verbosity 10 --function test.f --lazy-methods-extra-entry-point 'test.sety:\(F\)V'
^EXIT=0$
^SIGNAL=0$
VERIFICATION SUCCESSFUL

View File

@ -32,6 +32,7 @@ SRC = bytecode_info.cpp \
java_string_literals.cpp \
java_types.cpp \
java_utils.cpp \
load_method_by_regex.cpp \
mz_zip_archive.cpp \
remove_exceptions.cpp \
remove_instanceof.cpp \

View File

@ -34,7 +34,7 @@ ci_lazy_methodst::ci_lazy_methodst(
const symbol_tablet &symbol_table,
const irep_idt &main_class,
const std::vector<irep_idt> &main_jar_classes,
const std::vector<irep_idt> &lazy_methods_extra_entry_points,
const std::vector<load_extra_methodst> &lazy_methods_extra_entry_points,
java_class_loadert &java_class_loader,
const std::vector<irep_idt> &extra_instantiated_classes,
const select_pointer_typet &pointer_type_selector,
@ -103,8 +103,13 @@ bool ci_lazy_methodst::operator()(
// Add any extra entry points specified; we should elaborate these in the
// same way as the main function.
std::vector<irep_idt> extra_entry_points=lazy_methods_extra_entry_points;
resolve_method_names(extra_entry_points, symbol_table);
std::vector<irep_idt> extra_entry_points;
for(const auto &extra_function_generator : lazy_methods_extra_entry_points)
{
const auto &extra_methods = extra_function_generator(symbol_table);
extra_entry_points.insert(
extra_entry_points.end(), extra_methods.begin(), extra_methods.end());
}
methods_to_convert_later.insert(
extra_entry_points.begin(), extra_entry_points.end());
@ -355,54 +360,6 @@ ci_lazy_methodst::entry_point_methods(const symbol_tablet &symbol_table)
return methods_to_convert_later;
}
/// Translates the given list of method names from human-readable to
/// internal syntax.
/// Expands any wildcards (entries ending in '.*') in the given method
/// list to include all non-static methods defined on the given class.
/// \param [in, out] methods: List of methods to expand. Any wildcard entries
/// will be deleted and the expanded entries appended to the end.
/// \param symbol_table: global symbol table
void ci_lazy_methodst::resolve_method_names(
std::vector<irep_idt> &methods,
const symbol_tablet &symbol_table)
{
std::vector<irep_idt> new_methods;
for(const irep_idt &method : methods)
{
const std::string &method_str=id2string(method);
if(!has_suffix(method_str, ".*"))
{
std::string error_message;
irep_idt internal_name=
resolve_friendly_method_name(
method_str,
symbol_table,
error_message);
if(internal_name==irep_idt())
throw "entry point "+error_message;
new_methods.push_back(internal_name);
}
else
{
irep_idt classname="java::"+method_str.substr(0, method_str.length()-2);
if(!symbol_table.has_symbol(classname))
throw "wildcard entry point '"+method_str+"': unknown class";
for(const auto &name_symbol : symbol_table.symbols)
{
if(name_symbol.second.type.id()!=ID_code)
continue;
if(!to_code_type(name_symbol.second.type).has_this())
continue;
if(has_prefix(id2string(name_symbol.first), id2string(classname)))
new_methods.push_back(name_symbol.first);
}
}
}
methods=std::move(new_methods);
}
/// Build up a list of methods whose type may be passed around reachable
/// from the entry point.
/// \param entry_points: list of fully-qualified function names that

View File

@ -91,6 +91,9 @@ typedef std::function<
bool(const irep_idt &function_id, ci_lazy_methods_neededt)>
method_convertert;
typedef std::function<std::vector<irep_idt>(const symbol_tablet &)>
load_extra_methodst;
class ci_lazy_methodst:public messaget
{
public:
@ -98,7 +101,7 @@ public:
const symbol_tablet &symbol_table,
const irep_idt &main_class,
const std::vector<irep_idt> &main_jar_classes,
const std::vector<irep_idt> &lazy_methods_extra_entry_points,
const std::vector<load_extra_methodst> &lazy_methods_extra_entry_points,
java_class_loadert &java_class_loader,
const std::vector<irep_idt> &extra_instantiated_classes,
const select_pointer_typet &pointer_type_selector,
@ -112,10 +115,6 @@ public:
const method_convertert &method_converter);
private:
void resolve_method_names(
std::vector<irep_idt> &methods,
const symbol_tablet &symbol_table);
void initialize_instantiated_classes(
const std::unordered_set<irep_idt> &entry_points,
const namespacet &ns,
@ -149,7 +148,7 @@ private:
class_hierarchyt class_hierarchy;
irep_idt main_class;
std::vector<irep_idt> main_jar_classes;
std::vector<irep_idt> lazy_methods_extra_entry_points;
const std::vector<load_extra_methodst> &lazy_methods_extra_entry_points;
java_class_loadert &java_class_loader;
const std::vector<irep_idt> &extra_instantiated_classes;
const select_pointer_typet &pointer_type_selector;

View File

@ -37,6 +37,7 @@ Author: Daniel Kroening, kroening@kroening.com
#include "ci_lazy_methods.h"
#include "expr2java.h"
#include "load_method_by_regex.h"
/// Consume options that are java bytecode specific.
/// \param Command:line options
@ -94,10 +95,11 @@ void java_bytecode_languaget::get_language_options(const cmdlinet &cmd)
const std::list<std::string> &extra_entry_points=
cmd.get_values("lazy-methods-extra-entry-point");
lazy_methods_extra_entry_points.insert(
lazy_methods_extra_entry_points.end(),
std::transform(
extra_entry_points.begin(),
extra_entry_points.end());
extra_entry_points.end(),
std::back_inserter(extra_methods),
build_load_method_by_regex);
if(cmd.isset("java-cp-include-files"))
{
@ -815,7 +817,7 @@ bool java_bytecode_languaget::do_ci_lazy_method_conversion(
symbol_table,
main_class,
main_jar_classes,
lazy_methods_extra_entry_points,
extra_methods,
java_class_loader,
java_load_classes,
get_pointer_type_selector(),

View File

@ -62,7 +62,10 @@ Author: Daniel Kroening, kroening@kroening.com
" --lazy-methods-extra-entry-point METHODNAME\n" /* NOLINT(*) */ \
" treat METHODNAME as a possible program entry point for\n" /* NOLINT(*) */ \
" the purpose of lazy method loading\n" /* NOLINT(*) */ \
" A '.*' wildcard is allowed to specify all class members\n"
" METHODNAME can be a regex that will be matched against\n" /* NOLINT(*) */ \
" all symbols. If missing a java:: prefix will be added\n" /* NOLINT(*) */ \
" If no descriptor is found, all overloads of a method will\n"/* NOLINT(*) */ \
" also be added." /* NOLINT(*) */
// clang-format on
class symbolt;
@ -173,7 +176,6 @@ protected:
size_t max_user_array_length; // max size for user code created arrays
method_bytecodet method_bytecode;
lazy_methods_modet lazy_methods_mode;
std::vector<irep_idt> lazy_methods_extra_entry_points;
bool string_refinement_enabled;
bool throw_runtime_exceptions;
bool assert_uncaught_exceptions;
@ -195,6 +197,8 @@ private:
class_hierarchyt class_hierarchy;
// List of classes to never load
std::unordered_set<std::string> no_load_classes;
std::vector<load_extra_methodst> extra_methods;
};
std::unique_ptr<languaget> new_java_bytecode_language();

View File

@ -0,0 +1,74 @@
/*******************************************************************\
Module: Java Bytecode
Author: Diffblue Ltd.
\*******************************************************************/
#include "load_method_by_regex.h"
#include <regex>
#include <util/symbol_table.h>
/// For a given user provided pattern, return a regex, having dealt with the
/// cases where the user has not prefixed with java:: or suffixed with the
/// descriptor
/// \param pattern: The user provided pattern
/// \return The regex to match with
static std::regex build_regex_from_pattern(const std::string &pattern)
{
std::string modified_pattern = pattern;
if(does_pattern_miss_descriptor(pattern))
modified_pattern += R"(:\(.*\).*)";
if(!has_prefix(pattern, "java::"))
modified_pattern = "java::" + modified_pattern;
return std::regex{modified_pattern};
}
/// Identify if a parameter includes a part that will match a descriptor. That
/// is, does it have a colon separtor.
/// \param pattern: The user provided pattern
/// \return True if no descriptor is found (that is, the only : relates to the
/// java:: prefix.
bool does_pattern_miss_descriptor(const std::string &pattern)
{
const size_t descriptor_index = pattern.rfind(':');
if(descriptor_index == std::string::npos)
return true;
static const std::string java_prefix = "java::";
return descriptor_index == java_prefix.length() - 1 &&
has_prefix(pattern, java_prefix);
}
/// Create a lambda that returns the symbols that the given pattern should be
/// loaded.If the pattern doesn't include a colon for matching the descriptor,
/// append a `:\(.*\).*` to the regex. Note this will mean all overloaded
/// methods will be marked as extra entry points for CI lazy loading.
/// If the pattern doesn't include the java:: prefix, prefix that
/// \param pattern: The user provided pattern
/// \return The lambda to execute.
std::function<std::vector<irep_idt>(const symbol_tablet &symbol_table)>
build_load_method_by_regex(const std::string &pattern)
{
std::regex regex = build_regex_from_pattern(pattern);
return [=](const symbol_tablet &symbol_table) {
std::vector<irep_idt> matched_methods;
for(const auto &symbol : symbol_table.symbols)
{
if(
symbol.second.is_function() &&
std::regex_match(id2string(symbol.first), regex))
{
matched_methods.push_back(symbol.first);
}
}
return matched_methods;
};
}

View File

@ -0,0 +1,25 @@
/*******************************************************************\
Module: Java Bytecode
Author: Diffblue Ltd.
\*******************************************************************/
/// \file
/// Process a pattern to use as a regex for selecting extra entry points for
/// ci_lazy_methodst
#ifndef CPROVER_JAVA_BYTECODE_LOAD_METHOD_BY_REGEX_H
#define CPROVER_JAVA_BYTECODE_LOAD_METHOD_BY_REGEX_H
#include <java_bytecode/ci_lazy_methods.h>
class symbol_tablet;
std::function<std::vector<irep_idt>(const symbol_tablet &symbol_table)>
build_load_method_by_regex(const std::string &pattern);
bool does_pattern_miss_descriptor(const std::string &pattern);
#endif // CPROVER_JAVA_BYTECODE_LOAD_METHOD_BY_REGEX_H

View File

@ -21,6 +21,7 @@ SRC += java_bytecode/goto-programs/class_hierarchy_output.cpp \
java_bytecode/java_types/java_generic_symbol_type.cpp \
java_bytecode/java_types/java_type_from_string.cpp \
java_bytecode/java_utils_test.cpp \
java_bytecode/load_method_by_regex.cpp \
java_bytecode/inherited_static_fields/inherited_static_fields.cpp \
pointer-analysis/custom_value_set_analysis.cpp \
solvers/refinement/string_constraint_instantiation/instantiate_not_contains.cpp \

View File

@ -0,0 +1,178 @@
/*******************************************************************\
Module: Unit tests for parsing generic classes
Author: Diffblue Limited
\*******************************************************************/
#include <java_bytecode/load_method_by_regex.h>
#include <testing-utils/catch.hpp>
#include <testing-utils/require_vectors_equal_unordered.h>
SCENARIO(
"load_method_by_regex::does_pattern_miss_descriptor",
"[core][java_bytecode][load_method_by_regex]")
{
GIVEN("A string with a java prefix and no descriptor")
{
const std::string pattern = "java::com.diffblue.ClassName.methodName";
WHEN("When calling does_pattern_miss_descriptor")
{
const bool result = does_pattern_miss_descriptor(pattern);
THEN("It should miss discriptor")
{
REQUIRE(result);
}
}
}
GIVEN("A string with a java prefix and a descriptor")
{
const std::string pattern = "java::com.diffblue.ClassName.methodName:()V";
WHEN("When calling does_pattern_miss_descriptor")
{
const bool result = does_pattern_miss_descriptor(pattern);
THEN("It should have the discriptor")
{
REQUIRE_FALSE(result);
}
}
}
GIVEN("A string without a java prefix and without a descriptor")
{
const std::string pattern = "com.diffblue.ClassName.methodName";
WHEN("When calling does_pattern_miss_descriptor")
{
const bool result = does_pattern_miss_descriptor(pattern);
THEN("It should miss discriptor")
{
REQUIRE(result);
}
}
}
GIVEN("A string without a java prefix and with a descriptor")
{
const std::string pattern = "com.diffblue.ClassName.methodName:()V";
WHEN("When calling does_pattern_miss_descriptor")
{
const bool result = does_pattern_miss_descriptor(pattern);
THEN("It should have the discriptor")
{
REQUIRE_FALSE(result);
}
}
}
GIVEN("A string with an almost java prefix and no descriptor")
{
const std::string pattern = "java:com.diffblue.ClassName.methodName";
WHEN("When calling does_pattern_miss_descriptor")
{
const bool result = does_pattern_miss_descriptor(pattern);
THEN("It should classify the last : as a descriptor")
{
REQUIRE_FALSE(result);
}
}
}
GIVEN("A string with an almost java prefix and with a descriptor")
{
const std::string pattern = "java:com.diffblue.ClassName.methodName:()V";
WHEN("When calling does_pattern_miss_descriptor")
{
const bool result = does_pattern_miss_descriptor(pattern);
THEN("It should have the discriptor")
{
REQUIRE_FALSE(result);
}
}
}
}
static symbolt create_method_symbol(const std::string &method_name)
{
symbolt new_symbol;
new_symbol.name = method_name;
new_symbol.type = code_typet{{}, nil_typet{}};
return new_symbol;
}
static void require_result_for_pattern(
const std::string &pattern,
const std::vector<irep_idt> &expected,
const symbol_tablet &symbol_table)
{
WHEN("Constructing a load_method_by_regex")
{
const auto matcher = build_load_method_by_regex(pattern);
const auto &results = matcher(symbol_table);
if(expected.size() == 1)
{
THEN("Expect " + id2string(expected[0]))
{
require_vectors_equal_unordered(results, expected);
}
}
else
{
THEN("Expect " + std::to_string(expected.size()) + " symbols")
{
require_vectors_equal_unordered(results, expected);
}
}
}
}
SCENARIO("load_method_by_regex", "[core][java_bytecode][load_method_by_regex]")
{
symbol_tablet symbol_table;
symbol_table.add(create_method_symbol("java::pack.Class.methodName:()V"));
symbol_table.add(create_method_symbol("java::pack.Class.anotherMethod:()V"));
symbol_table.add(create_method_symbol("java::pack.Different.methodName:()V"));
symbol_table.add(create_method_symbol("java::another.Class.methodName:()V"));
GIVEN("A pattern without java prefix, without descriptor, no regex")
{
const std::string pattern = "pack.Class.methodName";
const std::vector<irep_idt> expected = {"java::pack.Class.methodName:()V"};
require_result_for_pattern(pattern, expected, symbol_table);
}
GIVEN("A pattern with java prefix, without descriptor, no regex")
{
const std::string pattern = "java::pack.Class.methodName";
const std::vector<irep_idt> expected = {"java::pack.Class.methodName:()V"};
require_result_for_pattern(pattern, expected, symbol_table);
}
GIVEN("A pattern with java prefix, with descriptor, no regex")
{
const std::string pattern = R"(java::pack.Class.methodName:\(\)V)";
const std::vector<irep_idt> expected = {"java::pack.Class.methodName:()V"};
require_result_for_pattern(pattern, expected, symbol_table);
}
GIVEN("A pattern with java prefix, with wrong descriptor, no regex")
{
const std::string pattern = R"(java::pack.Class.methodName:\(I\)V)";
const std::vector<irep_idt> expected = {};
require_result_for_pattern(pattern, expected, symbol_table);
}
GIVEN("A pattern with java prefix, without descriptor, with regex")
{
const std::string pattern = "java::pack.Class..*";
const std::vector<irep_idt> expected = {
"java::pack.Class.methodName:()V", "java::pack.Class.anotherMethod:()V"};
require_result_for_pattern(pattern, expected, symbol_table);
}
GIVEN("A pattern without java prefix, without descriptor, with regex")
{
const std::string pattern = "pack.Class..*";
const std::vector<irep_idt> expected = {
"java::pack.Class.methodName:()V", "java::pack.Class.anotherMethod:()V"};
require_result_for_pattern(pattern, expected, symbol_table);
}
GIVEN("A pattern without java prefix, with descriptor, with regex in package")
{
const std::string pattern = R"(\w+.Class.methodName:\(\)V)";
const std::vector<irep_idt> expected = {
"java::pack.Class.methodName:()V", "java::another.Class.methodName:()V"};
require_result_for_pattern(pattern, expected, symbol_table);
}
}

View File

@ -0,0 +1,28 @@
/*******************************************************************\
Module: Unit test utilities
Author: Diffblue Limited.
\*******************************************************************/
#ifndef CPROVER_TESTING_UTILS_REQUIRE_VECTORS_EQUAL_UNORDERED_H
#define CPROVER_TESTING_UTILS_REQUIRE_VECTORS_EQUAL_UNORDERED_H
#include <testing-utils/catch.hpp>
#include <vector>
/// Checks whether two vectors are equal, ignoring ordering
/// \tparam T: The type of the vector contents
/// \param actual: The vector to check
/// \param expected: The vector to check against
template <class T>
void require_vectors_equal_unordered(
const std::vector<T> &actual,
const std::vector<T> &expected)
{
REQUIRE(actual.size() == expected.size());
REQUIRE_THAT(actual, Catch::Matchers::Vector::ContainsMatcher<T>{expected});
}
#endif // CPROVER_TESTING_UTILS_REQUIRE_VECTORS_EQUAL_UNORDERED_H