Fix process_array_expr to take into account pointer offset

This commit is contained in:
Michael Tautschnig 2018-02-05 18:15:26 +00:00
parent 379705fbd2
commit f4fb09948a
5 changed files with 176 additions and 233 deletions

View File

@ -0,0 +1,18 @@
#include <assert.h>
#include <string.h>
int main()
{
char A[3] = {'a', 'b', 'c'};
char *p = A;
char v1, v2;
memcpy(&v1, p, 1);
++p;
memcpy(&v2, p, 1);
assert(v1 == 'a');
assert(v2 == 'b');
return 0;
}

View File

@ -0,0 +1,8 @@
CORE
main.c
^EXIT=0$
^SIGNAL=0$
^VERIFICATION SUCCESSFUL$
--
^warning: ignoring

View File

@ -252,7 +252,6 @@ protected:
void trigger_auto_object(const exprt &, statet &);
void initialize_auto_object(const exprt &, statet &);
void process_array_expr(exprt &);
void process_array_expr_rec(exprt &, const typet &) const;
exprt make_auto_object(const typet &, statet &);
virtual void dereference(exprt &, statet &, const bool write);

View File

@ -15,77 +15,13 @@ Author: Daniel Kroening, kroening@kroening.com
#include <util/std_expr.h>
#include <util/cprover_prefix.h>
#include <util/base_type.h>
#include <util/pointer_offset_size.h>
#include <util/c_types.h>
void goto_symext::process_array_expr_rec(
exprt &expr,
const typet &type) const
{
if(expr.id()==ID_if)
{
if_exprt &if_expr=to_if_expr(expr);
process_array_expr_rec(if_expr.true_case(), type);
process_array_expr_rec(if_expr.false_case(), type);
}
else if(expr.id()==ID_index)
{
// strip index
index_exprt &index_expr=to_index_expr(expr);
exprt tmp=index_expr.array();
expr.swap(tmp);
}
else if(expr.id()==ID_typecast)
{
// strip
exprt tmp=to_typecast_expr(expr).op0();
expr.swap(tmp);
process_array_expr_rec(expr, type);
}
else if(expr.id()==ID_address_of)
{
// strip
exprt tmp=to_address_of_expr(expr).op0();
expr.swap(tmp);
process_array_expr_rec(expr, type);
}
else if(expr.id()==ID_byte_extract_little_endian ||
expr.id()==ID_byte_extract_big_endian)
{
// pick the root object
exprt tmp=to_byte_extract_expr(expr).op();
expr.swap(tmp);
process_array_expr_rec(expr, type);
}
else if(expr.id()==ID_symbol &&
expr.get_bool(ID_C_SSA_symbol) &&
to_ssa_expr(expr).get_original_expr().id()==ID_index)
{
const ssa_exprt &ssa=to_ssa_expr(expr);
const index_exprt &index_expr=to_index_expr(ssa.get_original_expr());
exprt tmp=index_expr.array();
expr.swap(tmp);
}
else
{
Forall_operands(it, expr)
{
typet t=it->type();
process_array_expr_rec(*it, t);
}
}
if(!base_type_eq(expr.type(), type, ns))
{
byte_extract_exprt be(byte_extract_id());
be.type()=type;
be.op()=expr;
be.offset()=from_integer(0, index_type());
expr.swap(be);
}
}
/// Given an expression, find the root object and the offset into it.
///
/// The extra complication to be considered here is that the expression may
/// have any number of ternary expressions mixed with type casts.
void goto_symext::process_array_expr(exprt &expr)
{
// This may change the type of the expression!
@ -94,26 +30,21 @@ void goto_symext::process_array_expr(exprt &expr)
{
if_exprt &if_expr=to_if_expr(expr);
process_array_expr(if_expr.true_case());
process_array_expr(if_expr.false_case());
process_array_expr_rec(if_expr.false_case(),
if_expr.true_case().type());
if(!base_type_eq(if_expr.true_case(), if_expr.false_case(), ns))
{
byte_extract_exprt be(
byte_extract_id(),
if_expr.false_case(),
from_integer(0, index_type()),
if_expr.true_case().type());
if_expr.false_case().swap(be);
}
if_expr.type()=if_expr.true_case().type();
}
else if(expr.id()==ID_index)
{
// strip index
index_exprt &index_expr=to_index_expr(expr);
exprt tmp=index_expr.array();
expr.swap(tmp);
}
else if(expr.id()==ID_typecast)
{
// strip
exprt tmp=to_typecast_expr(expr).op0();
expr.swap(tmp);
process_array_expr(expr);
}
else if(expr.id()==ID_address_of)
{
// strip
@ -121,14 +52,6 @@ void goto_symext::process_array_expr(exprt &expr)
expr.swap(tmp);
process_array_expr(expr);
}
else if(expr.id()==ID_byte_extract_little_endian ||
expr.id()==ID_byte_extract_big_endian)
{
// pick the root object
exprt tmp=to_byte_extract_expr(expr).op();
expr.swap(tmp);
process_array_expr(expr);
}
else if(expr.id()==ID_symbol &&
expr.get_bool(ID_C_SSA_symbol) &&
to_ssa_expr(expr).get_original_expr().id()==ID_index)
@ -137,10 +60,58 @@ void goto_symext::process_array_expr(exprt &expr)
const index_exprt &index_expr=to_index_expr(ssa.get_original_expr());
exprt tmp=index_expr.array();
expr.swap(tmp);
process_array_expr(expr);
}
else if(expr.id() != ID_symbol)
{
object_descriptor_exprt ode;
ode.build(expr, ns);
do_simplify(ode.offset());
expr = ode.root_object();
if(!ode.offset().is_zero())
{
if(expr.type().id() != ID_array)
{
exprt array_size = size_of_expr(expr.type(), ns);
do_simplify(array_size);
expr =
byte_extract_exprt(
byte_extract_id(),
expr,
from_integer(0, index_type()),
array_typet(char_type(), array_size));
}
// given an array type T[N], i.e., an array of N elements of type T, and a
// byte offset B, compute the array offset B/sizeof(T) and build a new
// type T[N-(B/sizeof(T))]
const array_typet &prev_array_type = to_array_type(expr.type());
const typet &array_size_type = prev_array_type.size().type();
const typet &subtype = prev_array_type.subtype();
exprt new_offset(ode.offset());
if(new_offset.type() != array_size_type)
new_offset.make_typecast(array_size_type);
exprt subtype_size = size_of_expr(subtype, ns);
if(subtype_size.type() != array_size_type)
subtype_size.make_typecast(array_size_type);
new_offset = div_exprt(new_offset, subtype_size);
minus_exprt new_size(prev_array_type.size(), new_offset);
do_simplify(new_size);
array_typet new_array_type(subtype, new_size);
expr =
byte_extract_exprt(
byte_extract_id(),
expr,
ode.offset(),
new_array_type);
}
}
else
Forall_operands(it, expr)
process_array_expr(*it);
}
/// Rewrite index/member expressions in byte_extract to offset

View File

@ -11,8 +11,6 @@ Author: Daniel Kroening, kroening@kroening.com
#include "goto_symex.h"
#include <cassert>
#include <util/arith_tools.h>
#include <util/invariant.h>
#include <util/rename.h>
@ -20,7 +18,7 @@ Author: Daniel Kroening, kroening@kroening.com
#include <util/std_expr.h>
#include <util/std_code.h>
#include <util/byte_operators.h>
#include <util/pointer_offset_size.h>
#include <util/c_types.h>
void goto_symext::havoc_rec(
@ -136,179 +134,128 @@ void goto_symext::symex_other(
else if(statement==ID_array_copy ||
statement==ID_array_replace)
{
assert(code.operands().size()==2);
codet clean_code(code);
// array_copy and array_replace take two pointers (to arrays); we need to:
// 1. dereference the pointers (via clean_expr)
// 2. find the actual array objects/candidates for objects (via
// process_array_expr)
// 3. build an assignment where the type on lhs and rhs is:
// - array_copy: the type of the first array (even if the second is smaller)
// - array_replace: the type of the second array (even if it is smaller)
DATA_INVARIANT(
code.operands().size() == 2,
"array_copy/array_replace takes two operands");
// we need to add dereferencing for both operands
dereference_exprt d0, d1;
d0.op0()=code.op0();
d0.type()=empty_typet();
d1.op0()=code.op1();
d1.type()=empty_typet();
dereference_exprt dest_array(code.op0());
clean_expr(dest_array, state, true);
dereference_exprt src_array(code.op1());
clean_expr(src_array, state, false);
clean_code.op0()=d0;
clean_code.op1()=d1;
// obtain the actual arrays
process_array_expr(dest_array);
process_array_expr(src_array);
clean_expr(clean_code.op0(), state, true);
exprt op0_offset=from_integer(0, index_type());
if(clean_code.op0().id()==byte_extract_id() &&
clean_code.op0().type().id()==ID_empty)
{
op0_offset=to_byte_extract_expr(clean_code.op0()).offset();
clean_code.op0()=clean_code.op0().op0();
}
clean_expr(clean_code.op1(), state, false);
exprt op1_offset=from_integer(0, index_type());
if(clean_code.op1().id()==byte_extract_id() &&
clean_code.op1().type().id()==ID_empty)
{
op1_offset=to_byte_extract_expr(clean_code.op1()).offset();
clean_code.op1()=clean_code.op1().op0();
}
process_array_expr(clean_code.op0());
clean_expr(clean_code.op0(), state, true);
process_array_expr(clean_code.op1());
clean_expr(clean_code.op1(), state, false);
if(!base_type_eq(clean_code.op0().type(),
clean_code.op1().type(), ns) ||
!op0_offset.is_zero() || !op1_offset.is_zero())
// check for size (or type) mismatch and adjust
if(!base_type_eq(dest_array.type(), src_array.type(), ns))
{
byte_extract_exprt be(byte_extract_id());
if(statement==ID_array_copy)
{
be.op()=clean_code.op1();
be.offset()=op1_offset;
be.type()=clean_code.op0().type();
clean_code.op1()=be;
if(!op0_offset.is_zero())
{
byte_extract_exprt op0(
byte_extract_id(),
clean_code.op0(),
op0_offset,
clean_code.op0().type());
clean_code.op0()=op0;
}
be.op()=src_array;
be.offset()=from_integer(0, index_type());
be.type()=dest_array.type();
src_array.swap(be);
do_simplify(src_array);
}
else
{
// ID_array_replace
be.op()=clean_code.op0();
be.offset()=op0_offset;
be.type()=clean_code.op1().type();
clean_code.op0()=be;
if(!op1_offset.is_zero())
{
byte_extract_exprt op1(
byte_extract_id(),
clean_code.op1(),
op1_offset,
clean_code.op1().type());
clean_code.op1()=op1;
}
be.op()=dest_array;
be.offset()=from_integer(0, index_type());
be.type()=src_array.type();
dest_array.swap(be);
do_simplify(dest_array);
}
}
code_assignt assignment;
assignment.lhs()=clean_code.op0();
assignment.rhs()=clean_code.op1();
code_assignt assignment(dest_array, src_array);
symex_assign(state, assignment);
}
else if(statement==ID_array_set)
{
assert(code.operands().size()==2);
codet clean_code(code);
// array_set takes a pointer (to an array) and a value that each element
// should be set to; we need to:
// 1. dereference the pointer (via clean_expr)
// 2. find the actual array object/candidates for objects (via
// process_array_expr)
// 3. use the type of the resulting array to construct an array_of
// expression
DATA_INVARIANT(code.operands().size() == 2, "array_set takes two operands");
// we need to add dereferencing for the first operand
dereference_exprt d0;
d0.op0()=code.op0();
d0.type()=empty_typet();
exprt array_expr = dereference_exprt(code.op0());
clean_expr(array_expr, state, true);
clean_code.op0()=d0;
// obtain the actual array(s)
process_array_expr(array_expr);
clean_expr(clean_code.op0(), state, true);
if(clean_code.op0().id()==byte_extract_id() &&
clean_code.op0().type().id()==ID_empty)
clean_code.op0()=clean_code.op0().op0();
clean_expr(clean_code.op1(), state, false);
// prepare to build the array_of
exprt value = code.op1();
clean_expr(value, state, false);
process_array_expr(clean_code.op0());
clean_expr(clean_code.op0(), state, true);
// we might have a memset-style update of a non-array type - convert to a
// byte array
if(array_expr.type().id() != ID_array)
{
exprt array_size = size_of_expr(array_expr.type(), ns);
do_simplify(array_size);
array_expr =
byte_extract_exprt(
byte_extract_id(),
array_expr,
from_integer(0, index_type()),
array_typet(char_type(), array_size));
}
const typet &op0_type=ns.follow(clean_code.op0().type());
const array_typet &array_type = to_array_type(array_expr.type());
if(op0_type.id()!=ID_array)
throw "array_set expects array operand";
if(!base_type_eq(array_type.subtype(), value.type(), ns))
value.make_typecast(array_type.subtype());
const array_typet &array_type=
to_array_type(op0_type);
if(!base_type_eq(array_type.subtype(),
clean_code.op1().type(), ns))
clean_code.op1().make_typecast(array_type.subtype());
code_assignt assignment;
assignment.lhs()=clean_code.op0();
assignment.rhs()=array_of_exprt(clean_code.op1(), array_type);
code_assignt assignment(array_expr, array_of_exprt(value, array_type));
symex_assign(state, assignment);
}
else if(statement==ID_array_equal)
{
// array_equal takes two pointers (to arrays) and the symbol that the result
// should get assigned to; we need to:
// 1. dereference the pointers (via clean_expr)
// 2. find the actual array objects/candidates for objects (via
// process_array_expr)
// 3. build an assignment where the lhs is the previous third argument, and
// the right-hand side is an equality over the arrays, if their types match;
// if the types don't match the result trivially is false
DATA_INVARIANT(
code.operands().size() == 3,
"array_equal expected to take three arguments");
codet clean_code(code);
// we need to add dereferencing for the first two
dereference_exprt array1(code.op0());
clean_expr(array1, state, false);
dereference_exprt array2(code.op1());
clean_expr(array2, state, false);
// we need to add dereferencing for the first two operands
dereference_exprt d0, d1;
d0.op0()=code.op0();
d0.type()=empty_typet();
d1.op0()=code.op1();
d1.type()=empty_typet();
// obtain the actual arrays
process_array_expr(array1);
process_array_expr(array2);
clean_code.op0()=d0;
clean_code.op1()=d1;
code_assignt assignment(code.op2(), equal_exprt(array1, array2));
clean_expr(clean_code.op0(), state, false);
exprt op0_offset=from_integer(0, index_type());
if(clean_code.op0().id()==byte_extract_id() &&
clean_code.op0().type().id()==ID_empty)
{
op0_offset=to_byte_extract_expr(clean_code.op0()).offset();
clean_code.op0()=clean_code.op0().op0();
}
clean_expr(clean_code.op1(), state, false);
exprt op1_offset=from_integer(0, index_type());
if(clean_code.op1().id()==byte_extract_id() &&
clean_code.op1().type().id()==ID_empty)
{
op1_offset=to_byte_extract_expr(clean_code.op1()).offset();
clean_code.op1()=clean_code.op1().op0();
}
// check for size (or type) mismatch
if(!base_type_eq(array1.type(), array2.type(), ns))
assignment.lhs() = false_exprt();
process_array_expr(clean_code.op0());
clean_expr(clean_code.op0(), state, false);
process_array_expr(clean_code.op1());
clean_expr(clean_code.op1(), state, false);
if(!base_type_eq(clean_code.op0().type(),
clean_code.op1().type(), ns))
{
clean_code.op2() = false_exprt();
}
code_assignt assignment(
clean_code.op2(),
equal_exprt(clean_code.op0(), clean_code.op1()));
symex_assign(state, assignment);
}
else if(statement==ID_user_specified_predicate ||