Merge pull request #4261 from danpoe/feature/memory-analyzer

Memory analyzer to take memory snapshots [blocks: #2649, #4438]
This commit is contained in:
Michael Tautschnig 2019-05-16 15:10:45 +02:00 committed by GitHub
commit 09875c350c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
77 changed files with 2286 additions and 141 deletions

View File

@ -108,6 +108,7 @@ jobs:
env:
- COMPILER="ccache /usr/bin/g++-5"
- EXTRA_CXXFLAGS="-D_GLIBCXX_DEBUG"
- WITH_MEMORY_ANALYZER=1
# OS X using clang++
- stage: Test different OS/CXX/Flags
@ -118,7 +119,9 @@ jobs:
before_install:
- HOMEBREW_NO_AUTO_UPDATE=1 brew install ccache parallel gdb
- export PATH=$PATH:/usr/local/opt/ccache/libexec
env: COMPILER="ccache clang++"
env:
- COMPILER="ccache clang++"
- WITH_MEMORY_ANALYZER=0
# Ubuntu Linux with glibc using g++-5, debug mode
- stage: Test different OS/CXX/Flags
@ -144,6 +147,7 @@ jobs:
env:
- COMPILER="ccache /usr/bin/g++-5"
- EXTRA_CXXFLAGS="-DDEBUG"
- WITH_MEMORY_ANALYZER=1
script: echo "Not running any tests for a debug build."
# Ubuntu Linux with glibc using clang++-7, debug mode, disable USE_DSTRING
@ -175,6 +179,7 @@ jobs:
- COMPILER="ccache /usr/bin/clang++-7"
- EXTRA_CXXFLAGS="-Qunused-arguments -fcolor-diagnostics -DDEBUG -DUSE_STD_STRING"
- CCACHE_CPP2=yes
- WITH_MEMORY_ANALYZER=1
script: echo "Not running any tests for a debug build."
# cmake build using g++-7, enable NAMED_SUB_IS_FORWARD_LIST
@ -200,7 +205,7 @@ jobs:
install:
- ccache -z
- ccache --max-size=1G
- cmake -H. -Bbuild '-DCMAKE_BUILD_TYPE=Release' '-DCMAKE_CXX_COMPILER=/usr/bin/g++-7' '-DCMAKE_CXX_FLAGS=-DNAMED_SUB_IS_FORWARD_LIST'
- cmake -H. -Bbuild '-DCMAKE_BUILD_TYPE=Release' '-DCMAKE_CXX_COMPILER=/usr/bin/g++-7' '-DCMAKE_CXX_FLAGS=-DNAMED_SUB_IS_FORWARD_LIST' '-DWITH_MEMORY_ANALYZER=On'
- git submodule update --init --recursive
- cmake --build build -- -j4
script: (cd build; bin/unit "[core][irept]")
@ -228,7 +233,7 @@ jobs:
install:
- ccache -z
- ccache --max-size=1G
- cmake -H. -Bbuild '-DCMAKE_BUILD_TYPE=Release' '-DCMAKE_CXX_COMPILER=/usr/bin/g++-7' '-DCMAKE_USE_CUDD=true' -DCMAKE_CXX_FLAGS="-DBDD_GUARDS"
- cmake -H. -Bbuild '-DCMAKE_BUILD_TYPE=Release' '-DCMAKE_CXX_COMPILER=/usr/bin/g++-7' '-DCMAKE_USE_CUDD=true' -DCMAKE_CXX_FLAGS="-DBDD_GUARDS" '-DWITH_MEMORY_ANALYZER=On'
- git submodule update --init --recursive
- cmake --build build -- -j4
script: (cd build; ctest -V -L CORE -j2)
@ -264,7 +269,7 @@ jobs:
install:
- ccache -z
- ccache --max-size=1G
- cmake -H. -Bbuild '-DCMAKE_BUILD_TYPE=Release' '-DCMAKE_CXX_COMPILER=/usr/bin/clang++-7' '-DCMAKE_CXX_FLAGS=-Qunused-arguments'
- cmake -H. -Bbuild '-DCMAKE_BUILD_TYPE=Release' '-DCMAKE_CXX_COMPILER=/usr/bin/clang++-7' '-DCMAKE_CXX_FLAGS=-Qunused-arguments' '-DWITH_MEMORY_ANALYZER=On'
- git submodule update --init --recursive
- cmake --build build -- -j4
script: (cd build; ctest -V -L CORE -j2)
@ -321,6 +326,7 @@ jobs:
env:
- NAME="COVERITY_SCAN"
- COMPILER="ccache g++"
- WITH_MEMORY_ANALYZER=1
script: echo "This is coverity build. No need for tests."
install:

View File

@ -104,16 +104,37 @@ if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang" OR
endif()
endif()
function(cprover_default_properties)
set(CBMC_CXX_STANDARD 11)
set(CBMC_CXX_STANDARD_REQUIRED true)
set(CBMC_XCODE_ATTRIBUTE_CODE_SIGN_IDENTITY
"Developer ID Application: Daniel Kroening")
set_target_properties(
${ARGN}
PROPERTIES
CXX_STANDARD ${CBMC_CXX_STANDARD}
CXX_STANDARD_REQUIRED ${CBMC_CXX_STANDARD_REQUIRED}
XCODE_ATTRIBUTE_CODE_SIGN_IDENTITY ${CBMC_XCODE_ATTRIBUTE_CODE_SIGN_IDENTITY})
endfunction()
option(WITH_MEMORY_ANALYZER OFF
"build the memory analyzer")
if(CMAKE_SYSTEM_NAME STREQUAL Linux)
set(WITH_MEMORY_ANALYZER_DEFAULT ON)
else()
set(WITH_MEMORY_ANALYZER_DEFAULT OFF)
endif()
option(WITH_MEMORY_ANALYZER ${WITH_MEMORY_ANALYZER_DEFAULT}
"build the memory analyzer")
add_subdirectory(src)
add_subdirectory(regression)
add_subdirectory(unit)
set(CBMC_CXX_STANDARD 11)
set(CBMC_CXX_STANDARD_REQUIRED true)
set(CBMC_XCODE_ATTRIBUTE_CODE_SIGN_IDENTITY
"Developer ID Application: Daniel Kroening")
set_target_properties(
cprover_default_properties(
analyses
ansi-c
assembler
@ -145,13 +166,7 @@ set_target_properties(
testing-utils
unit
util
xml
PROPERTIES
CXX_STANDARD ${CBMC_CXX_STANDARD}
CXX_STANDARD_REQUIRED ${CBMC_CXX_STANDARD_REQUIRED}
XCODE_ATTRIBUTE_CODE_SIGN_IDENTITY ${CBMC_XCODE_ATTRIBUTE_CODE_SIGN_IDENTITY}
)
xml)
option(WITH_JBMC "Build the JBMC Java front-end" ON)
if(WITH_JBMC)

View File

@ -8,7 +8,9 @@
[A Short Tutorial](cbmc/tutorial/),
[Loop Unwinding](cbmc/unwinding/),
[Assertion Checking](cbmc/assertions/)
[Assertion Checking](cbmc/assertions/),
[Memory Analyzer](cbmc/memory-analyzer/),
[Program Harness](cbmc/goto-harness/)
## 4. [Test Suite Generation](test-suite/)

View File

@ -0,0 +1,89 @@
[CPROVER Manual TOC](../../)
## Memory Analyzer
The memory analyzer is a front-end for running and querying GDB in order to
obtain a state of the input program. The GDB is not needed to be executed
directly but is rather used as a back-end for the memory analysis. A common
application would be to obtain a snapshot of a program under analysis at a
particular state of execution. Such a snapshot could be useful on its own: to
query about values of particular variables. Furthermore, since that snapshot is
a state of a valid concrete execution, it can also be used for subsequent
analyses.
## Usage
We assume that the user wants to inspect a binary executable compiled with
debugging symbols and a symbol table information understandable by CBMC, e.g.
(having `goto-gcc` on the `PATH`):
```sh
$ goto-gcc -g input_program.c -o input_program_exe
```
Calling `goto-gcc` instead of simply compiling with `gcc` produces an ELF binary
with a goto section that contains the goto model (goto program + symbol table)
[goto-cc-variants](../goto-cc/variants/).
The memory analyzer supports two workflows to initiate the GDB with user code:
either to run the code from a core-file or up to a break-point. If the user
already has a core-file, they can specify it with the option `--core-file cf`.
If the user knows the point of their program from where they want to run the
analysis, they can specify it with the option `--breakpoint bp`. Only one of
core-file/break-point option can be used.
The tool also expects a comma-separated list of symbols to be analysed
`--symbols s1, s2, ..`. Given its dependence on GDB, `memory-analyzer` is a
Unix-only tool. The tool calls `gdb` to obtain the snapshot which is why the
`-g` option is necessary for the program symbols to be visible.
Take for example the following program:
```C
// main.c
void checkpoint() {}
int array[] = {1, 2, 3};
int main()
{
array[1] = 4;
checkpoint();
return 0;
}
```
Say we are interested in the evaluation of `array` at the call-site of
`checkpoint`. We compile the program with
```sh
$ goto-gcc -g main.c -o main_exe
```
And then we call `memory-analyzer` with:
```sh
$ memory-analyzer --breakpoint checkpoint --symbols array main_exe
```
to obtain as output the human readable list of values for each requested symbol:
```
{
array = { 1, 4, 3 };
}
```
The above option is useful for the user and their preliminary analysis but does
not contain enough information for further computer-based analyses. To that end,
memory analyzer has an option to request the result to be a snapshot of the
whole symbol table `--symtab-snapshot`. Finally, to obtain an output in JSON
format, e.g. for further analyses by `goto-harness` pass the additional option
`--json-ui`.
```sh
$ memory-analyzer --symtab-snapshot --json-ui \
--breakpoint checkpoint --symbols array main_exe > snapshot.json
```

View File

@ -8,7 +8,7 @@ add_custom_target(java-models-library ALL
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/lib/java-models-library
)
set_target_properties(
cprover_default_properties(
java_bytecode
java-models-library
jbmc
@ -20,9 +20,4 @@ set_target_properties(
java-testing-utils
java-unit
miniz
PROPERTIES
CXX_STANDARD ${CBMC_CXX_STANDARD}
CXX_STANDARD_REQUIRED ${CBMC_CXX_STANDARD_REQUIRED}
XCODE_ATTRIBUTE_CODE_SIGN_IDENTITY ${CBMC_XCODE_ATTRIBUTE_CODE_SIGN_IDENTITY}
)

View File

@ -53,3 +53,7 @@ add_subdirectory(goto-harness)
add_subdirectory(goto-cc-file-local)
add_subdirectory(linking-goto-binaries)
add_subdirectory(symtab2gb)
if(WITH_MEMORY_ANALYZER)
add_subdirectory(memory-analyzer)
endif()

View File

@ -30,6 +30,23 @@ DIRS = cbmc \
symtab2gb \
# Empty last line
ifeq ($(OS),Windows_NT)
detected_OS := Windows
else
detected_OS := $(shell sh -c 'uname 2>/dev/null || echo Unknown')
endif
ifeq ($(detected_OS),Linux)
ifneq ($(WITH_MEMORY_ANALYZER),0)
# only set if it wasn't explicitly unset
WITH_MEMORY_ANALYZER=1
endif
endif
ifeq ($(WITH_MEMORY_ANALYZER),1)
DIRS += memory-analyzer
endif
# Run all test directories in sequence
.PHONY: test
test:

View File

@ -0,0 +1,3 @@
add_test_pl_tests(
"../chain.sh \
$<TARGET_FILE:memory-analyzer> $<TARGET_FILE_DIR:goto-cc>/goto-gcc")

View File

@ -0,0 +1,22 @@
default: clean tests.log
GOTO_GCC_EXE=../../../src/goto-cc/goto-gcc
MEMORY_ANALYZER_EXE=../../../src/memory-analyzer/memory-analyzer
clean:
find -name '*.exe' -execdir $(RM) '{}' \;
find -name '*.out' -execdir $(RM) '{}' \;
$(RM) tests.log
test:
@../test.pl -e -p -c "../chain.sh $(MEMORY_ANALYZER_EXE) $(GOTO_GCC_EXE)"
tests.log: ../test.pl
@../test.pl -e -p -c "../chain.sh $(MEMORY_ANALYZER_EXE) $(GOTO_GCC_EXE)"
show:
@for dir in *; do \
if [ -d "$$dir" ]; then \
vim -o "$$dir/*.c" "$$dir/*.out"; \
fi; \
done;

View File

@ -0,0 +1,82 @@
//Copyright 2018 Author: Malte Mues <mail.mues@gmail.com>
/// \file
/// This file test array support in the memory-analyzer.
/// A more detailed description of the test idea is in example3.h.
/// setup() prepares the data structure.
/// manipulate_data() is the hook used for the test,
/// where gdb reaches the breakpoint.
/// main() is just the required boilerplate around this methods,
/// to make the compiled result executable.
#include "arrays.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
void setup()
{
test_struct = malloc(sizeof(struct a_typet));
for(int i = 0; i < 10; i++)
{
test_struct->config[i] = i + 10;
}
for(int i = 0; i < 10; i++)
{
test_struct->values[i] = 0xf3;
}
for(int i = 10; i < 20; i++)
{
test_struct->values[i] = 0x3f;
}
for(int i = 20; i < 30; i++)
{
test_struct->values[i] = 0x01;
}
for(int i = 30; i < 40; i++)
{
test_struct->values[i] = 0x01;
}
for(int i = 40; i < 50; i++)
{
test_struct->values[i] = 0xff;
}
for(int i = 50; i < 60; i++)
{
test_struct->values[i] = 0x00;
}
for(int i = 60; i < 70; i++)
{
test_struct->values[i] = 0xab;
}
messaget option1 = {.text = "accept"};
for(int i = 0; i < 4; i++)
{
char *value = malloc(sizeof(char *));
sprintf(value, "unique%i", i);
entryt your_options = {
.id = 1, .options[0] = option1, .options[1].text = value};
your_options.id = i + 12;
test_struct->childs[i].id = your_options.id;
test_struct->childs[i].options[0] = your_options.options[0];
test_struct->childs[i].options[1].text = your_options.options[1].text;
}
test_struct->initialized = true;
}
int manipulate_data()
{
for(int i = 0; i < 4; i++)
{
free(test_struct->childs[i].options[1].text);
test_struct->childs[i].options[1].text = "decline";
}
}
int main()
{
setup();
manipulate_data();
return 0;
}

View File

@ -0,0 +1,32 @@
//Copyright 2018 Author: Malte Mues <mail.mues@gmail.com>
/// \file
/// Example3 test explicitly the array support.
/// It ensures, that arrays are handled right.
/// The different typedefs have been used, to make sure the memory-analyzer
/// is aware of the different appeareances in the gdb responses.
#include <stdbool.h>
struct a_sub_sub_typet
{
char *text;
};
typedef struct a_sub_sub_typet messaget;
struct a_sub_typet
{
int id;
messaget options[2];
};
struct a_typet
{
int config[10];
bool initialized;
unsigned char values[70];
struct a_sub_typet childs[4];
} * test_struct;
typedef struct a_sub_typet entryt;

View File

@ -0,0 +1,27 @@
FUTURE
arrays.gb
--breakpoint manipulate_data --symbols test_struct
analyzing symbol: test_struct
GENERATED CODE:
\{
char _test_struct_childs0_options0_text_1\[\]="accept";
char _test_struct_childs0_options1_text_2\[\]="unique0";
char _test_struct_childs1_options1_text_3\[\]="unique1";
char _test_struct_childs2_options1_text_4\[\]="unique2";
char _test_struct_childs3_options1_text_5\[\]="unique3";
struct a_typet test_struct_0=\{ .config=\{ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19 \}, .initalized=0,
.values=\{ 243, 243, 243, 243, 243, 243, 243, 243, 243, 243, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171 \}, .childs=\{ \{ .id=12, .options=\{ \{ .text=\(char \*\)&_test_struct_childs0_options0_text_1 \},
\{ .text=\(char \*\)&_test_struct_childs0_options1_text_2 \} \} \},
\{ .id=13, .options=\{ \{ .text=\(char \*\)&_test_struct_childs0_options0_text_1 \},
\{ .text=\(char \*\)&_test_struct_childs1_options1_text_3 \} \} \},
\{ .id=14, .options=\{ \{ .text=\(char \*\)&_test_struct_childs0_options0_text_1 \},
\{ .text=\(char \*\)&_test_struct_childs2_options1_text_4 \} \} \},
\{ .id=15, .options=\{ \{ .text=\(char \*\)&_test_struct_childs0_options0_text_1 \},
\{ .text=\(char \*\)&_test_struct_childs3_options1_text_5 \} \} \} \} \};
test_struct = &test_struct_0;
\}
^EXIT=0$
^SIGNAL=0$
--
--
`values` is an array of unsigned chars which we currently cannot extract from GDB

View File

@ -0,0 +1,14 @@
void checkpoint()
{
}
int array[] = {1, 2, 3};
int main()
{
array[1] = 4;
checkpoint();
return 0;
}

View File

@ -0,0 +1,6 @@
CORE
main.gb
--breakpoint checkpoint --symbols array
array = \{ 1, 4, 3 \};
^EXIT=0$
^SIGNAL=0$

View File

@ -0,0 +1,12 @@
#!/bin/bash
set -e
memory_analyzer=$1
goto_gcc=$2
name=${*:$#}
name=${name%.gb}
args=${*:3:$#-3}
$goto_gcc -g -std=c11 -o "${name}.gb" "${name}.c"
$memory_analyzer $args "${name}.gb"

View File

@ -0,0 +1,61 @@
//Copyright 2018 Author: Malte Mues <mail.mues@gmail.com>
/// \file
/// This example deals with cyclic data structures,
/// see comment in example2.h explaining why this is necessary.
/// add_element is just declared as a helper method for cycle construction.
/// process_buffer isn't supposed to do meaningfull stuff.
/// It is the hook for the gdb breakpoint used in the test.
/// free_buffer just does clean up, if you run this.
#include "cycles.h"
void add_element(char *content)
{
cycle_buffer_entry_t *new_entry = malloc(sizeof(cycle_buffer_entry_t));
new_entry->data = content;
if(buffer.end && buffer.start)
{
new_entry->next = buffer.start;
buffer.end->next = new_entry;
buffer.end = new_entry;
}
else
{
new_entry->next = new_entry;
buffer.end = new_entry;
buffer.start = new_entry;
}
}
int process_buffer()
{
return 0;
}
void free_buffer()
{
cycle_buffer_entry_t *current;
cycle_buffer_entry_t *free_next;
if(buffer.start)
{
current = buffer.start->next;
while(current != buffer.start)
{
free_next = current;
current = current->next;
free(free_next);
}
free(current);
}
}
int main()
{
add_element("snow");
add_element("sun");
add_element("rain");
add_element("thunder storm");
process_buffer();
free_buffer();
}

View File

@ -0,0 +1,27 @@
//Copyright 2018 Author: Malte Mues <mail.mues@gmail.com>
/// \file
/// Example2 deals with cycles in datastructures.
///
/// While it is common that some datastructures contain cylces,
/// it is necessary that the memory-analyzer does recognize them.
/// Otherwise it would follow the datastructures pointer establishing
/// the cycle for ever and never terminate execution.
/// The cycle_buffer described below contains a cycle.
/// As long as this regression test works, cycle introduced by using pointers
/// are handle the correct way.
#include <stdlib.h>
typedef struct cycle_buffer_entry
{
char *data;
struct cycle_buffer_entry *next;
} cycle_buffer_entry_t;
struct cycle_buffer
{
cycle_buffer_entry_t *start;
struct cycle_buffer_entry *end;
};
struct cycle_buffer buffer;

View File

@ -0,0 +1,23 @@
CORE
cycles.gb
--breakpoint process_buffer --symbols buffer
cycle_buffer_entry_t tmp;
char tmp\$0\[\];
struct cycle_buffer_entry tmp\$1;
char tmp\$2\[\];
struct cycle_buffer_entry tmp\$3;
char tmp\$4\[\];
struct cycle_buffer_entry tmp\$5;
char tmp\$6\[\];
tmp\$0 = \"snow\";
tmp\$2 = \"sun\";
tmp\$4 = \"rain\";
tmp\$6 = \"thunder storm\";
tmp\$5 = \{ .data=tmp\$6, .next=\(\(struct cycle_buffer_entry \*\)0\) \};
tmp\$3 = \{ .data=tmp\$4, .next=&tmp\$5 \};
tmp\$1 = \{ .data=tmp\$2, .next=&tmp\$3 \};
tmp = \{ .data=tmp\$0, .next=&tmp\$1 \};
buffer = \{ .start=&tmp, .end=&tmp\$5 \};
buffer.start->next->next->next->next = &tmp;
^EXIT=0$
^SIGNAL=0$

View File

@ -0,0 +1,23 @@
void checkpoint()
{
}
int x = 3;
int *p = &x;
struct S
{
int c1;
int c2;
};
struct S st = {.c1 = 1, .c2 = 2};
int a[] = {1, 2, 3};
int main()
{
checkpoint();
return 0;
}

View File

@ -0,0 +1,5 @@
CORE
main.gb
--breakpoint checkpoint --symbols x,p,st,a --symtab-snapshot --json-ui
^EXIT=0$
^SIGNAL=0$

View File

@ -0,0 +1,42 @@
//Copyright 2018 Author: Malte Mues <mail.mues@gmail.com>
/// \file
/// This is just a basic example.
/// Pointer references are tested and ensured, that for example f and f_1 are
/// pointing to the same int value location after running memory-analyzer.
#include "plain_old_datatypes.h"
int my_function(char *s)
{
int a = 10;
g->a = a;
g->b = s;
return 0;
}
int main(int argc, char **argv)
{
char *test;
e = 17;
f = malloc(sizeof(int));
f = &e;
f_1 = f;
h = "test";
g = malloc(sizeof(struct mapping_things));
d.a = 4;
d.c = -32;
test = "test2";
d.b = test;
my_function(test);
free(g);
free(f);
return 0;
}

View File

@ -0,0 +1,28 @@
//Copyright 2018 Author: Malte Mues <mail.mues@gmail.com>
/// \file
/// Example1 is just demonstrating, that the tool works in general.
/// A small struct and a few primitive pointers are declared in the global
/// namespace. These are assigned with values before calling my_function
/// and then, it is tested that this global state can be reconstructed before
/// calling my_function. As long as this example work basic functionallity is
/// provided.
#include <stdlib.h>
struct mapping_things
{
int a;
char *b;
int c;
};
typedef struct mapping_things other_things;
other_things d;
int e;
int *f;
int *f_1;
struct mapping_things *g;
char *h;
int my_function(char *s);

View File

@ -0,0 +1,17 @@
CORE
plain_old_datatypes.gb
--breakpoint my_function --symbols e,f,f_1,d,g,h
^EXIT=0$
^SIGNAL=0$
char tmp\[\];
struct mapping_things tmp\$0;
char tmp\$1\[\];
e = 17;
f = &e;
f_1 = &e;
tmp = \"test2\";
d = \{ .a=4, .b=tmp, .c=-32 \};
tmp\$0 = \{ .a=0, .b=\(\(char \*\)0\), .c=0 \};
g = &tmp\$0;
tmp\$1 = \"test\";
h = tmp\$1;

View File

@ -0,0 +1,13 @@
void checkpoint()
{
}
int x = 3;
int *p = &x;
int main()
{
checkpoint();
return 0;
}

View File

@ -0,0 +1,7 @@
CORE
main.gb
--breakpoint checkpoint --symbols x,p
x = 3;
p = &x;
^EXIT=0$
^SIGNAL=0$

View File

@ -0,0 +1,13 @@
void checkpoint()
{
}
int x = 3;
int *p = &x;
int main()
{
checkpoint();
return 0;
}

View File

@ -0,0 +1,5 @@
CORE
main.gb
--breakpoint checkpoint --symbols p,x
^EXIT=0$
^SIGNAL=0$

View File

@ -0,0 +1,13 @@
void checkpoint()
{
}
int x = 3;
void *p = (void *)&x;
int main()
{
checkpoint();
return 0;
}

View File

@ -0,0 +1,7 @@
CORE
main.gb
--breakpoint checkpoint --symbols x,p
x = 3;
p = &x;
^EXIT=0$
^SIGNAL=0$

View File

@ -0,0 +1,14 @@
void checkpoint()
{
}
int x = 3;
int *p1 = &x;
int **p2 = &p1;
int main()
{
checkpoint();
return 0;
}

View File

@ -0,0 +1,5 @@
CORE
main.gb
--breakpoint checkpoint --symbols x,p1,p2
^EXIT=0$
^SIGNAL=0$

View File

@ -0,0 +1,14 @@
void checkpoint()
{
}
int x = 3;
int *p1 = &x;
int *p2 = &x;
int main()
{
checkpoint();
return 0;
}

View File

@ -0,0 +1,5 @@
CORE
main.gb
--breakpoint checkpoint --symbols p1,x,p2
^EXIT=0$
^SIGNAL=0$

View File

@ -0,0 +1,13 @@
void checkpoint()
{
}
int x = 3;
void *p = (void *)&x;
int main()
{
checkpoint();
return 0;
}

View File

@ -0,0 +1,5 @@
CORE
main.gb
--breakpoint checkpoint --symbols p,x
^EXIT=0$
^SIGNAL=0$

View File

@ -0,0 +1,13 @@
void checkpoint()
{
}
int x = 3;
int *p = &x;
int main()
{
checkpoint();
return 0;
}

View File

@ -0,0 +1,5 @@
CORE
main.gb
--breakpoint checkpoint --symbols p
^EXIT=0$
^SIGNAL=0$

View File

@ -0,0 +1,13 @@
void checkpoint()
{
}
int x = 3;
void *p = (void *)&x;
int main()
{
checkpoint();
return 0;
}

View File

@ -0,0 +1,8 @@
KNOWNBUG
main.gb
--breakpoint checkpoint --symbols p
^EXIT=0$
^SIGNAL=0$
--
--
p has type void*, the value it points to has type void; right now we can't initialize expression of that type

View File

@ -0,0 +1,21 @@
void checkpoint()
{
}
struct S
{
struct S *next;
};
struct S st;
struct S *p;
int main()
{
st.next = &st;
p = &st;
checkpoint();
return 0;
}

View File

@ -0,0 +1,9 @@
CORE
main.gb
--breakpoint checkpoint --symbols p
struct S tmp;
tmp = \{ \.next=\(\(struct S \*\)(NULL|0)\) \};
p = &tmp;
p->next = &tmp;
^EXIT=0$
^SIGNAL=0$

View File

@ -0,0 +1,12 @@
void checkpoint()
{
}
const char *s = "abc";
int main()
{
checkpoint();
return 0;
}

View File

@ -0,0 +1,8 @@
CORE
main.gb
--breakpoint checkpoint --symbols s
char tmp\[\];
tmp = \"abc\";
s = tmp;
^EXIT=0$
^SIGNAL=0$

View File

@ -0,0 +1,20 @@
void checkpoint()
{
}
struct S
{
int c1;
int c2;
};
struct S st = {1, 2};
int main()
{
st.c2 = 3;
checkpoint();
return 0;
}

View File

@ -0,0 +1,6 @@
CORE
main.gb
--breakpoint checkpoint --symbols st
st = \{ .c1=1, .c2=3 \};
^EXIT=0$
^SIGNAL=0$

View File

@ -0,0 +1,21 @@
void checkpoint()
{
}
struct S
{
int c1;
int *c2;
};
int i = 0;
struct S st = {1, &i};
int main()
{
i = 3;
checkpoint();
return 0;
}

View File

@ -0,0 +1,8 @@
CORE
main.gb
--breakpoint checkpoint --symbols st
signed int tmp;
tmp = 3;
st = \{ .c1=1, .c2=&tmp \};
^EXIT=0$
^SIGNAL=0$

View File

@ -0,0 +1,18 @@
void checkpoint()
{
}
struct S
{
int c1;
char *c2;
};
struct S st = {1, "abc"};
int main()
{
checkpoint();
return 0;
}

View File

@ -0,0 +1,8 @@
CORE
main.gb
--breakpoint checkpoint --symbols st
char tmp\[\];
tmp = \"abc\";
st = \{ .c1=1, .c2=tmp \};
^EXIT=0$
^SIGNAL=0$

View File

@ -0,0 +1,18 @@
void checkpoint()
{
}
struct S
{
int c1;
char c2;
};
struct S st = {1, 'x'};
int main()
{
checkpoint();
return 0;
}

View File

@ -0,0 +1,9 @@
KNOWNBUG
main.gb
--breakpoint checkpoint --symbols st
st = \{ .c1=1, .c2='x' \};
^EXIT=0$
^SIGNAL=0$
--
--
cannot extract single characters from GDB for now

View File

@ -0,0 +1,18 @@
void checkpoint()
{
}
struct S
{
int c1;
int a[2];
};
struct S st = {1, {2, 3}};
int main()
{
checkpoint();
return 0;
}

View File

@ -0,0 +1,6 @@
CORE
main.gb
--breakpoint checkpoint --symbols st
st = \{ .c1=1, .a=\{ 2, 3 \} \};
^EXIT=0$
^SIGNAL=0$

View File

@ -0,0 +1,23 @@
void checkpoint()
{
}
struct T
{
int i;
};
struct S
{
int c;
struct T t;
};
struct S st = {1, {2}};
int main()
{
checkpoint();
return 0;
}

View File

@ -0,0 +1,6 @@
CORE
main.gb
--breakpoint checkpoint --symbols st
st = \{ \.c=1, \.t=\{ \.i=2 \} \};
^EXIT=0$
^SIGNAL=0$

View File

@ -0,0 +1,8 @@
KNOWNBUG
void_pointer.gb
--breakpoint void_pointer.c:17 --symbols a_void_pointer,a_second_void_pointer,a_third_void_pointer,blob
^EXIT=0$
^SIGNAL=0$
--
--
void pointer unsupported for now

View File

@ -0,0 +1,18 @@
//Copyright 2018 Author: Malte Mues <mail.mues@gmail.com>
/// \file
/// This file initializes some void pointer in different styles.
/// Later the memory-analyzer tries to reconstruct the content.
#include "void_pointer.h"
int main()
{
char *char_pointer = "test_string";
a_second_void_pointer = char_pointer;
char bytes[] = {0xf3, 0xf3, 0x48, 0xff, 0x5c, 0x5c, 0xff};
a_third_void_pointer = &bytes;
blob.data = bytes;
blob.size = sizeof(bytes);
return 0;
}

View File

@ -0,0 +1,15 @@
//Copyright 2018 Author: Malte Mues <mail.mues@gmail.com>
/// \file
/// Example4 test the handling of void pointer.
/// The memory-analyzer tries to cast them to char and read some of the data.
struct a_struct_with_void
{
int size;
void *data;
} blob;
void *a_void_pointer;
void *a_second_void_pointer;
void *a_third_void_pointer;

View File

@ -105,3 +105,7 @@ add_subdirectory(goto-analyzer)
add_subdirectory(goto-diff)
add_subdirectory(goto-harness)
add_subdirectory(symtab2gb)
if(UNIX OR WITH_MEMORY_ANALYZER)
add_subdirectory(memory-analyzer)
endif()

View File

@ -17,6 +17,7 @@ DIRS = analyses \
json-symtab-language \
langapi \
linking \
memory-analyzer \
pointer-analysis \
solvers \
symtab2gb \
@ -33,6 +34,18 @@ all: cbmc.dir \
symtab2gb.dir \
# Empty last line
ifeq ($(OS),Windows_NT)
detected_OS := Windows
else
detected_OS := $(shell sh -c 'uname 2>/dev/null || echo Unknown')
endif
ifeq ($(detected_OS),Linux)
all: memory-analyzer.dir
else ifeq ($(WITH_MEMORY_ANALYZER),1)
all: memory-analyzer.dir
endif
###############################################################################
util.dir: big-int.dir
@ -72,6 +85,8 @@ goto-diff.dir: languages goto-programs.dir pointer-analysis.dir \
goto-cc.dir: languages goto-programs.dir linking.dir
memory-analyzer.dir: util.dir goto-programs.dir ansi-c.dir
symtab2gb.dir: util.dir goto-programs.dir json-symtab-language.dir
# building for a particular directory

View File

@ -55,14 +55,14 @@ bool c_implicit_typecast_arithmetic(
return !c_typecast.errors.empty();
}
bool is_void_pointer(const typet &type)
bool has_a_void_pointer(const typet &type)
{
if(type.id()==ID_pointer)
{
if(type.subtype().id()==ID_empty)
return true;
return is_void_pointer(type.subtype());
return has_a_void_pointer(type.subtype());
}
else
return false;
@ -212,11 +212,16 @@ bool check_c_implicit_typecast(
const irept &dest_subtype=dest_type.subtype();
const irept &src_subtype =src_type.subtype();
if(src_subtype==dest_subtype)
if(src_subtype == dest_subtype)
{
return false;
else if(is_void_pointer(src_type) || // from void to anything
is_void_pointer(dest_type)) // to void from anything
}
else if(
has_a_void_pointer(src_type) || // from void to anything
has_a_void_pointer(dest_type)) // to void from anything
{
return false;
}
}
if(dest_type.id()==ID_array &&
@ -544,8 +549,7 @@ void c_typecastt::implicit_typecast_followed(
const typet &src_sub = src_type.subtype();
const typet &dest_sub = dest_type.subtype();
if(is_void_pointer(src_type) ||
is_void_pointer(dest_type))
if(has_a_void_pointer(src_type) || has_a_void_pointer(dest_type))
{
// from/to void is always good
}

View File

@ -0,0 +1,21 @@
# Library
file(GLOB_RECURSE sources "*.cpp" "*.h")
list(REMOVE_ITEM sources
${CMAKE_CURRENT_SOURCE_DIR}/memory-analyzer_main.cpp
)
add_library(memory-analyzer-lib ${sources})
generic_includes(memory-analyzer-lib)
target_link_libraries(memory-analyzer-lib
ansi-c
goto-programs
util
)
# Executable
add_executable(memory-analyzer memory_analyzer_main.cpp)
target_link_libraries(memory-analyzer memory-analyzer-lib)
cprover_default_properties(memory-analyzer memory-analyzer-lib)

View File

@ -0,0 +1,37 @@
SRC = \
analyze_symbol.cpp \
gdb_api.cpp \
memory_analyzer_main.cpp \
memory_analyzer_parse_options.cpp
# Empty last line
INCLUDES= -I ..
LIBS = \
../ansi-c/ansi-c$(LIBEXT) \
../goto-programs/goto-programs$(LIBEXT) \
../linking/linking$(LIBEXT) \
../util/util$(LIBEXT) \
../big-int/big-int$(LIBEXT) \
../langapi/langapi$(LIBEXT)
CLEANFILES = memory-analyzer$(EXEEXT)
include ../config.inc
include ../common
all: memory-analyzer$(EXEEXT)
###############################################################################
memory-analyzer$(EXEEXT): $(OBJ)
$(LINKBIN)
.PHONY: memory-analyser-mac-signed
memory-analyser-mac-signed: memory-analyzer$(EXEEXT)
codesign -v -s $(OSX_IDENTITY) memory-analyzer$(EXEEXT)

View File

@ -0,0 +1,374 @@
/*******************************************************************\
Module: Symbol Analyzer
Author: Malte Mues <mail.mues@gmail.com>
Daniel Poetzl
\*******************************************************************/
#include "analyze_symbol.h"
#include <util/c_types.h>
#include <util/c_types_util.h>
#include <util/config.h>
#include <util/expr_initializer.h>
#include <util/string_constant.h>
gdb_value_extractort::gdb_value_extractort(
const symbol_tablet &symbol_table,
const char *binary)
: gdb_api(binary),
symbol_table(symbol_table),
ns(symbol_table),
c_converter(ns, expr2c_configurationt::clean_configuration),
allocate_objects(ID_C, source_locationt(), irep_idt{}, this->symbol_table)
{
}
void gdb_value_extractort::analyze_symbols(const std::vector<irep_idt> &symbols)
{
// record addresses of given symbols
for(const auto &id : symbols)
{
const symbol_exprt &symbol_expr = ns.lookup(id).symbol_expr();
const address_of_exprt aoe(symbol_expr);
const std::string c_expr = c_converter.convert(aoe);
const pointer_valuet &value = gdb_api.get_memory(c_expr);
CHECK_RETURN(value.pointee.empty() || (id == value.pointee));
values.insert({value.address, symbol_expr});
}
for(const auto &id : symbols)
{
analyze_symbol(id);
}
}
void gdb_value_extractort::analyze_symbol(const irep_idt &symbol_name)
{
const symbolt &symbol = ns.lookup(symbol_name);
const symbol_exprt symbol_expr = symbol.symbol_expr();
try
{
const typet target_type = symbol.type;
const auto zero_expr = zero_initializer(target_type, symbol.location, ns);
CHECK_RETURN(zero_expr);
const exprt target_expr =
get_expr_value(symbol_expr, *zero_expr, symbol.location);
add_assignment(symbol_expr, target_expr);
}
catch(gdb_interaction_exceptiont e)
{
throw analysis_exceptiont(e.what());
}
process_outstanding_assignments();
}
/// Get memory snapshot as C code
std::string gdb_value_extractort::get_snapshot_as_c_code()
{
code_blockt generated_code;
allocate_objects.declare_created_symbols(generated_code);
for(auto const &pair : assignments)
{
generated_code.add(code_assignt(pair.first, pair.second));
}
return c_converter.convert(generated_code);
}
/// Get memory snapshot as symbol table
symbol_tablet gdb_value_extractort::get_snapshot_as_symbol_table()
{
symbol_tablet snapshot;
for(const auto &pair : assignments)
{
const symbol_exprt &symbol_expr = to_symbol_expr(pair.first);
const irep_idt id = symbol_expr.get_identifier();
INVARIANT(symbol_table.has_symbol(id), "symbol must exist in symbol table");
const symbolt &symbol = symbol_table.lookup_ref(id);
symbolt snapshot_symbol(symbol);
snapshot_symbol.value = pair.second;
snapshot.insert(snapshot_symbol);
}
// Also add type symbols to the snapshot
for(const auto &pair : symbol_table)
{
const symbolt &symbol = pair.second;
if(symbol.is_type)
{
snapshot.insert(symbol);
}
}
return snapshot;
}
void gdb_value_extractort::add_assignment(const exprt &lhs, const exprt &value)
{
assignments.push_back(std::make_pair(lhs, value));
}
exprt gdb_value_extractort::get_char_pointer_value(
const exprt &expr,
const memory_addresst &memory_location,
const source_locationt &location)
{
PRECONDITION(expr.type().id() == ID_pointer);
PRECONDITION(is_c_char_type(expr.type().subtype()));
PRECONDITION(!memory_location.is_null());
auto it = values.find(memory_location);
if(it == values.end())
{
std::string c_expr = c_converter.convert(expr);
pointer_valuet value = gdb_api.get_memory(c_expr);
CHECK_RETURN(value.string);
string_constantt init(*value.string);
CHECK_RETURN(to_array_type(init.type()).is_complete());
symbol_exprt dummy("tmp", pointer_type(init.type()));
code_blockt assignments;
const symbol_exprt new_symbol =
to_symbol_expr(allocate_objects.allocate_automatic_local_object(
assignments, dummy, init.type()));
add_assignment(new_symbol, init);
values.insert(std::make_pair(memory_location, new_symbol));
return new_symbol;
}
else
{
return it->second;
}
}
exprt gdb_value_extractort::get_non_char_pointer_value(
const exprt &expr,
const memory_addresst &memory_location,
const source_locationt &location)
{
PRECONDITION(expr.type().id() == ID_pointer);
PRECONDITION(!is_c_char_type(expr.type().subtype()));
PRECONDITION(!memory_location.is_null());
auto it = values.find(memory_location);
if(it == values.end())
{
values.insert(std::make_pair(memory_location, nil_exprt()));
const typet target_type = expr.type().subtype();
symbol_exprt dummy("tmp", expr.type());
code_blockt assignments;
const symbol_exprt new_symbol =
to_symbol_expr(allocate_objects.allocate_automatic_local_object(
assignments, dummy, target_type));
dereference_exprt dereference_expr(expr);
const auto zero_expr = zero_initializer(target_type, location, ns);
CHECK_RETURN(zero_expr);
const exprt target_expr =
get_expr_value(dereference_expr, *zero_expr, location);
// add assignment of value to newly created symbol
add_assignment(new_symbol, target_expr);
values[memory_location] = new_symbol;
return new_symbol;
}
else
{
return it->second;
}
}
exprt gdb_value_extractort::get_pointer_value(
const exprt &expr,
const exprt &zero_expr,
const source_locationt &location)
{
PRECONDITION(zero_expr.id() == ID_constant);
PRECONDITION(expr.type().id() == ID_pointer);
PRECONDITION(expr.type() == zero_expr.type());
std::string c_expr = c_converter.convert(expr);
const pointer_valuet value = gdb_api.get_memory(c_expr);
const auto memory_location = value.address;
if(!memory_location.is_null())
{
if(is_c_char_type(expr.type().subtype()))
{
return get_char_pointer_value(expr, memory_location, location);
}
else
{
const exprt target_expr =
get_non_char_pointer_value(expr, memory_location, location);
if(target_expr.id() == ID_nil)
{
outstanding_assignments[expr] = memory_location;
}
else
{
return address_of_exprt(target_expr);
}
}
}
return zero_expr;
}
exprt gdb_value_extractort::get_array_value(
const exprt &expr,
const exprt &array,
const source_locationt &location)
{
PRECONDITION(array.id() == ID_array);
PRECONDITION(expr.type().id() == ID_array);
PRECONDITION(expr.type() == array.type());
exprt new_array(array);
for(size_t i = 0; i < new_array.operands().size(); ++i)
{
const index_exprt index_expr(expr, from_integer(i, index_type()));
exprt &operand = new_array.operands()[i];
operand = get_expr_value(index_expr, operand, location);
}
return new_array;
}
exprt gdb_value_extractort::get_expr_value(
const exprt &expr,
const exprt &zero_expr,
const source_locationt &location)
{
PRECONDITION(expr.type() == zero_expr.type());
const typet &type = expr.type();
PRECONDITION(type.id() != ID_struct);
if(is_c_integral_type(type))
{
INVARIANT(zero_expr.is_constant(), "zero initializer is a constant");
return from_integer(string2integer(get_gdb_value(expr)), expr.type());
}
else if(is_c_char_type(type))
{
INVARIANT(zero_expr.is_constant(), "zero initializer is a constant");
return zero_expr; // currently left at 0
}
else if(type.id() == ID_c_bool)
{
INVARIANT(zero_expr.is_constant(), "zero initializer is a constant");
return from_c_boolean_value(id2boolean(get_gdb_value(expr)), type);
}
else if(type.id() == ID_c_enum)
{
INVARIANT(zero_expr.is_constant(), "zero initializer is a constant");
return convert_member_name_to_enum_value(
get_gdb_value(expr), to_c_enum_type(type));
}
else if(type.id() == ID_struct_tag)
{
return get_struct_value(expr, zero_expr, location);
}
else if(type.id() == ID_array)
{
return get_array_value(expr, zero_expr, location);
}
else if(type.id() == ID_pointer)
{
INVARIANT(zero_expr.is_constant(), "zero initializer is a constant");
return get_pointer_value(expr, zero_expr, location);
}
UNIMPLEMENTED;
}
exprt gdb_value_extractort::get_struct_value(
const exprt &expr,
const exprt &zero_expr,
const source_locationt &location)
{
PRECONDITION(zero_expr.id() == ID_struct);
PRECONDITION(expr.type().id() == ID_struct_tag);
PRECONDITION(expr.type() == zero_expr.type());
exprt new_expr(zero_expr);
const struct_tag_typet &struct_tag_type = to_struct_tag_type(expr.type());
const struct_typet &struct_type = ns.follow_tag(struct_tag_type);
for(size_t i = 0; i < new_expr.operands().size(); ++i)
{
const struct_typet::componentt &component = struct_type.components()[i];
if(component.get_is_padding() || component.type().id() == ID_code)
{
continue;
}
exprt &operand = new_expr.operands()[i];
member_exprt member_expr(expr, component);
operand = get_expr_value(member_expr, operand, location);
}
return new_expr;
}
void gdb_value_extractort::process_outstanding_assignments()
{
for(const auto &pair : outstanding_assignments)
{
const address_of_exprt aoe(values[pair.second]);
add_assignment(pair.first, aoe);
}
}
std::string gdb_value_extractort::get_gdb_value(const exprt &expr)
{
return gdb_api.get_value(c_converter.convert(expr));
}

View File

@ -0,0 +1,189 @@
/*******************************************************************\
Module: Symbol Analyzer
Author: Malte Mues <mail.mues@gmail.com>
Daniel Poetzl
\*******************************************************************/
/// \file
/// High-level interface to gdb
#ifndef CPROVER_MEMORY_ANALYZER_ANALYZE_SYMBOL_H
#define CPROVER_MEMORY_ANALYZER_ANALYZE_SYMBOL_H
#include <map>
#include <string>
#include "gdb_api.h"
#include <ansi-c/expr2c_class.h>
#include <util/allocate_objects.h>
#include <util/message.h>
#include <util/namespace.h>
#include <util/std_code.h>
#include <util/symbol_table.h>
class gdb_apit;
class exprt;
class source_locationt;
/// Interface for extracting values from GDB (building on \ref gdb_apit)
class gdb_value_extractort
{
public:
gdb_value_extractort(const symbol_tablet &symbol_table, const char *binary);
/// For each input symbol in \p symbols: map its value address to its
/// \ref symbol_exprt (via the `values` map) and then call
/// \ref analyze_symbol on it.
/// \param symbols: names of symbols to be analysed
void analyze_symbols(const std::vector<irep_idt> &symbols);
/// Get memory snapshot as C code
/// \return converted block of code with the collected assignments
std::string get_snapshot_as_c_code();
/// Get memory snapshot as symbol table
/// Build a new \ref symbol_tablet and for each `lhs` symbol from collected
/// assignments [lhs:=rhs] store a new symbol (with the `rhs` as value)
/// there. Also, type symbols are copied from \ref symbol_table.
/// \return a new symbol table with known symbols having their extracted
/// values
symbol_tablet get_snapshot_as_symbol_table();
using pointer_valuet = gdb_apit::pointer_valuet;
using memory_addresst = gdb_apit::memory_addresst;
void create_gdb_process()
{
gdb_api.create_gdb_process();
}
bool run_gdb_to_breakpoint(const std::string &breakpoint)
{
return gdb_api.run_gdb_to_breakpoint(breakpoint);
}
void run_gdb_from_core(const std::string &corefile)
{
gdb_api.run_gdb_from_core(corefile);
}
private:
gdb_apit gdb_api;
/// External symbol table -- extracted from \ref read_goto_binary
/// We only expect to analyse symbols located there.
symbol_tablet symbol_table;
const namespacet ns;
expr2ct c_converter;
allocate_objectst allocate_objects;
/// Sequence of assignments collected during \ref analyze_symbols
std::vector<std::pair<exprt, exprt>> assignments;
/// Mapping pointer expression for which \ref get_non_char_pointer_value
/// returned nil expression to memory location (from \ref gdb_apit).
std::map<exprt, memory_addresst> outstanding_assignments;
/// Storing pairs <address, symbol> such that at `address` is stored the
/// value of `symbol`.
std::map<memory_addresst, exprt> values;
/// Assign the gdb-extracted value for \p symbol_name to its symbol
/// expression and then process outstanding assignments that this
/// extraction introduced.
/// \param symbol_name: symbol table name to be analysed
void analyze_symbol(const irep_idt &symbol_name);
/// Create assignment \p lhs := \p value (see \ref analyze_symbol)
/// \param lhs: the left-hand side of the assignment; expected to be a
/// \ref symbol_exprt
/// \param value: the value to be assigned; the result of \ref get_expr_value
void add_assignment(const exprt &lhs, const exprt &value);
/// Iterate over \p array and fill its operands with the results of calling
/// \ref get_expr_value on index expressions into \p expr.
/// \param expr: the expression to be analysed
/// \param array: array with zero-initialised elements
/// \param location: the source location
/// return an array of the same type as \p expr filled with values from gdb
exprt get_array_value(
const exprt &expr,
const exprt &array,
const source_locationt &location);
/// Case analysis on the \ref typet of \p expr
/// 1) For integers, booleans, and enums: call \ref gdb_apit::get_value
/// directly
/// 2) For chars: return the \p zero_expr
/// 3) For structs, arrays, and pointers: call their dedicated functions
/// \param expr: the expression to be analysed
/// \param zero_expr: zero of the same type as \p expr
/// \param location: the source location
/// \return the value of the expression extracted from \ref gdb_apit
exprt get_expr_value(
const exprt &expr,
const exprt &zero_expr,
const source_locationt &location);
/// For each of the members of the struct: call \ref get_expr_value
/// \param expr: struct expression to be analysed
/// \param zero_expr: struct with zero-initialised members
/// \param location: the source location
/// \return the value of the struct from \ref gdb_apit
exprt get_struct_value(
const exprt &expr,
const exprt &zero_expr,
const source_locationt &location);
/// Call \ref gdb_apit::get_memory on \p expr then split based on the
/// points-to type being `char` type or not. These have dedicated functions.
/// \param expr: the input pointer expression
/// \param zero_expr: null-pointer (or its equivalent)
/// \param location: the source location
/// \return symbol expression associated with the gdb-extracted memory
/// location
exprt get_pointer_value(
const exprt &expr,
const exprt &zero_expr,
const source_locationt &location);
/// Similar to \ref get_char_pointer_value. Doesn't re-call
/// \ref gdb_apit::get_memory, calls \ref get_expr_value on _dereferenced_
/// \p expr (the result of which is assigned to a new symbol).
/// \param expr: the pointer expression to be analysed
/// \param memory_location: pointer value from \ref gdb_apit::get_memory
/// \param location: the source location
/// \return symbol expression associated with \p memory_location
exprt get_non_char_pointer_value(
const exprt &expr,
const memory_addresst &memory_location,
const source_locationt &location);
/// If \p memory_location is found among \ref values then return the symbol
/// expression associated with it.
/// Otherwise we add the appropriate \ref values mapping:
/// 1) call \ref gdb_apit::get_memory on the \p expr
/// 2) allocate new symbol and assign it with the memory string from 1)
/// 3) fill \ref values (mapping \p memory_location to the new symbol)
/// \param expr: the pointer expression to be analysed
/// \param memory_location: pointer value from \ref gdb_apit::get_memory
/// \param location: the source location
/// \return symbol expression associated with \p memory_location
exprt get_char_pointer_value(
const exprt &expr,
const memory_addresst &memory_location,
const source_locationt &location);
/// Call \ref add_assignment for each pair in \ref outstanding_assignments
void process_outstanding_assignments();
/// Extract a stringified value from and c-converted \p expr
/// \param expr: expression to be extracted
/// \return the value as a string
std::string get_gdb_value(const exprt &expr);
};
#endif // CPROVER_MEMORY_ANALYZER_ANALYZE_SYMBOL_H

View File

@ -10,14 +10,6 @@ Author: Malte Mues <mail.mues@gmail.com>
/// \file
/// Low-level interface to gdb
// clang-format off
#if defined(__linux__) || \
defined(__FreeBSD_kernel__) || \
defined(__GNU__) || \
defined(__unix__) || \
defined(__CYGWIN__) || \
defined(__MACH__)
// clang-format on
#include <cctype>
#include <cerrno>
@ -25,6 +17,8 @@ Author: Malte Mues <mail.mues@gmail.com>
#include <cstring>
#include <regex>
#include <iostream>
#include "gdb_api.h"
#include <goto-programs/goto_model.h>
@ -34,17 +28,11 @@ Author: Malte Mues <mail.mues@gmail.com>
#include <sys/wait.h>
/// Create a gdb_apit object
///
/// \param binary the binary to run with gdb
/// \param log boolean indicating whether gdb input and output should be logged
gdb_apit::gdb_apit(const char *binary, const bool log)
: binary(binary), log(log), gdb_state(gdb_statet::NOT_CREATED)
{
}
/// Terminate the gdb process and close open streams (for reading from and
/// writing to gdb)
gdb_apit::~gdb_apit()
{
PRECONDITION(
@ -68,8 +56,6 @@ gdb_apit::~gdb_apit()
wait(NULL);
}
/// Create a new gdb process for analysing the binary indicated by the member
/// variable `binary`
void gdb_apit::create_gdb_process()
{
PRECONDITION(gdb_state == gdb_statet::NOT_CREATED);
@ -173,7 +159,6 @@ void gdb_apit::write_to_gdb(const std::string &command)
fflush(command_stream);
}
/// Return the vector of commands that have been written to gdb so far
const gdb_apit::commandst &gdb_apit::get_command_log()
{
PRECONDITION(log);
@ -259,9 +244,6 @@ bool gdb_apit::most_recent_line_has_tag(const std::string &tag)
return has_prefix(line, tag);
}
/// Run gdb with the given core file
///
/// \param corefile core dump
void gdb_apit::run_gdb_from_core(const std::string &corefile)
{
PRECONDITION(gdb_state == gdb_statet::CREATED);
@ -275,10 +257,6 @@ void gdb_apit::run_gdb_from_core(const std::string &corefile)
gdb_state = gdb_statet::STOPPED;
}
/// Run gdb to the given breakpoint
///
/// \param breakpoint the breakpoint to set (can be e.g. a line number or a
/// function name)
bool gdb_apit::run_gdb_to_breakpoint(const std::string &breakpoint)
{
PRECONDITION(gdb_state == gdb_statet::CREATED);
@ -353,70 +331,63 @@ std::string gdb_apit::eval_expr(const std::string &expr)
return value;
}
/// Get the memory address pointed to by the given pointer expression
///
/// \param expr an expression of pointer type (e.g., `&x` with `x` being of type
/// `int` or `p` with `p` being of type `int *`)
/// \return memory address in hex format
std::string gdb_apit::get_memory(const std::string &expr)
gdb_apit::pointer_valuet gdb_apit::get_memory(const std::string &expr)
{
PRECONDITION(gdb_state == gdb_statet::STOPPED);
std::string mem;
std::string value = eval_expr(expr);
// regex matching a hex memory address followed by an optional identifier in
// angle brackets (e.g., `0x601060 <x>`)
std::regex regex(R"(^(0x[1-9a-f][0-9a-f]*)( <.*>)?)");
const std::string value = eval_expr(expr);
std::regex regex(
r_hex_addr + r_opt(' ' + r_id) + r_opt(' ' + r_or(r_char, r_string)));
std::smatch result;
if(regex_match(value, result, regex))
const bool b = regex_match(value, result, regex);
CHECK_RETURN(b);
optionalt<std::string> opt_string;
const std::string string = result[4];
if(!string.empty())
{
// return hex address only
return result[1];
}
else
{
throw gdb_interaction_exceptiont(
"value `" + value +
"` is not a memory address or has unrecognised format");
const std::size_t len = string.length();
INVARIANT(
len >= 4,
"pointer-string should be: backslash, quotes, .., backslash, quotes");
INVARIANT(
string[0] == '\\',
"pointer-string should be: backslash, quotes, .., backslash, quotes");
INVARIANT(
string[1] == '"',
"pointer-string should be: backslash, quotes, .., backslash, quotes");
INVARIANT(
string[len - 2] == '\\',
"pointer-string should be: backslash, quotes, .., backslash, quotes");
INVARIANT(
string[len - 1] == '"',
"pointer-string should be: backslash, quotes, .., backslash, quotes");
opt_string = string.substr(2, len - 4);
}
UNREACHABLE;
return pointer_valuet(result[1], result[2], result[3], opt_string);
}
/// Get value of the given value expression
///
/// \param expr an expression of non-pointer type or pointer to char
/// \return value of the expression; if the expression is of type pointer to
/// char and represents a string, the string value is returned; otherwise the
/// value is returned just as it is printed by gdb
std::string gdb_apit::get_value(const std::string &expr)
{
PRECONDITION(gdb_state == gdb_statet::STOPPED);
const std::string value = eval_expr(expr);
// Get char value
{
// get string from char pointer
const std::regex regex(R"(0x[1-9a-f][0-9a-f]* \\"(.*)\\")");
// matches e.g. 99 'c' and extracts c
std::regex regex(R"([^ ]+ '([^']+)')");
std::smatch result;
if(regex_match(value, result, regex))
{
return result[1];
}
}
const bool b = regex_match(value, result, regex);
// this case will go away eventually, once client code has been refactored to
// use get_memory() instead
{
// get void pointer address
const std::regex regex(R"(0x[1-9a-f][0-9a-f]*)");
std::smatch result;
if(regex_match(value, result, regex))
if(b)
{
return result[1];
}
@ -500,4 +471,13 @@ void gdb_apit::check_command_accepted()
CHECK_RETURN(was_accepted);
}
#endif
std::string gdb_apit::r_opt(const std::string &regex)
{
return R"((?:)" + regex + R"()?)";
}
std::string
gdb_apit::r_or(const std::string &regex_left, const std::string &regex_right)
{
return R"((?:)" + regex_left + '|' + regex_right + R"())";
}

View File

@ -15,15 +15,6 @@ Author: Malte Mues <mail.mues@gmail.com>
/// gdb machine interface (see section "The GDB/MI Interface" in the
/// gdb manual to communicate with gdb.
// clang-format off
#if defined(__linux__) || \
defined(__FreeBSD_kernel__) || \
defined(__GNU__) || \
defined(__unix__) || \
defined(__CYGWIN__) || \
defined(__MACH__)
// clang-format on
#ifndef CPROVER_MEMORY_ANALYZER_GDB_API_H
#define CPROVER_MEMORY_ANALYZER_GDB_API_H
#include <unistd.h>
@ -33,23 +24,94 @@ Author: Malte Mues <mail.mues@gmail.com>
#include <util/exception_utils.h>
/// Interface for running and querying GDB
class gdb_apit
{
public:
using commandst = std::forward_list<std::string>;
struct memory_addresst
{
bool null_address;
std::string address_string;
memory_addresst() : null_address(true)
{
}
explicit memory_addresst(const std::string &address_string)
: null_address(address_string == "0x0"), address_string(address_string)
{
}
bool is_null() const
{
return null_address;
}
bool operator<(const memory_addresst &other) const
{
return address_string < other.address_string;
}
std::string string() const
{
return address_string;
}
};
/// Create a \ref gdb_apit object
/// \param binary: the binary to run with gdb
/// \param log: boolean indicating whether gdb input and output should be
/// logged
explicit gdb_apit(const char *binary, const bool log = false);
/// Terminate the gdb process and close open streams (for reading from and
/// writing to gdb)
~gdb_apit();
void create_gdb_process();
void terminate_gdb_process();
struct pointer_valuet
{
pointer_valuet() = delete;
pointer_valuet(
const std::string &address,
const std::string &pointee,
const std::string &character,
const optionalt<std::string> &string)
: address(address), pointee(pointee), character(character), string(string)
{
}
const memory_addresst address;
const std::string pointee;
const std::string character;
const optionalt<std::string> string;
};
/// Create a new gdb process for analysing the binary indicated by the member
/// variable `binary`
void create_gdb_process();
/// Run gdb to the given breakpoint
/// \param breakpoint the breakpoint to set (can be e.g. a line number or a
/// function name)
/// \return true if something failed
bool run_gdb_to_breakpoint(const std::string &breakpoint);
/// Run gdb with the given core file
/// \param corefile: core dump
void run_gdb_from_core(const std::string &corefile);
/// Get value of the given value expression
/// \param expr: an expression of non-pointer type or pointer to char
/// \return value of the expression; if the expression is of type pointer to
/// char and represents a string, the string value is returned; otherwise
/// the value is returned just as it is printed by gdb
std::string get_value(const std::string &expr);
std::string get_memory(const std::string &expr);
/// Get the memory address pointed to by the given pointer expression
/// \param expr: an expression of pointer type (e.g., `&x` with `x` being of
/// type `int` or `p` with `p` being of type `int *`)
/// \return memory address in hex format
pointer_valuet get_memory(const std::string &expr);
/// Return the vector of commands that have been written to gdb so far
/// \return the list of commands
const commandst &get_command_log();
protected:
@ -86,6 +148,27 @@ protected:
bool most_recent_line_has_tag(const std::string &tag);
bool was_command_accepted();
void check_command_accepted();
static std::string r_opt(const std::string &regex);
static std::string
r_or(const std::string &regex_left, const std::string &regex_right);
// regex group for hex memory address (part of the output of gdb when printing
// a pointer), matches e.g. 0x601040 and extracts 0x601040
const std::string r_hex_addr = R"((0x(?:0|[1-9a-f][0-9a-f]*)))";
// regex group for identifier (optional part of the output of gdb when
// printing a pointer), matches e.g. <abc> and extracts abc
const std::string r_id = R"(<([^<>]+)>)";
// regex group for octal encoded char (optional part of the output of gdb when
// printing a pointer), matches e.g. \"\\003\" and extracts \\003
const std::string r_char = R"(\\"(\\\\[0-7]{3})\\")";
// regex group for string (optional part of the output of gdb when printing a
// pointer), matches e.g. \"abc\" and extracts \"abc\"
const std::string r_string = R"((\\".*\\"))";
};
class gdb_interaction_exceptiont : public cprover_exception_baset
@ -104,4 +187,3 @@ private:
};
#endif // CPROVER_MEMORY_ANALYZER_GDB_API_H
#endif

View File

@ -0,0 +1,18 @@
/*******************************************************************\
Module: Symbol Analyzer
Author: Malte Mues <mail.mues@gmail.com>
\*******************************************************************/
/// \file
/// Memory analyzer interface
#include "memory_analyzer_parse_options.h"
int main(int argc, const char **argv)
{
memory_analyzer_parse_optionst parse_options(argc, argv);
return parse_options.main();
}

View File

@ -0,0 +1,192 @@
/*******************************************************************\
Module: Memory Analyzer
Author: Malte Mues <mail.mues@gmail.com>
Daniel Poetzl
\*******************************************************************/
/// \file
/// Commandline parser for the memory analyzer executing main work.
#include "memory_analyzer_parse_options.h"
#include "analyze_symbol.h"
#include "gdb_api.h"
#include <algorithm>
#include <fstream>
#include <ansi-c/ansi_c_language.h>
#include <goto-programs/goto_model.h>
#include <goto-programs/read_goto_binary.h>
#include <goto-programs/show_symbol_table.h>
#include <langapi/mode.h>
#include <util/config.h>
#include <util/exit_codes.h>
#include <util/message.h>
#include <util/string_utils.h>
#include <util/version.h>
memory_analyzer_parse_optionst::memory_analyzer_parse_optionst(
int argc,
const char *argv[])
: parse_options_baset(
MEMORY_ANALYZER_OPTIONS,
argc,
argv,
std::string("MEMORY-ANALYZER ") + CBMC_VERSION),
message(ui_message_handler)
{
}
int memory_analyzer_parse_optionst::doit()
{
if(cmdline.isset("version"))
{
message.status() << CBMC_VERSION << '\n';
return CPROVER_EXIT_SUCCESS;
}
config.set(cmdline);
if(cmdline.args.size() != 1)
{
throw invalid_command_line_argument_exceptiont(
"no binary provided for analysis", "<binary>");
}
if(!cmdline.isset("symbols"))
{
throw invalid_command_line_argument_exceptiont(
"need to provide symbols to analyse via --symbols", "--symbols");
}
const bool core_file = cmdline.isset("core-file");
const bool breakpoint = cmdline.isset("breakpoint");
if(core_file && breakpoint)
{
throw invalid_command_line_argument_exceptiont(
"cannot start gdb from both core-file and breakpoint",
"--core-file/--breakpoint");
}
if(!core_file && !breakpoint)
{
throw invalid_command_line_argument_exceptiont(
"need to provide either core-file or breakpoint for gdb",
"--core-file/--breakpoint");
}
const bool output_file = cmdline.isset("output-file");
const bool symtab_snapshot = cmdline.isset("symtab-snapshot");
if(symtab_snapshot && output_file)
{
throw invalid_command_line_argument_exceptiont(
"printing to a file is not supported for symbol table snapshot output",
"--symtab-snapshot");
}
register_language(new_ansi_c_language);
std::string binary = cmdline.args.front();
const std::string symbol_list(cmdline.get_value("symbols"));
std::vector<std::string> result;
split_string(symbol_list, ',', result, true, true);
auto opt = read_goto_binary(binary, ui_message_handler);
if(!opt.has_value())
{
throw deserialization_exceptiont(
"cannot read goto binary `" + binary + "'");
}
const goto_modelt goto_model(std::move(opt.value()));
gdb_value_extractort gdb_value_extractor(
goto_model.symbol_table, binary.c_str());
gdb_value_extractor.create_gdb_process();
if(core_file)
{
std::string core_file = cmdline.get_value("core-file");
gdb_value_extractor.run_gdb_from_core(core_file);
}
else if(breakpoint)
{
std::string breakpoint = cmdline.get_value("breakpoint");
gdb_value_extractor.run_gdb_to_breakpoint(breakpoint);
}
std::vector<irep_idt> result_ids(result.size());
std::transform(
result.begin(), result.end(), result_ids.begin(), [](std::string &name) {
return irep_idt{name};
});
gdb_value_extractor.analyze_symbols(result_ids);
std::ofstream file;
if(output_file)
{
file.open(cmdline.get_value("output-file"));
}
std::ostream &out =
output_file ? (std::ostream &)file : (std::ostream &)message.result();
if(symtab_snapshot)
{
symbol_tablet snapshot = gdb_value_extractor.get_snapshot_as_symbol_table();
show_symbol_table(snapshot, ui_message_handler);
}
else
{
std::string snapshot = gdb_value_extractor.get_snapshot_as_c_code();
out << snapshot;
}
if(output_file)
{
file.close();
}
else
{
message.result() << messaget::eom;
}
return CPROVER_EXIT_SUCCESS;
}
void memory_analyzer_parse_optionst::help()
{
message.status()
<< '\n'
<< banner_string("Memory-Analyzer", CBMC_VERSION) << '\n'
<< align_center_with_border("Copyright (C) 2019") << '\n'
<< align_center_with_border("Malte Mues, Diffblue Ltd.") << '\n'
<< align_center_with_border("info@diffblue.com") << '\n'
<< '\n'
<< "Usage: Purpose:\n"
<< '\n'
<< " memory-analyzer [-?] [-h] [--help] show help\n"
<< " memory-analyzer --version show"
<< " version\n"
<< " memory-analyzer --symbols <symbol-list> <options> <binary> analyze"
<< " binary\n"
<< "\n"
<< " --core-file <file> analyze from core file\n"
<< " --breakpoint <breakpoint> analyze from breakpoint\n"
<< " --symbols <symbol-list> list of symbols to analyze\n"
<< " --symtab-snapshot output snapshot as symbol table\n"
<< " --output-file <file> write snapshot to file\n"
<< " --json-ui output snapshot in JSON format\n"
<< messaget::eom;
}

View File

@ -0,0 +1,42 @@
/*******************************************************************\
Module: Memory Analyzer
Author: Malte Mues <mail.mues@gmail.com>
Daniel Poetzl
\*******************************************************************/
/// \file
/// This code does the command line parsing for the memory-analyzer tool
#ifndef CPROVER_MEMORY_ANALYZER_MEMORY_ANALYZER_PARSE_OPTIONS_H
#define CPROVER_MEMORY_ANALYZER_MEMORY_ANALYZER_PARSE_OPTIONS_H
#include <util/parse_options.h>
#include <util/ui_message.h>
// clang-format off
#define MEMORY_ANALYZER_OPTIONS \
"(version)" \
"(json-ui)" \
"(core-file):" \
"(breakpoint):" \
"(symbols):" \
"(symtab-snapshot)" \
"(output-file):"
// clang-format on
class memory_analyzer_parse_optionst : public parse_options_baset
{
public:
memory_analyzer_parse_optionst(int argc, const char *argv[]);
int doit() override;
void help() override;
protected:
messaget message;
};
#endif // CPROVER_MEMORY_ANALYZER_MEMORY_ANALYZER_PARSE_OPTIONS_H

View File

@ -1,3 +1,5 @@
ansi-c
goto-programs
langapi
util
sys

125
src/util/c_types_util.h Normal file
View File

@ -0,0 +1,125 @@
/*******************************************************************\
Module: API to expression classes
Author: Malte Mues <mail.mues@gmail.com>
\*******************************************************************/
#ifndef CPROVER_UTIL_C_TYPES_UTIL_H
#define CPROVER_UTIL_C_TYPES_UTIL_H
/// \file
/// This file contains functions, that should support test for underlying
/// c types, in cases where this is required for analysis purpose.
#include "arith_tools.h"
#include "invariant.h"
#include "std_types.h"
#include "type.h"
#include <algorithm>
#include <string>
/// This function checks, whether this has been a char type in the c program.
inline bool is_c_char_type(const typet &type)
{
const irep_idt &c_type = type.get(ID_C_c_type);
return is_signed_or_unsigned_bitvector(type) &&
(c_type == ID_char || c_type == ID_unsigned_char ||
c_type == ID_signed_char);
}
/// This function checks, whether the type
/// has been a bool type in the c program.
inline bool is_c_bool_type(const typet &type)
{
return type.id() == ID_c_bool;
}
/// This function checks, whether the type has been some kind of integer
/// type in the c program.
/// It considers the signed and unsigned verison of
/// int, short, long and long long as integer types in c.
inline bool is_c_integral_type(const typet &type)
{
const irep_idt &c_type = type.get(ID_C_c_type);
return is_signed_or_unsigned_bitvector(type) &&
(c_type == ID_signed_int || c_type == ID_unsigned_int ||
c_type == ID_signed_short_int || c_type == ID_unsigned_int ||
c_type == ID_unsigned_short_int || c_type == ID_signed_long_int ||
c_type == ID_signed_long_long_int || c_type == ID_unsigned_long_int ||
c_type == ID_unsigned_long_long_int);
}
/// This function checks, whether type is a pointer and the target type
/// of the pointer has been a char type in the c program.
inline bool is_c_char_pointer_type(const typet &type)
{
return type.id() == ID_pointer && is_c_char_type(type.subtype());
}
/// This function checks, whether type is a pointer and the target type
/// has been some kind of int type in the c program.
/// is_c_int_derivate answers is used for checking the int type.
inline bool is_c_integral_pointer_type(const typet &type)
{
return type.id() == ID_pointer && is_c_integral_type(type.subtype());
}
/// This function checks, whether the type
/// has been an enum type in the c program.
inline bool is_c_enum_type(const typet &type)
{
return type.id() == ID_c_enum;
}
/// This function creates a constant representing the
/// bitvector encoded integer value of a string in the enum.
/// \param member_name is a string that should be in the enum.
/// \param c_enum the enum type \p member_name is supposed to be part of.
/// \return constant, that could be assigned as the value of an expression with
/// type c_enum.
constant_exprt convert_member_name_to_enum_value(
const irep_idt &member_name,
const c_enum_typet &c_enum)
{
for(const auto &enum_value : c_enum.members())
{
if(enum_value.get_identifier() == member_name)
{
auto maybe_int_value = numeric_cast<mp_integer>(
constant_exprt{enum_value.get_value(), typet{ID_bv}});
CHECK_RETURN(maybe_int_value.has_value());
return from_integer(*maybe_int_value, c_enum);
}
}
INVARIANT(false, "member_name must be a valid value in the c_enum.");
}
/// Convert id to a Boolean value
/// \param bool_value: A string that is compared to "true" ignoring case.
/// \return a constant of type Boolean
bool id2boolean(const irep_idt &bool_value)
{
std::string string_value = id2string(bool_value);
std::transform(
string_value.begin(), string_value.end(), string_value.begin(), ::tolower);
if(string_value == "true")
return true;
if(string_value == "false")
return false;
UNREACHABLE;
}
/// This function creates a constant representing either 0 or 1 as value of
/// type type.
/// \param bool_value: A Boolean value.
/// \param type: The type, the resulting constant is supposed to have.
/// \return a constant of type \param type with either 0 or 1 as value.
constant_exprt from_c_boolean_value(bool bool_value, const typet &type)
{
return bool_value ? from_integer(mp_integer(1), type)
: from_integer(mp_integer(0), type);
}
#endif // CPROVER_UTIL_C_TYPES_UTIL_H

View File

@ -25,6 +25,13 @@ Author: Daniel Kroening, kroening@kroening.com
class constant_exprt;
class namespacet;
/// This method tests,
/// if the given typet is a constant
inline bool is_constant(const typet &type)
{
return type.get_bool(ID_C_constant);
}
/// The Boolean type
class bool_typet:public typet
{
@ -1064,6 +1071,13 @@ inline bool can_cast_type<bitvector_typet>(const typet &type)
type.id() == ID_c_bool;
}
/// This method tests,
/// if the given typet is a signed or unsigned bitvector.
inline bool is_signed_or_unsigned_bitvector(const typet &type)
{
return type.id() == ID_signedbv || type.id() == ID_unsignedbv;
}
/// \brief Cast a typet to a \ref bitvector_typet
///
/// This is an unchecked conversion. \a type must be known to be \ref
@ -1524,6 +1538,13 @@ inline pointer_typet &to_pointer_type(typet &type)
return static_cast<pointer_typet &>(type);
}
/// This method tests,
/// if the given typet is a pointer of type void.
inline bool is_void_pointer(const typet &type)
{
return type.id() == ID_pointer && type.subtype().id() == ID_empty;
}
/// The reference type
///
/// Intends to model C++ reference. Comparing to \ref pointer_typet should

View File

@ -1,7 +1,23 @@
if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang" OR
"${CMAKE_CXX_COMPILER_ID}" STREQUAL "AppleClang" OR
"${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU"
)
# private is overwritten in the gdb_api test cases
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-keyword-macro")
elseif("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC")
# This would be the place to enable warnings for Windows builds, although
# config.inc doesn't seem to do that currently
endif()
file(GLOB_RECURSE sources "*.cpp" "*.h")
file(GLOB_RECURSE testing_utils "testing-utils/*.cpp" "testing-utils/*.h")
if(NOT WITH_MEMORY_ANALYZER)
file(GLOB_RECURSE memory_analyzer_sources "memory-analyzer/*.cpp")
list(REMOVE_ITEM sources ${memory_analyzer_sources})
endif()
list(REMOVE_ITEM sources
# Don't build
${CMAKE_CURRENT_SOURCE_DIR}/elf_reader.cpp

View File

@ -34,7 +34,6 @@ SRC += analyses/ai/ai.cpp \
interpreter/interpreter.cpp \
json/json_parser.cpp \
json_symbol_table.cpp \
memory-analyzer/gdb_api.cpp \
path_strategies.cpp \
pointer-analysis/value_set.cpp \
solvers/bdd/miniBDD/miniBDD.cpp \
@ -85,6 +84,23 @@ SRC += analyses/ai/ai.cpp \
util/unicode.cpp \
# Empty last line
ifeq ($(OS),Windows_NT)
detected_OS := Windows
else
detected_OS := $(shell sh -c 'uname 2>/dev/null || echo Unknown')
endif
ifeq ($(detected_OS),Linux)
ifneq ($(WITH_MEMORY_ANALYZER),0)
# only set if it wasn't explicitly unset
WITH_MEMORY_ANALYZER=1
endif
endif
ifeq ($(WITH_MEMORY_ANALYZER),1)
SRC += memory-analyzer/gdb_api.cpp
endif
INCLUDES= -I ../src/ -I.
CPROVER_DIR = .
@ -145,9 +161,14 @@ CPROVER_LIBS =../src/ansi-c/ansi-c$(LIBEXT) \
OBJ += $(CPROVER_LIBS) testing-utils/testing-utils$(LIBEXT)
CATCH_TEST = unit_tests$(EXEEXT)
EXCLUDED_TESTS=expr_undefined_casts.cpp
ifneq ($(WITH_MEMORY_ANALYZER),1)
EXCLUDED_TESTS += gdb_api.cpp
endif
N_CATCH_TESTS = $(shell \
cat $$(find . -name "*.cpp" \
-a -not -name "expr_undefined_casts.cpp") | \
$$(printf ' -not -name %s ' $(EXCLUDED_TESTS))) | \
grep -E "(SCENARIO|TEST_CASE)" | grep -c -v '\[\.\]')
CLEANFILES = $(CATCH_TEST) testing-utils/testing-utils$(LIBEXT)

View File

@ -9,19 +9,6 @@ Author: Malte Mues <mail.mues@gmail.com>
#include <testing-utils/use_catch.h>
// clang-format off
#if defined(__linux__) || \
defined(__FreeBSD_kernel__) || \
defined(__GNU__) || \
defined(__unix__) || \
defined(__CYGWIN__) || \
defined(__MACH__)
#define RUN_GDB_API_TESTS
#endif
// clang-format on
#ifdef RUN_GDB_API_TESTS
#include <cstdio>
#include <regex>
#include <string>
@ -45,11 +32,14 @@ void check_for_gdb()
class gdb_api_testt : public gdb_apit
{
public:
explicit gdb_api_testt(const char *binary) : gdb_apit(binary)
{
}
friend void gdb_api_internals_test();
using gdb_apit::r_hex_addr;
};
void gdb_api_internals_test()
@ -100,18 +90,13 @@ void gdb_api_internals_test()
}
}
#endif
TEST_CASE("gdb api internals test", "[core][memory-analyzer]")
{
#ifdef RUN_GDB_API_TESTS
gdb_api_internals_test();
#endif
}
TEST_CASE("gdb api test", "[core][memory-analyzer]")
{
#ifdef RUN_GDB_API_TESTS
check_for_gdb();
compile_test_file();
@ -153,7 +138,10 @@ TEST_CASE("gdb api test", "[core][memory-analyzer]")
}
}
gdb_apit gdb_api("test");
gdb_api_testt gdb_api("test");
std::regex hex_addr(gdb_api.r_hex_addr);
gdb_api.create_gdb_process();
SECTION("breakpoint is hit")
@ -174,25 +162,73 @@ TEST_CASE("gdb api test", "[core][memory-analyzer]")
gdb_api.run_gdb_to_breakpoint("checkpoint3"), gdb_interaction_exceptiont);
}
SECTION("query memory")
SECTION("query variables, primitive types")
{
const bool r = gdb_api.run_gdb_to_breakpoint("checkpoint");
REQUIRE(r);
REQUIRE(gdb_api.get_value("x") == "8");
REQUIRE(gdb_api.get_value("s") == "abc");
REQUIRE(gdb_api.get_value("y") == "2.5");
REQUIRE(gdb_api.get_value("z") == "c");
}
const std::regex regex(R"(0x[1-9a-f][0-9a-f]*)");
SECTION("query pointers")
{
const bool r = gdb_api.run_gdb_to_breakpoint("checkpoint");
REQUIRE(r);
{
std::string address = gdb_api.get_memory("p");
REQUIRE(std::regex_match(address, regex));
auto value = gdb_api.get_memory("s");
REQUIRE(std::regex_match(value.address.string(), hex_addr));
REQUIRE(value.pointee.empty());
REQUIRE(value.character.empty());
REQUIRE(*value.string == "abc");
}
{
std::string address = gdb_api.get_memory("vp");
REQUIRE(std::regex_match(address, regex));
auto value = gdb_api.get_memory("p");
REQUIRE(std::regex_match(value.address.string(), hex_addr));
REQUIRE(value.pointee == "x");
REQUIRE(value.character.empty());
REQUIRE(!value.string);
}
{
auto value = gdb_api.get_memory("vp");
REQUIRE(std::regex_match(value.address.string(), hex_addr));
REQUIRE(value.pointee == "x");
REQUIRE(value.character.empty());
REQUIRE(!value.string);
}
{
auto value = gdb_api.get_memory("np");
REQUIRE(value.address.is_null());
REQUIRE(value.pointee.empty());
REQUIRE(value.character.empty());
REQUIRE(!value.string);
}
{
auto value = gdb_api.get_memory("vp_string");
REQUIRE(std::regex_match(value.address.string(), hex_addr));
REQUIRE(value.pointee.empty());
REQUIRE(value.character.empty());
REQUIRE(!value.string);
}
}
SECTION("query expressions")
{
const bool r = gdb_api.run_gdb_to_breakpoint("checkpoint");
REQUIRE(r);
{
auto value = gdb_api.get_memory("&x");
REQUIRE(std::regex_match(value.address.string(), hex_addr));
REQUIRE(value.pointee == "x");
REQUIRE(value.character.empty());
REQUIRE(!value.string);
}
}
#endif
}

View File

@ -1,7 +1,12 @@
int x;
float y;
char z;
char *s = "abc";
int *p;
void *vp;
int *np = 0;
void *vp_string;
void checkpoint()
{
@ -18,8 +23,12 @@ void func()
int main()
{
x = 8;
y = 2.5;
z = 'c';
p = &x;
vp = (void *)&x;
vp_string = s;
checkpoint();