From 9aa70a7d4dffbbd045a5a91602e1ad60dfb70384 Mon Sep 17 00:00:00 2001 From: Peter Schrammel Date: Thu, 22 Mar 2018 19:27:20 +0000 Subject: [PATCH] Add doc README.md files to each directory --- src/analyses/README.md | 10 ++ src/ansi-c/README | 22 ---- src/ansi-c/{module.md => README.md} | 49 ++++++++- src/assembler/README.md | 6 ++ src/big-int/README | 1 - src/big-int/README.md | 14 +++ src/cbmc/README.md | 13 +++ src/cbmc/module.md | 47 --------- src/clobber/README.md | 6 ++ src/cpp/README.md | 17 +++ src/goto-analyzer/README.md | 8 ++ src/goto-cc/README.md | 16 +++ src/goto-diff/README.md | 7 ++ src/goto-instrument/README.md | 28 +++++ src/goto-programs/{module.md => README.md} | 114 ++++++++++++++++++-- src/goto-symex/{module.md => README.md} | 20 +++- src/java_bytecode/README.md | 6 ++ src/jbmc/README.md | 6 ++ src/jsil/README.md | 6 ++ src/json/README.md | 6 ++ src/langapi/README.md | 12 +++ src/linking/README.md | 11 ++ src/memory-models/README.md | 6 ++ src/miniz/README.md | 6 ++ src/nonstd/README.md | 7 ++ src/pointer-analysis/README.md | 12 +++ src/solvers/{refinement => }/README.md | 115 ++++++++++++++++++++- src/solvers/module.md | 62 ----------- src/util/README.md | 95 +++++++++++++++++ src/util/command-line-parsing.md | 16 --- src/xmllang/README | 3 - src/xmllang/README.md | 15 +++ 32 files changed, 598 insertions(+), 164 deletions(-) create mode 100644 src/analyses/README.md delete mode 100644 src/ansi-c/README rename src/ansi-c/{module.md => README.md} (74%) create mode 100644 src/assembler/README.md delete mode 100644 src/big-int/README create mode 100644 src/big-int/README.md create mode 100644 src/cbmc/README.md delete mode 100644 src/cbmc/module.md create mode 100644 src/clobber/README.md create mode 100644 src/cpp/README.md create mode 100644 src/goto-analyzer/README.md create mode 100644 src/goto-cc/README.md create mode 100644 src/goto-diff/README.md create mode 100644 src/goto-instrument/README.md rename src/goto-programs/{module.md => README.md} (59%) rename src/goto-symex/{module.md => README.md} (85%) create mode 100644 src/java_bytecode/README.md create mode 100644 src/jbmc/README.md create mode 100644 src/jsil/README.md create mode 100644 src/json/README.md create mode 100644 src/langapi/README.md create mode 100644 src/linking/README.md create mode 100644 src/memory-models/README.md create mode 100644 src/miniz/README.md create mode 100644 src/nonstd/README.md create mode 100644 src/pointer-analysis/README.md rename src/solvers/{refinement => }/README.md (78%) delete mode 100644 src/solvers/module.md create mode 100644 src/util/README.md delete mode 100644 src/util/command-line-parsing.md delete mode 100644 src/xmllang/README create mode 100644 src/xmllang/README.md diff --git a/src/analyses/README.md b/src/analyses/README.md new file mode 100644 index 0000000000..7d5bb5ef26 --- /dev/null +++ b/src/analyses/README.md @@ -0,0 +1,10 @@ +\ingroup module_hidden +\defgroup analyses analyses + +# Folder analyses + +This contains the abstract interpretation framework `ai.h` and several +static analyses that instantiate it. + +FIXME: put here a good introduction describing what is contained +in this folder. diff --git a/src/ansi-c/README b/src/ansi-c/README deleted file mode 100644 index d4df317090..0000000000 --- a/src/ansi-c/README +++ /dev/null @@ -1,22 +0,0 @@ -CodeWarrior C Compilers Reference 3.2: - -http://cache.freescale.com/files/soft_dev_tools/doc/ref_manual/CCOMPILERRM.pdf - -http://cache.freescale.com/files/soft_dev_tools/doc/ref_manual/ASMX86RM.pdf - -ARM 4.1 Compiler Reference: - -http://infocenter.arm.com/help/topic/com.arm.doc.dui0491c/DUI0491C_arm_compiler_reference.pdf - - -Parsing performance considerations: - -* Measured on trunk/regression/ansi-c/windows_h_VS_2012/main.i - -* 13%: Copying into i_preprocessed - -* 5%: ansi_c_parser.read() - -* 53%: yyansi_clex() - -* 29%: parser (without typechecking) diff --git a/src/ansi-c/module.md b/src/ansi-c/README.md similarity index 74% rename from src/ansi-c/module.md rename to src/ansi-c/README.md index fb9d119110..a1d73685e6 100644 --- a/src/ansi-c/module.md +++ b/src/ansi-c/README.md @@ -1,7 +1,27 @@ +\ingroup module_hidden \defgroup ansi-c ansi-c # Folder ansi-c -\author Kareem Khazem +\author Kareem Khazem, Martin Brain + +\section overview Overview + +Contains the front-end for ANSI C, plus a variety of common extensions. +This parses the file, performs some basic sanity checks (this is one +area in which the UI could be improved; patches most welcome) and then +produces a goto-program (see below). The parser is a traditional Flex / +Bison system. + +`internal_addition.c` contains the implementation of various ‘magic’ +functions that are that allow control of the analysis from the source +code level. These include assertions, assumptions, atomic blocks, memory +fences and rounding modes. + +The `library/` subdirectory contains versions of some of the C standard +header files that make use of the CPROVER built-in functions. This +allows CPROVER programs to be ‘aware’ of the functionality and model it +correctly. Examples include `stdio.c`, `string.c`, `setjmp.c` and +various threading interfaces. \section preprocessing Preprocessing & Parsing @@ -24,8 +44,6 @@ digraph G { \enddot - ---- \section type-checking Type-checking In the \ref ansi-c and \ref java_bytecode directories. @@ -112,3 +130,28 @@ called symbols. Thus, for example: parameter and return types of the function. The value of the symbol is the function's body (a \ref codet), and the symbol is stored in the symbol table with `foo` as the key. + + +\section performance Parsing performance considerations + +* Measured on trunk/regression/ansi-c/windows_h_VS_2012/main.i + +* 13%: Copying into i_preprocessed + +* 5%: ansi_c_parser.read() + +* 53%: yyansi_clex() + +* 29%: parser (without typechecking) + +\section references Compiler References + +CodeWarrior C Compilers Reference 3.2: + +http://cache.freescale.com/files/soft_dev_tools/doc/ref_manual/CCOMPILERRM.pdf + +http://cache.freescale.com/files/soft_dev_tools/doc/ref_manual/ASMX86RM.pdf + +ARM 4.1 Compiler Reference: + +http://infocenter.arm.com/help/topic/com.arm.doc.dui0491c/DUI0491C_arm_compiler_reference.pdf diff --git a/src/assembler/README.md b/src/assembler/README.md new file mode 100644 index 0000000000..1f5c1415fa --- /dev/null +++ b/src/assembler/README.md @@ -0,0 +1,6 @@ +\ingroup module_hidden +\defgroup assembler assembler + +# Folder assembler + +`assembler/`provides front-end processing for assembler. diff --git a/src/big-int/README b/src/big-int/README deleted file mode 100644 index 2fa251b46b..0000000000 --- a/src/big-int/README +++ /dev/null @@ -1 +0,0 @@ -http://www.dirk-zoller.de/ diff --git a/src/big-int/README.md b/src/big-int/README.md new file mode 100644 index 0000000000..0bc17517c7 --- /dev/null +++ b/src/big-int/README.md @@ -0,0 +1,14 @@ +\ingroup module_hidden +\defgroup big-int big-int + +# Folder big-int + +\author Martin Brain + +CPROVER is distributed with its own multi-precision arithmetic library; +mainly for historical and portability reasons. The library is externally +developed and thus `big-int` contains the source as it is distributed: +[http://www.dirk-zoller.de/](http://www.dirk-zoller.de/). + +This should not be used directly, see `util/mp_arith.h` for the CPROVER +interface. diff --git a/src/cbmc/README.md b/src/cbmc/README.md new file mode 100644 index 0000000000..0db5563970 --- /dev/null +++ b/src/cbmc/README.md @@ -0,0 +1,13 @@ +\ingroup module_hidden +\defgroup cbmc cbmc + +# Folder CBMC + +\author Martin Brain + +This contains the first full application. CBMC is a bounded model +checker that uses the front ends (`ansi-c`, `cpp`, goto-program or +others) to create a goto-program, `goto-symex` to unwind the loops the +given number of times and to produce and equation system and finally +`solvers` to find a counter-example (technically, `goto-symex` is then +used to construct the counter-example trace). diff --git a/src/cbmc/module.md b/src/cbmc/module.md deleted file mode 100644 index d2dcb75b4b..0000000000 --- a/src/cbmc/module.md +++ /dev/null @@ -1,47 +0,0 @@ -\ingroup module_hidden -\defgroup module_cbmc CBMC tour - -\author Kareem Khazem - -CBMC takes C code or a goto-binary as input and tries to emit traces of -executions that lead to crashes or undefined behaviour. The diagram -below shows the intermediate steps in this process. - - -\dot -digraph G { - - rankdir="TB"; - node [shape=box, fontcolor=blue]; - - subgraph top { - rank=same; - 1 -> 2 -> 3 -> 4; - } - - subgraph bottom { - rank=same; - 5 -> 6 -> 7 -> 8 -> 9; - } - - /* shift bottom subgraph over */ - 9 -> 1 [color=white]; - - 4 -> 5; - - 1 [label="command line\nparsing" URL="\ref cbmc_parse_optionst"]; - 2 [label="preprocessing,\nparsing" URL="\ref preprocessing"]; - 3 [label="language\ntype-checking" URL="\ref type-checking"]; - 4 [label="goto\nconversion" URL="\ref goto-conversion"]; - 5 [label="instrumentation" URL="\ref instrumentation"]; - 6 [label="symbolic\nexecution" URL="\ref symbolic-execution"]; - 7 [label="SAT/SMT\nencoding" URL="\ref sat-smt-encoding"]; - 8 [label="decision\nprocedure" URL="\ref decision-procedure"]; - 9 [label="counter example\nproduction" URL="\ref counter-example-production"]; -} -\enddot - -The \ref cprover-manual "CProver Manual" describes CBMC from a user -perspective. Each node in the diagram above links to the appropriate -class or module documentation, describing that particular stage in the -CBMC pipeline. diff --git a/src/clobber/README.md b/src/clobber/README.md new file mode 100644 index 0000000000..fef0bd9392 --- /dev/null +++ b/src/clobber/README.md @@ -0,0 +1,6 @@ +\ingroup module_hidden +\defgroup clobber clobber + +# Folder clobber + +`clobber\` is a module that is a tool. diff --git a/src/cpp/README.md b/src/cpp/README.md new file mode 100644 index 0000000000..f3a4ceda72 --- /dev/null +++ b/src/cpp/README.md @@ -0,0 +1,17 @@ +\ingroup module_hidden +\defgroup cpp cpp + +# Folder cpp + +\author Martin Brain + +This directory contains the C++ front-end. It supports the subset of C++ +commonly found in embedded and system applications. Consequentially it +doesn’t have full support for templates and many of the more advanced +and obscure C++ features. The subset of the language that can be handled +is being extended over time so bug reports of programs that cannot be +parsed are useful. + +The functionality is very similar to the ANSI C front end; parsing the +code and converting to goto-programs. It makes use of code from +`langapi` and `ansi-c`. diff --git a/src/goto-analyzer/README.md b/src/goto-analyzer/README.md new file mode 100644 index 0000000000..0449a4a7b0 --- /dev/null +++ b/src/goto-analyzer/README.md @@ -0,0 +1,8 @@ +\ingroup module_hidden +\defgroup goto-analyzer goto-analyzer + +# Folder goto-analyzer + +`goto-analyzer/` is a tool performing static analyses on goto +programs. It provides the front end for many of the static analyses +in the \ref analyses directory. diff --git a/src/goto-cc/README.md b/src/goto-cc/README.md new file mode 100644 index 0000000000..98dbc67e6a --- /dev/null +++ b/src/goto-cc/README.md @@ -0,0 +1,16 @@ +\ingroup module_hidden +\defgroup goto-cc goto-cc + +# Folder goto-cc + +\author Martin Brain + +`goto-cc` is a compiler replacement that just performs the first step of +the process; converting C or C++ programs to goto-binaries. It is +intended to be dropped in to an existing build procedure in place of the +compiler, thus it emulates flags that would affect the semantics of the +code produced. Which set of flags are emulated depends on the naming of +the `goto-cc/` binary. If it is called `goto-cc` then it emulates GCC +flags, `goto-armcc` emulates the ARM compiler, `goto-cl` emulates VCC +and `goto-cw` emulates the Code Warrior compiler. The output of this +tool can then be used with `cbmc` or `goto-instrument`. diff --git a/src/goto-diff/README.md b/src/goto-diff/README.md new file mode 100644 index 0000000000..d49350f4f6 --- /dev/null +++ b/src/goto-diff/README.md @@ -0,0 +1,7 @@ +\ingroup module_hidden +\defgroup goto-diff goto-diff + +# Folder goto-diff + +`goto-diff/` is a tool that offers functionality similar to the `diff` +tool, but for GOTO programs. diff --git a/src/goto-instrument/README.md b/src/goto-instrument/README.md new file mode 100644 index 0000000000..5be917235b --- /dev/null +++ b/src/goto-instrument/README.md @@ -0,0 +1,28 @@ +\ingroup module_hidden +\defgroup goto-instrument goto-instrument + +# Folder goto-instrument + +\author Martin Brain + +The `goto-instrument/` directory contains a number of tools, one per +file, that are built into the `goto-instrument` program. All of them +take in a goto-program (produced by `goto-cc`) and either modify it or +perform some analysis. Examples include `nondet_static.cpp` which +initialises static variables to a non-deterministic value, +`nondet_volatile.cpp` which assigns a non-deterministic value to any +volatile variable before it is read and `weak_memory.h` which performs +the necessary transformations to reason about weak memory models. The +exception to the “one file for each piece of functionality” rule are the +program instrumentation options (mostly those given as “Safety checks” +in the `goto-instrument` help text) which are included in the +`goto-program/` directory. An example of this is +`goto-program/stack_depth.h` and the general rule seems to be that +transformations and instrumentation that `cbmc` uses should be in +`goto-program/`, others should be in `goto-instrument`. + +`goto-instrument` is a very good template for new analysis tools. New +developers are advised to copy the directory, remove all files apart +from `main.*`, `parseoptions.*` and the `Makefile` and use these as the +skeleton of their application. The `doit()` method in `parseoptions.cpp` +is the preferred location for the top level control for the program. diff --git a/src/goto-programs/module.md b/src/goto-programs/README.md similarity index 59% rename from src/goto-programs/module.md rename to src/goto-programs/README.md index d7696cd6ad..4bccc9de72 100644 --- a/src/goto-programs/module.md +++ b/src/goto-programs/README.md @@ -1,7 +1,107 @@ +\ingroup module_hidden \defgroup goto-programs goto-programs + # Folder goto-programs -\author Kareem Khazem +\author Kareem Khazem, Martin Brain + +\section overview Overview +Goto programs are the intermediate representation of the CPROVER tool +chain. They are language independent and similar to many of the compiler +intermediate languages. Section \ref goto-programs describes the +`goto_programt` and `goto_functionst` data structures in detail. However +it useful to understand some of the basic concepts. Each function is a +list of instructions, each of which has a type (one of 18 kinds of +instruction), a code expression, a guard expression and potentially some +targets for the next instruction. They are not natively in static +single-assign (SSA) form. Transitions are nondeterministic (although in +practise the guards on the transitions normally cover form a disjoint +cover of all possibilities). Local variables have non-deterministic +values if they are not initialised. Variables and data within the +program is commonly one of three types (parameterised by width): +`unsignedbv_typet`, `signedbv_typet` and `floatbv_typet`, see +`util/std_types.h` for more information. Goto programs can be serialised +in a binary (wrapped in ELF headers) format or in XML (see the various +`_serialization` files). + +The `cbmc` option `–show-goto-programs` is often a good starting point +as it outputs goto-programs in a human readable form. However there are +a few things to be aware of. Functions have an internal name (for +example `c::f00`) and a ‘pretty name’ (for example `f00`) and which is +used depends on whether it is internal or being presented to the user. +The `main` method is the ‘logical’ main which is not necessarily the +main method from the code. In the output `NONDET` is use to represent a +nondeterministic assignment to a variable. Likewise `IF` as a beautified +`GOTO` instruction where the guard expression is used as the condition. +`RETURN` instructions may be dropped if they precede an `END_FUNCTION` +instruction. The comment lines are generated from the `locationt` field +of the `instructiont` structure. + +`goto-programs/` is one of the few places in the CPROVER codebase that +templates are used. The intention is to allow the general architecture +of program and functions to be used for other formalisms. At the moment +most of the templates have a single instantiation; for example +`goto_functionst` and `goto_function_templatet` and `goto_programt` and +`goto_program_templatet`. + +\section data_structures Data Structures + +FIXME: This text is partially outdated. + +The common starting point for working with goto-programs is the +`read_goto_binary` function which populates an object of +`goto_functionst` type. This is defined in `goto_functions.h` and is an +instantiation of the template `goto_functions_templatet` which is +contained in `goto_functions_template.h`. They are wrappers around a map +from strings to `goto_programt`’s and iteration macros are provided. +Note that `goto_function_templatet` (no `s`) is defined in the same +header as `goto_functions_templatet` and is gives the C type for the +function and Boolean which indicates whether the body is available +(before linking this might not always be true). Also note the slightly +counter-intuitive naming; `goto_functionst` instances are the top level +structure representing the program and contain `goto_programt` instances +which represent the individual functions. At the time of writing +`goto_functionst` is the only instantiation of the template +`goto_functions_templatet` but other could be produced if a different +data-structures / kinds of models were needed for functions. + +`goto_programt` is also an instantiation of a template. In a similar +fashion it is `goto_program_templatet` and allows the types of the guard +and expression used in instructions to be parameterised. Again, this is +currently the only use of the template. As such there are only really +helper functions in `goto_program.h` and thus `goto_program_template.h` +is probably the key file that describes the representation of (C) +functions in the goto-program format. It is reasonably stable and +reasonably documented and thus is a good place to start looking at the +code. + +An instance of `goto_program_templatet` is effectively a list of +instructions (and inner template called `instructiont`). It is important +to use the copy and insertion functions that are provided as iterators +are used to link instructions to their predecessors and targets and +careless manipulation of the list could break these. Likewise there are +helper macros for iterating over the instructions in an instance of +`goto_program_templatet` and the use of these is good style and strongly +encouraged. + +Individual instructions are instances of type `instructiont`. They +represent one step in the function. Each has a type, an instance of +`goto_program_instruction_typet` which denotes what kind of instruction +it is. They can be computational (such as `ASSIGN` or `FUNCTION_CALL`), +logical (such as `ASSUME` and `ASSERT`) or informational (such as +`LOCATION` and `DEAD`). At the time of writing there are 18 possible +values for `goto_program_instruction_typet` / kinds of instruction. +Instructions also have a guard field (the condition under which it is +executed) and a code field (what the instruction does). These may be +empty depending on the kind of instruction. In the default +instantiations these are of type `exprt` and `codet` respectively and +thus covered by the previous discussion of `irept` and its descendents. +The next instructions (remembering that transitions are guarded by +non-deterministic) are given by the list `targets` (with the +corresponding list of labels `labels`) and the corresponding set of +previous instructions is get by `incoming_edges`. Finally `instructiont` +have informational `function` and `location` fields that indicate where +they are in the code. \section goto-conversion Goto Conversion @@ -10,7 +110,7 @@ In the \ref goto-programs directory. **Key classes:** * goto_programt * goto_functionst -* \ref goto_program_templatet::instructiont +* \ref goto_programt::instructiont \dot digraph G { @@ -36,7 +136,7 @@ entire target program is converted to a single \ref goto_functionst object. The goto functions contains a set of \ref goto_programt objects; each of these correspond to a "function" or "method" in the target program. Each goto_program contains a list of -\ref goto_program_templatet::instructiont "instructions"; each +\ref goto_programt::instructiont "instructions"; each instruction contains an associated statement---these are subtypes of \ref codet. Each instruction also contains a "target", which will be empty for now. @@ -188,7 +288,7 @@ goto_program_templatet and goto_functions_templatet, respectively. This means that the generated Doxygen documentation can be somewhat obtuse about the actual types of things, and is unable to generate links to the correct classes. Note that the -\ref goto_program_templatet::instructiont::code "code" member of a +\ref goto_programt::instructiont::code "code" member of a goto_programt's instruction has type \ref codet (its type in the goto_program_templatet documentation is given as "codeT", as this is the name of the template's type parameter); similarly, the type of a guard @@ -202,7 +302,7 @@ In the \ref goto-programs directory. **Key classes:** * goto_programt * goto_functionst -* \ref goto_program_templatet::instructiont +* \ref goto_programt::instructiont \dot digraph G { @@ -236,9 +336,9 @@ previous stage: the conditionals are flattened into lists of instructions, inline with the rest of the instruction in the goto_programt. The guard of the conditional is placed onto the - \ref goto_program_templatet::instructiont::guard "guard" member of + \ref goto_programt::instructiont::guard "guard" member of an instruction whose code member is of type \ref code_gotot. The - \ref goto_program_templatet::instructiont::targets "targets" member + \ref goto_programt::instructiont::targets "targets" member of that instruction points to the appropriate branch of the conditional. (Note that although instructions have a list of targets, in practice an instruction should only ever have at most diff --git a/src/goto-symex/module.md b/src/goto-symex/README.md similarity index 85% rename from src/goto-symex/module.md rename to src/goto-symex/README.md index fa34628d82..5617dc04ee 100644 --- a/src/goto-symex/module.md +++ b/src/goto-symex/README.md @@ -1,7 +1,25 @@ +\ingroup module_hidden \defgroup goto-symex goto-symex + # Folder goto-symex -\author Kareem Khazem +\author Kareem Khazem, Martin Brain + +This directory contains a symbolic evaluation system for goto-programs. +This takes a goto-program and translates it to an equation system by +traversing the program, branching and merging and unwinding loops as +needed. Each reverse goto has a separate counter (the actual counting is +handled by `cbmc`, see the `–unwind` and `–unwind-set` options). When a +counter limit is reach, an assertion can be added to explicitly show +when analysis is incomplete. The symbolic execution includes constant +folding so loops that have a constant number of iterations will be +handled completely (assuming the unwinding limit is sufficient). + +The output of the symbolic execution is a system of equations; an object +containing a list of `symex_target_elements`, each of which are +equalities between `expr` expressions. See `symex_target_equation.h`. +The output is in static, single assignment (SSA) form, which is *not* +the case for goto-programs. \section symbolic-execution Symbolic Execution diff --git a/src/java_bytecode/README.md b/src/java_bytecode/README.md new file mode 100644 index 0000000000..7b8812aa0b --- /dev/null +++ b/src/java_bytecode/README.md @@ -0,0 +1,6 @@ +\ingroup module_hidden +\defgroup java_bytecode java_bytecode + +# Folder java_bytecode + +This module providesy a front end for Java. diff --git a/src/jbmc/README.md b/src/jbmc/README.md new file mode 100644 index 0000000000..3fa183e797 --- /dev/null +++ b/src/jbmc/README.md @@ -0,0 +1,6 @@ +\ingroup module_hidden +\defgroup jbmc jbmc + +# Folder jbmc + +`jbmc/` is like cbmc but especially designed for Java. diff --git a/src/jsil/README.md b/src/jsil/README.md new file mode 100644 index 0000000000..3f77753cbf --- /dev/null +++ b/src/jsil/README.md @@ -0,0 +1,6 @@ +\ingroup module_hidden +\defgroup jsil jsil + +# Folder jsil + +`jsil/` contains a JavaScript front end. diff --git a/src/json/README.md b/src/json/README.md new file mode 100644 index 0000000000..dbd4ffa978 --- /dev/null +++ b/src/json/README.md @@ -0,0 +1,6 @@ +\ingroup module_hidden +\defgroup json json + +# Folder json + +`json/` contains a JSON parser. diff --git a/src/langapi/README.md b/src/langapi/README.md new file mode 100644 index 0000000000..b39599ea20 --- /dev/null +++ b/src/langapi/README.md @@ -0,0 +1,12 @@ +\ingroup module_hidden +\defgroup langapi langapi + +# Folder langapi + +\author Martin Brain + +`langapi/` contains the basic interfaces and support classes for programming +language front ends. Developers only really need look at this if they +are adding support for a new language. It’s main users are the +language front-ends such as `ansi-c/` and +`cpp/`. diff --git a/src/linking/README.md b/src/linking/README.md new file mode 100644 index 0000000000..28c68ae21e --- /dev/null +++ b/src/linking/README.md @@ -0,0 +1,11 @@ +\ingroup module_hidden +\defgroup linking linking + +# Folder linking + +\author Martin Brain + +This allows multiple ‘object +files’ (goto-programs) to be linked into one ‘executable’ (another +goto-program), thus allowing existing build systems to be used to build +complete goto-program binaries. diff --git a/src/memory-models/README.md b/src/memory-models/README.md new file mode 100644 index 0000000000..5eadb68117 --- /dev/null +++ b/src/memory-models/README.md @@ -0,0 +1,6 @@ +\ingroup module_hidden +\defgroup memory-models memory-models + +# Folder memory-models + +`memory-models` contains tools related to weak memory models. diff --git a/src/miniz/README.md b/src/miniz/README.md new file mode 100644 index 0000000000..0224ec7773 --- /dev/null +++ b/src/miniz/README.md @@ -0,0 +1,6 @@ +\ingroup module_hidden +\defgroup miniz miniz + +# Folder miniz + +`miniz/` contains a minimal ZIP compression library. diff --git a/src/nonstd/README.md b/src/nonstd/README.md new file mode 100644 index 0000000000..aebbaabb6f --- /dev/null +++ b/src/nonstd/README.md @@ -0,0 +1,7 @@ +\ingroup module_hidden +\defgroup nonstd nonstd + +# Folder nonstd + +`nonstd` contains implementations of C++ utilities that are not yet +part of the standard library, e.g. for `optional`. diff --git a/src/pointer-analysis/README.md b/src/pointer-analysis/README.md new file mode 100644 index 0000000000..0ef8b89f44 --- /dev/null +++ b/src/pointer-analysis/README.md @@ -0,0 +1,12 @@ +\ingroup module_hidden +\defgroup pointer-analysis pointer-analysis + +# Folder pointer-analysis + +\author Martin Brain + +To perform symbolic execution on programs with dereferencing of +arbitrary pointers, some alias analysis is needed. `pointer-analysis` +contains the three levels of analysis; flow and context insensitive, +context sensitive and flow and context sensitive. The code needed is +subtle and sophisticated and thus there may be bugs. diff --git a/src/solvers/refinement/README.md b/src/solvers/README.md similarity index 78% rename from src/solvers/refinement/README.md rename to src/solvers/README.md index f0ec6efde6..7a45f24666 100644 --- a/src/solvers/refinement/README.md +++ b/src/solvers/README.md @@ -1,8 +1,111 @@ +\ingroup module_hidden \defgroup solvers solvers # Folder solvers -\defgroup string_solver_interface String solver interface -\author Romain Brenguier +\authors Romain Brenguier, Kareem Khazem, Martin Brain + +\section overview Overview + +The basic role of solvers is to answer whether the set of equations given +is satisfiable. +One example usage, is to determine whether an assertion in a +program can be violated. +We refer to \ref goto-symex for how CBMC and JBMC convert a input program +and property to a set of equations. + +The secondary role of solvers is to provide a satisfying assignment of +the variables of the equations, this can for instance be used to construct +a trace. + +The `solvers/` directory contains interfaces to a number of +different decision procedures, roughly one per directory. + +* prop/: The basic and common functionality. The key file is + `prop_conv.h` which defines `prop_convt`. This is the base class that + is used to interface to the decision procedures. The key functions are + `convert` which takes an `exprt` and converts it to the appropriate, + solver specific, data structures and `dec_solve` (inherited from + `decision_proceduret`) which invokes the actual decision procedures. + Individual decision procedures (named `*_dect`) objects can be created + but `prop_convt` is the preferred interface for code that uses them. + +* flattening/: A library that converts operations to bit-vectors, + including calling the conversions in `floatbv` as necessary. Is + implemented as a simple conversion (with caching) and then a + post-processing function that adds extra constraints. This is not used + by the SMT or CVC back-ends. + +* dplib/: Provides the `dplib_dect` object which used the decision + procedure library from “Decision Procedures : An Algorithmic Point of + View”. + +* cvc/: Provides the `cvc_dect` type which interfaces to the old (pre + SMTLib) input format for the CVC family of solvers. This format is + still supported by depreciated in favour of SMTLib 2. + +* smt1/: Provides the `smt1_dect` type which converts the formulae to + SMTLib version 1 and then invokes one of Boolector, CVC3, OpenSMT, + Yices, MathSAT or Z3. Again, note that this format is depreciated. + +* smt2/: Provides the `smt2_dect` type which functions in a similar + way to `smt1_dect`, calling Boolector, CVC3, MathSAT, Yices or Z3. + Note that the interaction with the solver is batched and uses + temporary files rather than using the interactive command supported by + SMTLib 2. With the `–fpa` option, this output mode will not flatten + the floating point arithmetic and instead output the proposed SMTLib + floating point standard. + +* qbf/: Back-ends for a variety of QBF solvers. Appears to be no + longer used or maintained. + +* sat/: Back-ends for a variety of SAT solvers and DIMACS output. + +\section sat-smt-encoding SAT/SMT Encoding + +In the \ref solvers directory. + +**Key classes:** +* \ref literalt +* \ref boolbvt +* \ref propt + +\dot +digraph G { + node [shape=box]; + rankdir="LR"; + 1 [shape=none, label=""]; + 2 [label="goto conversion"]; + 3 [shape=none, label=""]; + 1 -> 2 [label="equations"]; + 2 -> 3 [label="propositional variables as bitvectors, constraints"]; +} +\enddot + + +--- + +\section decision-procedure Decision Procedure + +In the \ref solvers directory. + +**Key classes:** +* symex_target_equationt +* \ref propt +* \ref bmct + +\dot +digraph G { + node [shape=box]; + rankdir="LR"; + 1 [shape=none, label=""]; + 2 [label="goto conversion"]; + 3 [shape=none, label=""]; + 1 -> 2 [label="propositional variables as bitvectors, constraints"]; + 2 -> 3 [label="solutions"]; +} +\enddot + +\section string-solver-interface String Solver Interface The string solver is particularly aimed at string logic, but since it inherits from \ref bv_refinementt it is also capable of handling arithmetic, array logic, @@ -241,3 +344,11 @@ This is done by generate_instantiations(messaget::mstreamt &stream, const namesp \copydetails check_axioms(const string_axiomst &axioms, string_constraint_generatort &generator, const std::function &get, messaget::mstreamt &stream, const namespacet &ns, std::size_t max_string_length, bool use_counter_example, ui_message_handlert::uit ui, const union_find_replacet &symbol_resolve) \link check_axioms(const string_axiomst &axioms, string_constraint_generatort &generator, const std::function &get, messaget::mstreamt &stream, const namespacet &ns, std::size_t max_string_length, bool use_counter_example, ui_message_handlert::uit ui, const union_find_replacet &symbol_resolve) (See function documentation...) \endlink + +\section floatbv Floatbv Directory + +This library contains the code that is used to convert floating point +variables (`floatbv`) to bit vectors (`bv`). This is referred to as +‘bit-blasting’ and is called in the `solver` code during conversion to +SAT or SMT. It also contains the abstraction code described in the +FMCAD09 paper. diff --git a/src/solvers/module.md b/src/solvers/module.md deleted file mode 100644 index 0369735e55..0000000000 --- a/src/solvers/module.md +++ /dev/null @@ -1,62 +0,0 @@ -\ingroup module_hidden -\defgroup module_solvers SAT/SMT Encoding and Decision Procedure - -\author Kareem Khazem - -The basic role of solvers is to answer whether the set of equations given -is satisfiable. -One example usage, is to determine whether an assertion in a -program can be violated. -We refer to \ref module_goto-symex for how CBMC and JBMC convert a input program -and property to a set of equations. - -The secondary role of solvers is to provide a satisfying assignment of -the variables of the equations, this can for instance be used to construct -a trace. - -The most general solver in terms of supported equations is \ref string-solver. - -\section sat-smt-encoding SAT/SMT Encoding - -In the \ref solvers directory. - -**Key classes:** -* \ref literalt -* \ref boolbvt -* \ref propt - -\dot -digraph G { - node [shape=box]; - rankdir="LR"; - 1 [shape=none, label=""]; - 2 [label="goto conversion"]; - 3 [shape=none, label=""]; - 1 -> 2 [label="equations"]; - 2 -> 3 [label="propositional variables as bitvectors, constraints"]; -} -\enddot - - ---- - -\section decision-procedure Decision Procedure - -In the \ref solvers directory. - -**Key classes:** -* symex_target_equationt -* \ref propt -* \ref bmct - -\dot -digraph G { - node [shape=box]; - rankdir="LR"; - 1 [shape=none, label=""]; - 2 [label="goto conversion"]; - 3 [shape=none, label=""]; - 1 -> 2 [label="propositional variables as bitvectors, constraints"]; - 2 -> 3 [label="solutions"]; -} -\enddot diff --git a/src/util/README.md b/src/util/README.md new file mode 100644 index 0000000000..72b0c3a93e --- /dev/null +++ b/src/util/README.md @@ -0,0 +1,95 @@ +\ingroup module_hidden +\defgroup util util + +# Folder util + +\author Martin Brain + +\section data_structures Data Structures + +This section discusses some of the key data-structures used in the +CPROVER codebase. + +\subsection irept Irept Data Structure + +There are a large number of kind of tree structured or tree-like data in +CPROVER. `irept` provides a single, unified representation for all of +these, allowing structure sharing and reference counting of data. As +such `irept` is the basic unit of data in CPROVER. Each `irept` +contains[^2] a basic unit of data (of type `dt`) which contains four +things: + +* `data`: A string[^3], which is returned when the `id()` function is + used. + +* `named_sub`: A map from `irep_namet` (a string) to an `irept`. This + is used for named children, i.e. subexpressions, parameters, etc. + +* `comments`: Another map from `irep_namet` to `irept` which is used + for annotations and other ‘non-semantic’ information + +* `sub`: A vector of `irept` which is used to store ordered but + unnamed children. + +The `irept::pretty` function outputs the contents of an `irept` directly +and can be used to understand an debug problems with `irept`s. + +On their own `irept`s do not “mean” anything; they are effectively +generic tree nodes. Their interpretation depends on the contents of +result of the `id` function (the `data`) field. `util/irep_ids.txt` +contains the complete list of `id` values. During the build process it +is used to generate `util/irep_ids.h` which gives constants for each id +(named `ID_`). These can then be used to identify what kind of data +`irept` stores and thus what can be done with it. + +To simplify this process, there are a variety of classes that inherit +from `irept`, roughly corresponding to the ids listed (i.e. `ID_or` +(the string `"or”`) corresponds to the class `or_exprt`). These give +semantically relevant accessor functions for the data; effectively +different APIs for the same underlying data structure. None of these +classes add fields (only methods) and so static casting can be used. The +inheritance graph of the subclasses of `irept` is a useful starting +point for working out how to manipulate data. + +There are three main groups of classes (or APIs); those derived from +`typet`, `codet` and `exprt` respectively. Although all of these inherit +from `irept`, these are the most abstract level that code should handle +data. If code is manipulating plain `irept`s then something is wrong +with the architecture of the code. + +Many of the key descendent of `exprt` are declared in `std_expr.h`. All +expressions have a named subfield / annotation which gives the type of +the expression (slightly simplified from C/C++ as `unsignedbv_typet`, +`signedbv_typet`, `floatbv_typet`, etc.). All type conversions are +explicit with an expression with `id() == ID_typecast` and an ‘interface +class’ named `typecast_exprt`. One key descendent of `exprt` is +`symbol_exprt` which creates `irept` instances with the id of “symbol”. +These are used to represent variables; the name of which can be found +using the `get_identifier` accessor function. + +`codet` inherits from `exprt` and is defined in `std_code.h`. They +represent executable code; statements in C rather than expressions. In +the front-end there are versions of these that hold whole code blocks, +but in goto-programs these have been flattened so that each `irept` +represents one sequence point (almost one line of code / one +semi-colon). The most common descendents of `codet` are `code_assignt` +so a common pattern is to cast the `codet` to an assignment and then +recurse on the expression on either side. + +[^2]: Or references, if reference counted data sharing is enabled. It is + enabled by default; see the `SHARING` macro. + +[^3]: Unless `USE_STD_STRING` is set, this is actually +a `dstring` and thus an integer which is a reference into a string table + +\dot +digraph G { + node [shape=box]; + rankdir="LR"; + 1 [shape=none, label=""]; + 2 [label="command line parsing"]; + 3 [shape=none, label=""]; + 1 -> 2 [label="C files or goto-binaries"]; + 2 -> 3 [label="Command line options, file names"]; +} +\enddot diff --git a/src/util/command-line-parsing.md b/src/util/command-line-parsing.md deleted file mode 100644 index 611c3c4203..0000000000 --- a/src/util/command-line-parsing.md +++ /dev/null @@ -1,16 +0,0 @@ -\defgroup util util -# Folder util - -`util/` is filled with useful tools. - -\dot -digraph G { - node [shape=box]; - rankdir="LR"; - 1 [shape=none, label=""]; - 2 [label="command line parsing"]; - 3 [shape=none, label=""]; - 1 -> 2 [label="C files or goto-binaries"]; - 2 -> 3 [label="Command line options, file names"]; -} -\enddot diff --git a/src/xmllang/README b/src/xmllang/README deleted file mode 100644 index 9150bffa56..0000000000 --- a/src/xmllang/README +++ /dev/null @@ -1,3 +0,0 @@ -based on grammar/tokenizer from http://www.w3.org/XML/9707/xml-in-c.tar.gz -also see http://www.w3.org/XML/9707/XML-in-C - diff --git a/src/xmllang/README.md b/src/xmllang/README.md new file mode 100644 index 0000000000..94baab33df --- /dev/null +++ b/src/xmllang/README.md @@ -0,0 +1,15 @@ +\ingroup module_hidden +\defgroup xmllang xmllang + +# Folder xmllang + +\author Martin Brain + +CPROVER has optional XML output for results and there is an XML format +for goto-programs. It is used to interface to various IDEs. The +`xmllang/` directory contains the parser and helper functions for +handling this format. + +Based on grammar/tokenizer from [http://www.w3.org/XML/9707/xml-in-c.tar.gz](http://www.w3.org/XML/9707/xml-in-c.tar.gz) +also see +[http://www.w3.org/XML/9707/XML-in-C](http://www.w3.org/XML/9707/XML-in-C).