docs: More reST conversion.
Sorry for the large commit, but it is much faster to convert in batches. llvm-svn: 170067
This commit is contained in:
parent
29b513871a
commit
bf9b4cd596
|
@ -1,171 +0,0 @@
|
|||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
|
||||
"http://www.w3.org/TR/html4/strict.dtd">
|
||||
<!-- Material used from: HTML 4.01 specs: http://www.w3.org/TR/html401/ -->
|
||||
<html>
|
||||
<head>
|
||||
<META http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
|
||||
<title>AddressSanitizer, a fast memory error detector</title>
|
||||
<link type="text/css" rel="stylesheet" href="../menu.css">
|
||||
<link type="text/css" rel="stylesheet" href="../content.css">
|
||||
<style type="text/css">
|
||||
td {
|
||||
vertical-align: top;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<!--#include virtual="../menu.html.incl"-->
|
||||
|
||||
<div id="content">
|
||||
|
||||
<h1>AddressSanitizer</h1>
|
||||
<ul>
|
||||
<li> <a href="#intro">Introduction</a>
|
||||
<li> <a href="#howtobuild">How to Build</a>
|
||||
<li> <a href="#usage">Usage</a>
|
||||
<ul><li> <a href="#has_feature">__has_feature(address_sanitizer)</a></ul>
|
||||
<ul><li> <a href="#no_address_safety_analysis">
|
||||
__attribute__((no_address_safety_analysis))</a></ul>
|
||||
<li> <a href="#platforms">Supported Platforms</a>
|
||||
<li> <a href="#limitations">Limitations</a>
|
||||
<li> <a href="#status">Current Status</a>
|
||||
<li> <a href="#moreinfo">More Information</a>
|
||||
</ul>
|
||||
|
||||
<h2 id="intro">Introduction</h2>
|
||||
AddressSanitizer is a fast memory error detector.
|
||||
It consists of a compiler instrumentation module and a run-time library.
|
||||
The tool can detect the following types of bugs:
|
||||
<ul> <li> Out-of-bounds accesses to heap, stack and globals
|
||||
<li> Use-after-free
|
||||
<li> Use-after-return (to some extent)
|
||||
<li> Double-free, invalid free
|
||||
</ul>
|
||||
Typical slowdown introduced by AddressSanitizer is <b>2x</b>.
|
||||
|
||||
<h2 id="howtobuild">How to build</h2>
|
||||
Follow the <a href="../get_started.html">clang build instructions</a>.
|
||||
CMake build is supported.<BR>
|
||||
|
||||
<h2 id="usage">Usage</h2>
|
||||
Simply compile and link your program with <tt>-fsanitize=address</tt> flag. <BR>
|
||||
The AddressSanitizer run-time library should be linked to the final executable,
|
||||
so make sure to use <tt>clang</tt> (not <tt>ld</tt>) for the final link step.<BR>
|
||||
When linking shared libraries, the AddressSanitizer run-time is not linked,
|
||||
so <tt>-Wl,-z,defs</tt> may cause link errors (don't use it with AddressSanitizer). <BR>
|
||||
|
||||
To get a reasonable performance add <tt>-O1</tt> or higher. <BR>
|
||||
To get nicer stack traces in error messages add
|
||||
<tt>-fno-omit-frame-pointer</tt>. <BR>
|
||||
To get perfect stack traces you may need to disable inlining (just use <tt>-O1</tt>) and tail call
|
||||
elimination (<tt>-fno-optimize-sibling-calls</tt>).
|
||||
|
||||
<pre>
|
||||
% cat example_UseAfterFree.cc
|
||||
int main(int argc, char **argv) {
|
||||
int *array = new int[100];
|
||||
delete [] array;
|
||||
return array[argc]; // BOOM
|
||||
}
|
||||
</pre>
|
||||
|
||||
<pre>
|
||||
# Compile and link
|
||||
% clang -O1 -g -fsanitize=address -fno-omit-frame-pointer example_UseAfterFree.cc
|
||||
</pre>
|
||||
OR
|
||||
<pre>
|
||||
# Compile
|
||||
% clang -O1 -g -fsanitize=address -fno-omit-frame-pointer -c example_UseAfterFree.cc
|
||||
# Link
|
||||
% clang -g -fsanitize=address example_UseAfterFree.o
|
||||
</pre>
|
||||
|
||||
If a bug is detected, the program will print an error message to stderr and exit with a
|
||||
non-zero exit code.
|
||||
Currently, AddressSanitizer does not symbolize its output, so you may need to use a
|
||||
separate script to symbolize the result offline (this will be fixed in future).
|
||||
<pre>
|
||||
% ./a.out 2> log
|
||||
% projects/compiler-rt/lib/asan/scripts/asan_symbolize.py / < log | c++filt
|
||||
==9442== ERROR: AddressSanitizer heap-use-after-free on address 0x7f7ddab8c084 at pc 0x403c8c bp 0x7fff87fb82d0 sp 0x7fff87fb82c8
|
||||
READ of size 4 at 0x7f7ddab8c084 thread T0
|
||||
#0 0x403c8c in main example_UseAfterFree.cc:4
|
||||
#1 0x7f7ddabcac4d in __libc_start_main ??:0
|
||||
0x7f7ddab8c084 is located 4 bytes inside of 400-byte region [0x7f7ddab8c080,0x7f7ddab8c210)
|
||||
freed by thread T0 here:
|
||||
#0 0x404704 in operator delete[](void*) ??:0
|
||||
#1 0x403c53 in main example_UseAfterFree.cc:4
|
||||
#2 0x7f7ddabcac4d in __libc_start_main ??:0
|
||||
previously allocated by thread T0 here:
|
||||
#0 0x404544 in operator new[](unsigned long) ??:0
|
||||
#1 0x403c43 in main example_UseAfterFree.cc:2
|
||||
#2 0x7f7ddabcac4d in __libc_start_main ??:0
|
||||
==9442== ABORTING
|
||||
</pre>
|
||||
|
||||
AddressSanitizer exits on the first detected error. This is by design.
|
||||
One reason: it makes the generated code smaller and faster (both by ~5%).
|
||||
Another reason: this makes fixing bugs unavoidable. With Valgrind, it is often
|
||||
the case that users treat Valgrind warnings as false positives
|
||||
(which they are not) and don't fix them.
|
||||
|
||||
|
||||
<h3 id="has_feature">__has_feature(address_sanitizer)</h3>
|
||||
In some cases one may need to execute different code depending on whether
|
||||
AddressSanitizer is enabled.
|
||||
<a href="LanguageExtensions.html#__has_feature_extension">__has_feature</a>
|
||||
can be used for this purpose.
|
||||
<pre>
|
||||
#if defined(__has_feature)
|
||||
# if __has_feature(address_sanitizer)
|
||||
code that builds only under AddressSanitizer
|
||||
# endif
|
||||
#endif
|
||||
</pre>
|
||||
|
||||
<h3 id="no_address_safety_analysis">__attribute__((no_address_safety_analysis))</h3>
|
||||
Some code should not be instrumented by AddressSanitizer.
|
||||
One may use the function attribute
|
||||
<a href="LanguageExtensions.html#address_sanitizer">
|
||||
<tt>no_address_safety_analysis</tt></a>
|
||||
to disable instrumentation of a particular function.
|
||||
This attribute may not be supported by other compilers, so we suggest to
|
||||
use it together with <tt>__has_feature(address_sanitizer)</tt>.
|
||||
Note: currently, this attribute will be lost if the function is inlined.
|
||||
|
||||
<h2 id="platforms">Supported Platforms</h2>
|
||||
AddressSanitizer is supported on
|
||||
<ul><li>Linux i386/x86_64 (tested on Ubuntu 10.04 and 12.04).
|
||||
<li>MacOS 10.6, 10.7 and 10.8 (i386/x86_64).
|
||||
</ul>
|
||||
Support for Linux ARM (and Android ARM) is in progress
|
||||
(it may work, but is not guaranteed too).
|
||||
|
||||
|
||||
<h2 id="limitations">Limitations</h2>
|
||||
<ul>
|
||||
<li> AddressSanitizer uses more real memory than a native run.
|
||||
Exact overhead depends on the allocations sizes. The smaller the
|
||||
allocations you make the bigger the overhead is.
|
||||
<li> AddressSanitizer uses more stack memory. We have seen up to 3x increase.
|
||||
<li> On 64-bit platforms AddressSanitizer maps (but not reserves)
|
||||
16+ Terabytes of virtual address space.
|
||||
This means that tools like <tt>ulimit</tt> may not work as usually expected.
|
||||
<li> Static linking is not supported.
|
||||
</ul>
|
||||
|
||||
|
||||
<h2 id="status">Current Status</h2>
|
||||
AddressSanitizer is fully functional on supported platforms starting from LLVM 3.1.
|
||||
The test suite is integrated into CMake build and can be run with
|
||||
<tt>make check-asan</tt> command.
|
||||
|
||||
<h2 id="moreinfo">More Information</h2>
|
||||
<a href="http://code.google.com/p/address-sanitizer/">http://code.google.com/p/address-sanitizer</a>.
|
||||
|
||||
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
|
@ -0,0 +1,158 @@
|
|||
================
|
||||
AddressSanitizer
|
||||
================
|
||||
|
||||
.. contents::
|
||||
:local:
|
||||
|
||||
Introduction
|
||||
============
|
||||
|
||||
AddressSanitizer is a fast memory error detector. It consists of a
|
||||
compiler instrumentation module and a run-time library. The tool can
|
||||
detect the following types of bugs:
|
||||
|
||||
- Out-of-bounds accesses to heap, stack and globals
|
||||
- Use-after-free
|
||||
- Use-after-return (to some extent)
|
||||
- Double-free, invalid free
|
||||
|
||||
Typical slowdown introduced by AddressSanitizer is **2x**.
|
||||
|
||||
How to build
|
||||
============
|
||||
|
||||
Follow the `clang build instructions <../get_started.html>`_. CMake
|
||||
build is supported.
|
||||
|
||||
Usage
|
||||
=====
|
||||
|
||||
Simply compile and link your program with ``-fsanitize=address`` flag.
|
||||
The AddressSanitizer run-time library should be linked to the final
|
||||
executable, so make sure to use ``clang`` (not ``ld``) for the final
|
||||
link step.
|
||||
When linking shared libraries, the AddressSanitizer run-time is not
|
||||
linked, so ``-Wl,-z,defs`` may cause link errors (don't use it with
|
||||
AddressSanitizer).
|
||||
To get a reasonable performance add ``-O1`` or higher.
|
||||
To get nicer stack traces in error messages add
|
||||
``-fno-omit-frame-pointer``.
|
||||
To get perfect stack traces you may need to disable inlining (just use
|
||||
``-O1``) and tail call elimination (``-fno-optimize-sibling-calls``).
|
||||
|
||||
::
|
||||
|
||||
% cat example_UseAfterFree.cc
|
||||
int main(int argc, char **argv) {
|
||||
int *array = new int[100];
|
||||
delete [] array;
|
||||
return array[argc]; // BOOM
|
||||
}
|
||||
|
||||
::
|
||||
|
||||
# Compile and link
|
||||
% clang -O1 -g -fsanitize=address -fno-omit-frame-pointer example_UseAfterFree.cc
|
||||
|
||||
OR
|
||||
|
||||
::
|
||||
|
||||
# Compile
|
||||
% clang -O1 -g -fsanitize=address -fno-omit-frame-pointer -c example_UseAfterFree.cc
|
||||
# Link
|
||||
% clang -g -fsanitize=address example_UseAfterFree.o
|
||||
|
||||
If a bug is detected, the program will print an error message to stderr
|
||||
and exit with a non-zero exit code. Currently, AddressSanitizer does not
|
||||
symbolize its output, so you may need to use a separate script to
|
||||
symbolize the result offline (this will be fixed in future).
|
||||
|
||||
::
|
||||
|
||||
% ./a.out 2> log
|
||||
% projects/compiler-rt/lib/asan/scripts/asan_symbolize.py / < log | c++filt
|
||||
==9442== ERROR: AddressSanitizer heap-use-after-free on address 0x7f7ddab8c084 at pc 0x403c8c bp 0x7fff87fb82d0 sp 0x7fff87fb82c8
|
||||
READ of size 4 at 0x7f7ddab8c084 thread T0
|
||||
#0 0x403c8c in main example_UseAfterFree.cc:4
|
||||
#1 0x7f7ddabcac4d in __libc_start_main ??:0
|
||||
0x7f7ddab8c084 is located 4 bytes inside of 400-byte region [0x7f7ddab8c080,0x7f7ddab8c210)
|
||||
freed by thread T0 here:
|
||||
#0 0x404704 in operator delete[](void*) ??:0
|
||||
#1 0x403c53 in main example_UseAfterFree.cc:4
|
||||
#2 0x7f7ddabcac4d in __libc_start_main ??:0
|
||||
previously allocated by thread T0 here:
|
||||
#0 0x404544 in operator new[](unsigned long) ??:0
|
||||
#1 0x403c43 in main example_UseAfterFree.cc:2
|
||||
#2 0x7f7ddabcac4d in __libc_start_main ??:0
|
||||
==9442== ABORTING
|
||||
|
||||
AddressSanitizer exits on the first detected error. This is by design.
|
||||
One reason: it makes the generated code smaller and faster (both by
|
||||
~5%). Another reason: this makes fixing bugs unavoidable. With Valgrind,
|
||||
it is often the case that users treat Valgrind warnings as false
|
||||
positives (which they are not) and don't fix them.
|
||||
|
||||
\_\_has\_feature(address\_sanitizer)
|
||||
------------------------------------
|
||||
|
||||
In some cases one may need to execute different code depending on
|
||||
whether AddressSanitizer is enabled.
|
||||
`\_\_has\_feature <LanguageExtensions.html#__has_feature_extension>`_
|
||||
can be used for this purpose.
|
||||
|
||||
::
|
||||
|
||||
#if defined(__has_feature)
|
||||
# if __has_feature(address_sanitizer)
|
||||
code that builds only under AddressSanitizer
|
||||
# endif
|
||||
#endif
|
||||
|
||||
``__attribute__((no_address_safety_analysis))``
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Some code should not be instrumented by AddressSanitizer. One may use
|
||||
the function attribute
|
||||
`no_address_safety_analysis <LanguageExtensions.html#address_sanitizer>`_
|
||||
to disable instrumentation of a particular function. This attribute may
|
||||
not be supported by other compilers, so we suggest to use it together
|
||||
with ``__has_feature(address_sanitizer)``. Note: currently, this
|
||||
attribute will be lost if the function is inlined.
|
||||
|
||||
Supported Platforms
|
||||
===================
|
||||
|
||||
AddressSanitizer is supported on
|
||||
|
||||
- Linux i386/x86\_64 (tested on Ubuntu 10.04 and 12.04).
|
||||
- MacOS 10.6, 10.7 and 10.8 (i386/x86\_64).
|
||||
|
||||
Support for Linux ARM (and Android ARM) is in progress (it may work, but
|
||||
is not guaranteed too).
|
||||
|
||||
Limitations
|
||||
===========
|
||||
|
||||
- AddressSanitizer uses more real memory than a native run. Exact
|
||||
overhead depends on the allocations sizes. The smaller the
|
||||
allocations you make the bigger the overhead is.
|
||||
- AddressSanitizer uses more stack memory. We have seen up to 3x
|
||||
increase.
|
||||
- On 64-bit platforms AddressSanitizer maps (but not reserves) 16+
|
||||
Terabytes of virtual address space. This means that tools like
|
||||
``ulimit`` may not work as usually expected.
|
||||
- Static linking is not supported.
|
||||
|
||||
Current Status
|
||||
==============
|
||||
|
||||
AddressSanitizer is fully functional on supported platforms starting
|
||||
from LLVM 3.1. The test suite is integrated into CMake build and can be
|
||||
run with ``make check-asan`` command.
|
||||
|
||||
More Information
|
||||
================
|
||||
|
||||
`http://code.google.com/p/address-sanitizer <http://code.google.com/p/address-sanitizer/>`_.
|
|
@ -1,260 +0,0 @@
|
|||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
|
||||
"http://www.w3.org/TR/html4/strict.dtd">
|
||||
<html>
|
||||
<head>
|
||||
<title>Static Analyzer Design Document: Memory Regions</title>
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<h1>Static Analyzer Design Document: Memory Regions</h1>
|
||||
|
||||
<h3>Authors</h3>
|
||||
|
||||
<p>Ted Kremenek, <tt>kremenek at apple</tt><br>
|
||||
Zhongxing Xu, <tt>xuzhongzhing at gmail</tt></p>
|
||||
|
||||
<h2 id="intro">Introduction</h2>
|
||||
|
||||
<p>The path-sensitive analysis engine in libAnalysis employs an extensible API
|
||||
for abstractly modeling the memory of an analyzed program. This API employs the
|
||||
concept of "memory regions" to abstractly model chunks of program memory such as
|
||||
program variables and dynamically allocated memory such as those returned from
|
||||
'malloc' and 'alloca'. Regions are hierarchical, with subregions modeling
|
||||
subtyping relationships, field and array offsets into larger chunks of memory,
|
||||
and so on.</p>
|
||||
|
||||
<p>The region API consists of two components:</p>
|
||||
|
||||
<ul> <li>A taxonomy and representation of regions themselves within the analyzer
|
||||
engine. The primary definitions and interfaces are described in <tt><a
|
||||
href="http://clang.llvm.org/doxygen/MemRegion_8h-source.html">MemRegion.h</a></tt>.
|
||||
At the root of the region hierarchy is the class <tt>MemRegion</tt> with
|
||||
specific subclasses refining the region concept for variables, heap allocated
|
||||
memory, and so forth.</li> <li>The modeling of binding of values to regions. For
|
||||
example, modeling the value stored to a local variable <tt>x</tt> consists of
|
||||
recording the binding between the region for <tt>x</tt> (which represents the
|
||||
raw memory associated with <tt>x</tt>) and the value stored to <tt>x</tt>. This
|
||||
binding relationship is captured with the notion of "symbolic
|
||||
stores."</li> </ul>
|
||||
|
||||
<p>Symbolic stores, which can be thought of as representing the relation
|
||||
<tt>regions -> values</tt>, are implemented by subclasses of the
|
||||
<tt>StoreManager</tt> class (<tt><a
|
||||
href="http://clang.llvm.org/doxygen/Store_8h-source.html">Store.h</a></tt>). A
|
||||
particular StoreManager implementation has complete flexibility concerning the
|
||||
following:
|
||||
|
||||
<ul>
|
||||
<li><em>How</em> to model the binding between regions and values</li>
|
||||
<li><em>What</em> bindings are recorded
|
||||
</ul>
|
||||
|
||||
<p>Together, both points allow different StoreManagers to tradeoff between
|
||||
different levels of analysis precision and scalability concerning the reasoning
|
||||
of program memory. Meanwhile, the core path-sensitive engine makes no
|
||||
assumptions about either points, and queries a StoreManager about the bindings
|
||||
to a memory region through a generic interface that all StoreManagers share. If
|
||||
a particular StoreManager cannot reason about the potential bindings of a given
|
||||
memory region (e.g., '<tt>BasicStoreManager</tt>' does not reason about fields
|
||||
of structures) then the StoreManager can simply return 'unknown' (represented by
|
||||
'<tt>UnknownVal</tt>') for a particular region-binding. This separation of
|
||||
concerns not only isolates the core analysis engine from the details of
|
||||
reasoning about program memory but also facilities the option of a client of the
|
||||
path-sensitive engine to easily swap in different StoreManager implementations
|
||||
that internally reason about program memory in very different ways.</p>
|
||||
|
||||
<p>The rest of this document is divided into two parts. We first discuss region
|
||||
taxonomy and the semantics of regions. We then discuss the StoreManager
|
||||
interface, and details of how the currently available StoreManager classes
|
||||
implement region bindings.</p>
|
||||
|
||||
<h2 id="regions">Memory Regions and Region Taxonomy</h2>
|
||||
|
||||
<h3>Pointers</h3>
|
||||
|
||||
<p>Before talking about the memory regions, we would talk about the pointers
|
||||
since memory regions are essentially used to represent pointer values.</p>
|
||||
|
||||
<p>The pointer is a type of values. Pointer values have two semantic aspects.
|
||||
One is its physical value, which is an address or location. The other is the
|
||||
type of the memory object residing in the address.</p>
|
||||
|
||||
<p>Memory regions are designed to abstract these two properties of the pointer.
|
||||
The physical value of a pointer is represented by MemRegion pointers. The rvalue
|
||||
type of the region corresponds to the type of the pointee object.</p>
|
||||
|
||||
<p>One complication is that we could have different view regions on the same
|
||||
memory chunk. They represent the same memory location, but have different
|
||||
abstract location, i.e., MemRegion pointers. Thus we need to canonicalize the
|
||||
abstract locations to get a unique abstract location for one physical
|
||||
location.</p>
|
||||
|
||||
<p>Furthermore, these different view regions may or may not represent memory
|
||||
objects of different types. Some different types are semantically the same,
|
||||
for example, 'struct s' and 'my_type' are the same type.</p>
|
||||
|
||||
<pre>
|
||||
struct s;
|
||||
typedef struct s my_type;
|
||||
</pre>
|
||||
|
||||
<p>But <tt>char</tt> and <tt>int</tt> are not the same type in the code below:</p>
|
||||
|
||||
<pre>
|
||||
void *p;
|
||||
int *q = (int*) p;
|
||||
char *r = (char*) p;
|
||||
</pre>
|
||||
|
||||
<p>Thus we need to canonicalize the MemRegion which is used in binding and
|
||||
retrieving.</p>
|
||||
|
||||
<h3>Regions</h3>
|
||||
<p>Region is the entity used to model pointer values. A Region has the following
|
||||
properties:</p>
|
||||
|
||||
<ul>
|
||||
<li>Kind</li>
|
||||
|
||||
<li>ObjectType: the type of the object residing on the region.</li>
|
||||
|
||||
<li>LocationType: the type of the pointer value that the region corresponds to.
|
||||
Usually this is the pointer to the ObjectType. But sometimes we want to cache
|
||||
this type explicitly, for example, for a CodeTextRegion.</li>
|
||||
|
||||
<li>StartLocation</li>
|
||||
|
||||
<li>EndLocation</li>
|
||||
</ul>
|
||||
|
||||
<h3>Symbolic Regions</h3>
|
||||
|
||||
<p>A symbolic region is a map of the concept of symbolic values into the domain
|
||||
of regions. It is the way that we represent symbolic pointers. Whenever a
|
||||
symbolic pointer value is needed, a symbolic region is created to represent
|
||||
it.</p>
|
||||
|
||||
<p>A symbolic region has no type. It wraps a SymbolData. But sometimes we have
|
||||
type information associated with a symbolic region. For this case, a
|
||||
TypedViewRegion is created to layer the type information on top of the symbolic
|
||||
region. The reason we do not carry type information with the symbolic region is
|
||||
that the symbolic regions can have no type. To be consistent, we don't let them
|
||||
to carry type information.</p>
|
||||
|
||||
<p>Like a symbolic pointer, a symbolic region may be NULL, has unknown extent,
|
||||
and represents a generic chunk of memory.</p>
|
||||
|
||||
<p><em><b>NOTE</b>: We plan not to use loc::SymbolVal in RegionStore and remove it
|
||||
gradually.</em></p>
|
||||
|
||||
<p>Symbolic regions get their rvalue types through the following ways:</p>
|
||||
|
||||
<ul>
|
||||
<li>Through the parameter or global variable that points to it, e.g.:
|
||||
<pre>
|
||||
void f(struct s* p) {
|
||||
...
|
||||
}
|
||||
</pre>
|
||||
|
||||
<p>The symbolic region pointed to by <tt>p</tt> has type <tt>struct
|
||||
s</tt>.</p></li>
|
||||
|
||||
<li>Through explicit or implicit casts, e.g.:
|
||||
<pre>
|
||||
void f(void* p) {
|
||||
struct s* q = (struct s*) p;
|
||||
...
|
||||
}
|
||||
</pre>
|
||||
</li>
|
||||
</ul>
|
||||
|
||||
<p>We attach the type information to the symbolic region lazily. For the first
|
||||
case above, we create the <tt>TypedViewRegion</tt> only when the pointer is
|
||||
actually used to access the pointee memory object, that is when the element or
|
||||
field region is created. For the cast case, the <tt>TypedViewRegion</tt> is
|
||||
created when visiting the <tt>CastExpr</tt>.</p>
|
||||
|
||||
<p>The reason for doing lazy typing is that symbolic regions are sometimes only
|
||||
used to do location comparison.</p>
|
||||
|
||||
<h3>Pointer Casts</h3>
|
||||
|
||||
<p>Pointer casts allow people to impose different 'views' onto a chunk of
|
||||
memory.</p>
|
||||
|
||||
<p>Usually we have two kinds of casts. One kind of casts cast down with in the
|
||||
type hierarchy. It imposes more specific views onto more generic memory regions.
|
||||
The other kind of casts cast up with in the type hierarchy. It strips away more
|
||||
specific views on top of the more generic memory regions.</p>
|
||||
|
||||
<p>We simulate the down casts by layering another <tt>TypedViewRegion</tt> on
|
||||
top of the original region. We simulate the up casts by striping away the top
|
||||
<tt>TypedViewRegion</tt>. Down casts is usually simple. For up casts, if the
|
||||
there is no <tt>TypedViewRegion</tt> to be stripped, we return the original
|
||||
region. If the underlying region is of the different type than the cast-to type,
|
||||
we flag an error state.</p>
|
||||
|
||||
<p>For toll-free bridging casts, we return the original region.</p>
|
||||
|
||||
<p>We can set up a partial order for pointer types, with the most general type
|
||||
<tt>void*</tt> at the top. The partial order forms a tree with <tt>void*</tt> as
|
||||
its root node.</p>
|
||||
|
||||
<p>Every <tt>MemRegion</tt> has a root position in the type tree. For example,
|
||||
the pointee region of <tt>void *p</tt> has its root position at the root node of
|
||||
the tree. <tt>VarRegion</tt> of <tt>int x</tt> has its root position at the 'int
|
||||
type' node.</p>
|
||||
|
||||
<p><tt>TypedViewRegion</tt> is used to move the region down or up in the tree.
|
||||
Moving down in the tree adds a <tt>TypedViewRegion</tt>. Moving up in the tree
|
||||
removes a <Tt>TypedViewRegion</tt>.</p>
|
||||
|
||||
<p>Do we want to allow moving up beyond the root position? This happens
|
||||
when:</p> <pre> int x; void *p = &x; </pre>
|
||||
|
||||
<p>The region of <tt>x</tt> has its root position at 'int*' node. the cast to
|
||||
void* moves that region up to the 'void*' node. I propose to not allow such
|
||||
casts, and assign the region of <tt>x</tt> for <tt>p</tt>.</p>
|
||||
|
||||
<p>Another non-ideal case is that people might cast to a non-generic pointer
|
||||
from another non-generic pointer instead of first casting it back to the generic
|
||||
pointer. Direct handling of this case would result in multiple layers of
|
||||
TypedViewRegions. This enforces an incorrect semantic view to the region,
|
||||
because we can only have one typed view on a region at a time. To avoid this
|
||||
inconsistency, before casting the region, we strip the TypedViewRegion, then do
|
||||
the cast. In summary, we only allow one layer of TypedViewRegion.</p>
|
||||
|
||||
<h3>Region Bindings</h3>
|
||||
|
||||
<p>The following region kinds are boundable: VarRegion, CompoundLiteralRegion,
|
||||
StringRegion, ElementRegion, FieldRegion, and ObjCIvarRegion.</p>
|
||||
|
||||
<p>When binding regions, we perform canonicalization on element regions and field
|
||||
regions. This is because we can have different views on the same region, some
|
||||
of which are essentially the same view with different sugar type names.</p>
|
||||
|
||||
<p>To canonicalize a region, we get the canonical types for all TypedViewRegions
|
||||
along the way up to the root region, and make new TypedViewRegions with those
|
||||
canonical types.</p>
|
||||
|
||||
<p>For Objective-C and C++, perhaps another canonicalization rule should be
|
||||
added: for FieldRegion, the least derived class that has the field is used as
|
||||
the type of the super region of the FieldRegion.</p>
|
||||
|
||||
<p>All bindings and retrievings are done on the canonicalized regions.</p>
|
||||
|
||||
<p>Canonicalization is transparent outside the region store manager, and more
|
||||
specifically, unaware outside the Bind() and Retrieve() method. We don't need to
|
||||
consider region canonicalization when doing pointer cast.</p>
|
||||
|
||||
<h3>Constraint Manager</h3>
|
||||
|
||||
<p>The constraint manager reasons about the abstract location of memory objects.
|
||||
We can have different views on a region, but none of these views changes the
|
||||
location of that object. Thus we should get the same abstract location for those
|
||||
regions.</p>
|
||||
|
||||
</body>
|
||||
</html>
|
|
@ -0,0 +1,259 @@
|
|||
===============================================
|
||||
Static Analyzer Design Document: Memory Regions
|
||||
===============================================
|
||||
|
||||
Authors: Ted Kremenek, ``kremenek at apple``,
|
||||
Zhongxing Xu, ``xuzhongzhing at gmail``
|
||||
|
||||
Introduction
|
||||
============
|
||||
|
||||
The path-sensitive analysis engine in libAnalysis employs an extensible
|
||||
API for abstractly modeling the memory of an analyzed program. This API
|
||||
employs the concept of "memory regions" to abstractly model chunks of
|
||||
program memory such as program variables and dynamically allocated
|
||||
memory such as those returned from 'malloc' and 'alloca'. Regions are
|
||||
hierarchical, with subregions modeling subtyping relationships, field
|
||||
and array offsets into larger chunks of memory, and so on.
|
||||
|
||||
The region API consists of two components:
|
||||
|
||||
- A taxonomy and representation of regions themselves within the
|
||||
analyzer engine. The primary definitions and interfaces are described
|
||||
in ``MemRegion.h``. At the root of the region hierarchy is the class
|
||||
``MemRegion`` with specific subclasses refining the region concept
|
||||
for variables, heap allocated memory, and so forth.
|
||||
- The modeling of binding of values to regions. For example, modeling
|
||||
the value stored to a local variable ``x`` consists of recording the
|
||||
binding between the region for ``x`` (which represents the raw memory
|
||||
associated with ``x``) and the value stored to ``x``. This binding
|
||||
relationship is captured with the notion of "symbolic stores."
|
||||
|
||||
Symbolic stores, which can be thought of as representing the relation
|
||||
``regions -> values``, are implemented by subclasses of the
|
||||
``StoreManager`` class (``Store.h``). A particular StoreManager
|
||||
implementation has complete flexibility concerning the following:
|
||||
|
||||
- *How* to model the binding between regions and values
|
||||
- *What* bindings are recorded
|
||||
|
||||
Together, both points allow different StoreManagers to tradeoff between
|
||||
different levels of analysis precision and scalability concerning the
|
||||
reasoning of program memory. Meanwhile, the core path-sensitive engine
|
||||
makes no assumptions about either points, and queries a StoreManager
|
||||
about the bindings to a memory region through a generic interface that
|
||||
all StoreManagers share. If a particular StoreManager cannot reason
|
||||
about the potential bindings of a given memory region (e.g.,
|
||||
'``BasicStoreManager``' does not reason about fields of structures) then
|
||||
the StoreManager can simply return 'unknown' (represented by
|
||||
'``UnknownVal``') for a particular region-binding. This separation of
|
||||
concerns not only isolates the core analysis engine from the details of
|
||||
reasoning about program memory but also facilities the option of a
|
||||
client of the path-sensitive engine to easily swap in different
|
||||
StoreManager implementations that internally reason about program memory
|
||||
in very different ways.
|
||||
|
||||
The rest of this document is divided into two parts. We first discuss
|
||||
region taxonomy and the semantics of regions. We then discuss the
|
||||
StoreManager interface, and details of how the currently available
|
||||
StoreManager classes implement region bindings.
|
||||
|
||||
Memory Regions and Region Taxonomy
|
||||
==================================
|
||||
|
||||
Pointers
|
||||
--------
|
||||
|
||||
Before talking about the memory regions, we would talk about the
|
||||
pointers since memory regions are essentially used to represent pointer
|
||||
values.
|
||||
|
||||
The pointer is a type of values. Pointer values have two semantic
|
||||
aspects. One is its physical value, which is an address or location. The
|
||||
other is the type of the memory object residing in the address.
|
||||
|
||||
Memory regions are designed to abstract these two properties of the
|
||||
pointer. The physical value of a pointer is represented by MemRegion
|
||||
pointers. The rvalue type of the region corresponds to the type of the
|
||||
pointee object.
|
||||
|
||||
One complication is that we could have different view regions on the
|
||||
same memory chunk. They represent the same memory location, but have
|
||||
different abstract location, i.e., MemRegion pointers. Thus we need to
|
||||
canonicalize the abstract locations to get a unique abstract location
|
||||
for one physical location.
|
||||
|
||||
Furthermore, these different view regions may or may not represent
|
||||
memory objects of different types. Some different types are semantically
|
||||
the same, for example, 'struct s' and 'my\_type' are the same type.
|
||||
|
||||
::
|
||||
|
||||
struct s;
|
||||
typedef struct s my_type;
|
||||
|
||||
But ``char`` and ``int`` are not the same type in the code below:
|
||||
|
||||
::
|
||||
|
||||
void *p;
|
||||
int *q = (int*) p;
|
||||
char *r = (char*) p;
|
||||
|
||||
Thus we need to canonicalize the MemRegion which is used in binding and
|
||||
retrieving.
|
||||
|
||||
Regions
|
||||
-------
|
||||
|
||||
Region is the entity used to model pointer values. A Region has the
|
||||
following properties:
|
||||
|
||||
- Kind
|
||||
- ObjectType: the type of the object residing on the region.
|
||||
- LocationType: the type of the pointer value that the region
|
||||
corresponds to. Usually this is the pointer to the ObjectType. But
|
||||
sometimes we want to cache this type explicitly, for example, for a
|
||||
CodeTextRegion.
|
||||
- StartLocation
|
||||
- EndLocation
|
||||
|
||||
Symbolic Regions
|
||||
----------------
|
||||
|
||||
A symbolic region is a map of the concept of symbolic values into the
|
||||
domain of regions. It is the way that we represent symbolic pointers.
|
||||
Whenever a symbolic pointer value is needed, a symbolic region is
|
||||
created to represent it.
|
||||
|
||||
A symbolic region has no type. It wraps a SymbolData. But sometimes we
|
||||
have type information associated with a symbolic region. For this case,
|
||||
a TypedViewRegion is created to layer the type information on top of the
|
||||
symbolic region. The reason we do not carry type information with the
|
||||
symbolic region is that the symbolic regions can have no type. To be
|
||||
consistent, we don't let them to carry type information.
|
||||
|
||||
Like a symbolic pointer, a symbolic region may be NULL, has unknown
|
||||
extent, and represents a generic chunk of memory.
|
||||
|
||||
.. note::
|
||||
We plan not to use loc::SymbolVal in RegionStore and remove it
|
||||
gradually.
|
||||
|
||||
Symbolic regions get their rvalue types through the following ways:
|
||||
|
||||
- Through the parameter or global variable that points to it, e.g.:
|
||||
|
||||
::
|
||||
|
||||
void f(struct s* p) {
|
||||
...
|
||||
}
|
||||
|
||||
The symbolic region pointed to by ``p`` has type ``struct s``.
|
||||
|
||||
- Through explicit or implicit casts, e.g.:
|
||||
|
||||
::
|
||||
|
||||
void f(void* p) {
|
||||
struct s* q = (struct s*) p;
|
||||
...
|
||||
}
|
||||
|
||||
We attach the type information to the symbolic region lazily. For the
|
||||
first case above, we create the ``TypedViewRegion`` only when the
|
||||
pointer is actually used to access the pointee memory object, that is
|
||||
when the element or field region is created. For the cast case, the
|
||||
``TypedViewRegion`` is created when visiting the ``CastExpr``.
|
||||
|
||||
The reason for doing lazy typing is that symbolic regions are sometimes
|
||||
only used to do location comparison.
|
||||
|
||||
Pointer Casts
|
||||
-------------
|
||||
|
||||
Pointer casts allow people to impose different 'views' onto a chunk of
|
||||
memory.
|
||||
|
||||
Usually we have two kinds of casts. One kind of casts cast down with in
|
||||
the type hierarchy. It imposes more specific views onto more generic
|
||||
memory regions. The other kind of casts cast up with in the type
|
||||
hierarchy. It strips away more specific views on top of the more generic
|
||||
memory regions.
|
||||
|
||||
We simulate the down casts by layering another ``TypedViewRegion`` on
|
||||
top of the original region. We simulate the up casts by striping away
|
||||
the top ``TypedViewRegion``. Down casts is usually simple. For up casts,
|
||||
if the there is no ``TypedViewRegion`` to be stripped, we return the
|
||||
original region. If the underlying region is of the different type than
|
||||
the cast-to type, we flag an error state.
|
||||
|
||||
For toll-free bridging casts, we return the original region.
|
||||
|
||||
We can set up a partial order for pointer types, with the most general
|
||||
type ``void*`` at the top. The partial order forms a tree with ``void*``
|
||||
as its root node.
|
||||
|
||||
Every ``MemRegion`` has a root position in the type tree. For example,
|
||||
the pointee region of ``void *p`` has its root position at the root node
|
||||
of the tree. ``VarRegion`` of ``int x`` has its root position at the
|
||||
'int type' node.
|
||||
|
||||
``TypedViewRegion`` is used to move the region down or up in the tree.
|
||||
Moving down in the tree adds a ``TypedViewRegion``. Moving up in the
|
||||
tree removes a ``TypedViewRegion``.
|
||||
|
||||
Do we want to allow moving up beyond the root position? This happens
|
||||
when:
|
||||
|
||||
::
|
||||
|
||||
int x; void *p = &x;
|
||||
|
||||
The region of ``x`` has its root position at 'int\*' node. the cast to
|
||||
void\* moves that region up to the 'void\*' node. I propose to not allow
|
||||
such casts, and assign the region of ``x`` for ``p``.
|
||||
|
||||
Another non-ideal case is that people might cast to a non-generic
|
||||
pointer from another non-generic pointer instead of first casting it
|
||||
back to the generic pointer. Direct handling of this case would result
|
||||
in multiple layers of TypedViewRegions. This enforces an incorrect
|
||||
semantic view to the region, because we can only have one typed view on
|
||||
a region at a time. To avoid this inconsistency, before casting the
|
||||
region, we strip the TypedViewRegion, then do the cast. In summary, we
|
||||
only allow one layer of TypedViewRegion.
|
||||
|
||||
Region Bindings
|
||||
---------------
|
||||
|
||||
The following region kinds are boundable: VarRegion,
|
||||
CompoundLiteralRegion, StringRegion, ElementRegion, FieldRegion, and
|
||||
ObjCIvarRegion.
|
||||
|
||||
When binding regions, we perform canonicalization on element regions and
|
||||
field regions. This is because we can have different views on the same
|
||||
region, some of which are essentially the same view with different sugar
|
||||
type names.
|
||||
|
||||
To canonicalize a region, we get the canonical types for all
|
||||
TypedViewRegions along the way up to the root region, and make new
|
||||
TypedViewRegions with those canonical types.
|
||||
|
||||
For Objective-C and C++, perhaps another canonicalization rule should be
|
||||
added: for FieldRegion, the least derived class that has the field is
|
||||
used as the type of the super region of the FieldRegion.
|
||||
|
||||
All bindings and retrievings are done on the canonicalized regions.
|
||||
|
||||
Canonicalization is transparent outside the region store manager, and
|
||||
more specifically, unaware outside the Bind() and Retrieve() method. We
|
||||
don't need to consider region canonicalization when doing pointer cast.
|
||||
|
||||
Constraint Manager
|
||||
------------------
|
||||
|
||||
The constraint manager reasons about the abstract location of memory
|
||||
objects. We can have different views on a region, but none of these
|
||||
views changes the location of that object. Thus we should get the same
|
||||
abstract location for those regions.
|
|
@ -1,170 +0,0 @@
|
|||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
|
||||
"http://www.w3.org/TR/html4/strict.dtd">
|
||||
<html>
|
||||
<head>
|
||||
<title>Clang Plugins</title>
|
||||
<link type="text/css" rel="stylesheet" href="../menu.css">
|
||||
<link type="text/css" rel="stylesheet" href="../content.css">
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<!--#include virtual="../menu.html.incl"-->
|
||||
|
||||
<div id="content">
|
||||
|
||||
<h1>Clang Plugins</h1>
|
||||
<p>Clang Plugins make it possible to run extra user defined actions during
|
||||
a compilation. This document will provide a basic walkthrough of how to write
|
||||
and run a Clang Plugin.</p>
|
||||
|
||||
<!-- ======================================================================= -->
|
||||
<h2 id="intro">Introduction</h2>
|
||||
<!-- ======================================================================= -->
|
||||
|
||||
<p>Clang Plugins run FrontendActions over code. See the
|
||||
<a href="RAVFrontendAction.html">FrontendAction tutorial</a> on how to write a
|
||||
FrontendAction using the RecursiveASTVisitor. In this tutorial, we'll
|
||||
demonstrate how to write a simple clang plugin.
|
||||
</p>
|
||||
|
||||
<!-- ======================================================================= -->
|
||||
<h2 id="pluginactions">Writing a PluginASTAction</h2>
|
||||
<!-- ======================================================================= -->
|
||||
|
||||
<p>The main difference from writing normal FrontendActions is that you can
|
||||
handle plugin command line options. The
|
||||
PluginASTAction base class declares a ParseArgs method which you have to
|
||||
implement in your plugin.
|
||||
</p>
|
||||
<pre>
|
||||
bool ParseArgs(const CompilerInstance &CI,
|
||||
const std::vector<std::string>& args) {
|
||||
for (unsigned i = 0, e = args.size(); i != e; ++i) {
|
||||
if (args[i] == "-some-arg") {
|
||||
// Handle the command line argument.
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
</pre>
|
||||
|
||||
<!-- ======================================================================= -->
|
||||
<h2 id="registerplugin">Registering a plugin</h2>
|
||||
<!-- ======================================================================= -->
|
||||
|
||||
<p>A plugin is loaded from a dynamic library at runtime by the compiler. To register
|
||||
a plugin in a library, use FrontendPluginRegistry::Add:</p>
|
||||
<pre>
|
||||
static FrontendPluginRegistry::Add<MyPlugin> X("my-plugin-name", "my plugin description");
|
||||
</pre>
|
||||
|
||||
<!-- ======================================================================= -->
|
||||
<h2 id="example">Putting it all together</h2>
|
||||
<!-- ======================================================================= -->
|
||||
|
||||
<p>Let's look at an example plugin that prints top-level function names.
|
||||
This example is also checked into the clang repository; please also take a look
|
||||
at the latest <a href="http://llvm.org/viewvc/llvm-project/cfe/trunk/examples/PrintFunctionNames/PrintFunctionNames.cpp?view=markup">checked in version of PrintFunctionNames.cpp</a>.</p>
|
||||
<pre>
|
||||
#include "clang/Frontend/FrontendPluginRegistry.h"
|
||||
#include "clang/AST/ASTConsumer.h"
|
||||
#include "clang/AST/AST.h"
|
||||
#include "clang/Frontend/CompilerInstance.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
using namespace clang;
|
||||
|
||||
namespace {
|
||||
|
||||
class PrintFunctionsConsumer : public ASTConsumer {
|
||||
public:
|
||||
virtual bool HandleTopLevelDecl(DeclGroupRef DG) {
|
||||
for (DeclGroupRef::iterator i = DG.begin(), e = DG.end(); i != e; ++i) {
|
||||
const Decl *D = *i;
|
||||
if (const NamedDecl *ND = dyn_cast<NamedDecl>(D))
|
||||
llvm::errs() << "top-level-decl: \"" << ND->getNameAsString() << "\"\n";
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
class PrintFunctionNamesAction : public PluginASTAction {
|
||||
protected:
|
||||
ASTConsumer *CreateASTConsumer(CompilerInstance &CI, llvm::StringRef) {
|
||||
return new PrintFunctionsConsumer();
|
||||
}
|
||||
|
||||
bool ParseArgs(const CompilerInstance &CI,
|
||||
const std::vector<std::string>& args) {
|
||||
for (unsigned i = 0, e = args.size(); i != e; ++i) {
|
||||
llvm::errs() << "PrintFunctionNames arg = " << args[i] << "\n";
|
||||
|
||||
// Example error handling.
|
||||
if (args[i] == "-an-error") {
|
||||
DiagnosticsEngine &D = CI.getDiagnostics();
|
||||
unsigned DiagID = D.getCustomDiagID(
|
||||
DiagnosticsEngine::Error, "invalid argument '" + args[i] + "'");
|
||||
D.Report(DiagID);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if (args.size() && args[0] == "help")
|
||||
PrintHelp(llvm::errs());
|
||||
|
||||
return true;
|
||||
}
|
||||
void PrintHelp(llvm::raw_ostream& ros) {
|
||||
ros << "Help for PrintFunctionNames plugin goes here\n";
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
static FrontendPluginRegistry::Add<PrintFunctionNamesAction>
|
||||
X("print-fns", "print function names");
|
||||
</pre>
|
||||
|
||||
<!-- ======================================================================= -->
|
||||
<h2 id="running">Running the plugin</h2>
|
||||
<!-- ======================================================================= -->
|
||||
|
||||
<p>To run a plugin, the dynamic library containing the plugin registry must be
|
||||
loaded via the -load command line option. This will load all plugins that are
|
||||
registered, and you can select the plugins to run by specifying the -plugin
|
||||
option. Additional parameters for the plugins can be passed with -plugin-arg-<plugin-name>.</p>
|
||||
|
||||
<p>Note that those options must reach clang's cc1 process. There are two
|
||||
ways to do so:</p>
|
||||
<ul>
|
||||
<li>
|
||||
Directly call the parsing process by using the -cc1 option; this has the
|
||||
downside of not configuring the default header search paths, so you'll need to
|
||||
specify the full system path configuration on the command line.
|
||||
</li>
|
||||
<li>
|
||||
Use clang as usual, but prefix all arguments to the cc1 process with -Xclang.
|
||||
</li>
|
||||
</ul>
|
||||
<p>For example, to run the print-function-names plugin over a source file in clang,
|
||||
first build the plugin, and then call clang with the plugin from the source tree:</p>
|
||||
<pre>
|
||||
$ export BD=/path/to/build/directory
|
||||
$ (cd $BD && make PrintFunctionNames )
|
||||
$ clang++ -D_GNU_SOURCE -D_DEBUG -D__STDC_CONSTANT_MACROS \
|
||||
-D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -D_GNU_SOURCE \
|
||||
-I$BD/tools/clang/include -Itools/clang/include -I$BD/include -Iinclude \
|
||||
tools/clang/tools/clang-check/ClangCheck.cpp -fsyntax-only \
|
||||
-Xclang -load -Xclang $BD/lib/PrintFunctionNames.so -Xclang \
|
||||
-plugin -Xclang print-fns
|
||||
</pre>
|
||||
|
||||
<p>Also see the print-function-name plugin example's
|
||||
<a href="http://llvm.org/viewvc/llvm-project/cfe/trunk/examples/PrintFunctionNames/README.txt?view=markup">README</a></p>
|
||||
|
||||
|
||||
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -0,0 +1,149 @@
|
|||
=============
|
||||
Clang Plugins
|
||||
=============
|
||||
|
||||
Clang Plugins make it possible to run extra user defined actions during
|
||||
a compilation. This document will provide a basic walkthrough of how to
|
||||
write and run a Clang Plugin.
|
||||
|
||||
Introduction
|
||||
============
|
||||
|
||||
Clang Plugins run FrontendActions over code. See the :doc:`FrontendAction
|
||||
tutorial <RAVFrontendAction>` on how to write a FrontendAction
|
||||
using the RecursiveASTVisitor. In this tutorial, we'll demonstrate how
|
||||
to write a simple clang plugin.
|
||||
|
||||
Writing a PluginASTAction
|
||||
=========================
|
||||
|
||||
The main difference from writing normal FrontendActions is that you can
|
||||
handle plugin command line options. The PluginASTAction base class
|
||||
declares a ParseArgs method which you have to implement in your plugin.
|
||||
|
||||
::
|
||||
|
||||
bool ParseArgs(const CompilerInstance &CI,
|
||||
const std::vector<std::string>& args) {
|
||||
for (unsigned i = 0, e = args.size(); i != e; ++i) {
|
||||
if (args[i] == "-some-arg") {
|
||||
// Handle the command line argument.
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
Registering a plugin
|
||||
====================
|
||||
|
||||
A plugin is loaded from a dynamic library at runtime by the compiler. To
|
||||
register a plugin in a library, use FrontendPluginRegistry::Add:
|
||||
|
||||
::
|
||||
|
||||
static FrontendPluginRegistry::Add<MyPlugin> X("my-plugin-name", "my plugin description");
|
||||
|
||||
Putting it all together
|
||||
=======================
|
||||
|
||||
Let's look at an example plugin that prints top-level function names.
|
||||
This example is also checked into the clang repository; please also take
|
||||
a look at the latest `checked in version of
|
||||
PrintFunctionNames.cpp <http://llvm.org/viewvc/llvm-project/cfe/trunk/examples/PrintFunctionNames/PrintFunctionNames.cpp?view=markup>`_.
|
||||
|
||||
::
|
||||
|
||||
#include "clang/Frontend/FrontendPluginRegistry.h"
|
||||
#include "clang/AST/ASTConsumer.h"
|
||||
#include "clang/AST/AST.h"
|
||||
#include "clang/Frontend/CompilerInstance.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
using namespace clang;
|
||||
|
||||
namespace {
|
||||
|
||||
class PrintFunctionsConsumer : public ASTConsumer {
|
||||
public:
|
||||
virtual bool HandleTopLevelDecl(DeclGroupRef DG) {
|
||||
for (DeclGroupRef::iterator i = DG.begin(), e = DG.end(); i != e; ++i) {
|
||||
const Decl *D = *i;
|
||||
if (const NamedDecl *ND = dyn_cast<NamedDecl>(D))
|
||||
llvm::errs() << "top-level-decl: \"" << ND->getNameAsString() << "\"\n";
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
class PrintFunctionNamesAction : public PluginASTAction {
|
||||
protected:
|
||||
ASTConsumer *CreateASTConsumer(CompilerInstance &CI, llvm::StringRef) {
|
||||
return new PrintFunctionsConsumer();
|
||||
}
|
||||
|
||||
bool ParseArgs(const CompilerInstance &CI,
|
||||
const std::vector<std::string>& args) {
|
||||
for (unsigned i = 0, e = args.size(); i != e; ++i) {
|
||||
llvm::errs() << "PrintFunctionNames arg = " << args[i] << "\n";
|
||||
|
||||
// Example error handling.
|
||||
if (args[i] == "-an-error") {
|
||||
DiagnosticsEngine &D = CI.getDiagnostics();
|
||||
unsigned DiagID = D.getCustomDiagID(
|
||||
DiagnosticsEngine::Error, "invalid argument '" + args[i] + "'");
|
||||
D.Report(DiagID);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if (args.size() && args[0] == "help")
|
||||
PrintHelp(llvm::errs());
|
||||
|
||||
return true;
|
||||
}
|
||||
void PrintHelp(llvm::raw_ostream& ros) {
|
||||
ros << "Help for PrintFunctionNames plugin goes here\n";
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
static FrontendPluginRegistry::Add<PrintFunctionNamesAction>
|
||||
X("print-fns", "print function names");
|
||||
|
||||
Running the plugin
|
||||
==================
|
||||
|
||||
To run a plugin, the dynamic library containing the plugin registry must
|
||||
be loaded via the -load command line option. This will load all plugins
|
||||
that are registered, and you can select the plugins to run by specifying
|
||||
the -plugin option. Additional parameters for the plugins can be passed
|
||||
with -plugin-arg-<plugin-name>.
|
||||
|
||||
Note that those options must reach clang's cc1 process. There are two
|
||||
ways to do so:
|
||||
|
||||
- Directly call the parsing process by using the -cc1 option; this has
|
||||
the downside of not configuring the default header search paths, so
|
||||
you'll need to specify the full system path configuration on the
|
||||
command line.
|
||||
- Use clang as usual, but prefix all arguments to the cc1 process with
|
||||
-Xclang.
|
||||
|
||||
For example, to run the print-function-names plugin over a source file
|
||||
in clang, first build the plugin, and then call clang with the plugin
|
||||
from the source tree:
|
||||
|
||||
::
|
||||
|
||||
$ export BD=/path/to/build/directory
|
||||
$ (cd $BD && make PrintFunctionNames )
|
||||
$ clang++ -D_GNU_SOURCE -D_DEBUG -D__STDC_CONSTANT_MACROS \
|
||||
-D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -D_GNU_SOURCE \
|
||||
-I$BD/tools/clang/include -Itools/clang/include -I$BD/include -Iinclude \
|
||||
tools/clang/tools/clang-check/ClangCheck.cpp -fsyntax-only \
|
||||
-Xclang -load -Xclang $BD/lib/PrintFunctionNames.so -Xclang \
|
||||
-plugin -Xclang print-fns
|
||||
|
||||
Also see the print-function-name plugin example's
|
||||
`README <http://llvm.org/viewvc/llvm-project/cfe/trunk/examples/PrintFunctionNames/README.txt?view=markup>`_
|
|
@ -1,110 +0,0 @@
|
|||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
|
||||
"http://www.w3.org/TR/html4/strict.dtd">
|
||||
<html>
|
||||
<head>
|
||||
<title>Clang Tools</title>
|
||||
<link type="text/css" rel="stylesheet" href="../menu.css">
|
||||
<link type="text/css" rel="stylesheet" href="../content.css">
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<!--#include virtual="../menu.html.incl"-->
|
||||
|
||||
<div id="content">
|
||||
|
||||
<h1>Clang Tools</h1>
|
||||
<p>Clang Tools are standalone command line (and potentially GUI) tools design
|
||||
for use by C++ developers who are already using and enjoying Clang as their
|
||||
compiler. These tools provide developer-oriented functionality such as fast
|
||||
syntax checking, automatic formatting, refactoring, etc.</p>
|
||||
|
||||
<p>Only a couple of the most basic and fundamental tools are kept in the primary
|
||||
Clang Subversion project. The rest of the tools are kept in a side-project so
|
||||
that developers who don't want or need to build them don't. If you want to get
|
||||
access to the extra Clang Tools repository, simply check it out into the tools
|
||||
tree of your Clang checkout and follow the usual process for building and
|
||||
working with a combined LLVM/Clang checkout:</p>
|
||||
<ul>
|
||||
<li>With Subversion:
|
||||
<ul>
|
||||
<li><tt>cd llvm/tools/clang/tools</tt></li>
|
||||
<li><tt>svn co http://llvm.org/svn/llvm-project/clang-tools-extra/trunk
|
||||
extra</tt></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li>Or with Git:
|
||||
<ul>
|
||||
<li><tt>cd llvm/tools/clang/tools</tt></li>
|
||||
<li><tt>git clone http://llvm.org/git/clang-tools-extra.git extra</tt></li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
|
||||
<p>This document describes a high-level overview of the organization of Clang
|
||||
Tools within the project as well as giving an introduction to some of the more
|
||||
important tools. However, it should be noted that this document is currently
|
||||
focused on Clang and Clang Tool developers, not on end users of these tools.</p>
|
||||
|
||||
<!-- ======================================================================= -->
|
||||
<h2 id="org">Clang Tools Organization</h2>
|
||||
<!-- ======================================================================= -->
|
||||
|
||||
<p>Clang Tools are CLI or GUI programs that are intended to be directly used by
|
||||
C++ developers. That is they are <em>not</em> primarily for use by Clang
|
||||
developers, although they are hopefully useful to C++ developers who happen to
|
||||
work on Clang, and we try to actively dogfood their functionality. They are
|
||||
developed in three components: the underlying infrastructure for building
|
||||
a standalone tool based on Clang, core shared logic used by many different tools
|
||||
in the form of refactoring and rewriting libraries, and the tools
|
||||
themselves.</p>
|
||||
|
||||
<p>The underlying infrastructure for Clang Tools is the
|
||||
<a href="LibTooling.html">LibTooling</a> platform. See its documentation for
|
||||
much more detailed information about how this infrastructure works. The common
|
||||
refactoring and rewriting toolkit-style library is also part of LibTooling
|
||||
organizationally.</p>
|
||||
|
||||
<p>A few Clang Tools are developed along side the core Clang libraries as
|
||||
examples and test cases of fundamental functionality. However, most of the tools
|
||||
are developed in a side repository to provide easy separation from the core
|
||||
libraries. We intentionally do not support public libraries in the side
|
||||
repository, as we want to carefully review and find good APIs for libraries as
|
||||
they are lifted out of a few tools and into the core Clang library set.</p>
|
||||
|
||||
<p>Regardless of which repository Clang Tools' code resides in, the development
|
||||
process and practices for all Clang Tools are exactly those of Clang itself.
|
||||
They are entirely within the Clang <em>project</em>, regardless of the version
|
||||
control scheme.</p>
|
||||
|
||||
|
||||
<!-- ======================================================================= -->
|
||||
<h2 id="coretools">Core Clang Tools</h2>
|
||||
<!-- ======================================================================= -->
|
||||
|
||||
<p>The core set of Clang tools that are within the main repository are tools
|
||||
that very specifically compliment, and allow use and testing of <em>Clang</em>
|
||||
specific functionality.</p>
|
||||
|
||||
<h3 id="clang-check"><tt>clang-check</tt></h3>
|
||||
<p>This tool combines the LibTooling framework for running a Clang tool with the
|
||||
basic Clang diagnostics by syntax checking specific files in a fast, command
|
||||
line interface. It can also accept flags to re-display the diagnostics in
|
||||
different formats with different flags, suitable for use driving an IDE or
|
||||
editor. Furthermore, it can be used in fixit-mode to directly apply fixit-hints
|
||||
offered by clang.</p>
|
||||
|
||||
<p>FIXME: Link to user-oriented clang-check documentation.</p>
|
||||
|
||||
<!-- ======================================================================= -->
|
||||
<h2 id="registerplugin">Extra Clang Tools</h2>
|
||||
<!-- ======================================================================= -->
|
||||
|
||||
<p>As various categories of Clang Tools are added to the extra repository,
|
||||
they'll be tracked here. The focus of this documentation is on the scope and
|
||||
features of the tools for other tool developers; each tool should provide its
|
||||
own user-focused documentation.</p>
|
||||
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -0,0 +1,91 @@
|
|||
===========
|
||||
Clang Tools
|
||||
===========
|
||||
|
||||
Clang Tools are standalone command line (and potentially GUI) tools
|
||||
design for use by C++ developers who are already using and enjoying
|
||||
Clang as their compiler. These tools provide developer-oriented
|
||||
functionality such as fast syntax checking, automatic formatting,
|
||||
refactoring, etc.
|
||||
|
||||
Only a couple of the most basic and fundamental tools are kept in the
|
||||
primary Clang Subversion project. The rest of the tools are kept in a
|
||||
side-project so that developers who don't want or need to build them
|
||||
don't. If you want to get access to the extra Clang Tools repository,
|
||||
simply check it out into the tools tree of your Clang checkout and
|
||||
follow the usual process for building and working with a combined
|
||||
LLVM/Clang checkout:
|
||||
|
||||
- With Subversion:
|
||||
|
||||
- ``cd llvm/tools/clang/tools``
|
||||
- ``svn co http://llvm.org/svn/llvm-project/clang-tools-extra/trunk extra``
|
||||
|
||||
- Or with Git:
|
||||
|
||||
- ``cd llvm/tools/clang/tools``
|
||||
- ``git clone http://llvm.org/git/clang-tools-extra.git extra``
|
||||
|
||||
This document describes a high-level overview of the organization of
|
||||
Clang Tools within the project as well as giving an introduction to some
|
||||
of the more important tools. However, it should be noted that this
|
||||
document is currently focused on Clang and Clang Tool developers, not on
|
||||
end users of these tools.
|
||||
|
||||
Clang Tools Organization
|
||||
========================
|
||||
|
||||
Clang Tools are CLI or GUI programs that are intended to be directly
|
||||
used by C++ developers. That is they are *not* primarily for use by
|
||||
Clang developers, although they are hopefully useful to C++ developers
|
||||
who happen to work on Clang, and we try to actively dogfood their
|
||||
functionality. They are developed in three components: the underlying
|
||||
infrastructure for building a standalone tool based on Clang, core
|
||||
shared logic used by many different tools in the form of refactoring and
|
||||
rewriting libraries, and the tools themselves.
|
||||
|
||||
The underlying infrastructure for Clang Tools is the
|
||||
:doc:`LibTooling <LibTooling>` platform. See its documentation for much
|
||||
more detailed information about how this infrastructure works. The
|
||||
common refactoring and rewriting toolkit-style library is also part of
|
||||
LibTooling organizationally.
|
||||
|
||||
A few Clang Tools are developed along side the core Clang libraries as
|
||||
examples and test cases of fundamental functionality. However, most of
|
||||
the tools are developed in a side repository to provide easy separation
|
||||
from the core libraries. We intentionally do not support public
|
||||
libraries in the side repository, as we want to carefully review and
|
||||
find good APIs for libraries as they are lifted out of a few tools and
|
||||
into the core Clang library set.
|
||||
|
||||
Regardless of which repository Clang Tools' code resides in, the
|
||||
development process and practices for all Clang Tools are exactly those
|
||||
of Clang itself. They are entirely within the Clang *project*,
|
||||
regardless of the version control scheme.
|
||||
|
||||
Core Clang Tools
|
||||
================
|
||||
|
||||
The core set of Clang tools that are within the main repository are
|
||||
tools that very specifically compliment, and allow use and testing of
|
||||
*Clang* specific functionality.
|
||||
|
||||
``clang-check``
|
||||
~~~~~~~~~~~~~~~
|
||||
|
||||
This tool combines the LibTooling framework for running a Clang tool
|
||||
with the basic Clang diagnostics by syntax checking specific files in a
|
||||
fast, command line interface. It can also accept flags to re-display the
|
||||
diagnostics in different formats with different flags, suitable for use
|
||||
driving an IDE or editor. Furthermore, it can be used in fixit-mode to
|
||||
directly apply fixit-hints offered by clang.
|
||||
|
||||
FIXME: Link to user-oriented clang-check documentation.
|
||||
|
||||
Extra Clang Tools
|
||||
=================
|
||||
|
||||
As various categories of Clang Tools are added to the extra repository,
|
||||
they'll be tracked here. The focus of this documentation is on the scope
|
||||
and features of the tools for other tool developers; each tool should
|
||||
provide its own user-focused documentation.
|
|
@ -1,212 +0,0 @@
|
|||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
|
||||
"http://www.w3.org/TR/html4/strict.dtd">
|
||||
<html>
|
||||
<head>
|
||||
<title>How To Setup Clang Tooling For LLVM</title>
|
||||
<link type="text/css" rel="stylesheet" href="../menu.css">
|
||||
<link type="text/css" rel="stylesheet" href="../content.css">
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<!--#include virtual="../menu.html.incl"-->
|
||||
|
||||
<div id="content">
|
||||
|
||||
<h1>How To Setup Clang Tooling For LLVM</h1>
|
||||
<p>Clang Tooling provides infrastructure to write tools that need syntactic and
|
||||
semantic infomation about a program. This term also relates to a set of specific
|
||||
tools using this infrastructure (e.g. <code>clang-check</code>). This document
|
||||
provides information on how to set up and use Clang Tooling for the LLVM source
|
||||
code.</p>
|
||||
|
||||
|
||||
<!-- ======================================================================= -->
|
||||
<h2><a name="introduction">Introduction</a></h2>
|
||||
<!-- ======================================================================= -->
|
||||
|
||||
<p>Clang Tooling needs a compilation database to figure out specific build
|
||||
options for each file. Currently it can create a compilation database from the
|
||||
<code>compilation_commands.json</code> file, generated by CMake. When invoking
|
||||
clang tools, you can either specify a path to a build directory using a command
|
||||
line parameter <code>-p</code> or let Clang Tooling find this file in your
|
||||
source tree. In either case you need to configure your build using CMake to use
|
||||
clang tools.</p>
|
||||
|
||||
<!-- ======================================================================= -->
|
||||
<h2><a name="using-make">Setup Clang Tooling Using CMake and Make</a></h2>
|
||||
<!-- ======================================================================= -->
|
||||
|
||||
<p>If you intend to use make to build LLVM, you should have CMake 2.8.6 or later
|
||||
installed (can be found <a href="http://cmake.org">here</a>).</p>
|
||||
<p>First, you need to generate Makefiles for LLVM with CMake. You need to make
|
||||
a build directory and run CMake from it:</p>
|
||||
<pre>
|
||||
mkdir your/build/directory
|
||||
cd your/build/directory
|
||||
cmake -DCMAKE_EXPORT_COMPILE_COMMANDS=ON path/to/llvm/sources
|
||||
</pre>
|
||||
|
||||
<p>If you want to use clang instead of GCC, you can add
|
||||
<code>-DCMAKE_C_COMPILER=/path/to/clang
|
||||
-DCMAKE_CXX_COMPILER=/path/to/clang++</code>.
|
||||
You can also use ccmake, which provides a curses interface to configure CMake
|
||||
variables for lazy people.</p>
|
||||
|
||||
<p>As a result, the new <code>compile_commands.json</code> file should appear in
|
||||
the current directory. You should link it to the LLVM source tree so that Clang
|
||||
Tooling is able to use it:</p>
|
||||
<pre>
|
||||
ln -s $PWD/compile_commands.json path/to/llvm/source/
|
||||
</pre>
|
||||
|
||||
<p>Now you are ready to build and test LLVM using make:</p>
|
||||
<pre>
|
||||
make check-all
|
||||
</pre>
|
||||
|
||||
<!-- ======================================================================= -->
|
||||
<h2><a name="using-tools">Using Clang Tools</a></h2>
|
||||
<!-- ======================================================================= -->
|
||||
|
||||
<p>After you completed the previous steps, you are ready to run clang tools. If
|
||||
you have a recent clang installed, you should have <code>clang-check</code> in
|
||||
$PATH. Try to run it on any .cpp file inside the LLVM source tree:</p>
|
||||
<pre>
|
||||
clang-check tools/clang/lib/Tooling/CompilationDatabase.cpp
|
||||
</pre>
|
||||
<p>If you're using vim, it's convenient to have clang-check integrated. Put this
|
||||
into your .vimrc:</p>
|
||||
<pre>
|
||||
function! ClangCheckImpl(cmd)
|
||||
if &autowrite | wall | endif
|
||||
echo "Running " . a:cmd . " ..."
|
||||
let l:output = system(a:cmd)
|
||||
cexpr l:output
|
||||
cwindow
|
||||
let w:quickfix_title = a:cmd
|
||||
if v:shell_error != 0
|
||||
cc
|
||||
endif
|
||||
let g:clang_check_last_cmd = a:cmd
|
||||
endfunction
|
||||
|
||||
function! ClangCheck()
|
||||
let l:filename = expand('%')
|
||||
if l:filename =~ '\.\(cpp\|cxx\|cc\|c\)$'
|
||||
call ClangCheckImpl("clang-check " . l:filename)
|
||||
elseif exists("g:clang_check_last_cmd")
|
||||
call ClangCheckImpl(g:clang_check_last_cmd)
|
||||
else
|
||||
echo "Can't detect file's compilation arguments and no previous clang-check invocation!"
|
||||
endif
|
||||
endfunction
|
||||
|
||||
nmap <silent> <F5> :call ClangCheck()<CR><CR>
|
||||
</pre>
|
||||
|
||||
<p>When editing a .cpp/.cxx/.cc/.c file, hit F5 to reparse the file. In case
|
||||
the current file has a different extension (for example, .h), F5 will re-run
|
||||
the last clang-check invocation made from this vim instance (if any). The
|
||||
output will go into the error window, which is opened automatically when
|
||||
clang-check finds errors, and can be re-opened with <code>:cope</code>.</p>
|
||||
|
||||
<p>Other <code>clang-check</code> options that can be useful when working with
|
||||
clang AST:</p>
|
||||
<ul>
|
||||
<li><code>-ast-print</code> - Build ASTs and then pretty-print them.</li>
|
||||
<li><code>-ast-dump</code> - Build ASTs and then debug dump them.</li>
|
||||
<li><code>-ast-dump-filter=<string></code> - Use with
|
||||
<code>-ast-dump</code> or <code>-ast-print</code> to dump/print
|
||||
only AST declaration nodes having a certain substring in a qualified name.
|
||||
Use <code>-ast-list</code> to list all filterable declaration node
|
||||
names.</li>
|
||||
<li><code>-ast-list</code> - Build ASTs and print the list of declaration
|
||||
node qualified names.</li>
|
||||
</ul>
|
||||
<p>Examples:</p>
|
||||
<pre>
|
||||
<b>$ clang-check tools/clang/tools/clang-check/ClangCheck.cpp -ast-dump -ast-dump-filter ActionFactory::newASTConsumer</b>
|
||||
Processing: tools/clang/tools/clang-check/ClangCheck.cpp.
|
||||
Dumping <anonymous namespace>::ActionFactory::newASTConsumer:
|
||||
clang::ASTConsumer *newASTConsumer() (CompoundStmt 0x44da290 </home/alexfh/local/llvm/tools/clang/tools/clang-check/ClangCheck.cpp:64:40, line:72:3>
|
||||
(IfStmt 0x44d97c8 <line:65:5, line:66:45>
|
||||
<<<NULL>>>
|
||||
(ImplicitCastExpr 0x44d96d0 <line:65:9> '_Bool':'_Bool' <UserDefinedConversion>
|
||||
...
|
||||
<b>$ clang-check tools/clang/tools/clang-check/ClangCheck.cpp -ast-print -ast-dump-filter ActionFactory::newASTConsumer</b>
|
||||
Processing: tools/clang/tools/clang-check/ClangCheck.cpp.
|
||||
Printing <anonymous namespace>::ActionFactory::newASTConsumer:
|
||||
clang::ASTConsumer *newASTConsumer() {
|
||||
if (this->ASTList.operator _Bool())
|
||||
return clang::CreateASTDeclNodeLister();
|
||||
if (this->ASTDump.operator _Bool())
|
||||
return clang::CreateASTDumper(this->ASTDumpFilter);
|
||||
if (this->ASTPrint.operator _Bool())
|
||||
return clang::CreateASTPrinter(&llvm::outs(), this->ASTDumpFilter);
|
||||
return new clang::ASTConsumer();
|
||||
}
|
||||
</pre>
|
||||
|
||||
<!-- ======================================================================= -->
|
||||
<h2><a name="using-ninja">(Experimental) Using Ninja Build System</a></h2>
|
||||
<!-- ======================================================================= -->
|
||||
|
||||
<p>Optionally you can use the <a
|
||||
href="https://github.com/martine/ninja">Ninja</a> build system instead of
|
||||
make. It is aimed at making your builds faster. Currently this step will require
|
||||
building Ninja from sources and using a development version of CMake.</p>
|
||||
<p>To take advantage of using Clang Tools along with Ninja build you need at
|
||||
least CMake 2.8.9. At the moment CMake 2.8.9 is still under development, so you
|
||||
can get latest development sources and build it yourself:</p>
|
||||
<pre>
|
||||
git clone git://cmake.org/cmake.git
|
||||
cd cmake
|
||||
./bootstrap
|
||||
make
|
||||
sudo make install
|
||||
</pre>
|
||||
|
||||
<p>Having the correct version of CMake, you can clone the Ninja git repository
|
||||
and build Ninja from sources:</p>
|
||||
<pre>
|
||||
git clone git://github.com/martine/ninja.git
|
||||
cd ninja/
|
||||
./bootstrap.py
|
||||
</pre>
|
||||
<p>This will result in a single binary <code>ninja</code> in the current
|
||||
directory. It doesn't require installation and can just be copied to any
|
||||
location inside <code>$PATH</code>, say <code>/usr/local/bin/</code>:</p>
|
||||
<pre>
|
||||
sudo cp ninja /usr/local/bin/
|
||||
sudo chmod a+rx /usr/local/bin/ninja
|
||||
</pre>
|
||||
<p>After doing all of this, you'll need to generate Ninja build files for LLVM
|
||||
with CMake. You need to make a build directory and run CMake from it:</p>
|
||||
<pre>
|
||||
mkdir your/build/directory
|
||||
cd your/build/directory
|
||||
cmake -G Ninja -DCMAKE_EXPORT_COMPILE_COMMANDS=ON path/to/llvm/sources
|
||||
</pre>
|
||||
|
||||
<p>If you want to use clang instead of GCC, you can add
|
||||
<code>-DCMAKE_C_COMPILER=/path/to/clang
|
||||
-DCMAKE_CXX_COMPILER=/path/to/clang++</code>.
|
||||
You can also use ccmake, which provides a curses interface to configure CMake
|
||||
variables in an interactive manner.</p>
|
||||
|
||||
<p>As a result, the new <code>compile_commands.json</code> file should appear in
|
||||
the current directory. You should link it to the LLVM source tree so that Clang
|
||||
Tooling is able to use it:</p>
|
||||
<pre>
|
||||
ln -s $PWD/compile_commands.json path/to/llvm/source/
|
||||
</pre>
|
||||
|
||||
<p>Now you are ready to build and test LLVM using Ninja:</p>
|
||||
<pre>
|
||||
ninja check-all
|
||||
</pre>
|
||||
<p>Other target names can be used in the same way as with make.</p>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -0,0 +1,211 @@
|
|||
===================================
|
||||
How To Setup Clang Tooling For LLVM
|
||||
===================================
|
||||
|
||||
Clang Tooling provides infrastructure to write tools that need syntactic
|
||||
and semantic infomation about a program. This term also relates to a set
|
||||
of specific tools using this infrastructure (e.g. ``clang-check``). This
|
||||
document provides information on how to set up and use Clang Tooling for
|
||||
the LLVM source code.
|
||||
|
||||
Introduction
|
||||
============
|
||||
|
||||
Clang Tooling needs a compilation database to figure out specific build
|
||||
options for each file. Currently it can create a compilation database
|
||||
from the ``compilation_commands.json`` file, generated by CMake. When
|
||||
invoking clang tools, you can either specify a path to a build directory
|
||||
using a command line parameter ``-p`` or let Clang Tooling find this
|
||||
file in your source tree. In either case you need to configure your
|
||||
build using CMake to use clang tools.
|
||||
|
||||
Setup Clang Tooling Using CMake and Make
|
||||
========================================
|
||||
|
||||
If you intend to use make to build LLVM, you should have CMake 2.8.6 or
|
||||
later installed (can be found `here <http://cmake.org>`_).
|
||||
|
||||
First, you need to generate Makefiles for LLVM with CMake. You need to
|
||||
make a build directory and run CMake from it:
|
||||
|
||||
::
|
||||
|
||||
mkdir your/build/directory
|
||||
cd your/build/directory
|
||||
cmake -DCMAKE_EXPORT_COMPILE_COMMANDS=ON path/to/llvm/sources
|
||||
|
||||
If you want to use clang instead of GCC, you can add
|
||||
``-DCMAKE_C_COMPILER=/path/to/clang -DCMAKE_CXX_COMPILER=/path/to/clang++``.
|
||||
You can also use ccmake, which provides a curses interface to configure
|
||||
CMake variables for lazy people.
|
||||
|
||||
As a result, the new ``compile_commands.json`` file should appear in the
|
||||
current directory. You should link it to the LLVM source tree so that
|
||||
Clang Tooling is able to use it:
|
||||
|
||||
::
|
||||
|
||||
ln -s $PWD/compile_commands.json path/to/llvm/source/
|
||||
|
||||
Now you are ready to build and test LLVM using make:
|
||||
|
||||
::
|
||||
|
||||
make check-all
|
||||
|
||||
Using Clang Tools
|
||||
=================
|
||||
|
||||
After you completed the previous steps, you are ready to run clang
|
||||
tools. If you have a recent clang installed, you should have
|
||||
``clang-check`` in $PATH. Try to run it on any .cpp file inside the LLVM
|
||||
source tree:
|
||||
|
||||
::
|
||||
|
||||
clang-check tools/clang/lib/Tooling/CompilationDatabase.cpp
|
||||
|
||||
If you're using vim, it's convenient to have clang-check integrated. Put
|
||||
this into your .vimrc:
|
||||
|
||||
::
|
||||
|
||||
function! ClangCheckImpl(cmd)
|
||||
if &autowrite | wall | endif
|
||||
echo "Running " . a:cmd . " ..."
|
||||
let l:output = system(a:cmd)
|
||||
cexpr l:output
|
||||
cwindow
|
||||
let w:quickfix_title = a:cmd
|
||||
if v:shell_error != 0
|
||||
cc
|
||||
endif
|
||||
let g:clang_check_last_cmd = a:cmd
|
||||
endfunction
|
||||
|
||||
function! ClangCheck()
|
||||
let l:filename = expand('%')
|
||||
if l:filename =~ '\.\(cpp\|cxx\|cc\|c\)$'
|
||||
call ClangCheckImpl("clang-check " . l:filename)
|
||||
elseif exists("g:clang_check_last_cmd")
|
||||
call ClangCheckImpl(g:clang_check_last_cmd)
|
||||
else
|
||||
echo "Can't detect file's compilation arguments and no previous clang-check invocation!"
|
||||
endif
|
||||
endfunction
|
||||
|
||||
nmap <silent> <F5> :call ClangCheck()<CR><CR>
|
||||
|
||||
When editing a .cpp/.cxx/.cc/.c file, hit F5 to reparse the file. In
|
||||
case the current file has a different extension (for example, .h), F5
|
||||
will re-run the last clang-check invocation made from this vim instance
|
||||
(if any). The output will go into the error window, which is opened
|
||||
automatically when clang-check finds errors, and can be re-opened with
|
||||
``:cope``.
|
||||
|
||||
Other ``clang-check`` options that can be useful when working with clang
|
||||
AST:
|
||||
|
||||
- ``-ast-print`` - Build ASTs and then pretty-print them.
|
||||
- ``-ast-dump`` - Build ASTs and then debug dump them.
|
||||
- ``-ast-dump-filter=<string>`` - Use with ``-ast-dump`` or
|
||||
``-ast-print`` to dump/print only AST declaration nodes having a
|
||||
certain substring in a qualified name. Use ``-ast-list`` to list all
|
||||
filterable declaration node names.
|
||||
- ``-ast-list`` - Build ASTs and print the list of declaration node
|
||||
qualified names.
|
||||
|
||||
Examples:
|
||||
|
||||
::
|
||||
|
||||
$ clang-check tools/clang/tools/clang-check/ClangCheck.cpp -ast-dump -ast-dump-filter ActionFactory::newASTConsumer
|
||||
Processing: tools/clang/tools/clang-check/ClangCheck.cpp.
|
||||
Dumping ::ActionFactory::newASTConsumer:
|
||||
clang::ASTConsumer *newASTConsumer() (CompoundStmt 0x44da290 </home/alexfh/local/llvm/tools/clang/tools/clang-check/ClangCheck.cpp:64:40, line:72:3>
|
||||
(IfStmt 0x44d97c8 <line:65:5, line:66:45>
|
||||
<<<NULL>>>
|
||||
(ImplicitCastExpr 0x44d96d0 <line:65:9> '_Bool':'_Bool' <UserDefinedConversion>
|
||||
...
|
||||
$ clang-check tools/clang/tools/clang-check/ClangCheck.cpp -ast-print -ast-dump-filter ActionFactory::newASTConsumer
|
||||
Processing: tools/clang/tools/clang-check/ClangCheck.cpp.
|
||||
Printing <anonymous namespace>::ActionFactory::newASTConsumer:
|
||||
clang::ASTConsumer *newASTConsumer() {
|
||||
if (this->ASTList.operator _Bool())
|
||||
return clang::CreateASTDeclNodeLister();
|
||||
if (this->ASTDump.operator _Bool())
|
||||
return clang::CreateASTDumper(this->ASTDumpFilter);
|
||||
if (this->ASTPrint.operator _Bool())
|
||||
return clang::CreateASTPrinter(&llvm::outs(), this->ASTDumpFilter);
|
||||
return new clang::ASTConsumer();
|
||||
}
|
||||
|
||||
(Experimental) Using Ninja Build System
|
||||
=======================================
|
||||
|
||||
Optionally you can use the `Ninja <https://github.com/martine/ninja>`_
|
||||
build system instead of make. It is aimed at making your builds faster.
|
||||
Currently this step will require building Ninja from sources and using a
|
||||
development version of CMake.
|
||||
|
||||
To take advantage of using Clang Tools along with Ninja build you need
|
||||
at least CMake 2.8.9. At the moment CMake 2.8.9 is still under
|
||||
development, so you can get latest development sources and build it
|
||||
yourself:
|
||||
|
||||
::
|
||||
|
||||
git clone git://cmake.org/cmake.git
|
||||
cd cmake
|
||||
./bootstrap
|
||||
make
|
||||
sudo make install
|
||||
|
||||
Having the correct version of CMake, you can clone the Ninja git
|
||||
repository and build Ninja from sources:
|
||||
|
||||
::
|
||||
|
||||
git clone git://github.com/martine/ninja.git
|
||||
cd ninja/
|
||||
./bootstrap.py
|
||||
|
||||
This will result in a single binary ``ninja`` in the current directory.
|
||||
It doesn't require installation and can just be copied to any location
|
||||
inside ``$PATH``, say ``/usr/local/bin/``:
|
||||
|
||||
::
|
||||
|
||||
sudo cp ninja /usr/local/bin/
|
||||
sudo chmod a+rx /usr/local/bin/ninja
|
||||
|
||||
After doing all of this, you'll need to generate Ninja build files for
|
||||
LLVM with CMake. You need to make a build directory and run CMake from
|
||||
it:
|
||||
|
||||
::
|
||||
|
||||
mkdir your/build/directory
|
||||
cd your/build/directory
|
||||
cmake -G Ninja -DCMAKE_EXPORT_COMPILE_COMMANDS=ON path/to/llvm/sources
|
||||
|
||||
If you want to use clang instead of GCC, you can add
|
||||
``-DCMAKE_C_COMPILER=/path/to/clang -DCMAKE_CXX_COMPILER=/path/to/clang++``.
|
||||
You can also use ccmake, which provides a curses interface to configure
|
||||
CMake variables in an interactive manner.
|
||||
|
||||
As a result, the new ``compile_commands.json`` file should appear in the
|
||||
current directory. You should link it to the LLVM source tree so that
|
||||
Clang Tooling is able to use it:
|
||||
|
||||
::
|
||||
|
||||
ln -s $PWD/compile_commands.json path/to/llvm/source/
|
||||
|
||||
Now you are ready to build and test LLVM using Ninja:
|
||||
|
||||
::
|
||||
|
||||
ninja check-all
|
||||
|
||||
Other target names can be used in the same way as with make.
|
|
@ -1,139 +0,0 @@
|
|||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
|
||||
"http://www.w3.org/TR/html4/strict.dtd">
|
||||
<html>
|
||||
<head>
|
||||
<title>Introduction to the Clang AST</title>
|
||||
<link type="text/css" rel="stylesheet" href="../menu.css" />
|
||||
<link type="text/css" rel="stylesheet" href="../content.css" />
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<!--#include virtual="../menu.html.incl"-->
|
||||
|
||||
<div id="content">
|
||||
|
||||
<h1>Introduction to the Clang AST</h1>
|
||||
<p>This document gives a gentle introduction to the mysteries of the Clang AST.
|
||||
It is targeted at developers who either want to contribute to Clang, or use
|
||||
tools that work based on Clang's AST, like the AST matchers.</p>
|
||||
<!-- FIXME: Add link once we have an AST matcher document -->
|
||||
|
||||
<!-- ======================================================================= -->
|
||||
<h2 id="intro">Introduction</h2>
|
||||
<!-- ======================================================================= -->
|
||||
|
||||
<p>Clang's AST is different from ASTs produced by some other compilers in that it closely
|
||||
resembles both the written C++ code and the C++ standard. For example,
|
||||
parenthesis expressions and compile time constants are available in an unreduced
|
||||
form in the AST. This makes Clang's AST a good fit for refactoring tools.</p>
|
||||
|
||||
<p>Documentation for all Clang AST nodes is available via the generated
|
||||
<a href="http://clang.llvm.org/doxygen">Doxygen</a>. The doxygen online
|
||||
documentation is also indexed by your favorite search engine, which will make
|
||||
a search for clang and the AST node's class name usually turn up the doxygen
|
||||
of the class you're looking for (for example, search for: clang ParenExpr).</p>
|
||||
|
||||
<!-- ======================================================================= -->
|
||||
<h2 id="examine">Examining the AST</h2>
|
||||
<!-- ======================================================================= -->
|
||||
|
||||
<p>A good way to familarize yourself with the Clang AST is to actually look
|
||||
at it on some simple example code. Clang has a builtin AST-dump modes, which
|
||||
can be enabled with the flags -ast-dump and -ast-dump-xml. Note that -ast-dump-xml
|
||||
currently only works with debug-builds of clang.</p>
|
||||
|
||||
<p>Let's look at a simple example AST:</p>
|
||||
<pre>
|
||||
# cat test.cc
|
||||
int f(int x) {
|
||||
int result = (x / 42);
|
||||
return result;
|
||||
}
|
||||
|
||||
# Clang by default is a frontend for many tools; -cc1 tells it to directly
|
||||
# use the C++ compiler mode. -undef leaves out some internal declarations.
|
||||
$ clang -cc1 -undef -ast-dump-xml test.cc
|
||||
... cutting out internal declarations of clang ...
|
||||
<TranslationUnit ptr="0x4871160">
|
||||
<Function ptr="0x48a5800" name="f" prototype="true">
|
||||
<FunctionProtoType ptr="0x4871de0" canonical="0x4871de0">
|
||||
<BuiltinType ptr="0x4871250" canonical="0x4871250"/>
|
||||
<parameters>
|
||||
<BuiltinType ptr="0x4871250" canonical="0x4871250"/>
|
||||
</parameters>
|
||||
</FunctionProtoType>
|
||||
<ParmVar ptr="0x4871d80" name="x" initstyle="c">
|
||||
<BuiltinType ptr="0x4871250" canonical="0x4871250"/>
|
||||
</ParmVar>
|
||||
<Stmt>
|
||||
(CompoundStmt 0x48a5a38 <t2.cc:1:14, line:4:1>
|
||||
(DeclStmt 0x48a59c0 <line:2:3, col:24>
|
||||
0x48a58c0 "int result =
|
||||
(ParenExpr 0x48a59a0 <col:16, col:23> 'int'
|
||||
(BinaryOperator 0x48a5978 <col:17, col:21> 'int' '/'
|
||||
(ImplicitCastExpr 0x48a5960 <col:17> 'int' <LValueToRValue>
|
||||
(DeclRefExpr 0x48a5918 <col:17> 'int' lvalue ParmVar 0x4871d80 'x' 'int'))
|
||||
(IntegerLiteral 0x48a5940 <col:21> 'int' 42)))")
|
||||
(ReturnStmt 0x48a5a18 <line:3:3, col:10>
|
||||
(ImplicitCastExpr 0x48a5a00 <col:10> 'int' <LValueToRValue>
|
||||
(DeclRefExpr 0x48a59d8 <col:10> 'int' lvalue Var 0x48a58c0 'result' 'int'))))
|
||||
|
||||
</Stmt>
|
||||
</Function>
|
||||
</TranslationUnit>
|
||||
</pre>
|
||||
<p>In general, -ast-dump-xml dumps declarations in an XML-style format and
|
||||
statements in an S-expression-style format.
|
||||
The toplevel declaration in a translation unit is always the
|
||||
<a href="http://clang.llvm.org/doxygen/classclang_1_1TranslationUnitDecl.html">translation unit declaration</a>.
|
||||
In this example, our first user written declaration is the
|
||||
<a href="http://clang.llvm.org/doxygen/classclang_1_1FunctionDecl.html">function declaration</a>
|
||||
of 'f'. The body of 'f' is a <a href="http://clang.llvm.org/doxygen/classclang_1_1CompoundStmt.html">compound statement</a>,
|
||||
whose child nodes are a <a href="http://clang.llvm.org/doxygen/classclang_1_1DeclStmt.html">declaration statement</a>
|
||||
that declares our result variable, and the
|
||||
<a href="http://clang.llvm.org/doxygen/classclang_1_1ReturnStmt.html">return statement</a>.</p>
|
||||
|
||||
<!-- ======================================================================= -->
|
||||
<h2 id="context">AST Context</h2>
|
||||
<!-- ======================================================================= -->
|
||||
|
||||
<p>All information about the AST for a translation unit is bundled up in the class
|
||||
<a href="http://clang.llvm.org/doxygen/classclang_1_1ASTContext.html">ASTContext</a>.
|
||||
It allows traversal of the whole translation unit starting from
|
||||
<a href="http://clang.llvm.org/doxygen/classclang_1_1ASTContext.html#abd909fb01ef10cfd0244832a67b1dd64">getTranslationUnitDecl</a>,
|
||||
or to access Clang's <a href="http://clang.llvm.org/doxygen/classclang_1_1ASTContext.html#a4f95adb9958e22fbe55212ae6482feb4">table of identifiers</a>
|
||||
for the parsed translation unit.</p>
|
||||
|
||||
<!-- ======================================================================= -->
|
||||
<h2 id="nodes">AST Nodes</h2>
|
||||
<!-- ======================================================================= -->
|
||||
|
||||
<p>Clang's AST nodes are modeled on a class hierarchy that does not have a common
|
||||
ancestor. Instead, there are multiple larger hierarchies for basic node types like
|
||||
<a href="http://clang.llvm.org/doxygen/classclang_1_1Decl.html">Decl</a> and
|
||||
<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>. Many
|
||||
important AST nodes derive from <a href="http://clang.llvm.org/doxygen/classclang_1_1Type.html">Type</a>,
|
||||
<a href="http://clang.llvm.org/doxygen/classclang_1_1Decl.html">Decl</a>,
|
||||
<a href="http://clang.llvm.org/doxygen/classclang_1_1DeclContext.html">DeclContext</a> or
|
||||
<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>,
|
||||
with some classes deriving from both Decl and DeclContext.</p>
|
||||
<p>There are also a multitude of nodes in the AST that are not part of a
|
||||
larger hierarchy, and are only reachable from specific other nodes,
|
||||
like <a href="http://clang.llvm.org/doxygen/classclang_1_1CXXBaseSpecifier.html">CXXBaseSpecifier</a>.
|
||||
</p>
|
||||
|
||||
<p>Thus, to traverse the full AST, one starts from the <a href="http://clang.llvm.org/doxygen/classclang_1_1TranslationUnitDecl.html">TranslationUnitDecl</a>
|
||||
and then recursively traverses everything that can be reached from that node
|
||||
- this information has to be encoded for each specific node type. This algorithm
|
||||
is encoded in the <a href="http://clang.llvm.org/doxygen/classclang_1_1RecursiveASTVisitor.html">RecursiveASTVisitor</a>.
|
||||
See the <a href="http://clang.llvm.org/docs/RAVFrontendAction.html">RecursiveASTVisitor tutorial</a>.</p>
|
||||
|
||||
<p>The two most basic nodes in the Clang AST are statements (<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>)
|
||||
and declarations (<a href="http://clang.llvm.org/doxygen/classclang_1_1Decl.html">Decl</a>).
|
||||
Note that expressions (<a href="http://clang.llvm.org/doxygen/classclang_1_1Expr.html">Expr</a>)
|
||||
are also statements in Clang's AST.</p>
|
||||
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -0,0 +1,135 @@
|
|||
=============================
|
||||
Introduction to the Clang AST
|
||||
=============================
|
||||
|
||||
This document gives a gentle introduction to the mysteries of the Clang
|
||||
AST. It is targeted at developers who either want to contribute to
|
||||
Clang, or use tools that work based on Clang's AST, like the AST
|
||||
matchers.
|
||||
|
||||
Introduction
|
||||
============
|
||||
|
||||
Clang's AST is different from ASTs produced by some other compilers in
|
||||
that it closely resembles both the written C++ code and the C++
|
||||
standard. For example, parenthesis expressions and compile time
|
||||
constants are available in an unreduced form in the AST. This makes
|
||||
Clang's AST a good fit for refactoring tools.
|
||||
|
||||
Documentation for all Clang AST nodes is available via the generated
|
||||
`Doxygen <http://clang.llvm.org/doxygen>`_. The doxygen online
|
||||
documentation is also indexed by your favorite search engine, which will
|
||||
make a search for clang and the AST node's class name usually turn up
|
||||
the doxygen of the class you're looking for (for example, search for:
|
||||
clang ParenExpr).
|
||||
|
||||
Examining the AST
|
||||
=================
|
||||
|
||||
A good way to familarize yourself with the Clang AST is to actually look
|
||||
at it on some simple example code. Clang has a builtin AST-dump modes,
|
||||
which can be enabled with the flags -ast-dump and -ast-dump-xml. Note
|
||||
that -ast-dump-xml currently only works with debug-builds of clang.
|
||||
|
||||
Let's look at a simple example AST:
|
||||
|
||||
::
|
||||
|
||||
# cat test.cc
|
||||
int f(int x) {
|
||||
int result = (x / 42);
|
||||
return result;
|
||||
}
|
||||
|
||||
# Clang by default is a frontend for many tools; -cc1 tells it to directly
|
||||
# use the C++ compiler mode. -undef leaves out some internal declarations.
|
||||
$ clang -cc1 -undef -ast-dump-xml test.cc
|
||||
... cutting out internal declarations of clang ...
|
||||
<TranslationUnit ptr="0x4871160">
|
||||
<Function ptr="0x48a5800" name="f" prototype="true">
|
||||
<FunctionProtoType ptr="0x4871de0" canonical="0x4871de0">
|
||||
<BuiltinType ptr="0x4871250" canonical="0x4871250"/>
|
||||
<parameters>
|
||||
<BuiltinType ptr="0x4871250" canonical="0x4871250"/>
|
||||
</parameters>
|
||||
</FunctionProtoType>
|
||||
<ParmVar ptr="0x4871d80" name="x" initstyle="c">
|
||||
<BuiltinType ptr="0x4871250" canonical="0x4871250"/>
|
||||
</ParmVar>
|
||||
<Stmt>
|
||||
(CompoundStmt 0x48a5a38 <t2.cc:1:14, line:4:1>
|
||||
(DeclStmt 0x48a59c0 <line:2:3, col:24>
|
||||
0x48a58c0 "int result =
|
||||
(ParenExpr 0x48a59a0 <col:16, col:23> 'int'
|
||||
(BinaryOperator 0x48a5978 <col:17, col:21> 'int' '/'
|
||||
(ImplicitCastExpr 0x48a5960 <col:17> 'int' <LValueToRValue>
|
||||
(DeclRefExpr 0x48a5918 <col:17> 'int' lvalue ParmVar 0x4871d80 'x' 'int'))
|
||||
(IntegerLiteral 0x48a5940 <col:21> 'int' 42)))")
|
||||
(ReturnStmt 0x48a5a18 <line:3:3, col:10>
|
||||
(ImplicitCastExpr 0x48a5a00 <col:10> 'int' <LValueToRValue>
|
||||
(DeclRefExpr 0x48a59d8 <col:10> 'int' lvalue Var 0x48a58c0 'result' 'int'))))
|
||||
|
||||
</Stmt>
|
||||
</Function>
|
||||
</TranslationUnit>
|
||||
|
||||
In general, -ast-dump-xml dumps declarations in an XML-style format and
|
||||
statements in an S-expression-style format. The toplevel declaration in
|
||||
a translation unit is always the `translation unit
|
||||
declaration <http://clang.llvm.org/doxygen/classclang_1_1TranslationUnitDecl.html>`_.
|
||||
In this example, our first user written declaration is the `function
|
||||
declaration <http://clang.llvm.org/doxygen/classclang_1_1FunctionDecl.html>`_
|
||||
of 'f'. The body of 'f' is a `compound
|
||||
statement <http://clang.llvm.org/doxygen/classclang_1_1CompoundStmt.html>`_,
|
||||
whose child nodes are a `declaration
|
||||
statement <http://clang.llvm.org/doxygen/classclang_1_1DeclStmt.html>`_
|
||||
that declares our result variable, and the `return
|
||||
statement <http://clang.llvm.org/doxygen/classclang_1_1ReturnStmt.html>`_.
|
||||
|
||||
AST Context
|
||||
===========
|
||||
|
||||
All information about the AST for a translation unit is bundled up in
|
||||
the class
|
||||
`ASTContext <http://clang.llvm.org/doxygen/classclang_1_1ASTContext.html>`_.
|
||||
It allows traversal of the whole translation unit starting from
|
||||
`getTranslationUnitDecl <http://clang.llvm.org/doxygen/classclang_1_1ASTContext.html#abd909fb01ef10cfd0244832a67b1dd64>`_,
|
||||
or to access Clang's `table of
|
||||
identifiers <http://clang.llvm.org/doxygen/classclang_1_1ASTContext.html#a4f95adb9958e22fbe55212ae6482feb4>`_
|
||||
for the parsed translation unit.
|
||||
|
||||
AST Nodes
|
||||
=========
|
||||
|
||||
Clang's AST nodes are modeled on a class hierarchy that does not have a
|
||||
common ancestor. Instead, there are multiple larger hierarchies for
|
||||
basic node types like
|
||||
`Decl <http://clang.llvm.org/doxygen/classclang_1_1Decl.html>`_ and
|
||||
`Stmt <http://clang.llvm.org/doxygen/classclang_1_1Stmt.html>`_. Many
|
||||
important AST nodes derive from
|
||||
`Type <http://clang.llvm.org/doxygen/classclang_1_1Type.html>`_,
|
||||
`Decl <http://clang.llvm.org/doxygen/classclang_1_1Decl.html>`_,
|
||||
`DeclContext <http://clang.llvm.org/doxygen/classclang_1_1DeclContext.html>`_
|
||||
or `Stmt <http://clang.llvm.org/doxygen/classclang_1_1Stmt.html>`_, with
|
||||
some classes deriving from both Decl and DeclContext.
|
||||
|
||||
There are also a multitude of nodes in the AST that are not part of a
|
||||
larger hierarchy, and are only reachable from specific other nodes, like
|
||||
`CXXBaseSpecifier <http://clang.llvm.org/doxygen/classclang_1_1CXXBaseSpecifier.html>`_.
|
||||
|
||||
Thus, to traverse the full AST, one starts from the
|
||||
`TranslationUnitDecl <http://clang.llvm.org/doxygen/classclang_1_1TranslationUnitDecl.html>`_
|
||||
and then recursively traverses everything that can be reached from that
|
||||
node - this information has to be encoded for each specific node type.
|
||||
This algorithm is encoded in the
|
||||
`RecursiveASTVisitor <http://clang.llvm.org/doxygen/classclang_1_1RecursiveASTVisitor.html>`_.
|
||||
See the `RecursiveASTVisitor
|
||||
tutorial <http://clang.llvm.org/docs/RAVFrontendAction.html>`_.
|
||||
|
||||
The two most basic nodes in the Clang AST are statements
|
||||
(`Stmt <http://clang.llvm.org/doxygen/classclang_1_1Stmt.html>`_) and
|
||||
declarations
|
||||
(`Decl <http://clang.llvm.org/doxygen/classclang_1_1Decl.html>`_). Note
|
||||
that expressions
|
||||
(`Expr <http://clang.llvm.org/doxygen/classclang_1_1Expr.html>`_) are
|
||||
also statements in Clang's AST.
|
|
@ -1,89 +0,0 @@
|
|||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
|
||||
"http://www.w3.org/TR/html4/strict.dtd">
|
||||
<html>
|
||||
<head>
|
||||
<title>JSON Compilation Database Format Specification</title>
|
||||
<link type="text/css" rel="stylesheet" href="../menu.css">
|
||||
<link type="text/css" rel="stylesheet" href="../content.css">
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<!--#include virtual="../menu.html.incl"-->
|
||||
|
||||
<div id="content">
|
||||
|
||||
<h1>JSON Compilation Database Format Specification</h1>
|
||||
<p>This document describes a format for specifying how to replay
|
||||
single compilations independently of the build system.</p>
|
||||
|
||||
<h2>Background</h2>
|
||||
<p>Tools based on the C++ Abstract Syntax Tree need full information how to
|
||||
parse a translation unit. Usually this information is implicitly
|
||||
available in the build system, but running tools as part of
|
||||
the build system is not necessarily the best solution:
|
||||
<ul>
|
||||
<li>Build systems are inherently change driven, so running multiple
|
||||
tools over the same code base without changing the code does not fit
|
||||
into the architecture of many build systems.</li>
|
||||
<li>Figuring out whether things have changed is often an IO bound
|
||||
process; this makes it hard to build low latency end user tools based
|
||||
on the build system.</li>
|
||||
<li>Build systems are inherently sequential in the build graph, for example
|
||||
due to generated source code. While tools that run independently of the
|
||||
build still need the generated source code to exist, running tools multiple
|
||||
times over unchanging source does not require serialization of the runs
|
||||
according to the build dependency graph.</li>
|
||||
</ul>
|
||||
</p>
|
||||
|
||||
<h2>Supported Systems</h2>
|
||||
<p>Currently <a href="http://cmake.org">CMake</a> (since 2.8.5) supports generation of compilation
|
||||
databases for Unix Makefile builds (Ninja builds in the works) with the option
|
||||
CMAKE_EXPORT_COMPILE_COMMANDS.</p>
|
||||
<p>Clang's tooling interface supports reading compilation databases; see
|
||||
the <a href="LibTooling.html">LibTooling documentation</a>. libclang and its
|
||||
python bindings also support this (since clang 3.2); see
|
||||
<a href="/doxygen/group__COMPILATIONDB.html">CXCompilationDatabase.h</a>.</p>
|
||||
|
||||
<h2>Format</h2>
|
||||
<p>A compilation database is a JSON file, which consist of an array of
|
||||
"command objects", where each command object specifies one way a translation unit
|
||||
is compiled in the project.</p>
|
||||
<p>Each command object contains the translation unit's main file, the working
|
||||
directory of the compile run and the actual compile command.</p>
|
||||
<p>Example:
|
||||
<pre>
|
||||
[
|
||||
{ "directory": "/home/user/llvm/build",
|
||||
"command": "/usr/bin/clang++ -Irelative -DSOMEDEF='\"With spaces and quotes.\"' -c -o file.o file.cc",
|
||||
"file": "file.cc" },
|
||||
...
|
||||
]
|
||||
</pre>
|
||||
The contracts for each field in the command object are:
|
||||
<ul>
|
||||
<li><b>directory:</b> The working directory of the compilation. All paths specified
|
||||
in the <b>command</b> or <b>file</b> fields must be either absolute or relative to
|
||||
this directory.</li>
|
||||
<li><b>file:</b> The main translation unit source processed by this compilation step.
|
||||
This is used by tools as the key into the compilation database. There can be multiple
|
||||
command objects for the same file, for example if the same source file is
|
||||
compiled with different configurations.</li>
|
||||
<li><b>command:</b> The compile command executed. After JSON unescaping, this must
|
||||
be a valid command to rerun the exact compilation step for the translation unit in
|
||||
the environment the build system uses. Parameters use shell quoting and shell escaping
|
||||
of quotes, with '"' and '\' being the only special characters. Shell expansion is
|
||||
not supported.</li>
|
||||
</ul>
|
||||
</p>
|
||||
|
||||
<h2>Build System Integration</h2>
|
||||
<p>The convention is to name the file compile_commands.json and put it at the top
|
||||
of the build directory. Clang tools are pointed to the top of the build directory
|
||||
to detect the file and use the compilation database to parse C++ code in the source
|
||||
tree.</p>
|
||||
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -0,0 +1,85 @@
|
|||
==============================================
|
||||
JSON Compilation Database Format Specification
|
||||
==============================================
|
||||
|
||||
This document describes a format for specifying how to replay single
|
||||
compilations independently of the build system.
|
||||
|
||||
Background
|
||||
==========
|
||||
|
||||
Tools based on the C++ Abstract Syntax Tree need full information how to
|
||||
parse a translation unit. Usually this information is implicitly
|
||||
available in the build system, but running tools as part of the build
|
||||
system is not necessarily the best solution:
|
||||
|
||||
- Build systems are inherently change driven, so running multiple tools
|
||||
over the same code base without changing the code does not fit into
|
||||
the architecture of many build systems.
|
||||
- Figuring out whether things have changed is often an IO bound
|
||||
process; this makes it hard to build low latency end user tools based
|
||||
on the build system.
|
||||
- Build systems are inherently sequential in the build graph, for
|
||||
example due to generated source code. While tools that run
|
||||
independently of the build still need the generated source code to
|
||||
exist, running tools multiple times over unchanging source does not
|
||||
require serialization of the runs according to the build dependency
|
||||
graph.
|
||||
|
||||
Supported Systems
|
||||
=================
|
||||
|
||||
Currently `CMake <http://cmake.org>`_ (since 2.8.5) supports generation
|
||||
of compilation databases for Unix Makefile builds (Ninja builds in the
|
||||
works) with the option CMAKE\_EXPORT\_COMPILE\_COMMANDS.
|
||||
|
||||
Clang's tooling interface supports reading compilation databases; see
|
||||
the `LibTooling documentation <LibTooling.html>`_. libclang and its
|
||||
python bindings also support this (since clang 3.2); see
|
||||
`CXCompilationDatabase.h </doxygen/group__COMPILATIONDB.html>`_.
|
||||
|
||||
Format
|
||||
======
|
||||
|
||||
A compilation database is a JSON file, which consist of an array of
|
||||
"command objects", where each command object specifies one way a
|
||||
translation unit is compiled in the project.
|
||||
|
||||
Each command object contains the translation unit's main file, the
|
||||
working directory of the compile run and the actual compile command.
|
||||
|
||||
Example:
|
||||
|
||||
::
|
||||
|
||||
[
|
||||
{ "directory": "/home/user/llvm/build",
|
||||
"command": "/usr/bin/clang++ -Irelative -DSOMEDEF='\"With spaces and quotes.\"' -c -o file.o file.cc",
|
||||
"file": "file.cc" },
|
||||
...
|
||||
]
|
||||
|
||||
The contracts for each field in the command object are:
|
||||
|
||||
- **directory:** The working directory of the compilation. All paths
|
||||
specified in the **command** or **file** fields must be either
|
||||
absolute or relative to this directory.
|
||||
- **file:** The main translation unit source processed by this
|
||||
compilation step. This is used by tools as the key into the
|
||||
compilation database. There can be multiple command objects for the
|
||||
same file, for example if the same source file is compiled with
|
||||
different configurations.
|
||||
- **command:** The compile command executed. After JSON unescaping,
|
||||
this must be a valid command to rerun the exact compilation step for
|
||||
the translation unit in the environment the build system uses.
|
||||
Parameters use shell quoting and shell escaping of quotes, with '"'
|
||||
and '\\' being the only special characters. Shell expansion is not
|
||||
supported.
|
||||
|
||||
Build System Integration
|
||||
========================
|
||||
|
||||
The convention is to name the file compile\_commands.json and put it at
|
||||
the top of the build directory. Clang tools are pointed to the top of
|
||||
the build directory to detect the file and use the compilation database
|
||||
to parse C++ code in the source tree.
|
|
@ -1,533 +0,0 @@
|
|||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
|
||||
"http://www.w3.org/TR/html4/strict.dtd">
|
||||
<html>
|
||||
<head>
|
||||
<title>Tutorial for building tools using LibTooling and LibASTMatchers</title>
|
||||
<link type="text/css" rel="stylesheet" href="../menu.css" />
|
||||
<link type="text/css" rel="stylesheet" href="../content.css" />
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<!--#include virtual="../menu.html.incl"-->
|
||||
|
||||
<div id="content">
|
||||
|
||||
<h1>Tutorial for building tools using LibTooling and LibASTMatchers</h1>
|
||||
<p>This document is intended to show how to build a useful source-to-source
|
||||
translation tool based on Clang's <a href="LibTooling.html">LibTooling</a>. It
|
||||
is explicitly aimed at people who are new to Clang, so all you should need is a
|
||||
working knowledge of C++ and the command line.</p>
|
||||
|
||||
<p>In order to work on the compiler, you need some basic knowledge of the
|
||||
abstract syntax tree (AST). To this end, the reader is incouraged to skim the
|
||||
<a href="http://clang.llvm.org/docs/IntroductionToTheClangAST.html">Introduction
|
||||
to the Clang AST</a></p>
|
||||
|
||||
<!-- ======================================================================= -->
|
||||
<h2 id="obtainingclang">Step 0: Obtaining Clang</h2>
|
||||
<!-- ======================================================================= -->
|
||||
As Clang is part of the LLVM project, you'll need to download LLVM's source code
|
||||
first. Both Clang and LLVM are maintained as Subversion repositories, but we'll
|
||||
be accessing them through the git mirror. For further information, see the
|
||||
<a href="http://llvm.org/docs/GettingStarted.html">getting started guide</a>.
|
||||
|
||||
<pre class="doc_code">
|
||||
mkdir ~/clang-llvm && cd ~/clang-llvm
|
||||
git clone http://llvm.org/git/llvm.git
|
||||
cd llvm/tools
|
||||
git clone http://llvm.org/git/clang.git
|
||||
</pre>
|
||||
|
||||
Next you need to obtain the CMake build system and Ninja build tool. You may
|
||||
already have CMake installed, but current binary versions of CMake aren't built
|
||||
with Ninja support.
|
||||
|
||||
<pre class="doc_code">
|
||||
cd ~/clang-llvm
|
||||
git clone https://github.com/martine/ninja.git
|
||||
cd ninja
|
||||
git checkout release
|
||||
./bootstrap.py
|
||||
sudo cp ninja /usr/bin/
|
||||
|
||||
cd ~/clang-llvm
|
||||
git clone git://cmake.org/stage/cmake.git
|
||||
cd cmake
|
||||
git checkout next
|
||||
./bootstrap
|
||||
make
|
||||
sudo make install
|
||||
</pre>
|
||||
|
||||
<p>Okay. Now we'll build Clang!</p>
|
||||
|
||||
<pre class="doc_code">
|
||||
cd ~/clang-llvm
|
||||
mkdir build && cd build
|
||||
cmake -G Ninja ../llvm -DLLVM_BUILD_TESTS=ON # Enable tests; default is off.
|
||||
ninja
|
||||
ninja check # Test LLVM only.
|
||||
ninja clang-test # Test Clang only.
|
||||
ninja install
|
||||
</pre>
|
||||
|
||||
<p>And we're live.</p>
|
||||
|
||||
<p>All of the tests should pass, though there is a (very) small chance that you
|
||||
can catch LLVM and Clang out of sync. Running <tt>'git svn rebase'</tt> in both
|
||||
the llvm and clang directories should fix any problems.</p>
|
||||
|
||||
<p>Finally, we want to set Clang as its own compiler.</p>
|
||||
|
||||
<pre class="doc_code">
|
||||
cd ~/clang-llvm/build
|
||||
ccmake ../llvm
|
||||
</pre>
|
||||
|
||||
<p>The second command will bring up a GUI for configuring Clang. You need to set
|
||||
the entry for <tt>CMAKE_CXX_COMPILER</tt>. Press <tt>'t'</tt> to turn on
|
||||
advanced mode. Scroll down to <tt>CMAKE_CXX_COMPILER</tt>, and set it to
|
||||
<tt>/usr/bin/clang++</tt>, or wherever you installed it. Press <tt>'c'</tt> to
|
||||
configure, then <tt>'g'</tt> to generate CMake's files.</p>
|
||||
|
||||
<p>Finally, run ninja one last time, and you're done.</p>
|
||||
|
||||
<!-- ======================================================================= -->
|
||||
<h2 id="clangtool">Step 1: Create a ClangTool</h2>
|
||||
<!-- ======================================================================= -->
|
||||
<p>Now that we have enough background knowledge, it's time to create the
|
||||
simplest productive ClangTool in existence: a syntax checker. While this already
|
||||
exists as <tt>clang-check</tt>, it's important to understand what's going
|
||||
on.</p>
|
||||
|
||||
<p>First, we'll need to create a new directory for our tool and tell CMake that
|
||||
it exists. As this is not going to be a core clang tool, it will live in the
|
||||
<tt>tools/extra</tt> repository.</p>
|
||||
|
||||
<pre class="doc_code">
|
||||
cd ~/clang-llvm/llvm/tools/clang
|
||||
mkdir tools/extra/loop-convert
|
||||
echo 'add_subdirectory(loop-convert)' >> tools/extra/CMakeLists.txt
|
||||
vim tools/extra/loop-convert/CMakeLists.txt
|
||||
</pre>
|
||||
|
||||
CMakeLists.txt should have the following contents:
|
||||
<pre class="doc_code">
|
||||
set(LLVM_LINK_COMPONENTS support)
|
||||
set(LLVM_USED_LIBS clangTooling clangBasic clangAST)
|
||||
|
||||
add_clang_executable(loop-convert
|
||||
LoopConvert.cpp
|
||||
)
|
||||
target_link_libraries(loop-convert
|
||||
clangTooling
|
||||
clangBasic
|
||||
clangASTMatchers
|
||||
)
|
||||
</pre>
|
||||
|
||||
<p>With that done, Ninja will be able to compile our tool. Let's give it
|
||||
something to compile! Put the following into
|
||||
<tt>tools/extra/loop-convert/LoopConvert.cpp</tt>. A detailed explanation of why
|
||||
the different parts are needed can be found in the
|
||||
<a href="LibTooling.html">LibTooling documentation</a>.</p>
|
||||
|
||||
<pre>
|
||||
// Declares clang::SyntaxOnlyAction.
|
||||
#include "clang/Frontend/FrontendActions.h"
|
||||
#include "clang/Tooling/CommonOptionsParser.h"
|
||||
#include "clang/Tooling/Tooling.h"
|
||||
// Declares llvm::cl::extrahelp.
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
|
||||
using namespace clang::tooling;
|
||||
using namespace llvm;
|
||||
|
||||
// CommonOptionsParser declares HelpMessage with a description of the common
|
||||
// command-line options related to the compilation database and input files.
|
||||
// It's nice to have this help message in all tools.
|
||||
static cl::extrahelp CommonHelp(CommonOptionsParser::HelpMessage);
|
||||
|
||||
// A help message for this specific tool can be added afterwards.
|
||||
static cl::extrahelp MoreHelp("\nMore help text...");
|
||||
|
||||
int main(int argc, const char **argv) {
|
||||
CommonOptionsParser OptionsParser(argc, argv);
|
||||
ClangTool Tool(OptionsParser.GetCompilations(),
|
||||
OptionsParser.GetSourcePathList());
|
||||
return Tool.run(newFrontendActionFactory<clang::SyntaxOnlyAction>());
|
||||
}
|
||||
</pre>
|
||||
|
||||
<p>And that's it! You can compile our new tool by running ninja from the
|
||||
<tt>build</tt> directory.</p>
|
||||
|
||||
<pre class="doc_code">
|
||||
cd ~/clang-llvm/build
|
||||
ninja
|
||||
</pre>
|
||||
|
||||
<p>You should now be able to run the syntax checker, which is located in
|
||||
<tt>~/clang-llvm/build/bin</tt>, on any source file. Try it!</p>
|
||||
|
||||
<pre class="doc_code">
|
||||
cat "void main() {}" > test.cpp
|
||||
bin/loop-convert test.cpp --
|
||||
</pre>
|
||||
|
||||
<p>Note the two dashes after we specify the source file. The additional options
|
||||
for the compiler are passed after the dashes rather than loading them from a
|
||||
compilation database - there just aren't any options needed right now.</p>
|
||||
|
||||
<!-- ======================================================================= -->
|
||||
<h2 id="learnastmatchers">Intermezzo: Learn AST matcher basics</h2>
|
||||
<!-- ======================================================================= -->
|
||||
<p>Clang recently introduced the <a href=LibASTMatchers.html>ASTMatcher
|
||||
library</a> to provide a simple, powerful, and concise way to describe specific
|
||||
patterns in the AST. Implemented as a DSL powered by macros and templates (see
|
||||
<a href="../doxygen/ASTMatchers_8h_source.html">ASTMatchers.h</a> if you're
|
||||
curious), matchers offer the feel of algebraic data types common to functional
|
||||
programming languages.</p>
|
||||
|
||||
<p>For example, suppose you wanted to examine only binary operators. There is a
|
||||
matcher to do exactly that, conveniently named <tt>binaryOperator</tt>. I'll
|
||||
give you one guess what this matcher does:</p>
|
||||
|
||||
<pre class="doc_code">
|
||||
binaryOperator(hasOperatorName("+"), hasLHS(integerLiteral(equals(0))))
|
||||
</pre>
|
||||
|
||||
<p>Shockingly, it will match against addition expressions whose left hand side
|
||||
is exactly the literal 0. It will not match against other forms of 0, such as
|
||||
<tt>'\0'</tt> or <tt>NULL</tt>, but it will match against macros that expand to
|
||||
0. The matcher will also not match against calls to the overloaded operator
|
||||
<tt>'+'</tt>, as there is a separate <tt>operatorCallExpr</tt> matcher to handle
|
||||
overloaded operators.</p>
|
||||
|
||||
<p>There are AST matchers to match all the different nodes of the AST, narrowing
|
||||
matchers to only match AST nodes fulfilling specific criteria, and traversal
|
||||
matchers to get from one kind of AST node to another. For a complete list of AST
|
||||
matchers, take a look at the <a href="LibASTMatchersReference.html">AST Matcher
|
||||
References</a></p>
|
||||
|
||||
<p>All matcher that are nouns describe entities in the AST and can be bound,
|
||||
so that they can be referred to whenever a match is found. To do so, simply call
|
||||
the method <tt>bind</tt> on these matchers, e.g.:</p>
|
||||
<pre class="doc_code">
|
||||
variable(hasType(isInteger())).bind("intvar")
|
||||
</pre>
|
||||
|
||||
<!-- ======================================================================= -->
|
||||
<h2 id="usingastmatchers">Step 2: Using AST matchers</h2>
|
||||
<!-- ======================================================================= -->
|
||||
<p>Okay, on to using matchers for real. Let's start by defining a matcher which
|
||||
will capture all <tt>for</tt> statements that define a new variable
|
||||
initialized to zero. Let's start with matching all <tt>for</tt> loops:</p>
|
||||
|
||||
<pre class="doc_code">
|
||||
forStmt()
|
||||
</pre>
|
||||
|
||||
<p>Next, we want to specify that a single variable is declared in the first
|
||||
portion of the loop, so we can extend the matcher to</p>
|
||||
<pre class="doc_code">
|
||||
forStmt(hasLoopInit(declStmt(hasSingleDecl(varDecl()))))
|
||||
</pre>
|
||||
|
||||
<p>Finally, we can add the condition that the variable is initialized to
|
||||
zero.</p>
|
||||
<pre class="doc_code">
|
||||
forStmt(hasLoopInit(declStmt(hasSingleDecl(varDecl(
|
||||
hasInitializer(integerLiteral(equals(0))))))))
|
||||
</pre>
|
||||
|
||||
</p>It is fairly easy to read and understand the matcher definition ("match
|
||||
loops whose init portion declares a single variable which is initialized to the
|
||||
integer literal 0"), but deciding that every piece is necessary is more
|
||||
difficult. Note that this matcher will not match loops whose variables are
|
||||
initialized to <tt>'\0'</tt>, <tt>0.0</tt>, <tt>NULL</tt>, or any form of zero
|
||||
besides the integer 0.</p>
|
||||
|
||||
<p>The last step is giving the matcher a name and binding the <tt>ForStmt</tt>
|
||||
as we will want to do something with it:</p>
|
||||
<pre class="doc_code">
|
||||
StatementMatcher LoopMatcher =
|
||||
forStmt(hasLoopInit(declStmt(hasSingleDecl(varDecl(
|
||||
hasInitializer(integerLiteral(equals(0)))))))).bind("forLoop");
|
||||
</pre>
|
||||
|
||||
<p>Once you have defined your matchers, you will need to add a little more
|
||||
scaffolding in order to run them. Matchers are paired with a
|
||||
<tt>MatchCallback</tt> and registered with a <tt>MatchFinder</tt> object, then
|
||||
run from a <tt>ClangTool</tt>. More code!</p>
|
||||
|
||||
Add the following to <tt>LoopConvert.cpp</tt>:
|
||||
<pre class="doc_code">
|
||||
StatementMatcher LoopMatcher =
|
||||
forStmt(hasLoopInit(declStmt(hasSingleDecl(varDecl(
|
||||
hasInitializer(integerLiteral(equals(0)))))))).bind("forLoop");
|
||||
|
||||
class LoopPrinter : public MatchFinder::MatchCallback {
|
||||
public :
|
||||
virtual void run(const MatchFinder::MatchResult &Result) {
|
||||
if (const ForStmt *FS = Result.Nodes.getNodeAs<clang::ForStmt>("forLoop"))
|
||||
FS->dump();
|
||||
};
|
||||
</pre>
|
||||
|
||||
And change <tt>main()</tt> to:
|
||||
<pre class="doc_code">
|
||||
int main(int argc, const char **argv) {
|
||||
CommonOptionsParser OptionsParser(argc, argv);
|
||||
ClangTool Tool(OptionsParser.GetCompilations(),
|
||||
OptionsParser.GetSourcePathList());
|
||||
|
||||
LoopPrinter Printer;
|
||||
MatchFinder Finder;
|
||||
Finder.addMatcher(LoopMatcher, &Printer);
|
||||
|
||||
return Tool.run(newFrontendActionFactory(&Finder));
|
||||
}
|
||||
</pre>
|
||||
|
||||
<p>Now, you should be able to recompile and run the code to discover for loops.
|
||||
Create a new file with a few examples, and test out our new handiwork:</p>
|
||||
|
||||
<pre class="doc_code">
|
||||
cd ~/clang-llvm/llvm/llvm_build/
|
||||
ninja loop-convert
|
||||
vim ~/test-files/simple-loops.cc
|
||||
bin/loop-convert ~/test-files/simple-loops.cc
|
||||
</pre>
|
||||
|
||||
<!-- FIXME: add example step-2a -->
|
||||
|
||||
<!-- ======================================================================= -->
|
||||
<h2 id="morematchers">Step 3.5: More Complicated Matchers</h2>
|
||||
<!-- ======================================================================= -->
|
||||
<p>Our simple matcher is capable of discovering for loops, but we would still
|
||||
need to filter out many more ourselves. We can do a good portion of the
|
||||
remaining work with some cleverly chosen matchers, but first we need to decide
|
||||
exactly which properties we want to allow.</p>
|
||||
|
||||
<p>How can we characterize for loops over arrays which would be eligible for
|
||||
translation to range-based syntax? Range based loops over arrays of size
|
||||
<tt>N</tt> that:</p>
|
||||
<ul>
|
||||
<li>start at index <tt>0</tt></li>
|
||||
<li>iterate consecutively</li>
|
||||
<li>end at index <tt>N-1</tt></li>
|
||||
</ul>
|
||||
|
||||
<p>We already check for (1), so all we need to add is a check to the loop's
|
||||
condition to ensure that the loop's index variable is compared against
|
||||
<tt>N</tt> and another check to ensure that the increment step just increments
|
||||
this same variable. The matcher for (2) is straightforward: require a pre- or
|
||||
post-increment of the same variable declared in the init portion.</p>
|
||||
|
||||
<p>Unfortunately, such a matcher is impossible to write. Matchers contain no
|
||||
logic for comparing two arbitrary AST nodes and determining whether or not they
|
||||
are equal, so the best we can do is matching more than we would like to allow,
|
||||
and punting extra comparisons to the callback.</p>
|
||||
|
||||
<p>In any case, we can start building this sub-matcher. We can require that the
|
||||
increment step be a unary increment like this:</p>
|
||||
|
||||
<pre class="doc_code">
|
||||
hasIncrement(unaryOperator(hasOperatorName("++")))
|
||||
</pre>
|
||||
|
||||
<p>Specifying what is incremented introduces another quirk of Clang's AST:
|
||||
Usages of variables are represented as <tt>DeclRefExpr</tt>'s ("declaration
|
||||
reference expressions") because they are expressions which refer to variable
|
||||
declarations. To find a <tt>unaryOperator</tt> that refers to a specific
|
||||
declaration, we can simply add a second condition to it:</p>
|
||||
<pre class="doc_code">
|
||||
hasIncrement(unaryOperator(
|
||||
hasOperatorName("++"),
|
||||
hasUnaryOperand(declRefExpr())))
|
||||
</pre>
|
||||
|
||||
<p>Furthermore, we can restrict our matcher to only match if the incremented
|
||||
variable is an integer:</p>
|
||||
<pre class="doc_code">
|
||||
hasIncrement(unaryOperator(
|
||||
hasOperatorName("++"),
|
||||
hasUnaryOperand(declRefExpr(to(varDecl(hasType(isInteger())))))))
|
||||
</pre>
|
||||
|
||||
</p>And the last step will be to attach an identifier to this variable, so that
|
||||
we can retrieve it in the callback:</p>
|
||||
<pre class="doc_code">
|
||||
hasIncrement(unaryOperator(
|
||||
hasOperatorName("++"),
|
||||
hasUnaryOperand(declRefExpr(to(
|
||||
varDecl(hasType(isInteger())).bind("incrementVariable"))))))
|
||||
</pre>
|
||||
|
||||
<p>We can add this code to the definition of <tt>LoopMatcher</tt> and make sure
|
||||
that our program, outfitted with the new matcher, only prints out loops that
|
||||
declare a single variable initialized to zero and have an increment step
|
||||
consisting of a unary increment of some variable.</p>
|
||||
|
||||
<!-- FIXME: add example step-2b -->
|
||||
|
||||
<p>Now, we just need to add a matcher to check if the condition part of the
|
||||
<tt>for</tt> loop compares a variable against the size of the array. There is
|
||||
only one problem - we don't know which array we're iterating over without
|
||||
looking at the body of the loop! We are again restricted to approximating the
|
||||
result we want with matchers, filling in the details in the callback. So we
|
||||
start with:</p>
|
||||
<pre class="doc_code">
|
||||
hasCondition(binaryOperator(hasOperatorName("<"))
|
||||
</pre>
|
||||
|
||||
<p>It makes sense to ensure that the left-hand side is a reference to a
|
||||
variable, and that the right-hand side has integer type.</p>
|
||||
<pre class="doc_code">
|
||||
hasCondition(binaryOperator(
|
||||
hasOperatorName("<"),
|
||||
hasRHS(expr(hasType(isInteger()))),
|
||||
hasLHS(declRefExpr(to(varDecl(hasType(isInteger())))))))
|
||||
</pre>
|
||||
|
||||
<!-- FIXME: add example step-2c -->
|
||||
|
||||
<p>Why? Because it doesn't work. Of the three loops provided in
|
||||
<tt>test-files/simple.cpp</tt>, zero of them have a matching condition. A quick
|
||||
look at the AST dump of the first for loop, produced by the previous iteration
|
||||
of loop-convert, shows us the answer:</p>
|
||||
|
||||
<pre class="doc_code">
|
||||
(ForStmt 0x173b240
|
||||
(DeclStmt 0x173afc8
|
||||
0x173af50 "int i =
|
||||
(IntegerLiteral 0x173afa8 'int' 0)")
|
||||
<<<NULL>>>
|
||||
(BinaryOperator 0x173b060 '_Bool' '<'
|
||||
(ImplicitCastExpr 0x173b030 'int' <LValueToRValue>
|
||||
(DeclRefExpr 0x173afe0 'int' lvalue Var 0x173af50 'i' 'int'))
|
||||
(ImplicitCastExpr 0x173b048 'int' <LValueToRValue>
|
||||
(DeclRefExpr 0x173b008 'const int' lvalue Var 0x170fa80 'N' 'const int')))
|
||||
(UnaryOperator 0x173b0b0 'int' lvalue prefix '++'
|
||||
(DeclRefExpr 0x173b088 'int' lvalue Var 0x173af50 'i' 'int'))
|
||||
(CompoundStatement …
|
||||
</pre>
|
||||
|
||||
<p>We already know that the declaration and increments both match, or this loop
|
||||
wouldn't have been dumped. The culprit lies in the implicit cast applied to the
|
||||
first operand (i.e. the LHS) of the less-than operator, an L-value to R-value
|
||||
conversion applied to the expression referencing <tt>i</tt>. Thankfully, the
|
||||
matcher library offers a solution to this problem in the form of
|
||||
<tt>ignoringParenImpCasts</tt>, which instructs the matcher to ignore implicit
|
||||
casts and parentheses before continuing to match. Adjusting the condition
|
||||
operator will restore the desired match.</p>
|
||||
|
||||
<pre class="doc_code">
|
||||
hasCondition(binaryOperator(
|
||||
hasOperatorName("<"),
|
||||
hasLHS(expr(hasType(isInteger()))),
|
||||
hasRHS(ignoringParenImpCasts(declRefExpr(
|
||||
to(varDecl(hasType(isInteger()))))))))
|
||||
</pre>
|
||||
|
||||
<p>After adding binds to the expressions we wished to capture and extracting the
|
||||
identifier strings into variables, we have array-step-2 completed.</p>
|
||||
|
||||
<!-- ======================================================================= -->
|
||||
<h2 id="usingastmatchers">Step 4: Retrieving Matched Nodes</h2>
|
||||
<!-- ======================================================================= -->
|
||||
<p>So far, the matcher callback isn't very interesting: it just dumps the loop's
|
||||
AST. At some point, we will need to make changes to the input source code. Next,
|
||||
we'll work on using the nodes we bound in the previous step.</p>
|
||||
|
||||
<p>The <tt>MatchFinder::run()</tt> callback takes a
|
||||
<tt>MatchFinder::MatchResult&</tt> as its parameter. We're most interested in
|
||||
its <tt>Context</tt> and <tt>Nodes</tt> members. Clang uses the
|
||||
<tt>ASTContext</tt> class to represent contextual information about the AST, as
|
||||
the name implies, though the most functionally important detail is that several
|
||||
operations require an <tt>ASTContext*</tt> parameter. More immediately useful is
|
||||
the set of matched nodes, and how we retrieve them.</p>
|
||||
|
||||
<!-- FIXME: Where is this binding described? -->
|
||||
|
||||
<p>Since we bind three variables (identified by ConditionVarName,
|
||||
InitVarName, and IncrementVarName), we can obtain the matched nodes by using the
|
||||
<tt>getNodeAs()</tt> member function.</p>
|
||||
|
||||
<p>In <tt>LoopActions.cpp</tt>:</p>
|
||||
<pre class="doc_code">
|
||||
#include "clang/AST/ASTContext.h"
|
||||
|
||||
void LoopPrinter::run(const MatchFinder::MatchResult &Result) {
|
||||
ASTContext *Context = Result.Context;
|
||||
const ForStmt *FS = Result.Nodes.getStmtAs<ForStmt>(LoopName);
|
||||
// We do not want to convert header files!
|
||||
if (!FS || !Context->getSourceManager().isFromMainFile(FS->getForLoc()))
|
||||
return;
|
||||
const VarDecl *IncVar = Result.Nodes.getNodeAs<VarDecl>(IncrementVarName);
|
||||
const VarDecl *CondVar = Result.Nodes.getNodeAs<VarDecl>(ConditionVarName);
|
||||
const VarDecl *InitVar = Result.Nodes.getNodeAs<VarDecl>(InitVarName);
|
||||
</pre>
|
||||
|
||||
<p>Now that we have the three variables, represented by their respective
|
||||
declarations, let's make sure that they're all the same, using a helper function
|
||||
I call <tt>areSameVariable()</tt>.</p>
|
||||
<pre class="doc_code">
|
||||
if (!areSameVariable(IncVar, CondVar) || !areSameVariable(IncVar, InitVar))
|
||||
return;
|
||||
llvm::outs() << "Potential array-based loop discovered.\n";
|
||||
}
|
||||
</pre>
|
||||
|
||||
<p>If execution reaches the end of <tt>LoopPrinter::run()</tt>, we know that the
|
||||
loop shell that looks like</p>
|
||||
<pre class="doc_code">
|
||||
for (int i= 0; i < expr(); ++i) { ... }
|
||||
</pre>
|
||||
|
||||
<p>For now, we will just print a message explaining that we found a loop. The
|
||||
next section will deal with recursively traversing the AST to discover all
|
||||
changes needed.</p>
|
||||
|
||||
<p>As a side note, here is the implementation of <tt>areSameVariable</tt>. Clang
|
||||
associates a <tt>VarDecl</tt> with each variable to represent the variable's
|
||||
declaration. Since the "canonical" form of each declaration is unique by
|
||||
address, all we need to do is make sure neither <tt>ValueDecl</tt> (base class
|
||||
of <tt>VarDecl</tt>) is <tt>NULL</tt> and compare the canonical Decls.</p>
|
||||
<pre class="doc_code">
|
||||
static bool areSameVariable(const ValueDecl *First, const ValueDecl *Second) {
|
||||
return First && Second &&
|
||||
First->getCanonicalDecl() == Second->getCanonicalDecl();
|
||||
}
|
||||
</pre>
|
||||
|
||||
<p>It's not as trivial to test if two expressions are the same, though Clang has
|
||||
already done the hard work for us by providing a way to canonicalize
|
||||
expressions:</p>
|
||||
<pre class="doc_code">
|
||||
static bool areSameExpr(ASTContext* Context, const Expr *First,
|
||||
const Expr *Second) {
|
||||
if (!First || !Second)
|
||||
return false;
|
||||
llvm::FoldingSetNodeID FirstID, SecondID;
|
||||
First->Profile(FirstID, *Context, true);
|
||||
Second->Profile(SecondID, *Context, true);
|
||||
return FirstID == SecondID;
|
||||
}
|
||||
</pre>
|
||||
|
||||
<!-- FIXME: Add code example. -->
|
||||
|
||||
<p>This code relies on the comparison between two
|
||||
<tt>llvm::FoldingSetNodeIDs</tt>. As the documentation for
|
||||
<tt>Stmt::Profile()</tt> indicates, the <tt>Profile()</tt> member function
|
||||
builds a description of a node in the AST, based on its properties, along with
|
||||
those of its children. <tt>FoldingSetNodeID</tt> then serves as a hash we can
|
||||
use to compare expressions. We will need <tt>areSameExpr</tt> later. Before you
|
||||
run the new code on the additional loops added to test-files/simple.cpp, try to
|
||||
figure out which ones will be considered potentially convertible.</p>
|
||||
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
|
@ -0,0 +1,532 @@
|
|||
===============================================================
|
||||
Tutorial for building tools using LibTooling and LibASTMatchers
|
||||
===============================================================
|
||||
|
||||
This document is intended to show how to build a useful source-to-source
|
||||
translation tool based on Clang's `LibTooling <LibTooling.html>`_. It is
|
||||
explicitly aimed at people who are new to Clang, so all you should need
|
||||
is a working knowledge of C++ and the command line.
|
||||
|
||||
In order to work on the compiler, you need some basic knowledge of the
|
||||
abstract syntax tree (AST). To this end, the reader is incouraged to
|
||||
skim the :doc:`Introduction to the Clang
|
||||
AST <IntroductionToTheClangAST>`
|
||||
|
||||
Step 0: Obtaining Clang
|
||||
=======================
|
||||
|
||||
As Clang is part of the LLVM project, you'll need to download LLVM's
|
||||
source code first. Both Clang and LLVM are maintained as Subversion
|
||||
repositories, but we'll be accessing them through the git mirror. For
|
||||
further information, see the `getting started
|
||||
guide <http://llvm.org/docs/GettingStarted.html>`_.
|
||||
|
||||
::
|
||||
|
||||
mkdir ~/clang-llvm && cd ~/clang-llvm
|
||||
git clone http://llvm.org/git/llvm.git
|
||||
cd llvm/tools
|
||||
git clone http://llvm.org/git/clang.git
|
||||
|
||||
Next you need to obtain the CMake build system and Ninja build tool. You
|
||||
may already have CMake installed, but current binary versions of CMake
|
||||
aren't built with Ninja support.
|
||||
|
||||
::
|
||||
|
||||
cd ~/clang-llvm
|
||||
git clone https://github.com/martine/ninja.git
|
||||
cd ninja
|
||||
git checkout release
|
||||
./bootstrap.py
|
||||
sudo cp ninja /usr/bin/
|
||||
|
||||
cd ~/clang-llvm
|
||||
git clone git://cmake.org/stage/cmake.git
|
||||
cd cmake
|
||||
git checkout next
|
||||
./bootstrap
|
||||
make
|
||||
sudo make install
|
||||
|
||||
Okay. Now we'll build Clang!
|
||||
|
||||
::
|
||||
|
||||
cd ~/clang-llvm
|
||||
mkdir build && cd build
|
||||
cmake -G Ninja ../llvm -DLLVM_BUILD_TESTS=ON # Enable tests; default is off.
|
||||
ninja
|
||||
ninja check # Test LLVM only.
|
||||
ninja clang-test # Test Clang only.
|
||||
ninja install
|
||||
|
||||
And we're live.
|
||||
|
||||
All of the tests should pass, though there is a (very) small chance that
|
||||
you can catch LLVM and Clang out of sync. Running ``'git svn rebase'``
|
||||
in both the llvm and clang directories should fix any problems.
|
||||
|
||||
Finally, we want to set Clang as its own compiler.
|
||||
|
||||
::
|
||||
|
||||
cd ~/clang-llvm/build
|
||||
ccmake ../llvm
|
||||
|
||||
The second command will bring up a GUI for configuring Clang. You need
|
||||
to set the entry for ``CMAKE_CXX_COMPILER``. Press ``'t'`` to turn on
|
||||
advanced mode. Scroll down to ``CMAKE_CXX_COMPILER``, and set it to
|
||||
``/usr/bin/clang++``, or wherever you installed it. Press ``'c'`` to
|
||||
configure, then ``'g'`` to generate CMake's files.
|
||||
|
||||
Finally, run ninja one last time, and you're done.
|
||||
|
||||
Step 1: Create a ClangTool
|
||||
==========================
|
||||
|
||||
Now that we have enough background knowledge, it's time to create the
|
||||
simplest productive ClangTool in existence: a syntax checker. While this
|
||||
already exists as ``clang-check``, it's important to understand what's
|
||||
going on.
|
||||
|
||||
First, we'll need to create a new directory for our tool and tell CMake
|
||||
that it exists. As this is not going to be a core clang tool, it will
|
||||
live in the ``tools/extra`` repository.
|
||||
|
||||
::
|
||||
|
||||
cd ~/clang-llvm/llvm/tools/clang
|
||||
mkdir tools/extra/loop-convert
|
||||
echo 'add_subdirectory(loop-convert)' >> tools/extra/CMakeLists.txt
|
||||
vim tools/extra/loop-convert/CMakeLists.txt
|
||||
|
||||
CMakeLists.txt should have the following contents:
|
||||
|
||||
::
|
||||
|
||||
set(LLVM_LINK_COMPONENTS support)
|
||||
set(LLVM_USED_LIBS clangTooling clangBasic clangAST)
|
||||
|
||||
add_clang_executable(loop-convert
|
||||
LoopConvert.cpp
|
||||
)
|
||||
target_link_libraries(loop-convert
|
||||
clangTooling
|
||||
clangBasic
|
||||
clangASTMatchers
|
||||
)
|
||||
|
||||
With that done, Ninja will be able to compile our tool. Let's give it
|
||||
something to compile! Put the following into
|
||||
``tools/extra/loop-convert/LoopConvert.cpp``. A detailed explanation of
|
||||
why the different parts are needed can be found in the `LibTooling
|
||||
documentation <LibTooling.html>`_.
|
||||
|
||||
::
|
||||
|
||||
// Declares clang::SyntaxOnlyAction.
|
||||
#include "clang/Frontend/FrontendActions.h"
|
||||
#include "clang/Tooling/CommonOptionsParser.h"
|
||||
#include "clang/Tooling/Tooling.h"
|
||||
// Declares llvm::cl::extrahelp.
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
|
||||
using namespace clang::tooling;
|
||||
using namespace llvm;
|
||||
|
||||
// CommonOptionsParser declares HelpMessage with a description of the common
|
||||
// command-line options related to the compilation database and input files.
|
||||
// It's nice to have this help message in all tools.
|
||||
static cl::extrahelp CommonHelp(CommonOptionsParser::HelpMessage);
|
||||
|
||||
// A help message for this specific tool can be added afterwards.
|
||||
static cl::extrahelp MoreHelp("\nMore help text...");
|
||||
|
||||
int main(int argc, const char **argv) {
|
||||
CommonOptionsParser OptionsParser(argc, argv);
|
||||
ClangTool Tool(OptionsParser.GetCompilations(),
|
||||
OptionsParser.GetSourcePathList());
|
||||
return Tool.run(newFrontendActionFactory<clang::SyntaxOnlyAction>());
|
||||
}
|
||||
|
||||
And that's it! You can compile our new tool by running ninja from the
|
||||
``build`` directory.
|
||||
|
||||
::
|
||||
|
||||
cd ~/clang-llvm/build
|
||||
ninja
|
||||
|
||||
You should now be able to run the syntax checker, which is located in
|
||||
``~/clang-llvm/build/bin``, on any source file. Try it!
|
||||
|
||||
::
|
||||
|
||||
cat "void main() {}" > test.cpp
|
||||
bin/loop-convert test.cpp --
|
||||
|
||||
Note the two dashes after we specify the source file. The additional
|
||||
options for the compiler are passed after the dashes rather than loading
|
||||
them from a compilation database - there just aren't any options needed
|
||||
right now.
|
||||
|
||||
Intermezzo: Learn AST matcher basics
|
||||
====================================
|
||||
|
||||
Clang recently introduced the :doc:`ASTMatcher
|
||||
library <LibASTMatchers>` to provide a simple, powerful, and
|
||||
concise way to describe specific patterns in the AST. Implemented as a
|
||||
DSL powered by macros and templates (see
|
||||
`ASTMatchers.h <../doxygen/ASTMatchers_8h_source.html>`_ if you're
|
||||
curious), matchers offer the feel of algebraic data types common to
|
||||
functional programming languages.
|
||||
|
||||
For example, suppose you wanted to examine only binary operators. There
|
||||
is a matcher to do exactly that, conveniently named ``binaryOperator``.
|
||||
I'll give you one guess what this matcher does:
|
||||
|
||||
::
|
||||
|
||||
binaryOperator(hasOperatorName("+"), hasLHS(integerLiteral(equals(0))))
|
||||
|
||||
Shockingly, it will match against addition expressions whose left hand
|
||||
side is exactly the literal 0. It will not match against other forms of
|
||||
0, such as ``'\0'`` or ``NULL``, but it will match against macros that
|
||||
expand to 0. The matcher will also not match against calls to the
|
||||
overloaded operator ``'+'``, as there is a separate ``operatorCallExpr``
|
||||
matcher to handle overloaded operators.
|
||||
|
||||
There are AST matchers to match all the different nodes of the AST,
|
||||
narrowing matchers to only match AST nodes fulfilling specific criteria,
|
||||
and traversal matchers to get from one kind of AST node to another. For
|
||||
a complete list of AST matchers, take a look at the `AST Matcher
|
||||
References <LibASTMatchersReference.html>`_
|
||||
|
||||
All matcher that are nouns describe entities in the AST and can be
|
||||
bound, so that they can be referred to whenever a match is found. To do
|
||||
so, simply call the method ``bind`` on these matchers, e.g.:
|
||||
|
||||
::
|
||||
|
||||
variable(hasType(isInteger())).bind("intvar")
|
||||
|
||||
Step 2: Using AST matchers
|
||||
==========================
|
||||
|
||||
Okay, on to using matchers for real. Let's start by defining a matcher
|
||||
which will capture all ``for`` statements that define a new variable
|
||||
initialized to zero. Let's start with matching all ``for`` loops:
|
||||
|
||||
::
|
||||
|
||||
forStmt()
|
||||
|
||||
Next, we want to specify that a single variable is declared in the first
|
||||
portion of the loop, so we can extend the matcher to
|
||||
|
||||
::
|
||||
|
||||
forStmt(hasLoopInit(declStmt(hasSingleDecl(varDecl()))))
|
||||
|
||||
Finally, we can add the condition that the variable is initialized to
|
||||
zero.
|
||||
|
||||
::
|
||||
|
||||
forStmt(hasLoopInit(declStmt(hasSingleDecl(varDecl(
|
||||
hasInitializer(integerLiteral(equals(0))))))))
|
||||
|
||||
It is fairly easy to read and understand the matcher definition ("match
|
||||
loops whose init portion declares a single variable which is initialized
|
||||
to the integer literal 0"), but deciding that every piece is necessary
|
||||
is more difficult. Note that this matcher will not match loops whose
|
||||
variables are initialized to ``'\0'``, ``0.0``, ``NULL``, or any form of
|
||||
zero besides the integer 0.
|
||||
|
||||
The last step is giving the matcher a name and binding the ``ForStmt``
|
||||
as we will want to do something with it:
|
||||
|
||||
::
|
||||
|
||||
StatementMatcher LoopMatcher =
|
||||
forStmt(hasLoopInit(declStmt(hasSingleDecl(varDecl(
|
||||
hasInitializer(integerLiteral(equals(0)))))))).bind("forLoop");
|
||||
|
||||
Once you have defined your matchers, you will need to add a little more
|
||||
scaffolding in order to run them. Matchers are paired with a
|
||||
``MatchCallback`` and registered with a ``MatchFinder`` object, then run
|
||||
from a ``ClangTool``. More code!
|
||||
|
||||
Add the following to ``LoopConvert.cpp``:
|
||||
|
||||
::
|
||||
|
||||
StatementMatcher LoopMatcher =
|
||||
forStmt(hasLoopInit(declStmt(hasSingleDecl(varDecl(
|
||||
hasInitializer(integerLiteral(equals(0)))))))).bind("forLoop");
|
||||
|
||||
class LoopPrinter : public MatchFinder::MatchCallback {
|
||||
public :
|
||||
virtual void run(const MatchFinder::MatchResult &Result) {
|
||||
if (const ForStmt *FS = Result.Nodes.getNodeAs<clang::ForStmt>("forLoop"))
|
||||
FS->dump();
|
||||
};
|
||||
|
||||
And change ``main()`` to:
|
||||
|
||||
::
|
||||
|
||||
int main(int argc, const char **argv) {
|
||||
CommonOptionsParser OptionsParser(argc, argv);
|
||||
ClangTool Tool(OptionsParser.GetCompilations(),
|
||||
OptionsParser.GetSourcePathList());
|
||||
|
||||
LoopPrinter Printer;
|
||||
MatchFinder Finder;
|
||||
Finder.addMatcher(LoopMatcher, &Printer);
|
||||
|
||||
return Tool.run(newFrontendActionFactory(&Finder));
|
||||
}
|
||||
|
||||
Now, you should be able to recompile and run the code to discover for
|
||||
loops. Create a new file with a few examples, and test out our new
|
||||
handiwork:
|
||||
|
||||
::
|
||||
|
||||
cd ~/clang-llvm/llvm/llvm_build/
|
||||
ninja loop-convert
|
||||
vim ~/test-files/simple-loops.cc
|
||||
bin/loop-convert ~/test-files/simple-loops.cc
|
||||
|
||||
Step 3.5: More Complicated Matchers
|
||||
===================================
|
||||
|
||||
Our simple matcher is capable of discovering for loops, but we would
|
||||
still need to filter out many more ourselves. We can do a good portion
|
||||
of the remaining work with some cleverly chosen matchers, but first we
|
||||
need to decide exactly which properties we want to allow.
|
||||
|
||||
How can we characterize for loops over arrays which would be eligible
|
||||
for translation to range-based syntax? Range based loops over arrays of
|
||||
size ``N`` that:
|
||||
|
||||
- start at index ``0``
|
||||
- iterate consecutively
|
||||
- end at index ``N-1``
|
||||
|
||||
We already check for (1), so all we need to add is a check to the loop's
|
||||
condition to ensure that the loop's index variable is compared against
|
||||
``N`` and another check to ensure that the increment step just
|
||||
increments this same variable. The matcher for (2) is straightforward:
|
||||
require a pre- or post-increment of the same variable declared in the
|
||||
init portion.
|
||||
|
||||
Unfortunately, such a matcher is impossible to write. Matchers contain
|
||||
no logic for comparing two arbitrary AST nodes and determining whether
|
||||
or not they are equal, so the best we can do is matching more than we
|
||||
would like to allow, and punting extra comparisons to the callback.
|
||||
|
||||
In any case, we can start building this sub-matcher. We can require that
|
||||
the increment step be a unary increment like this:
|
||||
|
||||
::
|
||||
|
||||
hasIncrement(unaryOperator(hasOperatorName("++")))
|
||||
|
||||
Specifying what is incremented introduces another quirk of Clang's AST:
|
||||
Usages of variables are represented as ``DeclRefExpr``'s ("declaration
|
||||
reference expressions") because they are expressions which refer to
|
||||
variable declarations. To find a ``unaryOperator`` that refers to a
|
||||
specific declaration, we can simply add a second condition to it:
|
||||
|
||||
::
|
||||
|
||||
hasIncrement(unaryOperator(
|
||||
hasOperatorName("++"),
|
||||
hasUnaryOperand(declRefExpr())))
|
||||
|
||||
Furthermore, we can restrict our matcher to only match if the
|
||||
incremented variable is an integer:
|
||||
|
||||
::
|
||||
|
||||
hasIncrement(unaryOperator(
|
||||
hasOperatorName("++"),
|
||||
hasUnaryOperand(declRefExpr(to(varDecl(hasType(isInteger())))))))
|
||||
|
||||
And the last step will be to attach an identifier to this variable, so
|
||||
that we can retrieve it in the callback:
|
||||
|
||||
::
|
||||
|
||||
hasIncrement(unaryOperator(
|
||||
hasOperatorName("++"),
|
||||
hasUnaryOperand(declRefExpr(to(
|
||||
varDecl(hasType(isInteger())).bind("incrementVariable"))))))
|
||||
|
||||
We can add this code to the definition of ``LoopMatcher`` and make sure
|
||||
that our program, outfitted with the new matcher, only prints out loops
|
||||
that declare a single variable initialized to zero and have an increment
|
||||
step consisting of a unary increment of some variable.
|
||||
|
||||
Now, we just need to add a matcher to check if the condition part of the
|
||||
``for`` loop compares a variable against the size of the array. There is
|
||||
only one problem - we don't know which array we're iterating over
|
||||
without looking at the body of the loop! We are again restricted to
|
||||
approximating the result we want with matchers, filling in the details
|
||||
in the callback. So we start with:
|
||||
|
||||
::
|
||||
|
||||
hasCondition(binaryOperator(hasOperatorName("<"))
|
||||
|
||||
It makes sense to ensure that the left-hand side is a reference to a
|
||||
variable, and that the right-hand side has integer type.
|
||||
|
||||
::
|
||||
|
||||
hasCondition(binaryOperator(
|
||||
hasOperatorName("<"),
|
||||
hasRHS(expr(hasType(isInteger()))),
|
||||
hasLHS(declRefExpr(to(varDecl(hasType(isInteger())))))))
|
||||
|
||||
Why? Because it doesn't work. Of the three loops provided in
|
||||
``test-files/simple.cpp``, zero of them have a matching condition. A
|
||||
quick look at the AST dump of the first for loop, produced by the
|
||||
previous iteration of loop-convert, shows us the answer:
|
||||
|
||||
::
|
||||
|
||||
(ForStmt 0x173b240
|
||||
(DeclStmt 0x173afc8
|
||||
0x173af50 "int i =
|
||||
(IntegerLiteral 0x173afa8 'int' 0)")
|
||||
<<>>
|
||||
(BinaryOperator 0x173b060 '_Bool' '<'
|
||||
(ImplicitCastExpr 0x173b030 'int'
|
||||
(DeclRefExpr 0x173afe0 'int' lvalue Var 0x173af50 'i' 'int'))
|
||||
(ImplicitCastExpr 0x173b048 'int'
|
||||
(DeclRefExpr 0x173b008 'const int' lvalue Var 0x170fa80 'N' 'const int')))
|
||||
(UnaryOperator 0x173b0b0 'int' lvalue prefix '++'
|
||||
(DeclRefExpr 0x173b088 'int' lvalue Var 0x173af50 'i' 'int'))
|
||||
(CompoundStatement …
|
||||
|
||||
We already know that the declaration and increments both match, or this
|
||||
loop wouldn't have been dumped. The culprit lies in the implicit cast
|
||||
applied to the first operand (i.e. the LHS) of the less-than operator,
|
||||
an L-value to R-value conversion applied to the expression referencing
|
||||
``i``. Thankfully, the matcher library offers a solution to this problem
|
||||
in the form of ``ignoringParenImpCasts``, which instructs the matcher to
|
||||
ignore implicit casts and parentheses before continuing to match.
|
||||
Adjusting the condition operator will restore the desired match.
|
||||
|
||||
::
|
||||
|
||||
hasCondition(binaryOperator(
|
||||
hasOperatorName("<"),
|
||||
hasLHS(expr(hasType(isInteger()))),
|
||||
hasRHS(ignoringParenImpCasts(declRefExpr(
|
||||
to(varDecl(hasType(isInteger()))))))))
|
||||
|
||||
After adding binds to the expressions we wished to capture and
|
||||
extracting the identifier strings into variables, we have array-step-2
|
||||
completed.
|
||||
|
||||
Step 4: Retrieving Matched Nodes
|
||||
================================
|
||||
|
||||
So far, the matcher callback isn't very interesting: it just dumps the
|
||||
loop's AST. At some point, we will need to make changes to the input
|
||||
source code. Next, we'll work on using the nodes we bound in the
|
||||
previous step.
|
||||
|
||||
The ``MatchFinder::run()`` callback takes a
|
||||
``MatchFinder::MatchResult&`` as its parameter. We're most interested in
|
||||
its ``Context`` and ``Nodes`` members. Clang uses the ``ASTContext``
|
||||
class to represent contextual information about the AST, as the name
|
||||
implies, though the most functionally important detail is that several
|
||||
operations require an ``ASTContext*`` parameter. More immediately useful
|
||||
is the set of matched nodes, and how we retrieve them.
|
||||
|
||||
Since we bind three variables (identified by ConditionVarName,
|
||||
InitVarName, and IncrementVarName), we can obtain the matched nodes by
|
||||
using the ``getNodeAs()`` member function.
|
||||
|
||||
In ``LoopActions.cpp``:
|
||||
|
||||
::
|
||||
|
||||
#include "clang/AST/ASTContext.h"
|
||||
|
||||
void LoopPrinter::run(const MatchFinder::MatchResult &Result) {
|
||||
ASTContext *Context = Result.Context;
|
||||
const ForStmt *FS = Result.Nodes.getStmtAs<ForStmt>(LoopName);
|
||||
// We do not want to convert header files!
|
||||
if (!FS || !Context->getSourceManager().isFromMainFile(FS->getForLoc()))
|
||||
return;
|
||||
const VarDecl *IncVar = Result.Nodes.getNodeAs<VarDecl>(IncrementVarName);
|
||||
const VarDecl *CondVar = Result.Nodes.getNodeAs<VarDecl>(ConditionVarName);
|
||||
const VarDecl *InitVar = Result.Nodes.getNodeAs<VarDecl>(InitVarName);
|
||||
|
||||
Now that we have the three variables, represented by their respective
|
||||
declarations, let's make sure that they're all the same, using a helper
|
||||
function I call ``areSameVariable()``.
|
||||
|
||||
::
|
||||
|
||||
if (!areSameVariable(IncVar, CondVar) || !areSameVariable(IncVar, InitVar))
|
||||
return;
|
||||
llvm::outs() << "Potential array-based loop discovered.\n";
|
||||
}
|
||||
|
||||
If execution reaches the end of ``LoopPrinter::run()``, we know that the
|
||||
loop shell that looks like
|
||||
|
||||
::
|
||||
|
||||
for (int i= 0; i < expr(); ++i) { ... }
|
||||
|
||||
For now, we will just print a message explaining that we found a loop.
|
||||
The next section will deal with recursively traversing the AST to
|
||||
discover all changes needed.
|
||||
|
||||
As a side note, here is the implementation of ``areSameVariable``. Clang
|
||||
associates a ``VarDecl`` with each variable to represent the variable's
|
||||
declaration. Since the "canonical" form of each declaration is unique by
|
||||
address, all we need to do is make sure neither ``ValueDecl`` (base
|
||||
class of ``VarDecl``) is ``NULL`` and compare the canonical Decls.
|
||||
|
||||
::
|
||||
|
||||
static bool areSameVariable(const ValueDecl *First, const ValueDecl *Second) {
|
||||
return First && Second &&
|
||||
First->getCanonicalDecl() == Second->getCanonicalDecl();
|
||||
}
|
||||
|
||||
It's not as trivial to test if two expressions are the same, though
|
||||
Clang has already done the hard work for us by providing a way to
|
||||
canonicalize expressions:
|
||||
|
||||
::
|
||||
|
||||
static bool areSameExpr(ASTContext* Context, const Expr *First,
|
||||
const Expr *Second) {
|
||||
if (!First || !Second)
|
||||
return false;
|
||||
llvm::FoldingSetNodeID FirstID, SecondID;
|
||||
First->Profile(FirstID, *Context, true);
|
||||
Second->Profile(SecondID, *Context, true);
|
||||
return FirstID == SecondID;
|
||||
}
|
||||
|
||||
This code relies on the comparison between two
|
||||
``llvm::FoldingSetNodeIDs``. As the documentation for
|
||||
``Stmt::Profile()`` indicates, the ``Profile()`` member function builds
|
||||
a description of a node in the AST, based on its properties, along with
|
||||
those of its children. ``FoldingSetNodeID`` then serves as a hash we can
|
||||
use to compare expressions. We will need ``areSameExpr`` later. Before
|
||||
you run the new code on the additional loops added to
|
||||
test-files/simple.cpp, try to figure out which ones will be considered
|
||||
potentially convertible.
|
|
@ -1,179 +0,0 @@
|
|||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
|
||||
"http://www.w3.org/TR/html4/strict.dtd">
|
||||
<html>
|
||||
<head>
|
||||
<title>Pretokenized Headers (PTH)</title>
|
||||
<link type="text/css" rel="stylesheet" href="../menu.css">
|
||||
<link type="text/css" rel="stylesheet" href="../content.css">
|
||||
<style type="text/css">
|
||||
td {
|
||||
vertical-align: top;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<!--#include virtual="../menu.html.incl"-->
|
||||
|
||||
<div id="content">
|
||||
|
||||
<h1>Pretokenized Headers (PTH)</h1>
|
||||
|
||||
<p>This document first describes the low-level
|
||||
interface for using PTH and then briefly elaborates on its design and
|
||||
implementation. If you are interested in the end-user view, please see the
|
||||
<a href="UsersManual.html#precompiledheaders">User's Manual</a>.</p>
|
||||
|
||||
|
||||
<h2>Using Pretokenized Headers with <tt>clang</tt> (Low-level Interface)</h2>
|
||||
|
||||
<p>The Clang compiler frontend, <tt>clang -cc1</tt>, supports three command line
|
||||
options for generating and using PTH files.<p>
|
||||
|
||||
<p>To generate PTH files using <tt>clang -cc1</tt>, use the option
|
||||
<b><tt>-emit-pth</tt></b>:
|
||||
|
||||
<pre> $ clang -cc1 test.h -emit-pth -o test.h.pth </pre>
|
||||
|
||||
<p>This option is transparently used by <tt>clang</tt> when generating PTH
|
||||
files. Similarly, PTH files can be used as prefix headers using the
|
||||
<b><tt>-include-pth</tt></b> option:</p>
|
||||
|
||||
<pre>
|
||||
$ clang -cc1 -include-pth test.h.pth test.c -o test.s
|
||||
</pre>
|
||||
|
||||
<p>Alternatively, Clang's PTH files can be used as a raw "token-cache"
|
||||
(or "content" cache) of the source included by the original header
|
||||
file. This means that the contents of the PTH file are searched as substitutes
|
||||
for <em>any</em> source files that are used by <tt>clang -cc1</tt> to process a
|
||||
source file. This is done by specifying the <b><tt>-token-cache</tt></b>
|
||||
option:</p>
|
||||
|
||||
<pre>
|
||||
$ cat test.h
|
||||
#include <stdio.h>
|
||||
$ clang -cc1 -emit-pth test.h -o test.h.pth
|
||||
$ cat test.c
|
||||
#include "test.h"
|
||||
$ clang -cc1 test.c -o test -token-cache test.h.pth
|
||||
</pre>
|
||||
|
||||
<p>In this example the contents of <tt>stdio.h</tt> (and the files it includes)
|
||||
will be retrieved from <tt>test.h.pth</tt>, as the PTH file is being used in
|
||||
this case as a raw cache of the contents of <tt>test.h</tt>. This is a low-level
|
||||
interface used to both implement the high-level PTH interface as well as to
|
||||
provide alternative means to use PTH-style caching.</p>
|
||||
|
||||
<h2>PTH Design and Implementation</h2>
|
||||
|
||||
<p>Unlike GCC's precompiled headers, which cache the full ASTs and preprocessor
|
||||
state of a header file, Clang's pretokenized header files mainly cache the raw
|
||||
lexer <em>tokens</em> that are needed to segment the stream of characters in a
|
||||
source file into keywords, identifiers, and operators. Consequently, PTH serves
|
||||
to mainly directly speed up the lexing and preprocessing of a source file, while
|
||||
parsing and type-checking must be completely redone every time a PTH file is
|
||||
used.</p>
|
||||
|
||||
<h3>Basic Design Tradeoffs</h3>
|
||||
|
||||
<p>In the long term there are plans to provide an alternate PCH implementation
|
||||
for Clang that also caches the work for parsing and type checking the contents
|
||||
of header files. The current implementation of PCH in Clang as pretokenized
|
||||
header files was motivated by the following factors:<p>
|
||||
|
||||
<ul>
|
||||
|
||||
<li><p><b>Language independence</b>: PTH files work with any language that
|
||||
Clang's lexer can handle, including C, Objective-C, and (in the early stages)
|
||||
C++. This means development on language features at the parsing level or above
|
||||
(which is basically almost all interesting pieces) does not require PTH to be
|
||||
modified.</p></li>
|
||||
|
||||
<li><b>Simple design</b>: Relatively speaking, PTH has a simple design and
|
||||
implementation, making it easy to test. Further, because the machinery for PTH
|
||||
resides at the lower-levels of the Clang library stack it is fairly
|
||||
straightforward to profile and optimize.</li>
|
||||
</ul>
|
||||
|
||||
<p>Further, compared to GCC's PCH implementation (which is the dominate
|
||||
precompiled header file implementation that Clang can be directly compared
|
||||
against) the PTH design in Clang yields several attractive features:</p>
|
||||
|
||||
<ul>
|
||||
|
||||
<li><p><b>Architecture independence</b>: In contrast to GCC's PCH files (and
|
||||
those of several other compilers), Clang's PTH files are architecture
|
||||
independent, requiring only a single PTH file when building an program for
|
||||
multiple architectures.</p>
|
||||
|
||||
<p>For example, on Mac OS X one may wish to
|
||||
compile a "universal binary" that runs on PowerPC, 32-bit Intel
|
||||
(i386), and 64-bit Intel architectures. In contrast, GCC requires a PCH file for
|
||||
each architecture, as the definitions of types in the AST are
|
||||
architecture-specific. Since a Clang PTH file essentially represents a lexical
|
||||
cache of header files, a single PTH file can be safely used when compiling for
|
||||
multiple architectures. This can also reduce compile times because only a single
|
||||
PTH file needs to be generated during a build instead of several.</p></li>
|
||||
|
||||
<li><p><b>Reduced memory pressure</b>: Similar to GCC,
|
||||
Clang reads PTH files via the use of memory mapping (i.e., <tt>mmap</tt>).
|
||||
Clang, however, memory maps PTH files as read-only, meaning that multiple
|
||||
invocations of <tt>clang -cc1</tt> can share the same pages in memory from a
|
||||
memory-mapped PTH file. In comparison, GCC also memory maps its PCH files but
|
||||
also modifies those pages in memory, incurring the copy-on-write costs. The
|
||||
read-only nature of PTH can greatly reduce memory pressure for builds involving
|
||||
multiple cores, thus improving overall scalability.</p></li>
|
||||
|
||||
<li><p><b>Fast generation</b>: PTH files can be generated in a small fraction
|
||||
of the time needed to generate GCC's PCH files. Since PTH/PCH generation is a
|
||||
serial operation that typically blocks progress during a build, faster
|
||||
generation time leads to improved processor utilization with parallel builds on
|
||||
multicore machines.</p></li>
|
||||
|
||||
</ul>
|
||||
|
||||
<p>Despite these strengths, PTH's simple design suffers some algorithmic
|
||||
handicaps compared to other PCH strategies such as those used by GCC. While PTH
|
||||
can greatly speed up the processing time of a header file, the amount of work
|
||||
required to process a header file is still roughly linear in the size of the
|
||||
header file. In contrast, the amount of work done by GCC to process a
|
||||
precompiled header is (theoretically) constant (the ASTs for the header are
|
||||
literally memory mapped into the compiler). This means that only the pieces of
|
||||
the header file that are referenced by the source file including the header are
|
||||
the only ones the compiler needs to process during actual compilation. While
|
||||
GCC's particular implementation of PCH mitigates some of these algorithmic
|
||||
strengths via the use of copy-on-write pages, the approach itself can
|
||||
fundamentally dominate at an algorithmic level, especially when one considers
|
||||
header files of arbitrary size.</p>
|
||||
|
||||
<p>There are plans to potentially implement an complementary PCH implementation
|
||||
for Clang based on the lazy deserialization of ASTs. This approach would
|
||||
theoretically have the same constant-time algorithmic advantages just mentioned
|
||||
but would also retain some of the strengths of PTH such as reduced memory
|
||||
pressure (ideal for multi-core builds).</p>
|
||||
|
||||
<h3>Internal PTH Optimizations</h3>
|
||||
|
||||
<p>While the main optimization employed by PTH is to reduce lexing time of
|
||||
header files by caching pre-lexed tokens, PTH also employs several other
|
||||
optimizations to speed up the processing of header files:</p>
|
||||
|
||||
<ul>
|
||||
|
||||
<li><p><em><tt>stat</tt> caching</em>: PTH files cache information obtained via
|
||||
calls to <tt>stat</tt> that <tt>clang -cc1</tt> uses to resolve which files are
|
||||
included by <tt>#include</tt> directives. This greatly reduces the overhead
|
||||
involved in context-switching to the kernel to resolve included files.</p></li>
|
||||
|
||||
<li><p><em>Fasting skipping of <tt>#ifdef</tt>...<tt>#endif</tt> chains</em>:
|
||||
PTH files record the basic structure of nested preprocessor blocks. When the
|
||||
condition of the preprocessor block is false, all of its tokens are immediately
|
||||
skipped instead of requiring them to be handled by Clang's
|
||||
preprocessor.</p></li>
|
||||
|
||||
</ul>
|
||||
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
|
@ -0,0 +1,164 @@
|
|||
==========================
|
||||
Pretokenized Headers (PTH)
|
||||
==========================
|
||||
|
||||
This document first describes the low-level interface for using PTH and
|
||||
then briefly elaborates on its design and implementation. If you are
|
||||
interested in the end-user view, please see the `User's
|
||||
Manual <UsersManual.html#precompiledheaders>`_.
|
||||
|
||||
Using Pretokenized Headers with ``clang`` (Low-level Interface)
|
||||
===============================================================
|
||||
|
||||
The Clang compiler frontend, ``clang -cc1``, supports three command line
|
||||
options for generating and using PTH files.
|
||||
|
||||
To generate PTH files using ``clang -cc1``, use the option
|
||||
``-emit-pth``:
|
||||
|
||||
::
|
||||
|
||||
$ clang -cc1 test.h -emit-pth -o test.h.pth
|
||||
|
||||
This option is transparently used by ``clang`` when generating PTH
|
||||
files. Similarly, PTH files can be used as prefix headers using the
|
||||
``-include-pth`` option:
|
||||
|
||||
::
|
||||
|
||||
$ clang -cc1 -include-pth test.h.pth test.c -o test.s
|
||||
|
||||
Alternatively, Clang's PTH files can be used as a raw "token-cache" (or
|
||||
"content" cache) of the source included by the original header file.
|
||||
This means that the contents of the PTH file are searched as substitutes
|
||||
for *any* source files that are used by ``clang -cc1`` to process a
|
||||
source file. This is done by specifying the ``-token-cache`` option:
|
||||
|
||||
::
|
||||
|
||||
$ cat test.h
|
||||
#include <stdio.h>
|
||||
$ clang -cc1 -emit-pth test.h -o test.h.pth
|
||||
$ cat test.c
|
||||
#include "test.h"
|
||||
$ clang -cc1 test.c -o test -token-cache test.h.pth
|
||||
|
||||
In this example the contents of ``stdio.h`` (and the files it includes)
|
||||
will be retrieved from ``test.h.pth``, as the PTH file is being used in
|
||||
this case as a raw cache of the contents of ``test.h``. This is a
|
||||
low-level interface used to both implement the high-level PTH interface
|
||||
as well as to provide alternative means to use PTH-style caching.
|
||||
|
||||
PTH Design and Implementation
|
||||
=============================
|
||||
|
||||
Unlike GCC's precompiled headers, which cache the full ASTs and
|
||||
preprocessor state of a header file, Clang's pretokenized header files
|
||||
mainly cache the raw lexer *tokens* that are needed to segment the
|
||||
stream of characters in a source file into keywords, identifiers, and
|
||||
operators. Consequently, PTH serves to mainly directly speed up the
|
||||
lexing and preprocessing of a source file, while parsing and
|
||||
type-checking must be completely redone every time a PTH file is used.
|
||||
|
||||
Basic Design Tradeoffs
|
||||
~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
In the long term there are plans to provide an alternate PCH
|
||||
implementation for Clang that also caches the work for parsing and type
|
||||
checking the contents of header files. The current implementation of PCH
|
||||
in Clang as pretokenized header files was motivated by the following
|
||||
factors:
|
||||
|
||||
**Language independence**
|
||||
PTH files work with any language that
|
||||
Clang's lexer can handle, including C, Objective-C, and (in the early
|
||||
stages) C++. This means development on language features at the
|
||||
parsing level or above (which is basically almost all interesting
|
||||
pieces) does not require PTH to be modified.
|
||||
|
||||
**Simple design**
|
||||
Relatively speaking, PTH has a simple design and
|
||||
implementation, making it easy to test. Further, because the
|
||||
machinery for PTH resides at the lower-levels of the Clang library
|
||||
stack it is fairly straightforward to profile and optimize.
|
||||
|
||||
Further, compared to GCC's PCH implementation (which is the dominate
|
||||
precompiled header file implementation that Clang can be directly
|
||||
compared against) the PTH design in Clang yields several attractive
|
||||
features:
|
||||
|
||||
**Architecture independence**
|
||||
In contrast to GCC's PCH files (and
|
||||
those of several other compilers), Clang's PTH files are architecture
|
||||
independent, requiring only a single PTH file when building an
|
||||
program for multiple architectures.
|
||||
|
||||
For example, on Mac OS X one may wish to compile a "universal binary"
|
||||
that runs on PowerPC, 32-bit Intel (i386), and 64-bit Intel
|
||||
architectures. In contrast, GCC requires a PCH file for each
|
||||
architecture, as the definitions of types in the AST are
|
||||
architecture-specific. Since a Clang PTH file essentially represents
|
||||
a lexical cache of header files, a single PTH file can be safely used
|
||||
when compiling for multiple architectures. This can also reduce
|
||||
compile times because only a single PTH file needs to be generated
|
||||
during a build instead of several.
|
||||
|
||||
**Reduced memory pressure**
|
||||
Similar to GCC, Clang reads PTH files
|
||||
via the use of memory mapping (i.e., ``mmap``). Clang, however,
|
||||
memory maps PTH files as read-only, meaning that multiple invocations
|
||||
of ``clang -cc1`` can share the same pages in memory from a
|
||||
memory-mapped PTH file. In comparison, GCC also memory maps its PCH
|
||||
files but also modifies those pages in memory, incurring the
|
||||
copy-on-write costs. The read-only nature of PTH can greatly reduce
|
||||
memory pressure for builds involving multiple cores, thus improving
|
||||
overall scalability.
|
||||
|
||||
**Fast generation**
|
||||
PTH files can be generated in a small fraction
|
||||
of the time needed to generate GCC's PCH files. Since PTH/PCH
|
||||
generation is a serial operation that typically blocks progress
|
||||
during a build, faster generation time leads to improved processor
|
||||
utilization with parallel builds on multicore machines.
|
||||
|
||||
Despite these strengths, PTH's simple design suffers some algorithmic
|
||||
handicaps compared to other PCH strategies such as those used by GCC.
|
||||
While PTH can greatly speed up the processing time of a header file, the
|
||||
amount of work required to process a header file is still roughly linear
|
||||
in the size of the header file. In contrast, the amount of work done by
|
||||
GCC to process a precompiled header is (theoretically) constant (the
|
||||
ASTs for the header are literally memory mapped into the compiler). This
|
||||
means that only the pieces of the header file that are referenced by the
|
||||
source file including the header are the only ones the compiler needs to
|
||||
process during actual compilation. While GCC's particular implementation
|
||||
of PCH mitigates some of these algorithmic strengths via the use of
|
||||
copy-on-write pages, the approach itself can fundamentally dominate at
|
||||
an algorithmic level, especially when one considers header files of
|
||||
arbitrary size.
|
||||
|
||||
There are plans to potentially implement an complementary PCH
|
||||
implementation for Clang based on the lazy deserialization of ASTs. This
|
||||
approach would theoretically have the same constant-time algorithmic
|
||||
advantages just mentioned but would also retain some of the strengths of
|
||||
PTH such as reduced memory pressure (ideal for multi-core builds).
|
||||
|
||||
Internal PTH Optimizations
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
While the main optimization employed by PTH is to reduce lexing time of
|
||||
header files by caching pre-lexed tokens, PTH also employs several other
|
||||
optimizations to speed up the processing of header files:
|
||||
|
||||
- ``stat`` caching: PTH files cache information obtained via calls to
|
||||
``stat`` that ``clang -cc1`` uses to resolve which files are included
|
||||
by ``#include`` directives. This greatly reduces the overhead
|
||||
involved in context-switching to the kernel to resolve included
|
||||
files.
|
||||
|
||||
- Fasting skipping of ``#ifdef``... ``#endif`` chains: PTH files
|
||||
record the basic structure of nested preprocessor blocks. When the
|
||||
condition of the preprocessor block is false, all of its tokens are
|
||||
immediately skipped instead of requiring them to be handled by
|
||||
Clang's preprocessor.
|
||||
|
||||
|
|
@ -1,224 +0,0 @@
|
|||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
|
||||
"http://www.w3.org/TR/html4/strict.dtd">
|
||||
<html>
|
||||
<head>
|
||||
<title>How to write RecursiveASTVisitor based ASTFrontendActions.</title>
|
||||
<link type="text/css" rel="stylesheet" href="../menu.css">
|
||||
<link type="text/css" rel="stylesheet" href="../content.css">
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<!--#include virtual="../menu.html.incl"-->
|
||||
|
||||
<div id="content">
|
||||
|
||||
<h1>How to write RecursiveASTVisitor based ASTFrontendActions.</h1>
|
||||
|
||||
<!-- ======================================================================= -->
|
||||
<h2 id="intro">Introduction</h2>
|
||||
<!-- ======================================================================= -->
|
||||
|
||||
In this tutorial you will learn how to create a FrontendAction that uses
|
||||
a RecursiveASTVisitor to find CXXRecordDecl AST nodes with a specified name.
|
||||
|
||||
<!-- ======================================================================= -->
|
||||
<h2 id="action">Creating a FrontendAction</h2>
|
||||
<!-- ======================================================================= -->
|
||||
|
||||
<p>When writing a clang based tool like a Clang Plugin or a standalone tool
|
||||
based on LibTooling, the common entry point is the FrontendAction.
|
||||
FrontendAction is an interface that allows execution of user specific actions
|
||||
as part of the compilation. To run tools over the AST clang provides the
|
||||
convenience interface ASTFrontendAction, which takes care of executing the
|
||||
action. The only part left is to implement the CreateASTConsumer method that
|
||||
returns an ASTConsumer per translation unit.</p>
|
||||
<pre>
|
||||
class FindNamedClassAction : public clang::ASTFrontendAction {
|
||||
public:
|
||||
virtual clang::ASTConsumer *CreateASTConsumer(
|
||||
clang::CompilerInstance &Compiler, llvm::StringRef InFile) {
|
||||
return new FindNamedClassConsumer;
|
||||
}
|
||||
};
|
||||
</pre>
|
||||
|
||||
<!-- ======================================================================= -->
|
||||
<h2 id="consumer">Creating an ASTConsumer</h2>
|
||||
<!-- ======================================================================= -->
|
||||
|
||||
<p>ASTConsumer is an interface used to write generic actions on an AST,
|
||||
regardless of how the AST was produced. ASTConsumer provides many different
|
||||
entry points, but for our use case the only one needed is HandleTranslationUnit,
|
||||
which is called with the ASTContext for the translation unit.</p>
|
||||
<pre>
|
||||
class FindNamedClassConsumer : public clang::ASTConsumer {
|
||||
public:
|
||||
virtual void HandleTranslationUnit(clang::ASTContext &Context) {
|
||||
// Traversing the translation unit decl via a RecursiveASTVisitor
|
||||
// will visit all nodes in the AST.
|
||||
Visitor.TraverseDecl(Context.getTranslationUnitDecl());
|
||||
}
|
||||
private:
|
||||
// A RecursiveASTVisitor implementation.
|
||||
FindNamedClassVisitor Visitor;
|
||||
};
|
||||
</pre>
|
||||
|
||||
<!-- ======================================================================= -->
|
||||
<h2 id="rav">Using the RecursiveASTVisitor</h2>
|
||||
<!-- ======================================================================= -->
|
||||
|
||||
<p>Now that everything is hooked up, the next step is to implement a
|
||||
RecursiveASTVisitor to extract the relevant information from the AST.</p>
|
||||
<p>The RecursiveASTVisitor provides hooks of the form
|
||||
bool VisitNodeType(NodeType *) for most AST nodes; the exception are TypeLoc
|
||||
nodes, which are passed by-value. We only need to implement the methods for the
|
||||
relevant node types.
|
||||
</p>
|
||||
<p>Let's start by writing a RecursiveASTVisitor that visits all CXXRecordDecl's.
|
||||
<pre>
|
||||
class FindNamedClassVisitor
|
||||
: public RecursiveASTVisitor<FindNamedClassVisitor> {
|
||||
public:
|
||||
bool VisitCXXRecordDecl(CXXRecordDecl *Declaration) {
|
||||
// For debugging, dumping the AST nodes will show which nodes are already
|
||||
// being visited.
|
||||
Declaration->dump();
|
||||
|
||||
// The return value indicates whether we want the visitation to proceed.
|
||||
// Return false to stop the traversal of the AST.
|
||||
return true;
|
||||
}
|
||||
};
|
||||
</pre>
|
||||
</p>
|
||||
<p>In the methods of our RecursiveASTVisitor we can now use the full power of
|
||||
the Clang AST to drill through to the parts that are interesting for us. For
|
||||
example, to find all class declaration with a certain name, we can check for a
|
||||
specific qualified name:
|
||||
<pre>
|
||||
bool VisitCXXRecordDecl(CXXRecordDecl *Declaration) {
|
||||
if (Declaration->getQualifiedNameAsString() == "n::m::C")
|
||||
Declaration->dump();
|
||||
return true;
|
||||
}
|
||||
</pre>
|
||||
</p>
|
||||
|
||||
<!-- ======================================================================= -->
|
||||
<h2 id="context">Accessing the SourceManager and ASTContext</h2>
|
||||
<!-- ======================================================================= -->
|
||||
|
||||
<p>Some of the information about the AST, like source locations and global
|
||||
identifier information, are not stored in the AST nodes themselves, but in
|
||||
the ASTContext and its associated source manager. To retrieve them we need to
|
||||
hand the ASTContext into our RecursiveASTVisitor implementation.</p>
|
||||
<p>The ASTContext is available from the CompilerInstance during the call
|
||||
to CreateASTConsumer. We can thus extract it there and hand it into our
|
||||
freshly created FindNamedClassConsumer:</p>
|
||||
<pre>
|
||||
virtual clang::ASTConsumer *CreateASTConsumer(
|
||||
clang::CompilerInstance &Compiler, llvm::StringRef InFile) {
|
||||
return new FindNamedClassConsumer(<b>&Compiler.getASTContext()</b>);
|
||||
}
|
||||
</pre>
|
||||
|
||||
<p>Now that the ASTContext is available in the RecursiveASTVisitor, we can do
|
||||
more interesting things with AST nodes, like looking up their source
|
||||
locations:</p>
|
||||
<pre>
|
||||
bool VisitCXXRecordDecl(CXXRecordDecl *Declaration) {
|
||||
if (Declaration->getQualifiedNameAsString() == "n::m::C") {
|
||||
// getFullLoc uses the ASTContext's SourceManager to resolve the source
|
||||
// location and break it up into its line and column parts.
|
||||
FullSourceLoc FullLocation = Context->getFullLoc(Declaration->getLocStart());
|
||||
if (FullLocation.isValid())
|
||||
llvm::outs() << "Found declaration at "
|
||||
<< FullLocation.getSpellingLineNumber() << ":"
|
||||
<< FullLocation.getSpellingColumnNumber() << "\n";
|
||||
}
|
||||
return true;
|
||||
}
|
||||
</pre>
|
||||
|
||||
<!-- ======================================================================= -->
|
||||
<h2 id="full">Putting it all together</h2>
|
||||
<!-- ======================================================================= -->
|
||||
|
||||
<p>Now we can combine all of the above into a small example program:</p>
|
||||
<pre>
|
||||
#include "clang/AST/ASTConsumer.h"
|
||||
#include "clang/AST/RecursiveASTVisitor.h"
|
||||
#include "clang/Frontend/CompilerInstance.h"
|
||||
#include "clang/Frontend/FrontendAction.h"
|
||||
#include "clang/Tooling/Tooling.h"
|
||||
|
||||
using namespace clang;
|
||||
|
||||
class FindNamedClassVisitor
|
||||
: public RecursiveASTVisitor<FindNamedClassVisitor> {
|
||||
public:
|
||||
explicit FindNamedClassVisitor(ASTContext *Context)
|
||||
: Context(Context) {}
|
||||
|
||||
bool VisitCXXRecordDecl(CXXRecordDecl *Declaration) {
|
||||
if (Declaration->getQualifiedNameAsString() == "n::m::C") {
|
||||
FullSourceLoc FullLocation = Context->getFullLoc(Declaration->getLocStart());
|
||||
if (FullLocation.isValid())
|
||||
llvm::outs() << "Found declaration at "
|
||||
<< FullLocation.getSpellingLineNumber() << ":"
|
||||
<< FullLocation.getSpellingColumnNumber() << "\n";
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
private:
|
||||
ASTContext *Context;
|
||||
};
|
||||
|
||||
class FindNamedClassConsumer : public clang::ASTConsumer {
|
||||
public:
|
||||
explicit FindNamedClassConsumer(ASTContext *Context)
|
||||
: Visitor(Context) {}
|
||||
|
||||
virtual void HandleTranslationUnit(clang::ASTContext &Context) {
|
||||
Visitor.TraverseDecl(Context.getTranslationUnitDecl());
|
||||
}
|
||||
private:
|
||||
FindNamedClassVisitor Visitor;
|
||||
};
|
||||
|
||||
class FindNamedClassAction : public clang::ASTFrontendAction {
|
||||
public:
|
||||
virtual clang::ASTConsumer *CreateASTConsumer(
|
||||
clang::CompilerInstance &Compiler, llvm::StringRef InFile) {
|
||||
return new FindNamedClassConsumer(&Compiler.getASTContext());
|
||||
}
|
||||
};
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
if (argc > 1) {
|
||||
clang::tooling::runToolOnCode(new FindNamedClassAction, argv[1]);
|
||||
}
|
||||
}
|
||||
</pre>
|
||||
|
||||
<p>We store this into a file called FindClassDecls.cpp and create the following
|
||||
CMakeLists.txt to link it:</p>
|
||||
<pre>
|
||||
set(LLVM_USED_LIBS clangTooling)
|
||||
|
||||
add_clang_executable(find-class-decls FindClassDecls.cpp)
|
||||
</pre>
|
||||
|
||||
<p>When running this tool over a small code snippet it will output all
|
||||
declarations of a class n::m::C it found:</p>
|
||||
<pre>
|
||||
$ ./bin/find-class-decls "namespace n { namespace m { class C {}; } }"
|
||||
Found declaration at 1:29
|
||||
</pre>
|
||||
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -0,0 +1,216 @@
|
|||
==========================================================
|
||||
How to write RecursiveASTVisitor based ASTFrontendActions.
|
||||
==========================================================
|
||||
|
||||
Introduction
|
||||
============
|
||||
|
||||
In this tutorial you will learn how to create a FrontendAction that uses
|
||||
a RecursiveASTVisitor to find CXXRecordDecl AST nodes with a specified
|
||||
name.
|
||||
|
||||
Creating a FrontendAction
|
||||
=========================
|
||||
|
||||
When writing a clang based tool like a Clang Plugin or a standalone tool
|
||||
based on LibTooling, the common entry point is the FrontendAction.
|
||||
FrontendAction is an interface that allows execution of user specific
|
||||
actions as part of the compilation. To run tools over the AST clang
|
||||
provides the convenience interface ASTFrontendAction, which takes care
|
||||
of executing the action. The only part left is to implement the
|
||||
CreateASTConsumer method that returns an ASTConsumer per translation
|
||||
unit.
|
||||
|
||||
::
|
||||
|
||||
class FindNamedClassAction : public clang::ASTFrontendAction {
|
||||
public:
|
||||
virtual clang::ASTConsumer *CreateASTConsumer(
|
||||
clang::CompilerInstance &Compiler, llvm::StringRef InFile) {
|
||||
return new FindNamedClassConsumer;
|
||||
}
|
||||
};
|
||||
|
||||
Creating an ASTConsumer
|
||||
=======================
|
||||
|
||||
ASTConsumer is an interface used to write generic actions on an AST,
|
||||
regardless of how the AST was produced. ASTConsumer provides many
|
||||
different entry points, but for our use case the only one needed is
|
||||
HandleTranslationUnit, which is called with the ASTContext for the
|
||||
translation unit.
|
||||
|
||||
::
|
||||
|
||||
class FindNamedClassConsumer : public clang::ASTConsumer {
|
||||
public:
|
||||
virtual void HandleTranslationUnit(clang::ASTContext &Context) {
|
||||
// Traversing the translation unit decl via a RecursiveASTVisitor
|
||||
// will visit all nodes in the AST.
|
||||
Visitor.TraverseDecl(Context.getTranslationUnitDecl());
|
||||
}
|
||||
private:
|
||||
// A RecursiveASTVisitor implementation.
|
||||
FindNamedClassVisitor Visitor;
|
||||
};
|
||||
|
||||
Using the RecursiveASTVisitor
|
||||
=============================
|
||||
|
||||
Now that everything is hooked up, the next step is to implement a
|
||||
RecursiveASTVisitor to extract the relevant information from the AST.
|
||||
|
||||
The RecursiveASTVisitor provides hooks of the form bool
|
||||
VisitNodeType(NodeType \*) for most AST nodes; the exception are TypeLoc
|
||||
nodes, which are passed by-value. We only need to implement the methods
|
||||
for the relevant node types.
|
||||
|
||||
Let's start by writing a RecursiveASTVisitor that visits all
|
||||
CXXRecordDecl's.
|
||||
|
||||
::
|
||||
|
||||
class FindNamedClassVisitor
|
||||
: public RecursiveASTVisitor<FindNamedClassVisitor> {
|
||||
public:
|
||||
bool VisitCXXRecordDecl(CXXRecordDecl *Declaration) {
|
||||
// For debugging, dumping the AST nodes will show which nodes are already
|
||||
// being visited.
|
||||
Declaration->dump();
|
||||
|
||||
// The return value indicates whether we want the visitation to proceed.
|
||||
// Return false to stop the traversal of the AST.
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
In the methods of our RecursiveASTVisitor we can now use the full power
|
||||
of the Clang AST to drill through to the parts that are interesting for
|
||||
us. For example, to find all class declaration with a certain name, we
|
||||
can check for a specific qualified name:
|
||||
|
||||
::
|
||||
|
||||
bool VisitCXXRecordDecl(CXXRecordDecl *Declaration) {
|
||||
if (Declaration->getQualifiedNameAsString() == "n::m::C")
|
||||
Declaration->dump();
|
||||
return true;
|
||||
}
|
||||
|
||||
Accessing the SourceManager and ASTContext
|
||||
==========================================
|
||||
|
||||
Some of the information about the AST, like source locations and global
|
||||
identifier information, are not stored in the AST nodes themselves, but
|
||||
in the ASTContext and its associated source manager. To retrieve them we
|
||||
need to hand the ASTContext into our RecursiveASTVisitor implementation.
|
||||
|
||||
The ASTContext is available from the CompilerInstance during the call to
|
||||
CreateASTConsumer. We can thus extract it there and hand it into our
|
||||
freshly created FindNamedClassConsumer:
|
||||
|
||||
::
|
||||
|
||||
virtual clang::ASTConsumer *CreateASTConsumer(
|
||||
clang::CompilerInstance &Compiler, llvm::StringRef InFile) {
|
||||
return new FindNamedClassConsumer(&Compiler.getASTContext());
|
||||
}
|
||||
|
||||
Now that the ASTContext is available in the RecursiveASTVisitor, we can
|
||||
do more interesting things with AST nodes, like looking up their source
|
||||
locations:
|
||||
|
||||
::
|
||||
|
||||
bool VisitCXXRecordDecl(CXXRecordDecl *Declaration) {
|
||||
if (Declaration->getQualifiedNameAsString() == "n::m::C") {
|
||||
// getFullLoc uses the ASTContext's SourceManager to resolve the source
|
||||
// location and break it up into its line and column parts.
|
||||
FullSourceLoc FullLocation = Context->getFullLoc(Declaration->getLocStart());
|
||||
if (FullLocation.isValid())
|
||||
llvm::outs() << "Found declaration at "
|
||||
<< FullLocation.getSpellingLineNumber() << ":"
|
||||
<< FullLocation.getSpellingColumnNumber() << "\n";
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
Putting it all together
|
||||
=======================
|
||||
|
||||
Now we can combine all of the above into a small example program:
|
||||
|
||||
::
|
||||
|
||||
#include "clang/AST/ASTConsumer.h"
|
||||
#include "clang/AST/RecursiveASTVisitor.h"
|
||||
#include "clang/Frontend/CompilerInstance.h"
|
||||
#include "clang/Frontend/FrontendAction.h"
|
||||
#include "clang/Tooling/Tooling.h"
|
||||
|
||||
using namespace clang;
|
||||
|
||||
class FindNamedClassVisitor
|
||||
: public RecursiveASTVisitor<FindNamedClassVisitor> {
|
||||
public:
|
||||
explicit FindNamedClassVisitor(ASTContext *Context)
|
||||
: Context(Context) {}
|
||||
|
||||
bool VisitCXXRecordDecl(CXXRecordDecl *Declaration) {
|
||||
if (Declaration->getQualifiedNameAsString() == "n::m::C") {
|
||||
FullSourceLoc FullLocation = Context->getFullLoc(Declaration->getLocStart());
|
||||
if (FullLocation.isValid())
|
||||
llvm::outs() << "Found declaration at "
|
||||
<< FullLocation.getSpellingLineNumber() << ":"
|
||||
<< FullLocation.getSpellingColumnNumber() << "\n";
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
private:
|
||||
ASTContext *Context;
|
||||
};
|
||||
|
||||
class FindNamedClassConsumer : public clang::ASTConsumer {
|
||||
public:
|
||||
explicit FindNamedClassConsumer(ASTContext *Context)
|
||||
: Visitor(Context) {}
|
||||
|
||||
virtual void HandleTranslationUnit(clang::ASTContext &Context) {
|
||||
Visitor.TraverseDecl(Context.getTranslationUnitDecl());
|
||||
}
|
||||
private:
|
||||
FindNamedClassVisitor Visitor;
|
||||
};
|
||||
|
||||
class FindNamedClassAction : public clang::ASTFrontendAction {
|
||||
public:
|
||||
virtual clang::ASTConsumer *CreateASTConsumer(
|
||||
clang::CompilerInstance &Compiler, llvm::StringRef InFile) {
|
||||
return new FindNamedClassConsumer(&Compiler.getASTContext());
|
||||
}
|
||||
};
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
if (argc > 1) {
|
||||
clang::tooling::runToolOnCode(new FindNamedClassAction, argv[1]);
|
||||
}
|
||||
}
|
||||
|
||||
We store this into a file called FindClassDecls.cpp and create the
|
||||
following CMakeLists.txt to link it:
|
||||
|
||||
::
|
||||
|
||||
set(LLVM_USED_LIBS clangTooling)
|
||||
|
||||
add_clang_executable(find-class-decls FindClassDecls.cpp)
|
||||
|
||||
When running this tool over a small code snippet it will output all
|
||||
declarations of a class n::m::C it found:
|
||||
|
||||
::
|
||||
|
||||
$ ./bin/find-class-decls "namespace n { namespace m { class C {}; } }"
|
||||
Found declaration at 1:29
|
||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -18,6 +18,17 @@ progress. This page will get filled out with docs soon...
|
|||
PCHInternals
|
||||
ThreadSanitizer
|
||||
Tooling
|
||||
AddressSanitizer
|
||||
AnalyzerRegions
|
||||
ClangPlugins
|
||||
ClangTools
|
||||
HowToSetupToolingForLLVM
|
||||
IntroductionToTheClangAST
|
||||
JSONCompilationDatabase
|
||||
LibASTMatchersTutorial
|
||||
PTHInternals
|
||||
RAVFrontendAction
|
||||
UsersManual
|
||||
|
||||
|
||||
Indices and tables
|
||||
|
|
Loading…
Reference in New Issue