Integrate Kostya's clang-proto-fuzzer with LLVM.

Summary:
The clang-proto-fuzzer models a subset of C++ as a protobuf and
uses libprotobuf-mutator to generate interesting mutations of C++
programs.  Clang-proto-fuzzer has already found several bugs in
Clang (e.g., https://bugs.llvm.org/show_bug.cgi?id=33747,
https://bugs.llvm.org/show_bug.cgi?id=33749).

As with clang-fuzzer, clang-proto-fuzzer requires the following
cmake flags:
- CMAKE_C_COMPILER=clang
- CMAKE_CXX_COMPILER=clang++
- LLVM_USE_SANITIZE_COVERAGE=YES  // needed for libFuzzer
- LLVM_USE_SANITIZER=Address  // needed for libFuzzer

In addition, clang-proto-fuzzer requires:
- CLANG_ENABLE_PROTO_FUZZER=ON

clang-proto-fuzzer also requires the following dependencies:
- binutils  // needed for libprotobuf-mutator
- liblzma-dev  // needed for libprotobuf-mutator
- libz-dev  // needed for libprotobuf-mutator
- docbook2x  // needed for libprotobuf-mutator
- Recent version of protobuf [3.3.0 is known to work]

A working version of libprotobuf-mutator will automatically be
downloaded and built as an external project.

Implementation of clang-proto-fuzzer provided by Kostya
Serebryany.

https://bugs.llvm.org/show_bug.cgi?id=33829

Reviewers: kcc, vitalybuka, bogner

Reviewed By: kcc, vitalybuka

Subscribers: thakis, mgorny, cfe-commits

Differential Revision: https://reviews.llvm.org/D36324

llvm-svn: 310408
This commit is contained in:
Matt Morehouse 2017-08-08 20:15:04 +00:00
parent 364359e4fc
commit f051f5d1e6
14 changed files with 529 additions and 43 deletions

View File

@ -377,6 +377,8 @@ option(CLANG_ENABLE_STATIC_ANALYZER "Build static analyzer." ON)
option(CLANG_ANALYZER_BUILD_Z3
"Build the static analyzer with the Z3 constraint manager." OFF)
option(CLANG_ENABLE_PROTO_FUZZER "Build Clang protobuf fuzzer." OFF)
if(NOT CLANG_ENABLE_STATIC_ANALYZER AND (CLANG_ENABLE_ARCMT OR CLANG_ANALYZER_BUILD_Z3))
message(FATAL_ERROR "Cannot disable static analyzer while enabling ARCMT or Z3")
endif()

View File

@ -0,0 +1,24 @@
set(PBM_PREFIX protobuf_mutator)
set(PBM_PATH ${CMAKE_CURRENT_BINARY_DIR}/${PBM_PREFIX}/src/${PBM_PREFIX})
set(PBM_LIB_PATH ${PBM_PATH}/src/libprotobuf-mutator.a)
set(PBM_FUZZ_LIB_PATH ${PBM_PATH}/src/libfuzzer/libprotobuf-mutator-libfuzzer.a)
ExternalProject_Add(${PBM_PREFIX}
PREFIX ${PBM_PREFIX}
GIT_REPOSITORY https://github.com/google/libprotobuf-mutator.git
GIT_TAG 34287f8
CONFIGURE_COMMAND ${CMAKE_COMMAND} -G${CMAKE_GENERATOR}
-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
BUILD_COMMAND ${CMAKE_MAKE_PROGRAM}
BUILD_BYPRODUCTS ${PBM_LIB_PATH} ${PBM_FUZZ_LIB_PATH}
BUILD_IN_SOURCE 1
INSTALL_COMMAND ""
LOG_DOWNLOAD 1
LOG_CONFIGURE 1
LOG_BUILD 1
)
set(ProtobufMutator_INCLUDE_DIRS ${PBM_PATH})
set(ProtobufMutator_LIBRARIES ${PBM_FUZZ_LIB_PATH} ${PBM_LIB_PATH})

View File

@ -1,21 +1,60 @@
if( LLVM_USE_SANITIZE_COVERAGE )
set(LLVM_LINK_COMPONENTS ${LLVM_TARGETS_TO_BUILD})
if(CLANG_ENABLE_PROTO_FUZZER)
# Create protobuf .h and .cc files, and put them in a library for use by
# clang-proto-fuzzer components.
find_package(Protobuf REQUIRED)
add_definitions(-DGOOGLE_PROTOBUF_NO_RTTI)
include_directories(${PROTOBUF_INCLUDE_DIRS})
include_directories(${CMAKE_CURRENT_BINARY_DIR})
protobuf_generate_cpp(PROTO_SRCS PROTO_HDRS cxx_proto.proto)
# Hack to bypass LLVM's cmake sources check and allow multiple libraries and
# executables from this directory.
set(LLVM_OPTIONAL_SOURCES
ClangFuzzer.cpp
ExampleClangProtoFuzzer.cpp
${PROTO_SRCS}
)
add_clang_library(clangCXXProto
${PROTO_SRCS}
${PROTO_HDRS}
LINK_LIBS
${PROTOBUF_LIBRARIES}
)
# Build and include libprotobuf-mutator
include(ProtobufMutator)
include_directories(${ProtobufMutator_INCLUDE_DIRS})
# Build the protobuf->C++ translation library and driver.
add_clang_subdirectory(proto-to-cxx)
# Build the protobuf fuzzer
add_clang_executable(clang-proto-fuzzer ExampleClangProtoFuzzer.cpp)
target_link_libraries(clang-proto-fuzzer
${ProtobufMutator_LIBRARIES}
clangCXXProto
clangHandleCXX
clangProtoToCXX
LLVMFuzzer
)
else()
# Hack to bypass LLVM's cmake sources check and allow multiple libraries and
# executables from this directory.
set(LLVM_OPTIONAL_SOURCES ClangFuzzer.cpp ExampleClangProtoFuzzer.cpp)
endif()
add_clang_subdirectory(handle-cxx)
add_clang_executable(clang-fuzzer
EXCLUDE_FROM_ALL
ClangFuzzer.cpp
)
target_link_libraries(clang-fuzzer
${CLANG_FORMAT_LIB_DEPS}
clangAST
clangBasic
clangCodeGen
clangDriver
clangFrontend
clangRewriteFrontend
clangStaticAnalyzerFrontend
clangTooling
clangHandleCXX
LLVMFuzzer
)
endif()

View File

@ -13,43 +13,12 @@
///
//===----------------------------------------------------------------------===//
#include "clang/Tooling/Tooling.h"
#include "clang/CodeGen/CodeGenAction.h"
#include "clang/Frontend/CompilerInstance.h"
#include "clang/Lex/PreprocessorOptions.h"
#include "llvm/Option/Option.h"
#include "llvm/Support/TargetSelect.h"
#include "handle-cxx/handle_cxx.h"
using namespace clang;
using namespace clang_fuzzer;
extern "C" int LLVMFuzzerTestOneInput(uint8_t *data, size_t size) {
std::string s((const char *)data, size);
llvm::InitializeAllTargets();
llvm::InitializeAllTargetMCs();
llvm::InitializeAllAsmPrinters();
llvm::InitializeAllAsmParsers();
llvm::opt::ArgStringList CC1Args;
CC1Args.push_back("-cc1");
CC1Args.push_back("./test.cc");
CC1Args.push_back("-O2");
llvm::IntrusiveRefCntPtr<FileManager> Files(
new FileManager(FileSystemOptions()));
IgnoringDiagConsumer Diags;
IntrusiveRefCntPtr<DiagnosticOptions> DiagOpts = new DiagnosticOptions();
DiagnosticsEngine Diagnostics(
IntrusiveRefCntPtr<clang::DiagnosticIDs>(new DiagnosticIDs()), &*DiagOpts,
&Diags, false);
std::unique_ptr<clang::CompilerInvocation> Invocation(
tooling::newInvocation(&Diagnostics, CC1Args));
std::unique_ptr<llvm::MemoryBuffer> Input =
llvm::MemoryBuffer::getMemBuffer(s);
Invocation->getPreprocessorOpts().addRemappedFile("./test.cc", Input.release());
std::unique_ptr<tooling::ToolAction> action(
tooling::newFrontendActionFactory<clang::EmitObjAction>());
std::shared_ptr<PCHContainerOperations> PCHContainerOps =
std::make_shared<PCHContainerOperations>();
action->runInvocation(std::move(Invocation), Files.get(), PCHContainerOps,
&Diags);
HandleCXX(s, {"-O2"});
return 0;
}

View File

@ -0,0 +1,28 @@
//===-- ExampleClangProtoFuzzer.cpp - Fuzz Clang --------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
///
/// \file
/// \brief This file implements a function that runs Clang on a single
/// input and uses libprotobuf-mutator to find new inputs. This function is
/// then linked into the Fuzzer library.
///
//===----------------------------------------------------------------------===//
#include "cxx_proto.pb.h"
#include "handle-cxx/handle_cxx.h"
#include "proto-to-cxx/proto_to_cxx.h"
#include "src/libfuzzer/libfuzzer_macro.h"
using namespace clang_fuzzer;
DEFINE_BINARY_PROTO_FUZZER(const Function& input) {
auto S = FunctionToString(input);
HandleCXX(S, {"-O2"});
}

View File

@ -0,0 +1,73 @@
This directory contains two utilities for fuzzing Clang: clang-fuzzer and
clang-proto-fuzzer. Both use libFuzzer to generate inputs to clang via
coverage-guided mutation.
The two utilities differ, however, in how they structure inputs to Clang.
clang-fuzzer makes no attempt to generate valid C++ programs and is therefore
primarily useful for stressing the surface layers of Clang (i.e. lexer, parser).
clang-proto-fuzzer uses a protobuf class to describe a subset of the C++
language and then uses libprotobuf-mutator to mutate instantiations of that
class, producing valid C++ programs in the process. As a result,
clang-proto-fuzzer is better at stressing deeper layers of Clang and LLVM.
===================================
Building clang-fuzzer
===================================
Within your LLVM build directory, run CMake with the following variable
definitions:
- CMAKE_C_COMPILER=clang
- CMAKE_CXX_COMPILER=clang++
- LLVM_USE_SANITIZE_COVERAGE=YES
- LLVM_USE_SANITIZER=Address
Then build the clang-fuzzer target.
Example:
cd $LLVM_SOURCE_DIR
mkdir build && cd build
cmake .. -GNinja -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ \
-DLLVM_USE_SANITIZE_COVERAGE=YES -DLLVM_USE_SANITIZER=Address
ninja clang-fuzzer
=======================================================
Building clang-proto-fuzzer (Linux-only instructions)
=======================================================
Install the necessary dependencies:
- binutils // needed for libprotobuf-mutator
- liblzma-dev // needed for libprotobuf-mutator
- libz-dev // needed for libprotobuf-mutator
- docbook2x // needed for libprotobuf-mutator
- Recent version of protobuf [3.3.0 is known to work]
Within your LLVM build directory, run CMake with the following variable
definitions:
- CMAKE_C_COMPILER=clang
- CMAKE_CXX_COMPILER=clang++
- LLVM_USE_SANITIZE_COVERAGE=YES
- LLVM_USE_SANITIZER=Address
- CLANG_ENABLE_PROTO_FUZZER=ON
Then build the clang-proto-fuzzer and clang-proto-to-cxx targets. Optionally,
you may also build clang-fuzzer with this setup.
Example:
cd $LLVM_SOURCE_DIR
mkdir build && cd build
cmake .. -GNinja -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ \
-DLLVM_USE_SANITIZE_COVERAGE=YES -DLLVM_USE_SANITIZER=Address \
-DCLANG_ENABLE_PROTO_FUZZER=ON
ninja clang-proto-fuzzer clang-proto-to-cxx
=====================
Running the fuzzers
=====================
clang-fuzzer:
bin/clang-fuzzer CORPUS_DIR
clang-proto-fuzzer:
bin/clang-proto-fuzzer CORPUS_DIR
Translating a clang-proto-fuzzer corpus output to C++:
bin/clang-proto-to-cxx CORPUS_OUTPUT_FILE

View File

@ -0,0 +1,93 @@
//===-- cxx_proto.proto - Protobuf description of C++ ---------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
///
/// \file
/// \brief This file describes a subset of C++ as a protobuf. It is used to
/// more easily find interesting inputs for fuzzing Clang.
///
//===----------------------------------------------------------------------===//
syntax = "proto2";
message VarRef {
required int32 varnum = 1;
}
message Lvalue {
required VarRef varref = 1;
}
message Const {
required int32 val = 1;
}
message BinaryOp {
enum Op {
PLUS = 0;
MINUS = 1;
MUL = 2;
DIV = 3;
MOD = 4;
XOR = 5;
AND = 6;
OR = 7;
EQ = 8;
NE = 9;
LE = 10;
GE = 11;
LT = 12;
GT = 13;
};
required Op op = 1;
required Rvalue left = 2;
required Rvalue right = 3;
}
message Rvalue {
oneof rvalue_oneof {
VarRef varref = 1;
Const cons = 2;
BinaryOp binop = 3;
}
}
message AssignmentStatement {
required Lvalue lvalue = 1;
required Rvalue rvalue = 2;
}
message IfElse {
required Rvalue cond = 1;
required StatementSeq if_body = 2;
required StatementSeq else_body = 3;
}
message While {
required Rvalue cond = 1;
required StatementSeq body = 2;
}
message Statement {
oneof stmt_oneof {
AssignmentStatement assignment = 1;
IfElse ifelse = 2;
While while_loop = 3;
}
}
message StatementSeq {
repeated Statement statements = 1;
}
message Function {
required StatementSeq statements = 1;
}
package clang_fuzzer;

View File

@ -0,0 +1,11 @@
set(LLVM_LINK_COMPONENTS ${LLVM_TARGETS_TO_BUILD})
add_clang_library(clangHandleCXX
handle_cxx.cpp
LINK_LIBS
clangCodeGen
clangFrontend
clangLex
clangTooling
)

View File

@ -0,0 +1,58 @@
//==-- handle_cxx.cpp - Helper function for Clang fuzzers ------------------==//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// Implements HandleCXX for use by the Clang fuzzers.
//
//===----------------------------------------------------------------------===//
#include "handle_cxx.h"
#include "clang/CodeGen/CodeGenAction.h"
#include "clang/Frontend/CompilerInstance.h"
#include "clang/Lex/PreprocessorOptions.h"
#include "clang/Tooling/Tooling.h"
#include "llvm/Option/Option.h"
#include "llvm/Support/TargetSelect.h"
using namespace clang;
void clang_fuzzer::HandleCXX(const std::string &S,
const std::vector<const char *> &ExtraArgs) {
llvm::InitializeAllTargets();
llvm::InitializeAllTargetMCs();
llvm::InitializeAllAsmPrinters();
llvm::InitializeAllAsmParsers();
llvm::opt::ArgStringList CC1Args;
CC1Args.push_back("-cc1");
for (auto &A : ExtraArgs)
CC1Args.push_back(A);
CC1Args.push_back("./test.cc");
llvm::IntrusiveRefCntPtr<FileManager> Files(
new FileManager(FileSystemOptions()));
IgnoringDiagConsumer Diags;
IntrusiveRefCntPtr<DiagnosticOptions> DiagOpts = new DiagnosticOptions();
DiagnosticsEngine Diagnostics(
IntrusiveRefCntPtr<clang::DiagnosticIDs>(new DiagnosticIDs()), &*DiagOpts,
&Diags, false);
std::unique_ptr<clang::CompilerInvocation> Invocation(
tooling::newInvocation(&Diagnostics, CC1Args));
std::unique_ptr<llvm::MemoryBuffer> Input =
llvm::MemoryBuffer::getMemBuffer(S);
Invocation->getPreprocessorOpts().addRemappedFile("./test.cc",
Input.release());
std::unique_ptr<tooling::ToolAction> action(
tooling::newFrontendActionFactory<clang::EmitObjAction>());
std::shared_ptr<PCHContainerOperations> PCHContainerOps =
std::make_shared<PCHContainerOperations>();
action->runInvocation(std::move(Invocation), Files.get(), PCHContainerOps,
&Diags);
}

View File

@ -0,0 +1,25 @@
//==-- handle_cxx.h - Helper function for Clang fuzzers --------------------==//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// Defines HandleCXX for use by the Clang fuzzers.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_CLANG_TOOLS_CLANG_FUZZER_HANDLE_CXX_HANDLECXX_H
#define LLVM_CLANG_TOOLS_CLANG_FUZZER_HANDLE_CXX_HANDLECXX_H
#include <string>
#include <vector>
namespace clang_fuzzer {
void HandleCXX(const std::string &S,
const std::vector<const char *> &ExtraArgs);
} // namespace clang_fuzzer
#endif

View File

@ -0,0 +1,10 @@
set(LLVM_LINK_COMPONENTS ${LLVM_TARGETS_TO_BUILD})
# Hack to bypass LLVM's CMake source checks so we can have both a library and
# an executable built from this directory.
set(LLVM_OPTIONAL_SOURCES proto_to_cxx.cpp proto_to_cxx_main.cpp)
add_clang_library(clangProtoToCXX proto_to_cxx.cpp LINK_LIBS clangCXXProto)
add_clang_executable(clang-proto-to-cxx proto_to_cxx_main.cpp)
target_link_libraries(clang-proto-to-cxx clangProtoToCXX)

View File

@ -0,0 +1,102 @@
//==-- proto_to_cxx.cpp - Protobuf-C++ conversion --------------------------==//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// Implements functions for converting between protobufs and C++.
//
//===----------------------------------------------------------------------===//
#include "proto_to_cxx.h"
#include "cxx_proto.pb.h"
#include <ostream>
#include <sstream>
namespace clang_fuzzer {
// Forward decls.
std::ostream &operator<<(std::ostream &os, const BinaryOp &x);
std::ostream &operator<<(std::ostream &os, const StatementSeq &x);
// Proto to C++.
std::ostream &operator<<(std::ostream &os, const Const &x) {
return os << "(" << x.val() << ")";
}
std::ostream &operator<<(std::ostream &os, const VarRef &x) {
return os << "a[" << (static_cast<uint32_t>(x.varnum()) % 100) << "]";
}
std::ostream &operator<<(std::ostream &os, const Lvalue &x) {
return os << x.varref();
}
std::ostream &operator<<(std::ostream &os, const Rvalue &x) {
if (x.has_varref()) return os << x.varref();
if (x.has_cons()) return os << x.cons();
if (x.has_binop()) return os << x.binop();
return os << "1";
}
std::ostream &operator<<(std::ostream &os, const BinaryOp &x) {
os << "(" << x.left();
switch (x.op()) {
case BinaryOp::PLUS: os << "+"; break;
case BinaryOp::MINUS: os << "-"; break;
case BinaryOp::MUL: os << "*"; break;
case BinaryOp::DIV: os << "/"; break;
case BinaryOp::MOD: os << "%"; break;
case BinaryOp::XOR: os << "^"; break;
case BinaryOp::AND: os << "&"; break;
case BinaryOp::OR: os << "|"; break;
case BinaryOp::EQ: os << "=="; break;
case BinaryOp::NE: os << "!="; break;
case BinaryOp::LE: os << "<="; break;
case BinaryOp::GE: os << ">="; break;
case BinaryOp::LT: os << "<"; break;
case BinaryOp::GT: os << ">"; break;
}
return os << x.right() << ")";
}
std::ostream &operator<<(std::ostream &os, const AssignmentStatement &x) {
return os << x.lvalue() << "=" << x.rvalue() << ";\n";
}
std::ostream &operator<<(std::ostream &os, const IfElse &x) {
return os << "if (" << x.cond() << "){\n"
<< x.if_body() << "} else { \n"
<< x.else_body() << "}\n";
}
std::ostream &operator<<(std::ostream &os, const While &x) {
return os << "while (" << x.cond() << "){\n" << x.body() << "}\n";
}
std::ostream &operator<<(std::ostream &os, const Statement &x) {
if (x.has_assignment()) return os << x.assignment();
if (x.has_ifelse()) return os << x.ifelse();
if (x.has_while_loop()) return os << x.while_loop();
return os << "(void)0;\n";
}
std::ostream &operator<<(std::ostream &os, const StatementSeq &x) {
for (auto &st : x.statements()) os << st;
return os;
}
std::ostream &operator<<(std::ostream &os, const Function &x) {
return os << "void foo(int *a) {\n" << x.statements() << "}\n";
}
// ---------------------------------
std::string FunctionToString(const Function &input) {
std::ostringstream os;
os << input;
return os.str();
}
std::string ProtoToCxx(const uint8_t *data, size_t size) {
Function message;
if (!message.ParseFromArray(data, size))
return "#error invalid proto\n";
return FunctionToString(message);
}
} // namespace clang_fuzzer

View File

@ -0,0 +1,22 @@
//==-- proto_to_cxx.h - Protobuf-C++ conversion ----------------------------==//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// Defines functions for converting between protobufs and C++.
//
//===----------------------------------------------------------------------===//
#include <cstdint>
#include <cstddef>
#include <string>
namespace clang_fuzzer {
class Function;
std::string FunctionToString(const Function &input);
std::string ProtoToCxx(const uint8_t *data, size_t size);
}

View File

@ -0,0 +1,30 @@
//==-- proto_to_cxx_main.cpp - Driver for protobuf-C++ conversion ----------==//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// Implements a simple driver to print a C++ program from a protobuf.
//
//===----------------------------------------------------------------------===//
#include <fstream>
#include <iostream>
#include <streambuf>
#include <string>
#include "proto_to_cxx.h"
int main(int argc, char **argv) {
for (int i = 1; i < argc; i++) {
std::fstream in(argv[i]);
std::string str((std::istreambuf_iterator<char>(in)),
std::istreambuf_iterator<char>());
std::cout << "// " << argv[i] << std::endl;
std::cout << clang_fuzzer::ProtoToCxx(
reinterpret_cast<const uint8_t *>(str.data()), str.size());
}
}