Implemented proto to LLVM conversion and LLVM fuzz target

Differential Revision: https://reviews.llvm.org/D48106

llvm-svn: 335374
This commit is contained in:
Emmett Neyman 2018-06-22 18:05:00 +00:00
parent cd18bb523c
commit e5f4a9ff76
14 changed files with 430 additions and 36 deletions

View File

@ -15,6 +15,7 @@ set(LLVM_OPTIONAL_SOURCES
DummyClangFuzzer.cpp
ExampleClangProtoFuzzer.cpp
ExampleClangLoopProtoFuzzer.cpp
ExampleClangLLVMProtoFuzzer.cpp
)
if(CLANG_ENABLE_PROTO_FUZZER)
@ -49,6 +50,9 @@ if(CLANG_ENABLE_PROTO_FUZZER)
# Build the protobuf->C++ translation library and driver.
add_clang_subdirectory(proto-to-cxx)
# Build the protobuf->LLVM IR translation library and driver.
add_clang_subdirectory(proto-to-llvm)
# Build the fuzzer initialization library.
add_clang_subdirectory(fuzzer-initialize)
@ -65,29 +69,45 @@ if(CLANG_ENABLE_PROTO_FUZZER)
ExampleClangLoopProtoFuzzer.cpp
)
# Build the llvm protobuf fuzzer
add_clang_executable(clang-llvm-proto-fuzzer
${DUMMY_MAIN}
ExampleClangLLVMProtoFuzzer.cpp
)
set(COMMON_PROTO_FUZZ_LIBRARIES
${ProtobufMutator_LIBRARIES}
${PROTOBUF_LIBRARIES}
${LLVM_LIB_FUZZING_ENGINE}
clangFuzzerInitialize
clangHandleCXX
)
target_link_libraries(clang-proto-fuzzer
PRIVATE
${COMMON_PROTO_FUZZ_LIBRARIES}
clangHandleCXX
clangCXXProto
clangProtoToCXX
)
target_link_libraries(clang-loop-proto-fuzzer
PRIVATE
${COMMON_PROTO_FUZZ_LIBRARIES}
clangHandleCXX
clangCXXLoopProto
clangLoopProtoToCXX
)
target_link_libraries(clang-llvm-proto-fuzzer
PRIVATE
${COMMON_PROTO_FUZZ_LIBRARIES}
clangHandleLLVM
clangCXXLoopProto
clangLoopProtoToLLVM
)
endif()
add_clang_subdirectory(handle-cxx)
add_clang_subdirectory(handle-llvm)
add_clang_executable(clang-fuzzer
EXCLUDE_FROM_ALL

View File

@ -0,0 +1,28 @@
//===-- ExampleClangLLVMProtoFuzzer.cpp - Fuzz Clang ----------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
///
/// \file
/// This file implements a function that compiles a single LLVM IR string as
/// input and uses libprotobuf-mutator to find new inputs. This function is
/// then linked into the Fuzzer library.
///
//===----------------------------------------------------------------------===//
#include "cxx_loop_proto.pb.h"
#include "fuzzer-initialize/fuzzer_initialize.h"
#include "handle-llvm/handle_llvm.h"
#include "proto-to-llvm/loop_proto_to_llvm.h"
#include "src/libfuzzer/libfuzzer_macro.h"
using namespace clang_fuzzer;
DEFINE_BINARY_PROTO_FUZZER(const LoopFunction &input) {
auto S = LoopFunctionToLLVMString(input);
HandleLLVM(S, GetCLArgs());
}

View File

@ -37,17 +37,15 @@ message BinaryOp {
PLUS = 0;
MINUS = 1;
MUL = 2;
DIV = 3;
MOD = 4;
XOR = 5;
AND = 6;
OR = 7;
EQ = 8;
NE = 9;
LE = 10;
GE = 11;
LT = 12;
GT = 13;
XOR = 3;
AND = 4;
OR = 5;
EQ = 6;
NE = 7;
LE = 8;
GE = 9;
LT = 10;
GT = 11;
};
required Op op = 1;
required Rvalue left = 2;
@ -67,12 +65,6 @@ message AssignmentStatement {
required Rvalue rvalue = 2;
}
message IfElse {
required Rvalue cond = 1;
required StatementSeq if_body = 2;
required StatementSeq else_body = 3;
}
message Statement {
required AssignmentStatement assignment = 1;
}

View File

@ -15,6 +15,8 @@
//===----------------------------------------------------------------------===//
#include "fuzzer_initialize.h"
#include "llvm/Support/TargetSelect.h"
#include <cstring>
using namespace clang_fuzzer;
@ -31,6 +33,11 @@ const std::vector<const char *>& GetCLArgs() {
}
extern "C" int LLVMFuzzerInitialize(int *argc, char ***argv) {
llvm::InitializeAllTargets();
llvm::InitializeAllTargetMCs();
llvm::InitializeAllAsmPrinters();
llvm::InitializeAllAsmParsers();
CLArgs.push_back("-O2");
for (int I = 1; I < *argc; I++) {
if (strcmp((*argv)[I], "-ignore_remaining_args=1") == 0) {

View File

@ -18,17 +18,11 @@
#include "clang/Lex/PreprocessorOptions.h"
#include "clang/Tooling/Tooling.h"
#include "llvm/Option/Option.h"
#include "llvm/Support/TargetSelect.h"
using namespace clang;
void clang_fuzzer::HandleCXX(const std::string &S,
const std::vector<const char *> &ExtraArgs) {
llvm::InitializeAllTargets();
llvm::InitializeAllTargetMCs();
llvm::InitializeAllAsmPrinters();
llvm::InitializeAllAsmParsers();
llvm::opt::ArgStringList CC1Args;
CC1Args.push_back("-cc1");
for (auto &A : ExtraArgs)

View File

@ -0,0 +1,5 @@
set(LLVM_LINK_COMPONENTS ${LLVM_TARGETS_TO_BUILD} Support)
add_clang_library(clangHandleLLVM
handle_llvm.cpp
)

View File

@ -0,0 +1,111 @@
//==-- handle_llvm.cpp - Helper function for Clang fuzzers -----------------==//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// Implements HandleLLVM for use by the Clang fuzzers. Mimics the llc tool to
// compile an LLVM IR file to X86_64 assembly.
//
//===----------------------------------------------------------------------===//
#include "handle_llvm.h"
#include "llvm/ADT/Triple.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/CodeGen/CommandFlags.inc"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Verifier.h"
#include "llvm/IRReader/IRReader.h"
#include "llvm/PassRegistry.h"
#include "llvm/Support/InitLLVM.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Target/TargetMachine.h"
#include <cstdlib>
using namespace llvm;
static void getOptLevel(const std::vector<const char *> &ExtraArgs,
CodeGenOpt::Level &OLvl) {
// Find the optimization level from the command line args
OLvl = CodeGenOpt::Default;
for (auto &A : ExtraArgs) {
if (A[0] == '-' && A[1] == 'O') {
switch(A[2]) {
case '0': OLvl = CodeGenOpt::None; break;
case '1': OLvl = CodeGenOpt::Less; break;
case '2': OLvl = CodeGenOpt::Default; break;
case '3': OLvl = CodeGenOpt::Aggressive; break;
default:
errs() << "error: opt level must be between 0 and 3.\n";
std::exit(1);
}
}
}
}
void clang_fuzzer::HandleLLVM(const std::string &S,
const std::vector<const char *> &ExtraArgs) {
// Parse ExtraArgs to set the optimization level
CodeGenOpt::Level OLvl;
getOptLevel(ExtraArgs, OLvl);
// Set the Module to include the the IR code to be compiled
SMDiagnostic Err;
LLVMContext Context;
std::unique_ptr<Module> M = parseIR(MemoryBufferRef(S, "IR"), Err, Context);
if (!M) {
errs() << "error: could not parse IR!\n";
std::exit(1);
}
// Create a new Target
std::string Error;
const Target *TheTarget = TargetRegistry::lookupTarget(
sys::getDefaultTargetTriple(), Error);
if (!TheTarget) {
errs() << Error;
std::exit(1);
}
TargetOptions Options = InitTargetOptionsFromCodeGenFlags();
// Create a new Machine
std::string CPUStr = getCPUStr();
std::string FeaturesStr = getFeaturesStr();
std::unique_ptr<TargetMachine> Target(TheTarget->createTargetMachine(
sys::getDefaultTargetTriple(), CPUStr, FeaturesStr, Options,
getRelocModel(), getCodeModel(), OLvl));
// Create a new PassManager
legacy::PassManager PM;
TargetLibraryInfoImpl TLII(Triple(M->getTargetTriple()));
PM.add(new TargetLibraryInfoWrapperPass(TLII));
M->setDataLayout(Target->createDataLayout());
// Make sure the Module has no errors
if (verifyModule(*M, &errs())) {
errs() << "error: input module is broken!\n";
std::exit(1);
}
setFunctionAttributes(CPUStr, FeaturesStr, *M);
raw_null_ostream OS;
Target->addPassesToEmitFile(PM, OS, nullptr, TargetMachine::CGFT_ObjectFile,
false);
PM.run(*M);
return;
}

View File

@ -0,0 +1,25 @@
//==-- handle_llvm.h - Helper function for Clang fuzzers -------------------==//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// Defines HandleLLVM for use by the Clang fuzzers.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_CLANG_TOOLS_CLANG_FUZZER_HANDLE_LLVM_HANDLELLVM_H
#define LLVM_CLANG_TOOLS_CLANG_FUZZER_HANDLE_LLVM_HANDLELLVM_H
#include <string>
#include <vector>
namespace clang_fuzzer {
void HandleLLVM(const std::string &S,
const std::vector<const char *> &ExtraArgs);
} // namespace clang_fuzzer
#endif

View File

@ -67,12 +67,6 @@ std::ostream &operator<<(std::ostream &os, const BinaryOp &x) {
case BinaryOp::MUL:
os << "*";
break;
case BinaryOp::DIV:
os << "/";
break;
case BinaryOp::MOD:
os << "%";
break;
case BinaryOp::XOR:
os << "^";
break;
@ -106,11 +100,6 @@ std::ostream &operator<<(std::ostream &os, const BinaryOp &x) {
std::ostream &operator<<(std::ostream &os, const AssignmentStatement &x) {
return os << x.varref() << "=" << x.rvalue() << ";\n";
}
std::ostream &operator<<(std::ostream &os, const IfElse &x) {
return os << "if (" << x.cond() << "){\n"
<< x.if_body() << "} else { \n"
<< x.else_body() << "}\n";
}
std::ostream &operator<<(std::ostream &os, const Statement &x) {
return os << x.assignment();
}

View File

@ -11,7 +11,6 @@
//
//===----------------------------------------------------------------------===//
// This is a copy and will be updated later to introduce changes
#include <fstream>
#include <iostream>

View File

@ -0,0 +1,14 @@
set(LLVM_LINK_COMPONENTS ${LLVM_TARGETS_TO_BUILD})
set(CMAKE_CXX_FLAGS ${CXX_FLAGS_NOFUZZ})
# Needed by LLVM's CMake checks because this file defines multiple targets.
set(LLVM_OPTIONAL_SOURCES loop_proto_to_llvm.cpp loop_proto_to_llvm_main.cpp)
add_clang_library(clangLoopProtoToLLVM loop_proto_to_llvm.cpp
DEPENDS clangCXXLoopProto
LINK_LIBS clangCXXLoopProto ${PROTOBUF_LIBRARIES}
)
add_clang_executable(clang-loop-proto-to-llvm loop_proto_to_llvm_main.cpp)
target_link_libraries(clang-loop-proto-to-llvm PRIVATE clangLoopProtoToLLVM)

View File

@ -0,0 +1,156 @@
//==-- loop_proto_to_llvm.cpp - Protobuf-C++ conversion
//---------------------==//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// Implements functions for converting between protobufs and LLVM IR.
//
//
//===----------------------------------------------------------------------===//
#include "loop_proto_to_llvm.h"
#include "cxx_loop_proto.pb.h"
// The following is needed to convert protos in human-readable form
#include <google/protobuf/text_format.h>
#include <ostream>
#include <sstream>
namespace clang_fuzzer {
// Forward decls
std::string BinopToString(std::ostream &os, const BinaryOp &x);
std::string StateSeqToString(std::ostream &os, const StatementSeq &x);
// Counter variable to generate new LLVM IR variable names and wrapper function
std::string get_var() {
static int ctr = 0;
return "%var" + std::to_string(ctr++);
}
// Proto to LLVM.
std::string ConstToString(const Const &x) {
return std::to_string(x.val());
}
std::string VarRefToString(std::ostream &os, const VarRef &x) {
std::string arr;
switch(x.arr()) {
case VarRef::ARR_A:
arr = "%a";
break;
case VarRef::ARR_B:
arr = "%b";
break;
case VarRef::ARR_C:
arr = "%c";
break;
}
std::string ptr_var = get_var();
os << ptr_var << " = getelementptr i32, i32* " << arr << ", i64 %ct\n";
return ptr_var;
}
std::string RvalueToString(std::ostream &os, const Rvalue &x) {
if(x.has_cons())
return ConstToString(x.cons());
if(x.has_binop())
return BinopToString(os, x.binop());
if(x.has_varref()) {
std::string var_ref = VarRefToString(os, x.varref());
std::string val_var = get_var();
os << val_var << " = load i32, i32* " << var_ref << "\n";
return val_var;
}
return "1";
}
std::string BinopToString(std::ostream &os, const BinaryOp &x) {
std::string left = RvalueToString(os, x.left());
std::string right = RvalueToString(os, x.right());
std::string op;
switch (x.op()) {
case BinaryOp::PLUS:
op = "add";
break;
case BinaryOp::MINUS:
op = "sub";
break;
case BinaryOp::MUL:
op = "mul";
break;
case BinaryOp::XOR:
op = "xor";
break;
case BinaryOp::AND:
op = "and";
break;
case BinaryOp::OR:
op = "or";
break;
// Support for Boolean operators will be added later
case BinaryOp::EQ:
case BinaryOp::NE:
case BinaryOp::LE:
case BinaryOp::GE:
case BinaryOp::LT:
case BinaryOp::GT:
op = "add";
break;
}
std::string val_var = get_var();
os << val_var << " = " << op << " i32 " << left << ", " << right << "\n";
return val_var;
}
std::ostream &operator<<(std::ostream &os, const AssignmentStatement &x) {
std::string rvalue = RvalueToString(os, x.rvalue());
std::string var_ref = VarRefToString(os, x.varref());
return os << "store i32 " << rvalue << ", i32* " << var_ref << "\n";
}
std::ostream &operator<<(std::ostream &os, const Statement &x) {
return os << x.assignment();
}
std::ostream &operator<<(std::ostream &os, const StatementSeq &x) {
for (auto &st : x.statements()) {
os << st;
}
return os;
}
std::ostream &operator<<(std::ostream &os, const LoopFunction &x) {
return os << "define void @foo(i32* %a, i32* %b, i32* noalias %c, i64 %s) {\n"
<< "%i = alloca i64\n"
<< "store i64 0, i64* %i\n"
<< "br label %loop\n\n"
<< "loop:\n"
<< "%ct = load i64, i64* %i\n"
<< "%comp = icmp eq i64 %ct, %s\n"
<< "br i1 %comp, label %endloop, label %body\n\n"
<< "body:\n"
<< x.statements()
<< "%z = add i64 1, %ct\n"
<< "store i64 %z, i64* %i\n"
<< "br label %loop\n\n"
<< "endloop:\n"
<< "ret void\n}\n";
}
// ---------------------------------
std::string LoopFunctionToLLVMString(const LoopFunction &input) {
std::ostringstream os;
os << input;
return os.str();
}
std::string LoopProtoToLLVM(const uint8_t *data, size_t size) {
LoopFunction message;
if (!message.ParsePartialFromArray(data, size))
return "#error invalid proto\n";
return LoopFunctionToLLVMString(message);
}
} // namespace clang_fuzzer

View File

@ -0,0 +1,23 @@
//==-- loop_proto_to_llvm.h - Protobuf-C++ conversion ----------------------------==//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// Defines functions for converting between protobufs and LLVM IR.
//
//===----------------------------------------------------------------------===//
#include <cstdint>
#include <cstddef>
#include <string>
namespace clang_fuzzer {
class LoopFunction;
std::string LoopFunctionToLLVMString(const LoopFunction &input);
std::string LoopProtoToLLVM(const uint8_t *data, size_t size);
}

View File

@ -0,0 +1,31 @@
//==-- loop_proto_to_llvm_main.cpp - Driver for protobuf-LLVM conversion----==//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// Implements a simple driver to print a LLVM program from a protobuf with loops
//
//===----------------------------------------------------------------------===//
#include <fstream>
#include <iostream>
#include <streambuf>
#include <string>
#include "loop_proto_to_llvm.h"
int main(int argc, char **argv) {
for (int i = 1; i < argc; i++) {
std::fstream in(argv[i]);
std::string str((std::istreambuf_iterator<char>(in)),
std::istreambuf_iterator<char>());
std::cout << ";; " << argv[i] << std::endl;
std::cout << clang_fuzzer::LoopProtoToLLVM(
reinterpret_cast<const uint8_t *>(str.data()), str.size());
}
}