From 980e45fe55091739600f1fa42fce707197debc2a Mon Sep 17 00:00:00 2001 From: Kostya Serebryany Date: Thu, 31 May 2018 01:27:07 +0000 Subject: [PATCH] [libFuzzer] add collect_data_flow.py that allows to run the data-flow tracer several times on subsets of inputs bytes, to overcome DFSan out-of-label failures llvm-svn: 333616 --- .../lib/fuzzer/scripts/collect_data_flow.py | 56 +++++++++++++++++++ compiler-rt/test/fuzzer/dataflow.test | 5 ++ 2 files changed, 61 insertions(+) create mode 100755 compiler-rt/lib/fuzzer/scripts/collect_data_flow.py diff --git a/compiler-rt/lib/fuzzer/scripts/collect_data_flow.py b/compiler-rt/lib/fuzzer/scripts/collect_data_flow.py new file mode 100755 index 000000000000..d13f6dcc4110 --- /dev/null +++ b/compiler-rt/lib/fuzzer/scripts/collect_data_flow.py @@ -0,0 +1,56 @@ +#!/usr/bin/env python +#===- lib/fuzzer/scripts/collect_data_flow.py ------------------------------===# +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +#===------------------------------------------------------------------------===# +# Runs the data-flow tracer several times on the same input in order to collect +# the complete trace for all input bytes (running it on all bytes at once +# may fail if DFSan runs out of labels). +# Usage: +# collect_data_flow.py BINARY INPUT [RESULT] +#===------------------------------------------------------------------------===# +import atexit +import sys +import os +import subprocess +import tempfile +import shutil + +tmpdir = "" + +def cleanup(d): + print "removing: ", d + shutil.rmtree(d) + +def main(argv): + exe = argv[1] + inp = argv[2] + size = os.path.getsize(inp) + q = [[0, size]] + tmpdir = tempfile.mkdtemp(prefix="libfuzzer-tmp-") + atexit.register(cleanup, tmpdir) + print "tmpdir: ", tmpdir + outputs = [] + while len(q): + r = q.pop() + print "******* Trying: ", r + tmpfile = os.path.join(tmpdir, str(r[0]) + "-" + str(r[1])) + ret = subprocess.call([exe, str(r[0]), str(r[1]), inp, tmpfile]) + if ret and r[1] - r[0] >= 2: + q.append([r[0], (r[1] + r[0]) / 2]) + q.append([(r[1] + r[0]) / 2, r[1]]) + else: + outputs.append(tmpfile) + print "******* Success: ", r + f = sys.stdout + if len(argv) >= 4: + f = open(argv[3], "w") + merge = os.path.join(os.path.dirname(argv[0]), "merge_data_flow.py") + subprocess.call([merge] + outputs, stdout=f) + +if __name__ == '__main__': + main(sys.argv) diff --git a/compiler-rt/test/fuzzer/dataflow.test b/compiler-rt/test/fuzzer/dataflow.test index 46e5c5d3d1c7..7162b06f6d25 100644 --- a/compiler-rt/test/fuzzer/dataflow.test +++ b/compiler-rt/test/fuzzer/dataflow.test @@ -54,6 +54,9 @@ RUN:%t-ThreeFunctionsTestDF 2 4 %t/IN/FUZZMU > %t-merge-2 RUN:%t-ThreeFunctionsTestDF 4 6 %t/IN/FUZZMU > %t-merge-3 RUN:%libfuzzer_src/scripts/merge_data_flow.py %t-merge-* | FileCheck %s --check-prefix=IN_FUZZMU +# Test collect_data_flow +RUN: %libfuzzer_src/scripts/collect_data_flow.py %t-ThreeFunctionsTestDF %t/IN/FUZZMU | FileCheck %s --check-prefix=IN_FUZZMU + IN_FUZZMU-DAG: F{{[012]}} 0000100 IN_FUZZMU-DAG: F{{[012]}} 1111001 IN_FUZZMU-DAG: F{{[012]}} 0000011 @@ -65,3 +68,5 @@ OUT_OF_LABELS: ==FATAL: DataFlowSanitizer: out of labels RUN: %t-ExplodeDFSanLabelsTestDF 0 2 %t/IN/1234567890123456 RUN: %t-ExplodeDFSanLabelsTestDF 2 4 %t/IN/1234567890123456 RUN: %t-ExplodeDFSanLabelsTestDF 4 6 %t/IN/1234567890123456 +# Or we can use collect_data_flow +RUN: %libfuzzer_src/scripts/collect_data_flow.py %t-ExplodeDFSanLabelsTestDF %t/IN/1234567890123456