[libFuzzer] improve -merge functionality

llvm-svn: 263769
This commit is contained in:
Kostya Serebryany 2016-03-18 00:23:29 +00:00
parent 01864a2d01
commit 945761b8c2
6 changed files with 101 additions and 73 deletions

View File

@ -284,8 +284,7 @@ static int FuzzerDriver(const std::vector<std::string> &Args,
Options.UseTraces = Flags.use_traces;
Options.UseMemcmp = Flags.use_memcmp;
Options.ShuffleAtStartUp = Flags.shuffle;
Options.PreferSmallDuringInitialShuffle =
Flags.prefer_small_during_initial_shuffle;
Options.PreferSmall = Flags.prefer_small;
Options.Reload = Flags.reload;
Options.OnlyASCII = Flags.only_ascii;
Options.OutputCSV = Flags.output_csv;

View File

@ -21,10 +21,8 @@ FUZZER_FLAG_INT(cross_over, 1, "If 1, cross over inputs.")
FUZZER_FLAG_INT(mutate_depth, 5,
"Apply this number of consecutive mutations to each input.")
FUZZER_FLAG_INT(shuffle, 1, "Shuffle inputs at startup")
FUZZER_FLAG_INT(
prefer_small_during_initial_shuffle, -1,
"If 1, always prefer smaller inputs during the initial corpus shuffle."
" If 0, never do that. If -1, do it sometimes.")
FUZZER_FLAG_INT(prefer_small, 1,
"If 1, always prefer smaller inputs during the corpus shuffle.")
FUZZER_FLAG_INT(
timeout, 1200,
"Timeout in seconds (if positive). "

View File

@ -29,6 +29,7 @@
namespace fuzzer {
using namespace std::chrono;
typedef std::vector<uint8_t> Unit;
typedef std::vector<Unit> UnitVector;
// A simple POD sized array of bytes.
template <size_t kMaxSize> class FixedWord {
@ -288,7 +289,7 @@ public:
bool UseFullCoverageSet = false;
bool Reload = true;
bool ShuffleAtStartUp = true;
int PreferSmallDuringInitialShuffle = -1;
bool PreferSmall = true;
size_t MaxNumberOfRuns = ULONG_MAX;
int ReportSlowUnits = 10;
bool OnlyASCII = false;
@ -342,6 +343,8 @@ public:
// Merge Corpora[1:] into Corpora[0].
void Merge(const std::vector<std::string> &Corpora);
// Returns a subset of 'Extra' that adds coverage to 'Initial'.
UnitVector FindExtraUnits(const UnitVector &Initial, const UnitVector &Extra);
MutationDispatcher &GetMD() { return MD; }
void PrintFinalStats();
void SetMaxLen(size_t MaxLen);
@ -359,6 +362,8 @@ private:
void WriteUnitToFileWithPrefix(const Unit &U, const char *Prefix);
void PrintStats(const char *Where, const char *End = "\n");
void PrintStatusForNewUnit(const Unit &U);
void ShuffleCorpus(UnitVector *V);
// Updates the probability distribution for the units in the corpus.
// Must be called whenever the corpus or unit weights are changed.
void UpdateCorpusDistribution();
@ -367,6 +372,7 @@ private:
size_t RecordCallerCalleeCoverage();
void PrepareCoverageBeforeRun();
bool CheckCoverageAfterRun();
void ResetCoverage();
// Trace-based fuzzing: we run a unit with some kind of tracing
// enabled and record potentially useful mutations. Then

View File

@ -243,34 +243,25 @@ void Fuzzer::RereadOutputCorpus(size_t MaxSize) {
}
}
void Fuzzer::ShuffleCorpus(UnitVector *V) {
std::random_shuffle(V->begin(), V->end(), MD.GetRand());
if (Options.PreferSmall)
std::stable_sort(V->begin(), V->end(), [](const Unit &A, const Unit &B) {
return A.size() < B.size();
});
}
void Fuzzer::ShuffleAndMinimize() {
bool PreferSmall = (Options.PreferSmallDuringInitialShuffle == 1 ||
(Options.PreferSmallDuringInitialShuffle == -1 &&
MD.GetRand().RandBool()));
if (Options.Verbosity)
Printf("INFO: PreferSmall: %d\n", PreferSmall);
PrintStats("READ ");
std::vector<Unit> NewCorpus;
if (Options.ShuffleAtStartUp) {
std::random_shuffle(Corpus.begin(), Corpus.end(), MD.GetRand());
if (PreferSmall)
std::stable_sort(
Corpus.begin(), Corpus.end(),
[](const Unit &A, const Unit &B) { return A.size() < B.size(); });
}
Unit U;
for (const auto &C : Corpus) {
for (size_t First = 0; First < 1; First++) {
U.clear();
size_t Last = std::min(First + Options.MaxLen, C.size());
U.insert(U.begin(), C.begin() + First, C.begin() + Last);
if (Options.OnlyASCII)
ToASCII(U.data(), U.size());
if (RunOne(U)) {
NewCorpus.push_back(U);
if (Options.Verbosity >= 2)
Printf("NEW0: %zd L %zd\n", LastRecordedBlockCoverage, U.size());
}
if (Options.ShuffleAtStartUp)
ShuffleCorpus(&Corpus);
for (const auto &U : Corpus) {
if (RunOne(U)) {
NewCorpus.push_back(U);
if (Options.Verbosity >= 2)
Printf("NEW0: %zd L %zd\n", LastRecordedBlockCoverage, U.size());
}
}
Corpus = NewCorpus;
@ -438,37 +429,65 @@ void Fuzzer::ReportNewCoverage(const Unit &U) {
NumberOfNewUnitsAdded++;
}
// Finds minimal number of units in 'Extra' that add coverage to 'Initial'.
// We do it by actually executing the units, sometimes more than once,
// because we may be using different coverage-like signals and the only
// common thing between them is that we can say "this unit found new stuff".
UnitVector Fuzzer::FindExtraUnits(const UnitVector &Initial,
const UnitVector &Extra) {
UnitVector Res = Extra;
size_t OldSize = Res.size();
for (int Iter = 0; Iter < 10; Iter++) {
ShuffleCorpus(&Res);
ResetCoverage();
for (auto &U : Initial)
RunOne(U);
Corpus.clear();
for (auto &U : Res)
if (RunOne(U))
Corpus.push_back(U);
char Stat[7] = "MIN ";
Stat[3] = '0' + Iter;
PrintStats(Stat);
size_t NewSize = Corpus.size();
Res.swap(Corpus);
if (NewSize == OldSize)
break;
OldSize = NewSize;
}
return Res;
}
void Fuzzer::Merge(const std::vector<std::string> &Corpora) {
if (Corpora.size() <= 1) {
Printf("Merge requires two or more corpus dirs\n");
return;
}
auto InitialCorpusDir = Corpora[0];
assert(Options.MaxLen > 0);
ReadDir(InitialCorpusDir, nullptr, Options.MaxLen);
Printf("Merge: running the initial corpus '%s' of %d units\n",
InitialCorpusDir.c_str(), Corpus.size());
for (auto &U : Corpus)
RunOne(U);
std::vector<std::string> ExtraCorpora(Corpora.begin() + 1, Corpora.end());
size_t NumTried = 0;
size_t NumMerged = 0;
for (auto &C : ExtraCorpora) {
Corpus.clear();
ReadDir(C, nullptr, Options.MaxLen);
Printf("Merge: merging the extra corpus '%s' of %zd units\n", C.c_str(),
Corpus.size());
for (auto &U : Corpus) {
NumTried++;
if (RunOne(U)) {
WriteToOutputCorpus(U);
NumMerged++;
}
}
assert(Options.MaxLen > 0);
UnitVector Initial, Extra;
ReadDirToVectorOfUnits(Corpora[0].c_str(), &Initial, nullptr, Options.MaxLen);
for (auto &C : ExtraCorpora)
ReadDirToVectorOfUnits(C.c_str(), &Extra, nullptr, Options.MaxLen);
if (!Initial.empty()) {
Printf("=== Minimizing the initial corpus of %zd units\n", Initial.size());
Initial = FindExtraUnits({}, Initial);
}
Printf("Merge: written %zd out of %zd units\n", NumMerged, NumTried);
Printf("=== Merging extra %zd units\n", Extra.size());
auto Res = FindExtraUnits(Initial, Extra);
for (auto &U: Res)
WriteToOutputCorpus(U);
Printf("=== Merge: written %zd units\n", Res.size());
}
void Fuzzer::MutateAndTestOne() {
@ -507,6 +526,12 @@ size_t Fuzzer::ChooseUnitIdxToMutate() {
return Idx;
}
void Fuzzer::ResetCoverage() {
CHECK_WEAK_API_FUNCTION(__sanitizer_reset_coverage);
__sanitizer_reset_coverage();
CounterBitmap.clear();
}
// Experimental search heuristic: drilling.
// - Read, shuffle, execute and minimize the corpus.
// - Choose one random unit.
@ -521,8 +546,7 @@ void Fuzzer::Drill() {
Unit U = ChooseUnitToMutate();
CHECK_WEAK_API_FUNCTION(__sanitizer_reset_coverage);
__sanitizer_reset_coverage();
ResetCoverage();
std::vector<Unit> SavedCorpus;
SavedCorpus.swap(Corpus);
@ -535,7 +559,7 @@ void Fuzzer::Drill() {
SavedOutputCorpusPath.swap(Options.OutputCorpus);
Loop();
__sanitizer_reset_coverage();
ResetCoverage();
PrintStats("REINIT");
SavedOutputCorpusPath.swap(Options.OutputCorpus);

View File

@ -7,19 +7,19 @@ RUN: not LLVMFuzzer-SimpleCmpTest -use_traces=1 -seed=1 -runs=10000001 2>&1 | F
RUN: not LLVMFuzzer-MemcmpTest -seed=4294967295 -runs=100000 2>&1 | FileCheck %s
RUN: LLVMFuzzer-MemcmpTest -use_memcmp=0 -seed=4294967295 -runs=1000000 2>&1 | FileCheck %s --check-prefix=Done1000000
RUN: not LLVMFuzzer-StrncmpTest -seed=1 -runs=100000 2>&1 | FileCheck %s
RUN: LLVMFuzzer-StrncmpTest -use_memcmp=0 -seed=1 -runs=1000000 2>&1 | FileCheck %s --check-prefix=Done1000000
RUN: not LLVMFuzzer-StrncmpTest -seed=2 -runs=100000 2>&1 | FileCheck %s
RUN: LLVMFuzzer-StrncmpTest -use_memcmp=0 -seed=3 -runs=1000000 2>&1 | FileCheck %s --check-prefix=Done1000000
RUN: not LLVMFuzzer-StrcmpTest -seed=1 -runs=200000 2>&1 | FileCheck %s
RUN: LLVMFuzzer-StrcmpTest -use_memcmp=0 -seed=1 -runs=1000000 2>&1 | FileCheck %s --check-prefix=Done1000000
RUN: not LLVMFuzzer-StrcmpTest -seed=4 -runs=200000 2>&1 | FileCheck %s
RUN: LLVMFuzzer-StrcmpTest -use_memcmp=0 -seed=5 -runs=1000000 2>&1 | FileCheck %s --check-prefix=Done1000000
RUN: not LLVMFuzzer-SwitchTest -use_traces=1 -seed=1 -runs=1000002 2>&1 | FileCheck %s
RUN: LLVMFuzzer-SwitchTest -seed=1 -runs=1000000 2>&1 | FileCheck %s --check-prefix=Done1000000
RUN: not LLVMFuzzer-SwitchTest -use_traces=1 -seed=6 -runs=1000002 2>&1 | FileCheck %s
RUN: LLVMFuzzer-SwitchTest -seed=7 -runs=1000000 2>&1 | FileCheck %s --check-prefix=Done1000000
RUN: not LLVMFuzzer-SimpleHashTest -use_traces=1 -seed=1 -runs=10000000 2>&1 | FileCheck %s
RUN: LLVMFuzzer-SimpleHashTest -seed=1 -runs=10000000 2>&1 | FileCheck %s --check-prefix=Done10000000
RUN: not LLVMFuzzer-SimpleHashTest -use_traces=1 -seed=8 -runs=10000000 2>&1 | FileCheck %s
RUN: LLVMFuzzer-SimpleHashTest -seed=9 -runs=10000000 2>&1 | FileCheck %s --check-prefix=Done10000000
RUN: LLVMFuzzer-RepeatedMemcmp -seed=1 -runs=100000 2>&1 | FileCheck %s --check-prefix=RECOMMENDED_DICT
RUN: LLVMFuzzer-RepeatedMemcmp -seed=10 -runs=100000 2>&1 | FileCheck %s --check-prefix=RECOMMENDED_DICT
RECOMMENDED_DICT:###### Recommended dictionary. ######
RECOMMENDED_DICT-DAG: "foo"
RECOMMENDED_DICT-DAG: "bar"

View File

@ -8,8 +8,8 @@ RUN: echo ..Z... > %tmp/T1/3
# T1 has 3 elements, T2 is empty.
RUN: LLVMFuzzer-FullCoverageSetTest -merge=1 %tmp/T1 %tmp/T2 2>&1 | FileCheck %s --check-prefix=CHECK1
CHECK1: Merge: running the initial corpus {{.*}} of 3 units
CHECK1: Merge: written 0 out of 0 units
CHECK1: === Minimizing the initial corpus of 3 units
CHECK1: === Merge: written 0 units
RUN: echo ...Z.. > %tmp/T2/1
RUN: echo ....E. > %tmp/T2/2
@ -20,10 +20,11 @@ RUN: echo ..Z... > %tmp/T2/c
# T1 has 3 elements, T2 has 6 elements, only 3 are new.
RUN: LLVMFuzzer-FullCoverageSetTest -merge=1 %tmp/T1 %tmp/T2 2>&1 | FileCheck %s --check-prefix=CHECK2
CHECK2: Merge: running the initial corpus {{.*}} of 3 units
CHECK2: Merge: written 3 out of 6 units
CHECK2: === Minimizing the initial corpus of 3 units
CHECK2: === Merging extra 6 units
CHECK2: === Merge: written 3 units
# Now, T1 has 6 units and T2 has no new interesting units.
RUN: LLVMFuzzer-FullCoverageSetTest -merge=1 %tmp/T1 %tmp/T2 2>&1 | FileCheck %s --check-prefix=CHECK3
CHECK3: Merge: running the initial corpus {{.*}} of 6 units
CHECK3: Merge: written 0 out of 6 units
CHECK3: === Minimizing the initial corpus of 6 units
CHECK3: === Merge: written 0 units