From e9f2eda7abc381804c7de76eb4940ec351005fc0 Mon Sep 17 00:00:00 2001 From: Xinhao Yuan Date: Mon, 20 Oct 2025 06:55:55 -0700 Subject: [PATCH] Track the mutant origin. This is to enable input reduction (replacing corpus input with smaller mutants if the coverage matches) and corpus mutation stats for new scheduling methods. PiperOrigin-RevId: 821630139 --- centipede/BUILD | 40 +++-- centipede/byte_array_mutator.cc | 18 +- centipede/byte_array_mutator.h | 8 +- centipede/byte_array_mutator_test.cc | 42 ++--- centipede/centipede.cc | 105 +++++++---- centipede/centipede.h | 22 +-- centipede/centipede_callbacks.cc | 4 +- centipede/centipede_callbacks.h | 16 +- centipede/centipede_callbacks_test.cc | 2 +- centipede/centipede_default_callbacks.cc | 10 +- centipede/centipede_default_callbacks.h | 10 +- centipede/centipede_test.cc | 163 ++++++++++-------- centipede/corpus.cc | 12 +- centipede/corpus.h | 10 +- centipede/corpus_test.cc | 15 +- centipede/crash_deduplication_test.cc | 2 +- centipede/dispatcher.cc | 11 +- centipede/fuzztest_mutator.cc | 72 +++++--- centipede/fuzztest_mutator.h | 6 +- centipede/fuzztest_mutator_test.cc | 133 +++++++------- centipede/minimize_crash.cc | 12 +- centipede/minimize_crash_test.cc | 6 +- .../{mutation_input.h => mutation_data.h} | 44 ++++- ...on_input_test.cc => mutation_data_test.cc} | 2 +- centipede/runner.cc | 22 +-- centipede/runner_cmp_trace.h | 60 +++++-- centipede/runner_cmp_trace_test.cc | 17 +- centipede/runner_interface.h | 6 +- centipede/runner_request.cc | 10 +- centipede/runner_request.h | 6 +- centipede/runner_result.cc | 19 +- centipede/runner_result.h | 9 +- centipede/runner_result_test.cc | 13 +- centipede/test_coverage_util.cc | 3 +- centipede/test_coverage_util.h | 10 +- centipede/testing/BUILD | 2 +- centipede/testing/async_failing_target.cc | 2 +- .../fuzz_target_with_custom_mutator.cc | 11 +- centipede/util.cc | 6 +- centipede/util.h | 6 +- common/remote_file.cc | 26 ++- common/remote_file.h | 11 +- common/remote_file_oss.cc | 12 +- fuzztest/internal/BUILD | 2 +- fuzztest/internal/centipede_adaptor.cc | 17 +- fuzztest/internal/table_of_recent_compares.h | 1 - 46 files changed, 612 insertions(+), 424 deletions(-) rename centipede/{mutation_input.h => mutation_data.h} (53%) rename centipede/{mutation_input_test.cc => mutation_data_test.cc} (96%) diff --git a/centipede/BUILD b/centipede/BUILD index db3134503..3254bb1e8 100644 --- a/centipede/BUILD +++ b/centipede/BUILD @@ -366,7 +366,7 @@ cc_library( deps = [ ":centipede_callbacks", ":environment", - ":mutation_input", + ":mutation_data", ":runner_result", ":stop", ":thread_pool", @@ -439,6 +439,7 @@ cc_library( # used in centipede_runner. ":feature", ":execution_metadata", + ":mutation_data", ":shared_memory_blob_sequence", "@com_google_fuzztest//common:defs", ], @@ -454,14 +455,14 @@ cc_library( # used in centipede_runner. ":shared_memory_blob_sequence", ":execution_metadata", - ":mutation_input", + ":mutation_data", "@com_google_fuzztest//common:defs", ], ) cc_library( - name = "mutation_input", - hdrs = ["mutation_input.h"], + name = "mutation_data", + hdrs = ["mutation_data.h"], copts = DISABLE_SANCOV_COPTS, deps = [ # This target must have a minimal set of dependencies since it is @@ -480,7 +481,7 @@ cc_library( deps = [ ":execution_metadata", ":knobs", - ":mutation_input", + ":mutation_data", "@abseil-cpp//absl/base:nullability", "@com_google_fuzztest//common:defs", ], @@ -624,7 +625,7 @@ cc_library( ":control_flow", ":environment", ":fuzztest_mutator", - ":mutation_input", + ":mutation_data", ":runner_request", ":runner_result", ":shared_memory_blob_sequence", @@ -716,7 +717,7 @@ cc_library( ":environment", ":feature", ":feature_set", - ":mutation_input", + ":mutation_data", ":pc_info", ":runner_result", ":rusage_profiler", @@ -852,7 +853,7 @@ cc_library( deps = [ ":centipede_callbacks", ":environment", - ":mutation_input", + ":mutation_data", ":runner_result", ":stop", "@abseil-cpp//absl/status", @@ -870,7 +871,7 @@ cc_library( ":byte_array_mutator", ":execution_metadata", ":knobs", - ":mutation_input", + ":mutation_data", "@abseil-cpp//absl/random", "@abseil-cpp//absl/types:span", "@com_google_fuzztest//common:defs", @@ -938,6 +939,7 @@ cc_library( name = "runner_cmp_trace", hdrs = ["runner_cmp_trace.h"], copts = DISABLE_SANCOV_COPTS, + deps = ["@abseil-cpp//absl/base:core_headers"], ) # Library for manipulating centipede runner flags. This is not used by the @@ -955,6 +957,7 @@ cc_library( hdrs = ["dispatcher.h"], deps = [ ":execution_metadata", + ":mutation_data", ":runner_request", ":runner_result", ":shared_memory_blob_sequence", @@ -1030,7 +1033,7 @@ RUNNER_DEPS = [ ":foreach_nonzero", ":int_utils", ":knobs", - ":mutation_input", + ":mutation_data", ":rolling_hash", ":runner_cmp_trace", ":runner_fork_server", @@ -1095,7 +1098,7 @@ cc_library( linkstatic = True, # Must be linked statically even when dynamic_mode=on. deps = [ ":centipede_runner_no_main", - ":mutation_input", + ":mutation_data", "@abseil-cpp//absl/base:nullability", "@com_google_fuzztest//common:defs", ], @@ -1234,7 +1237,7 @@ cc_library( ":corpus", ":environment", ":feature", - ":mutation_input", + ":mutation_data", ":runner_result", ":util", "@com_google_fuzztest//common:defs", @@ -1506,6 +1509,7 @@ cc_test( deps = [ ":execution_metadata", ":feature", + ":mutation_data", ":runner_result", ":shared_memory_blob_sequence", "@com_google_fuzztest//common:defs", @@ -1515,10 +1519,10 @@ cc_test( ) cc_test( - name = "mutation_input_test", - srcs = ["mutation_input_test.cc"], + name = "mutation_data_test", + srcs = ["mutation_data_test.cc"], deps = [ - ":mutation_input", + ":mutation_data", "@com_google_fuzztest//common:defs", "@googletest//:gtest_main", ], @@ -1531,7 +1535,7 @@ cc_test( ":byte_array_mutator", ":execution_metadata", ":knobs", - ":mutation_input", + ":mutation_data", ":runner_cmp_trace", "@abseil-cpp//absl/container:flat_hash_set", "@com_google_fuzztest//common:defs", @@ -1564,7 +1568,7 @@ cc_test( ":execution_metadata", ":fuzztest_mutator", ":knobs", - ":mutation_input", + ":mutation_data", "@abseil-cpp//absl/container:flat_hash_set", "@abseil-cpp//absl/strings", "@com_google_fuzztest//common:defs", @@ -1911,7 +1915,7 @@ cc_test( ":centipede_interface", ":environment", ":feature", - ":mutation_input", + ":mutation_data", ":runner_result", ":stop", ":util", diff --git a/centipede/byte_array_mutator.cc b/centipede/byte_array_mutator.cc index 9a29a432e..845226966 100644 --- a/centipede/byte_array_mutator.cc +++ b/centipede/byte_array_mutator.cc @@ -24,7 +24,7 @@ #include "./centipede/execution_metadata.h" #include "./centipede/knobs.h" -#include "./centipede/mutation_input.h" +#include "./centipede/mutation_data.h" #include "./common/defs.h" namespace fuzztest::internal { @@ -321,27 +321,29 @@ void ByteArrayMutator::CrossOver(ByteArray &data, const ByteArray &other) { // TODO(kcc): add tests with different values of knobs. const KnobId knob_mutate_or_crossover = Knobs::NewId("mutate_or_crossover"); -std::vector ByteArrayMutator::MutateMany( - const std::vector &inputs, size_t num_mutants) { +std::vector ByteArrayMutator::MutateMany( + const std::vector& inputs, size_t num_mutants) { if (inputs.empty()) abort(); // TODO(xinhaoyuan): Consider metadata in other inputs instead of always the // first one. SetMetadata(inputs[0].metadata != nullptr ? *inputs[0].metadata : ExecutionMetadata()); size_t num_inputs = inputs.size(); - std::vector mutants; + std::vector mutants; mutants.reserve(num_mutants); for (size_t i = 0; i < num_mutants; ++i) { - auto mutant = inputs[rng_() % num_inputs].data; - if (mutant.size() <= max_len_ && + Mutant mutant; + mutant.origin = rng_() % num_inputs; + mutant.data = inputs[mutant.origin].data; + if (mutant.data.size() <= max_len_ && knobs_.GenerateBool(knob_mutate_or_crossover, rng_())) { // Do crossover only if the mutant is not over the max_len_. // Perform crossover with some other input. It may be the same input. const auto &other_input = inputs[rng_() % num_inputs].data; - CrossOver(mutant, other_input); + CrossOver(mutant.data, other_input); } else { // Perform mutation. - Mutate(mutant); + Mutate(mutant.data); } mutants.push_back(std::move(mutant)); } diff --git a/centipede/byte_array_mutator.h b/centipede/byte_array_mutator.h index 3c6978caa..782b2842b 100644 --- a/centipede/byte_array_mutator.h +++ b/centipede/byte_array_mutator.h @@ -25,7 +25,7 @@ #include "absl/base/nullability.h" #include "./centipede/execution_metadata.h" #include "./centipede/knobs.h" -#include "./centipede/mutation_input.h" +#include "./centipede/mutation_data.h" #include "./common/defs.h" namespace fuzztest::internal { @@ -33,7 +33,7 @@ namespace fuzztest::internal { // A simple class representing an array of up to kMaxEntrySize bytes. class DictEntry { public: - static constexpr uint8_t kMaxEntrySize = 16; + static constexpr uint8_t kMaxEntrySize = 128; explicit DictEntry(ByteSpan bytes) : bytes_{}, // initialize bytes_ to all zeros @@ -108,8 +108,8 @@ class ByteArrayMutator { } // Takes non-empty `inputs` and produces `num_mutants` mutants. - std::vector MutateMany(const std::vector &inputs, - size_t num_mutants); + std::vector MutateMany(const std::vector& inputs, + size_t num_mutants); using CrossOverFn = void (ByteArrayMutator::*)(ByteArray &, const ByteArray &); diff --git a/centipede/byte_array_mutator_test.cc b/centipede/byte_array_mutator_test.cc index ae35641b5..e28dff356 100644 --- a/centipede/byte_array_mutator_test.cc +++ b/centipede/byte_array_mutator_test.cc @@ -18,6 +18,7 @@ #include #include #include +#include #include #include "gmock/gmock.h" @@ -25,7 +26,7 @@ #include "absl/container/flat_hash_set.h" #include "./centipede/execution_metadata.h" #include "./centipede/knobs.h" -#include "./centipede/mutation_input.h" +#include "./centipede/mutation_data.h" #include "./centipede/runner_cmp_trace.h" #include "./common/defs.h" @@ -93,8 +94,9 @@ TEST(ByteArrayMutator, RoundDownToRemoveCorrectly) { namespace { TEST(DictEntry, DictEntry) { - uint8_t bytes[17] = {0, 1, 2, 3, 4, 5, 6, 7, 8, - 9, 10, 11, 12, 13, 14, 15, 16}; + uint8_t bytes[129]; + std::iota(bytes, bytes + 129, 0); + DictEntry a_0_10({bytes + 0, 10}); DictEntry a_0_4({bytes + 0, 4}); DictEntry a_1_8({bytes + 1, 8}); @@ -103,7 +105,7 @@ TEST(DictEntry, DictEntry) { EXPECT_LT(a_0_10, a_1_8); EXPECT_EQ(memcmp(a_0_10.begin(), bytes, a_0_10.end() - a_0_10.begin()), 0); - EXPECT_DEATH({ DictEntry a_0_10({bytes, 17}); }, ""); + EXPECT_DEATH({ DictEntry a_0_10({bytes, 129}); }, ""); } TEST(CmpDictionary, CmpDictionary) { @@ -158,11 +160,11 @@ TEST(CmpDictionary, CmpDictionary) { } TEST(CmpDictionary, CmpDictionaryIsCompatibleWithCmpTrace) { - CmpTrace<0, 13> traceN; + CmpTrace<0, 13> traceN = {}; traceN.Clear(); constexpr uint8_t long_array[20] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19}; - traceN.Capture(20, long_array, long_array); // will be trimmed to 16. + traceN.Capture(20, long_array, long_array); ExecutionMetadata metadata; bool append_failed = false; @@ -928,12 +930,12 @@ TEST(ByteArrayMutator, MutateManyWithAlignedInputs) { {0, 1, 2, 3, 4, 5, 6, 7}, {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}, }; - const std::vector mutants = + const std::vector mutants = mutator.MutateMany(GetMutationInputRefsFromDataInputs(aligned_inputs), kNumMutantsToGenerate); EXPECT_EQ(mutants.size(), kNumMutantsToGenerate); - for (const ByteArray &mutant : mutants) { - EXPECT_EQ(mutant.size() % kSizeAlignment, 0); + for (const Mutant& mutant : mutants) { + EXPECT_EQ(mutant.data.size() % kSizeAlignment, 0); } } @@ -958,13 +960,13 @@ TEST(ByteArrayMutator, MutateManyWithUnalignedInputs) { {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}, {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, }; - const std::vector mutants = + const std::vector mutants = mutator.MutateMany(GetMutationInputRefsFromDataInputs(unaligned_inputs), kNumMutantsToGenerate); EXPECT_EQ(mutants.size(), kNumMutantsToGenerate); - for (const ByteArray &mutant : mutants) { - if (mutant.size() % kSizeAlignment != 0) { - EXPECT_LE(mutant.size(), 11); + for (const Mutant& mutant : mutants) { + if (mutant.data.size() % kSizeAlignment != 0) { + EXPECT_LE(mutant.data.size(), 11); } } } @@ -982,12 +984,12 @@ TEST(ByteArrayMutator, MutateManyWithMaxLen) { {0, 1, 2}, {0, 1, 2, 3}, }; - const std::vector mutants = mutator.MutateMany( + const std::vector mutants = mutator.MutateMany( GetMutationInputRefsFromDataInputs(inputs), kNumMutantsToGenerate); EXPECT_EQ(mutants.size(), kNumMutantsToGenerate); - for (const ByteArray &mutant : mutants) { - EXPECT_LE(mutant.size(), kMaxLen); + for (const Mutant& mutant : mutants) { + EXPECT_LE(mutant.data.size(), kMaxLen); } } @@ -1001,16 +1003,16 @@ TEST(ByteArrayMutator, MutateManyWithMaxLenWithStartingLargeInput) { const std::vector large_input = { {0, 1, 2, 3, 4, 5, 6, 7}, {0}, {0, 1}, {0, 1, 2}, {0, 1, 2, 3}, }; - const std::vector mutants = mutator.MutateMany( + const std::vector mutants = mutator.MutateMany( GetMutationInputRefsFromDataInputs(large_input), kNumMutantsToGenerate); EXPECT_EQ(mutants.size(), kNumMutantsToGenerate); - for (const ByteArray &mutant : mutants) { - if (mutant.size() > kMaxLen) { + for (const Mutant& mutant : mutants) { + if (mutant.data.size() > kMaxLen) { // The only mutant larger than max length should be the same large input // that mutation originally started with. All other mutants should be // within the maximum length specified. - EXPECT_EQ(mutant, large_input[0]); + EXPECT_EQ(mutant.data, large_input[0]); } } } diff --git a/centipede/centipede.cc b/centipede/centipede.cc index b94f24610..538f0e3d8 100644 --- a/centipede/centipede.cc +++ b/centipede/centipede.cc @@ -82,7 +82,7 @@ #include "./centipede/environment.h" #include "./centipede/feature.h" #include "./centipede/feature_set.h" -#include "./centipede/mutation_input.h" +#include "./centipede/mutation_data.h" #include "./centipede/runner_result.h" #include "./centipede/rusage_profiler.h" #include "./centipede/rusage_stats.h" @@ -99,6 +99,21 @@ namespace fuzztest::internal { +namespace { + +std::vector InputsToMutantRefs(const std::vector& inputs) { + std::vector mutants; + mutants.reserve(inputs.size()); + for (const auto input : inputs) { + MutantRef mutant; + mutant.data = input; + mutants.push_back(mutant); + } + return mutants; +} + +} // namespace + Centipede::Centipede(const Environment& env, CentipedeCallbacks& user_callbacks, const BinaryInfo& binary_info, CoverageLogger& coverage_logger, std::atomic& stats) @@ -360,7 +375,7 @@ void Centipede::LogFeaturesAsSymbols(const FeatureVec &fv) { } } -bool Centipede::InputPassesFilter(const ByteArray &input) { +bool Centipede::InputPassesFilter(ByteSpan input) { if (env_.input_filter.empty()) return true; WriteToLocalFile(input_filter_path_, input); bool result = input_filter_cmd_.Execute() == EXIT_SUCCESS; @@ -369,8 +384,8 @@ bool Centipede::InputPassesFilter(const ByteArray &input) { } bool Centipede::ExecuteAndReportCrash(std::string_view binary, - const std::vector &input_vec, - BatchResult &batch_result) { + const std::vector& input_vec, + BatchResult& batch_result) { bool success = user_callbacks_.Execute(binary, input_vec, batch_result); if (success) return true; if (ShouldStop()) { @@ -425,20 +440,24 @@ size_t Centipede::AddPcPairFeatures(FeatureVec &fv) { } bool Centipede::RunBatch( - const std::vector &input_vec, - BlobFileWriter *absl_nullable corpus_file, - BlobFileWriter *absl_nullable features_file, - BlobFileWriter *absl_nullable unconditional_features_file) { + const std::vector& mutants, + BlobFileWriter* absl_nullable corpus_file, + BlobFileWriter* absl_nullable features_file, + BlobFileWriter* absl_nullable unconditional_features_file) { BatchResult batch_result; - bool success = ExecuteAndReportCrash(env_.binary, input_vec, batch_result); - FUZZTEST_CHECK_EQ(input_vec.size(), batch_result.results().size()); + std::vector inputs; + inputs.reserve(mutants.size()); + for (auto mutant : mutants) { + inputs.push_back({mutant.data}); + } + bool success = ExecuteAndReportCrash(env_.binary, inputs, batch_result); + FUZZTEST_CHECK_EQ(mutants.size(), batch_result.results().size()); for (const auto &extra_binary : env_.extra_binaries) { if (ShouldStop()) break; BatchResult extra_batch_result; - success = - ExecuteAndReportCrash(extra_binary, input_vec, extra_batch_result) && - success; + success = ExecuteAndReportCrash(extra_binary, inputs, extra_batch_result) && + success; } if (EarlyStopRequested()) return false; if (!success && env_.exit_on_crash) { @@ -446,9 +465,8 @@ bool Centipede::RunBatch( RequestEarlyStop(EXIT_FAILURE); return false; } - FUZZTEST_CHECK_EQ(batch_result.results().size(), input_vec.size()); bool batch_gained_new_coverage = false; - for (size_t i = 0; i < input_vec.size(); i++) { + for (size_t i = 0; i < mutants.size(); i++) { if (ShouldStop()) break; FeatureVec &fv = batch_result.results()[i].mutable_features(); bool function_filter_passed = function_filter_.filter(fv); @@ -457,28 +475,28 @@ bool Centipede::RunBatch( input_gained_new_coverage = true; if (unconditional_features_file != nullptr) { FUZZTEST_CHECK_OK(unconditional_features_file->Write( - PackFeaturesAndHash(input_vec[i], fv))); + PackFeaturesAndHash(inputs[i], fv))); } if (input_gained_new_coverage) { // TODO(kcc): [impl] add stats for filtered-out inputs. - if (!InputPassesFilter(input_vec[i])) continue; + if (!InputPassesFilter(inputs[i])) continue; fs_.MergeFeatures(fv); LogFeaturesAsSymbols(fv); batch_gained_new_coverage = true; FUZZTEST_CHECK_GT(fv.size(), 0UL); if (function_filter_passed) { - corpus_.Add(input_vec[i], fv, batch_result.results()[i].metadata(), + corpus_.Add(inputs[i], fv, batch_result.results()[i].metadata(), batch_result.results()[i].stats(), fs_, coverage_frontier_); } if (corpus_file != nullptr) { - FUZZTEST_CHECK_OK(corpus_file->Write(input_vec[i])); + FUZZTEST_CHECK_OK(corpus_file->Write(inputs[i])); } if (!env_.corpus_dir.empty() && !env_.corpus_dir[0].empty()) { - WriteToLocalHashedFileInDir(env_.corpus_dir[0], input_vec[i]); + WriteToLocalHashedFileInDir(env_.corpus_dir[0], inputs[i]); } if (features_file != nullptr) { FUZZTEST_CHECK_OK( - features_file->Write(PackFeaturesAndHash(input_vec[i], fv))); + features_file->Write(PackFeaturesAndHash(inputs[i], fv))); } } } @@ -580,11 +598,12 @@ void Centipede::Rerun(std::vector &to_rerun) { while (!to_rerun.empty()) { if (ShouldStop()) break; size_t batch_size = std::min(to_rerun.size(), env_.batch_size); - std::vector batch(to_rerun.end() - batch_size, to_rerun.end()); - to_rerun.resize(to_rerun.size() - batch_size); - if (RunBatch(batch, nullptr, nullptr, features_file.get())) { + if (RunBatch( + InputsToMutantRefs({to_rerun.end() - batch_size, to_rerun.end()}), + nullptr, nullptr, features_file.get())) { UpdateAndMaybeLogStats("rerun-old", 1); } + to_rerun.resize(to_rerun.size() - batch_size); } } @@ -776,7 +795,8 @@ void Centipede::LoadSeedInputs(BlobFileWriter *absl_nonnull corpus_file, seed_inputs.push_back({0}); } - RunBatch(seed_inputs, corpus_file, features_file, + RunBatch(InputsToMutantRefs({seed_inputs.begin(), seed_inputs.end()}), + corpus_file, features_file, /*unconditional_features_file=*/nullptr); FUZZTEST_LOG(INFO) << "Number of input seeds available: " << num_seeds_available @@ -857,22 +877,39 @@ void Centipede::FuzzingLoop() { auto remaining_runs = env_.num_runs - new_runs; auto batch_size = std::min(env_.batch_size, remaining_runs); std::vector mutation_inputs; + std::vector mutate_input_to_corpus_idx; mutation_inputs.reserve(env_.mutate_batch_size); + mutate_input_to_corpus_idx.reserve(env_.mutate_batch_size); for (size_t i = 0; i < env_.mutate_batch_size; i++) { - const auto& corpus_record = env_.use_corpus_weights - ? corpus_.WeightedRandom(rng_) - : corpus_.UniformRandom(rng_); + const size_t origin = env_.use_corpus_weights + ? corpus_.WeightedRandom(rng_) + : corpus_.UniformRandom(rng_); + mutate_input_to_corpus_idx.push_back(origin); + const auto& corpus_record = corpus_.Records()[origin]; mutation_inputs.push_back( MutationInputRef{corpus_record.data, &corpus_record.metadata}); } - const std::vector mutants = + std::vector mutants = user_callbacks_.Mutate(mutation_inputs, batch_size); if (ShouldStop()) break; + new_runs += mutants.size(); + std::vector mutant_refs; + mutant_refs.reserve(mutants.size()); + for (auto& mutant : mutants) { + MutantRef ref; + ref.data = mutant.data; + if (mutant.origin == Mutant::kOriginNone) { + ref.origin = Mutant::kOriginNone; + } else { + FUZZTEST_CHECK_LT(mutant.origin, mutate_input_to_corpus_idx.size()); + ref.origin = mutate_input_to_corpus_idx[mutant.origin]; + } + mutant_refs.push_back(ref); + } bool gained_new_coverage = - RunBatch(mutants, corpus_file.get(), features_file.get(), nullptr); - new_runs += mutants.size(); + RunBatch(mutant_refs, corpus_file.get(), features_file.get(), nullptr); if (gained_new_coverage) { UpdateAndMaybeLogStats("new-feature", 1); @@ -915,8 +952,8 @@ void Centipede::FuzzingLoop() { } void Centipede::ReportCrash(std::string_view binary, - const std::vector &input_vec, - const BatchResult &batch_result) { + const std::vector& input_vec, + const BatchResult& batch_result) { FUZZTEST_CHECK_EQ(input_vec.size(), batch_result.results().size()); const size_t suspect_input_idx = std::clamp( @@ -1012,7 +1049,7 @@ void Centipede::ReportCrash(std::string_view binary, << "Executing inputs one-by-one, trying to find the reproducer"; for (auto input_idx : input_idxs_to_try) { if (ShouldStop()) break; - const auto &one_input = input_vec[input_idx]; + const auto one_input = input_vec[input_idx]; BatchResult one_input_batch_result; if (!user_callbacks_.Execute(binary, {one_input}, one_input_batch_result) && one_input_batch_result.IsInputFailure() && diff --git a/centipede/centipede.h b/centipede/centipede.h index 625938f3c..368adf043 100644 --- a/centipede/centipede.h +++ b/centipede/centipede.h @@ -75,7 +75,7 @@ class Centipede { std::string_view dir); private: - // Executes inputs from `input_vec`. + // Executes inputs from `mutants` and update the corpus. // For every input, its pruned features are written to // `unconditional_features_file`, (if that's non-null). // For every input that caused new features to be observed: @@ -83,11 +83,11 @@ class Centipede { // * the input is written to `corpus_file` (if that's non-null). // * its features are written to `features_file` (if that's non-null). // Returns true if new features were observed. - // Post-condition: `batch_result.results.size()` == `input_vec.size()`. - bool RunBatch(const std::vector &input_vec, - BlobFileWriter *absl_nullable corpus_file, - BlobFileWriter *absl_nullable features_file, - BlobFileWriter *absl_nullable unconditional_features_file); + // Post-condition: `batch_result.results.size()` == `mutants.size()`. + bool RunBatch(const std::vector& mutants, + BlobFileWriter* absl_nullable corpus_file, + BlobFileWriter* absl_nullable features_file, + BlobFileWriter* absl_nullable unconditional_features_file); // Loads seed inputs from the user callbacks, execute them, and store them // with the corresponding features into `corpus_file` and `features_file`. void LoadSeedInputs(BlobFileWriter *absl_nonnull corpus_file, @@ -140,13 +140,13 @@ class Centipede { size_t batch_index); // Returns true if `input` passes env_.input_filter. - bool InputPassesFilter(const ByteArray &input); + bool InputPassesFilter(ByteSpan input); // Executes `binary` with `input_vec` and `batch_result` as input/output. // If the binary crashes, calls ReportCrash(). // Returns true iff there were no crashes. bool ExecuteAndReportCrash(std::string_view binary, - const std::vector &input_vec, - BatchResult &batch_result); + const std::vector& input_vec, + BatchResult& batch_result); // Reports a crash and saves the reproducer to workdir/crashes, if possible. // `binary` is the binary causing the crash. // Prints the first `env_.max_num_crash_reports` logs. @@ -156,8 +156,8 @@ class Centipede { // as a hint when choosing which input to try first. // Stops early if `EarlyExitRequested()`. void ReportCrash(std::string_view binary, - const std::vector &input_vec, - const BatchResult &batch_result); + const std::vector& input_vec, + const BatchResult& batch_result); // Merges shard `shard_index_to_merge` of the corpus in `merge_from_dir` // into the current corpus. // Writes added inputs to the current shard. diff --git a/centipede/centipede_callbacks.cc b/centipede/centipede_callbacks.cc index 3f6f817dc..cbe1a6c20 100644 --- a/centipede/centipede_callbacks.cc +++ b/centipede/centipede_callbacks.cc @@ -48,7 +48,7 @@ #include "./centipede/binary_info.h" #include "./centipede/command.h" #include "./centipede/control_flow.h" -#include "./centipede/mutation_input.h" +#include "./centipede/mutation_data.h" #include "./centipede/runner_request.h" #include "./centipede/runner_result.h" #include "./centipede/stop.h" @@ -518,7 +518,7 @@ int CentipedeCallbacks::RunBatchForBinary(std::string_view binary) { } int CentipedeCallbacks::ExecuteCentipedeSancovBinaryWithShmem( - std::string_view binary, const std::vector& inputs, + std::string_view binary, const std::vector& inputs, BatchResult& batch_result) { auto start_time = absl::Now(); batch_result.ClearAndResize(inputs.size()); diff --git a/centipede/centipede_callbacks.h b/centipede/centipede_callbacks.h index 068dfc6c4..2d54694ef 100644 --- a/centipede/centipede_callbacks.h +++ b/centipede/centipede_callbacks.h @@ -30,7 +30,7 @@ #include "./centipede/command.h" #include "./centipede/environment.h" #include "./centipede/fuzztest_mutator.h" -#include "./centipede/mutation_input.h" +#include "./centipede/mutation_data.h" #include "./centipede/runner_result.h" #include "./centipede/shared_memory_blob_sequence.h" #include "./centipede/util.h" @@ -68,12 +68,12 @@ class CentipedeCallbacks { // Post-condition: // `batch_result` has results for every `input`, even on failure. virtual bool Execute(std::string_view binary, - const std::vector &inputs, - BatchResult &batch_result) = 0; + const std::vector& inputs, + BatchResult& batch_result) = 0; // Takes non-empty `inputs` and returns at most `num_mutants` mutated inputs. - virtual std::vector Mutate( - const std::vector &inputs, size_t num_mutants) { + virtual std::vector Mutate( + const std::vector& inputs, size_t num_mutants) { return env_.use_legacy_default_mutator ? byte_array_mutator_.MutateMany(inputs, num_mutants) : fuzztest_mutator_.MutateMany(inputs, num_mutants); @@ -105,9 +105,9 @@ class CentipedeCallbacks { // Same as ExecuteCentipedeSancovBinary, but uses shared memory. // Much faster for fast targets since it uses fewer system calls. - int ExecuteCentipedeSancovBinaryWithShmem( - std::string_view binary, const std::vector &inputs, - BatchResult &batch_result); + int ExecuteCentipedeSancovBinaryWithShmem(std::string_view binary, + const std::vector& inputs, + BatchResult& batch_result); // Constructs a string CENTIPEDE_RUNNER_FLAGS=":flag1:flag2:...", // where the flags are determined by `env` and also include `extra_flags`. diff --git a/centipede/centipede_callbacks_test.cc b/centipede/centipede_callbacks_test.cc index 60fb8fc91..12e968736 100644 --- a/centipede/centipede_callbacks_test.cc +++ b/centipede/centipede_callbacks_test.cc @@ -28,7 +28,7 @@ namespace { class FakeCallbacks : public CentipedeCallbacks { public: explicit FakeCallbacks(const Environment& env) : CentipedeCallbacks(env) {} - bool Execute(std::string_view binary, const std::vector& inputs, + bool Execute(std::string_view binary, const std::vector& inputs, BatchResult& batch_result) override { return true; } diff --git a/centipede/centipede_default_callbacks.cc b/centipede/centipede_default_callbacks.cc index 73b8b8e41..9f6d0cf87 100644 --- a/centipede/centipede_default_callbacks.cc +++ b/centipede/centipede_default_callbacks.cc @@ -25,7 +25,7 @@ #include "absl/status/statusor.h" #include "./centipede/centipede_callbacks.h" #include "./centipede/environment.h" -#include "./centipede/mutation_input.h" +#include "./centipede/mutation_data.h" #include "./centipede/runner_result.h" #include "./centipede/stop.h" #include "./common/defs.h" @@ -46,8 +46,8 @@ CentipedeDefaultCallbacks::CentipedeDefaultCallbacks(const Environment &env) } bool CentipedeDefaultCallbacks::Execute(std::string_view binary, - const std::vector &inputs, - BatchResult &batch_result) { + const std::vector& inputs, + BatchResult& batch_result) { return ExecuteCentipedeSancovBinaryWithShmem(binary, inputs, batch_result) == 0; } @@ -72,8 +72,8 @@ CentipedeDefaultCallbacks::GetSerializedTargetConfig() { "Failed to get serialized configuration from the target binary."); } -std::vector CentipedeDefaultCallbacks::Mutate( - const std::vector &inputs, size_t num_mutants) { +std::vector CentipedeDefaultCallbacks::Mutate( + const std::vector& inputs, size_t num_mutants) { if (num_mutants == 0) return {}; // In persistent mode, mutation could fail due to previous asynchronous // failure, thus give it one more chance to mutate in a clean state. diff --git a/centipede/centipede_default_callbacks.h b/centipede/centipede_default_callbacks.h index 0b7856261..eef27339d 100644 --- a/centipede/centipede_default_callbacks.h +++ b/centipede/centipede_default_callbacks.h @@ -28,7 +28,7 @@ #include "absl/status/statusor.h" #include "./centipede/centipede_callbacks.h" #include "./centipede/environment.h" -#include "./centipede/mutation_input.h" +#include "./centipede/mutation_data.h" #include "./centipede/runner_result.h" #include "./common/defs.h" @@ -40,10 +40,10 @@ class CentipedeDefaultCallbacks : public CentipedeCallbacks { explicit CentipedeDefaultCallbacks(const Environment &env); size_t GetSeeds(size_t num_seeds, std::vector &seeds) override; absl::StatusOr GetSerializedTargetConfig() override; - bool Execute(std::string_view binary, const std::vector &inputs, - BatchResult &batch_result) override; - std::vector Mutate(const std::vector &inputs, - size_t num_mutants) override; + bool Execute(std::string_view binary, const std::vector& inputs, + BatchResult& batch_result) override; + std::vector Mutate(const std::vector& inputs, + size_t num_mutants) override; private: std::optional custom_mutator_is_usable_ = std::nullopt; diff --git a/centipede/centipede_test.cc b/centipede/centipede_test.cc index 16e928fd1..3b4d2304c 100644 --- a/centipede/centipede_test.cc +++ b/centipede/centipede_test.cc @@ -38,7 +38,7 @@ #include "./centipede/centipede_interface.h" #include "./centipede/environment.h" #include "./centipede/feature.h" -#include "./centipede/mutation_input.h" +#include "./centipede/mutation_data.h" #include "./centipede/runner_result.h" #include "./centipede/stop.h" #include "./centipede/util.h" @@ -68,8 +68,8 @@ class CentipedeMock : public CentipedeCallbacks { // Doesn't execute anything // Sets `batch_result.results()` based on the values of `inputs`: // Collects various stats about the inputs, to be checked in tests. - bool Execute(std::string_view binary, const std::vector &inputs, - BatchResult &batch_result) override { + bool Execute(std::string_view binary, const std::vector& inputs, + BatchResult& batch_result) override { batch_result.results().clear(); // For every input, we create a 256-element array `counters`, where // i-th element is the number of bytes with the value 'i' in the input. @@ -106,19 +106,20 @@ class CentipedeMock : public CentipedeCallbacks { // (the value {0} is produced by the default GetSeeds()). // Next 65536 mutations are 2-byte sequences {0,0} ... {255, 255}. // Then repeat 2-byte sequences. - std::vector Mutate(const std::vector &inputs, - size_t num_mutants) override { - std::vector mutants; + std::vector Mutate(const std::vector& inputs, + size_t num_mutants) override { + std::vector mutants; mutants.reserve(num_mutants); for (size_t i = 0; i < num_mutants; ++i) { num_mutations_++; if (num_mutations_ < 256) { - mutants.push_back({static_cast(num_mutations_)}); + mutants.push_back({/*data=*/{static_cast(num_mutations_)}, + Mutant::kOriginNone}); continue; } uint8_t byte0 = (num_mutations_ - 256) / 256; uint8_t byte1 = (num_mutations_ - 256) % 256; - mutants.push_back({byte0, byte1}); + mutants.push_back({/*data=*/{byte0, byte1}, Mutant::kOriginNone}); } return mutants; } @@ -343,15 +344,15 @@ class MutateCallbacks : public CentipedeCallbacks { public: explicit MutateCallbacks(const Environment &env) : CentipedeCallbacks(env) {} // Will not be called. - bool Execute(std::string_view binary, const std::vector &inputs, - BatchResult &batch_result) override { + bool Execute(std::string_view binary, const std::vector& inputs, + BatchResult& batch_result) override { FUZZTEST_LOG(FATAL); return false; } // Will not be called. - std::vector Mutate(const std::vector &inputs, - size_t num_mutants) override { + std::vector Mutate(const std::vector& inputs, + size_t num_mutants) override { FUZZTEST_LOG(FATAL); } @@ -436,8 +437,9 @@ TEST_F(CentipedeWithTemporaryLocalDir, MutateViaExternalBinary) { GetMutationInputRefsFromDataInputs(inputs), 10000); EXPECT_EQ(result.exit_code(), EXIT_SUCCESS); EXPECT_TRUE(result.has_custom_mutator()); - EXPECT_THAT(result.mutants(), AllOf(IsSupersetOf(all_expected_mutants), - Each(Not(IsEmpty())))); + EXPECT_THAT( + GetDataFromMutants(result.mutants()), + AllOf(IsSupersetOf(all_expected_mutants), Each(Not(IsEmpty())))); } } @@ -451,9 +453,10 @@ TEST_F(CentipedeWithTemporaryLocalDir, MutateViaExternalBinary) { 10000); EXPECT_EQ(result.exit_code(), EXIT_SUCCESS); EXPECT_TRUE(result.has_custom_mutator()); - EXPECT_THAT(result.mutants(), AllOf(IsSupersetOf(all_expected_mutants), - Each(Not(IsEmpty())))); - EXPECT_THAT(result.mutants(), + const auto mutant_data = GetDataFromMutants(result.mutants()); + EXPECT_THAT(mutant_data, AllOf(IsSupersetOf(all_expected_mutants), + Each(Not(IsEmpty())))); + EXPECT_THAT(mutant_data, AllOf(IsSupersetOf(all_expected_mutants), Each(Not(IsEmpty())), // The byte_array_mutator may insert up to 20 bytes to an // input, which may push the size over the max_len. @@ -471,9 +474,10 @@ TEST_F(CentipedeWithTemporaryLocalDir, MutateViaExternalBinary) { binary_with_custom_mutator, GetMutationInputRefsFromDataInputs(inputs), 10000); // Must contain normal mutants, but not the ones from crossover. - EXPECT_THAT(result.mutants(), IsSupersetOf(some_of_expected_mutants)); + const auto mutant_data = GetDataFromMutants(result.mutants()); + EXPECT_THAT(mutant_data, IsSupersetOf(some_of_expected_mutants)); for (const auto &crossover_mutant : expected_crossover_mutants) { - EXPECT_THAT(result.mutants(), Not(Contains(crossover_mutant))); + EXPECT_THAT(mutant_data, Not(Contains(crossover_mutant))); } } } @@ -486,8 +490,8 @@ class MergeMock : public CentipedeCallbacks { // Doesn't execute anything. // All inputs are 1-byte long. // For an input {X}, the feature output is {X}. - bool Execute(std::string_view binary, const std::vector &inputs, - BatchResult &batch_result) override { + bool Execute(std::string_view binary, const std::vector& inputs, + BatchResult& batch_result) override { batch_result.results().resize(inputs.size()); for (size_t i = 0, n = inputs.size(); i < n; ++i) { FUZZTEST_CHECK_EQ(inputs[i].size(), 1); @@ -497,12 +501,13 @@ class MergeMock : public CentipedeCallbacks { } // Every consecutive mutation is {number_of_mutations_} (starting from 1). - std::vector Mutate(const std::vector &inputs, - size_t num_mutants) override { - std::vector mutants{num_mutants}; + std::vector Mutate(const std::vector& inputs, + size_t num_mutants) override { + std::vector mutants(num_mutants); for (auto &mutant : mutants) { - mutant.resize(1); - mutant[0] = ++number_of_mutations_; + mutant.data.resize(1); + mutant.data[0] = ++number_of_mutations_; + mutant.origin = Mutant::kOriginNone; } return mutants; } @@ -575,24 +580,25 @@ class FunctionFilterMock : public CentipedeCallbacks { } // Executes the target in the normal way. - bool Execute(std::string_view binary, const std::vector &inputs, - BatchResult &batch_result) override { + bool Execute(std::string_view binary, const std::vector& inputs, + BatchResult& batch_result) override { return ExecuteCentipedeSancovBinaryWithShmem(env_.binary, inputs, batch_result) == EXIT_SUCCESS; } // Sets the inputs to one of 3 pre-defined values. - std::vector Mutate(const std::vector &inputs, - size_t num_mutants) override { + std::vector Mutate(const std::vector& inputs, + size_t num_mutants) override { for (auto &input : inputs) { if (!seed_inputs_.contains(input.data)) { observed_inputs_.insert(input.data); } } - std::vector mutants; + std::vector mutants; mutants.reserve(num_mutants); for (size_t i = 0; i < num_mutants; ++i) { - mutants.push_back(GetMutant(++number_of_mutations_)); + mutants.push_back( + {/*data=*/GetMutant(++number_of_mutations_), Mutant::kOriginNone}); } return mutants; } @@ -684,8 +690,8 @@ class ExtraBinariesMock : public CentipedeCallbacks { // Doesn't execute anything. // On certain combinations of {binary,input} returns false. - bool Execute(std::string_view binary, const std::vector &inputs, - BatchResult &batch_result) override { + bool Execute(std::string_view binary, const std::vector& inputs, + BatchResult& batch_result) override { bool res = true; for (const auto &input : inputs) { if (input.size() != 1) continue; @@ -703,12 +709,13 @@ class ExtraBinariesMock : public CentipedeCallbacks { } // Sets the mutants to different 1-byte values. - std::vector Mutate(const std::vector &inputs, - size_t num_mutants) override { - std::vector mutants{num_mutants}; + std::vector Mutate(const std::vector& inputs, + size_t num_mutants) override { + std::vector mutants(num_mutants); for (auto &mutant : mutants) { - mutant.resize(1); - mutant[0] = ++number_of_mutations_; + mutant.data.resize(1); + mutant.data[0] = ++number_of_mutations_; + mutant.origin = Mutant::kOriginNone; } return mutants; } @@ -803,8 +810,8 @@ class UndetectedCrashingInputMock : public CentipedeCallbacks { // Doesn't execute anything. // Crash when 0th char of input to binary b1 equals `crashing_input_idx_`, but // only on 1st exec. - bool Execute(std::string_view binary, const std::vector &inputs, - BatchResult &batch_result) override { + bool Execute(std::string_view binary, const std::vector& inputs, + BatchResult& batch_result) override { batch_result.ClearAndResize(inputs.size()); bool res = true; if (!first_pass_) { @@ -816,7 +823,7 @@ class UndetectedCrashingInputMock : public CentipedeCallbacks { if (input[0] == crashing_input_idx_) { if (first_pass_) { first_pass_ = false; - crashing_input_ = input; + crashing_input_ = {input.begin(), input.end()}; // TODO(b/274705740): `num_outputs_read()` is the number of outputs // that Centipede engine *expects* to have been read from *the // current BatchResult* by the *particular* implementation of @@ -836,13 +843,14 @@ class UndetectedCrashingInputMock : public CentipedeCallbacks { } // Sets the mutants to different 1-byte values. - std::vector Mutate(const std::vector &inputs, - size_t num_mutants) override { - std::vector mutants; + std::vector Mutate(const std::vector& inputs, + size_t num_mutants) override { + std::vector mutants; mutants.reserve(num_mutants); for (size_t i = 0; i < num_mutants; ++i) { // The contents of each mutant is simply its sequential number. - mutants.push_back({static_cast(curr_input_idx_++)}); + mutants.push_back({/*data=*/{static_cast(curr_input_idx_++)}, + Mutant::kOriginNone}); } return mutants; } @@ -971,7 +979,8 @@ TEST_F(CentipedeWithTemporaryLocalDir, CleansUpMetadataAfterStartup) { BatchResult batch_result; const std::vector inputs = {{0}}; - ASSERT_TRUE(callbacks.Execute(env.binary, inputs, batch_result)); + ASSERT_TRUE(callbacks.Execute(env.binary, {inputs.begin(), inputs.end()}, + batch_result)); ASSERT_EQ(batch_result.results().size(), 1); bool found_startup_cmp_entry = false; batch_result.results()[0].metadata().ForEachCmpEntry( @@ -988,17 +997,17 @@ class FakeCentipedeCallbacksForThreadChecking : public CentipedeCallbacks { std::thread::id execute_thread_id) : CentipedeCallbacks(env), execute_thread_id_(execute_thread_id) {} - bool Execute(std::string_view binary, const std::vector &inputs, - BatchResult &batch_result) override { + bool Execute(std::string_view binary, const std::vector& inputs, + BatchResult& batch_result) override { batch_result.ClearAndResize(inputs.size()); thread_check_passed_ = thread_check_passed_ && std::this_thread::get_id() == execute_thread_id_; return true; } - std::vector Mutate(const std::vector &inputs, - size_t num_mutants) override { - return {num_mutants, {0}}; + std::vector Mutate(const std::vector& inputs, + size_t num_mutants) override { + return {num_mutants, {/*data=*/{0}, Mutant::kOriginNone}}; } bool thread_check_passed() { return thread_check_passed_; } @@ -1033,7 +1042,8 @@ TEST_F(CentipedeWithTemporaryLocalDir, DetectsStackOverflow) { BatchResult batch_result; const std::vector inputs = {ByteArray{'s', 't', 'k'}}; - ASSERT_FALSE(callbacks.Execute(env.binary, inputs, batch_result)); + ASSERT_FALSE(callbacks.Execute(env.binary, {inputs.begin(), inputs.end()}, + batch_result)); EXPECT_THAT(batch_result.log(), HasSubstr("Stack limit exceeded")); EXPECT_EQ(batch_result.failure_description(), "stack-limit-exceeded"); } @@ -1042,8 +1052,8 @@ class SetupFailureCallbacks : public CentipedeCallbacks { public: using CentipedeCallbacks::CentipedeCallbacks; - bool Execute(std::string_view binary, const std::vector &inputs, - BatchResult &batch_result) override { + bool Execute(std::string_view binary, const std::vector& inputs, + BatchResult& batch_result) override { ++execute_count_; batch_result.ClearAndResize(inputs.size()); batch_result.exit_code() = EXIT_FAILURE; @@ -1051,9 +1061,9 @@ class SetupFailureCallbacks : public CentipedeCallbacks { return false; } - std::vector Mutate(const std::vector &inputs, - size_t num_mutants) override { - return {num_mutants, {0}}; + std::vector Mutate(const std::vector& inputs, + size_t num_mutants) override { + return {num_mutants, {/*data=*/{0}, Mutant::kOriginNone}}; } int execute_count() const { return execute_count_; } @@ -1079,8 +1089,8 @@ class SkippedTestCallbacks : public CentipedeCallbacks { public: using CentipedeCallbacks::CentipedeCallbacks; - bool Execute(std::string_view binary, const std::vector &inputs, - BatchResult &batch_result) override { + bool Execute(std::string_view binary, const std::vector& inputs, + BatchResult& batch_result) override { ++execute_count_; batch_result.ClearAndResize(inputs.size()); batch_result.exit_code() = EXIT_FAILURE; @@ -1089,9 +1099,9 @@ class SkippedTestCallbacks : public CentipedeCallbacks { return false; } - std::vector Mutate(const std::vector &inputs, - size_t num_mutants) override { - return {num_mutants, {0}}; + std::vector Mutate(const std::vector& inputs, + size_t num_mutants) override { + return {num_mutants, {/*data=*/{0}, Mutant::kOriginNone}}; } int execute_count() const { return execute_count_; } @@ -1117,8 +1127,8 @@ class IgnoredFailureCallbacks : public CentipedeCallbacks { public: using CentipedeCallbacks::CentipedeCallbacks; - bool Execute(std::string_view binary, const std::vector &inputs, - BatchResult &batch_result) override { + bool Execute(std::string_view binary, const std::vector& inputs, + BatchResult& batch_result) override { ++execute_count_; batch_result.ClearAndResize(inputs.size()); batch_result.exit_code() = EXIT_FAILURE; @@ -1127,9 +1137,9 @@ class IgnoredFailureCallbacks : public CentipedeCallbacks { return false; } - std::vector Mutate(const std::vector &inputs, - size_t num_mutants) override { - return {num_mutants, {0}}; + std::vector Mutate(const std::vector& inputs, + size_t num_mutants) override { + return {num_mutants, {/*data=*/{0}, Mutant::kOriginNone}}; } int execute_count() const { return execute_count_; } @@ -1160,11 +1170,11 @@ TEST_F(CentipedeWithTemporaryLocalDir, UsesProvidedCustomMutator) { CentipedeDefaultCallbacks callbacks(env); const std::vector inputs = {{1}, {2}, {3}, {4}, {5}, {6}}; - const std::vector mutants = callbacks.Mutate( + const std::vector mutants = callbacks.Mutate( GetMutationInputRefsFromDataInputs(inputs), inputs.size()); // The custom mutator just returns the original inputs as mutants. - EXPECT_EQ(inputs, mutants); + EXPECT_EQ(inputs, GetDataFromMutants(mutants)); } TEST_F(CentipedeWithTemporaryLocalDir, FailsOnMisbehavingCustomMutator) { @@ -1193,12 +1203,12 @@ TEST_F(CentipedeWithTemporaryLocalDir, CentipedeDefaultCallbacks callbacks(env); const std::vector inputs = {{1}, {2}, {3}, {4}, {5}, {6}}; - const std::vector mutants = callbacks.Mutate( + const std::vector mutants = callbacks.Mutate( GetMutationInputRefsFromDataInputs(inputs), inputs.size()); // The built-in mutator performs non-trivial mutations. EXPECT_EQ(inputs.size(), mutants.size()); - EXPECT_NE(inputs, mutants); + EXPECT_NE(inputs, GetDataFromMutants(mutants)); } TEST_F(CentipedeWithTemporaryLocalDir, @@ -1239,11 +1249,13 @@ TEST_F(CentipedeWithTemporaryLocalDir, ExecuteEndsAfterCustomFailure) { {'c', 'u', 's', 't', 'o', 'm'}, {'c', 'u', 's', 't', 'o', 'm'}, }; - EXPECT_FALSE(callbacks.Execute(env.binary, inputs, result)); + EXPECT_FALSE( + callbacks.Execute(env.binary, {inputs.begin(), inputs.end()}, result)); EXPECT_THAT(result.failure_description(), HasSubstr("custom 0")); EXPECT_THAT(result.log(), AllOf(HasSubstr("custom failure 0"), Not(HasSubstr("custom failure 1")))); - EXPECT_FALSE(callbacks.Execute(env.binary, inputs, result)); + EXPECT_FALSE( + callbacks.Execute(env.binary, {inputs.begin(), inputs.end()}, result)); EXPECT_THAT(result.failure_description(), HasSubstr("custom 1")); EXPECT_THAT(result.log(), AllOf(HasSubstr("custom failure 1"), Not(HasSubstr("custom failure 2")))); @@ -1259,7 +1271,8 @@ TEST_F(CentipedeWithTemporaryLocalDir, ToleratesAsyncFailureInMutation) { {'s', 'o', 'm', 'e'}, }; ClearEarlyStopRequestAndSetStopTime(absl::InfiniteFuture()); - EXPECT_TRUE(callbacks.Execute(env.binary, inputs, result)); + EXPECT_TRUE( + callbacks.Execute(env.binary, {inputs.begin(), inputs.end()}, result)); // Match the error log to check for retrying mutation. EXPECT_DEATH( [&] { diff --git a/centipede/corpus.cc b/centipede/corpus.cc index f3bc0019d..ba1df9b53 100644 --- a/centipede/corpus.cc +++ b/centipede/corpus.cc @@ -229,7 +229,7 @@ size_t Corpus::Prune(const FeatureSet &fs, return subset_to_remove.size(); } -void Corpus::Add(const ByteArray& data, const FeatureVec& fv, +void Corpus::Add(ByteSpan data, const FeatureVec& fv, const ExecutionMetadata& metadata, const ExecutionResult::Stats& stats, const FeatureSet& fs, const CoverageFrontier& coverage_frontier) { @@ -237,17 +237,17 @@ void Corpus::Add(const ByteArray& data, const FeatureVec& fv, FUZZTEST_CHECK(!data.empty()) << "Got request to add empty element to corpus: ignoring"; FUZZTEST_CHECK_EQ(records_.size(), weighted_distribution_.size()); - records_.push_back({data, fv, metadata, stats}); + records_.push_back({{data.begin(), data.end()}, fv, metadata, stats}); // Will be updated by `UpdateWeights`. weighted_distribution_.AddWeight(0); } -const CorpusRecord& Corpus::WeightedRandom(absl::BitGenRef rng) const { - return records_[weighted_distribution_.RandomIndex(rng)]; +size_t Corpus::WeightedRandom(absl::BitGenRef rng) const { + return weighted_distribution_.RandomIndex(rng); } -const CorpusRecord& Corpus::UniformRandom(absl::BitGenRef rng) const { - return records_[absl::Uniform(rng, 0, records_.size())]; +size_t Corpus::UniformRandom(absl::BitGenRef rng) const { + return absl::Uniform(rng, 0, records_.size()); } void Corpus::DumpStatsToFile(const FeatureSet &fs, std::string_view filepath, diff --git a/centipede/corpus.h b/centipede/corpus.h index 1a1c4c1da..4c16ad3e0 100644 --- a/centipede/corpus.h +++ b/centipede/corpus.h @@ -121,7 +121,7 @@ class Corpus { // Adds a corpus element, consisting of 'data' (the input bytes, non-empty), // 'fv' (the features associated with this input), and execution `metadata`. // `fs` is used to compute weights of `fv`. - void Add(const ByteArray& data, const FeatureVec& fv, + void Add(ByteSpan data, const FeatureVec& fv, const ExecutionMetadata& metadata, const ExecutionResult::Stats& stats, const FeatureSet& fs, const CoverageFrontier& coverage_frontier); @@ -149,11 +149,11 @@ class Corpus { size_t NumActive() const { return records_.size(); } // Returns the max and avg sizes of the inputs. std::pair MaxAndAvgSize() const; - // Returns a random active corpus record using weighted distribution. + // Returns a random active corpus record index using weighted distribution. // See WeightedDistribution. - const CorpusRecord& WeightedRandom(absl::BitGenRef rng) const; - // Returns a random active corpus record using uniform distribution. - const CorpusRecord& UniformRandom(absl::BitGenRef rng) const; + size_t WeightedRandom(absl::BitGenRef rng) const; + // Returns a random active corpus record index using uniform distribution. + size_t UniformRandom(absl::BitGenRef rng) const; // Returns the element with index 'idx', where `idx` < NumActive(). const ByteArray &Get(size_t idx) const { return records_[idx].data; } // Returns the execution metadata for the element `idx`, `idx` < NumActive(). diff --git a/centipede/corpus_test.cc b/centipede/corpus_test.cc index 4b47d5c6d..46e4d4e37 100644 --- a/centipede/corpus_test.cc +++ b/centipede/corpus_test.cc @@ -173,8 +173,7 @@ TEST(Corpus, UniformWeightMethodsWorkAsExpected) { freq.clear(); freq.resize(corpus.NumActive()); for (int i = 0; i < kNumIter; i++) { - const auto& record = corpus.WeightedRandom(rng); - const auto id = record.data[0]; + const auto id = corpus.Records()[corpus.WeightedRandom(rng)].data[0]; ASSERT_LT(id, freq.size()); freq[id]++; } @@ -215,8 +214,7 @@ TEST(Corpus, RecencyWeightMethodsWorkAsExpected) { freq.clear(); freq.resize(corpus.NumActive()); for (int i = 0; i < kNumIter; i++) { - const auto& record = corpus.WeightedRandom(rng); - const auto id = record.data[0]; + const auto id = corpus.Records()[corpus.WeightedRandom(rng)].data[0]; ASSERT_LT(id, freq.size()); freq[id]++; } @@ -256,8 +254,7 @@ TEST(Corpus, FeatureRarityWeightMethodsWorkAsExpected) { freq.clear(); freq.resize(corpus.NumActive()); for (int i = 0; i < kNumIter; i++) { - const auto& record = corpus.WeightedRandom(rng); - const auto id = record.data[0]; + const auto id = corpus.Records()[corpus.WeightedRandom(rng)].data[0]; ASSERT_LT(id, freq.size()); freq[id]++; } @@ -300,10 +297,8 @@ TEST(Corpus, ScalesWeightsWithExecTime) { freq.clear(); freq.resize(corpus.NumActive()); for (int i = 0; i < kNumIter; i++) { - const auto& record = corpus.WeightedRandom(rng); - const auto id = record.data[0]; - ASSERT_LT(id, freq.size()); - freq[id]++; + const size_t idx = corpus.WeightedRandom(rng); + freq[idx]++; } }; diff --git a/centipede/crash_deduplication_test.cc b/centipede/crash_deduplication_test.cc index bd1966e47..afda69eb8 100644 --- a/centipede/crash_deduplication_test.cc +++ b/centipede/crash_deduplication_test.cc @@ -168,7 +168,7 @@ class FakeCentipedeCallbacks : public CentipedeCallbacks { absl::flat_hash_map crashing_inputs) : CentipedeCallbacks(env), crashing_inputs_(std::move(crashing_inputs)) {} - bool Execute(std::string_view binary, const std::vector& inputs, + bool Execute(std::string_view binary, const std::vector& inputs, BatchResult& batch_result) override { batch_result.ClearAndResize(inputs.size()); for (ByteSpan input : inputs) { diff --git a/centipede/dispatcher.cc b/centipede/dispatcher.cc index d82cd2834..9d056fc46 100644 --- a/centipede/dispatcher.cc +++ b/centipede/dispatcher.cc @@ -31,6 +31,7 @@ #include "absl/base/nullability.h" #include "./centipede/execution_metadata.h" +#include "./centipede/mutation_data.h" #include "./centipede/runner_request.h" #include "./centipede/runner_result.h" #include "./centipede/shared_memory_blob_sequence.h" @@ -535,9 +536,13 @@ void FuzzTestDispatcherEmitMutant(const void* data, size_t size) { "mutant must be non-empty with a valid pointer"); auto* output = GetOutputsBlobSequence(); DispatcherCheck(output != nullptr, "outputs blob sequence must exist"); - DispatcherCheck(MutationResult::WriteMutant( - {static_cast(data), size}, *output), - "failed to write mutant"); + DispatcherCheck( + MutationResult::WriteMutant({{static_cast(data), size}, + // TODO(xinhaoyuan): change the dispatcher + // interface to include the origin. + fuzztest::internal::Mutant::kOriginNone}, + *output), + "failed to write mutant"); } void FuzzTestDispatcherEmitFeedbackAs32BitFeatures(const uint32_t* features, diff --git a/centipede/fuzztest_mutator.cc b/centipede/fuzztest_mutator.cc index 0690f2801..6767a5f01 100644 --- a/centipede/fuzztest_mutator.cc +++ b/centipede/fuzztest_mutator.cc @@ -29,7 +29,7 @@ #include "./centipede/byte_array_mutator.h" #include "./centipede/execution_metadata.h" #include "./centipede/knobs.h" -#include "./centipede/mutation_input.h" +#include "./centipede/mutation_data.h" #include "./common/defs.h" #include "./common/logging.h" #include "./fuzztest/domain_core.h" @@ -42,6 +42,16 @@ namespace { using MutatorDomainBase = decltype(fuzztest::VectorOf(fuzztest::Arbitrary())); +template +bool SampleInsert(const T& cmp_table, size_t& counter) { + static thread_local absl::BitGen bitgen; + counter++; + if (counter <= cmp_table.kTableSize) { + return true; + } + return absl::Uniform(bitgen, 0, counter) < cmp_table.kTableSize; +} + template void InsertCmpEntryIntoIntegerDictionary(const uint8_t* a, const uint8_t* b, TablesOfRecentCompares& cmp_tables) { @@ -57,27 +67,38 @@ void InsertCmpEntryIntoIntegerDictionary(const uint8_t* a, const uint8_t* b, void PopulateCmpEntries(const ExecutionMetadata& metadata, TablesOfRecentCompares& cmp_tables) { // Size limits on the cmp entries to be populated. - static constexpr uint8_t kMaxCmpEntrySize = 15; + static constexpr uint8_t kMaxCmpEntrySize = 128; static constexpr uint8_t kMinCmpEntrySize = 2; + size_t uint16_sample_counter = 0; + size_t uint32_sample_counter = 0; + size_t uint64_sample_counter = 0; + size_t mem_sample_counter = 0; - metadata.ForEachCmpEntry([&cmp_tables](fuzztest::internal::ByteSpan a, - fuzztest::internal::ByteSpan b) { + metadata.ForEachCmpEntry([&](fuzztest::internal::ByteSpan a, + fuzztest::internal::ByteSpan b) { FUZZTEST_CHECK(a.size() == b.size()) << "cmp operands must have the same size"; const size_t size = a.size(); if (size < kMinCmpEntrySize) return; if (size > kMaxCmpEntrySize) return; - if (size == 2) { + if (size == 2 && SampleInsert(cmp_tables.GetMutable(), + uint16_sample_counter)) { InsertCmpEntryIntoIntegerDictionary(a.data(), b.data(), cmp_tables); - } else if (size == 4) { + } else if (size == 4 && + SampleInsert(cmp_tables.GetMutable(), + uint32_sample_counter)) { InsertCmpEntryIntoIntegerDictionary(a.data(), b.data(), cmp_tables); - } else if (size == 8) { + } else if (size == 8 && + SampleInsert(cmp_tables.GetMutable(), + uint64_sample_counter)) { InsertCmpEntryIntoIntegerDictionary(a.data(), b.data(), cmp_tables); } - cmp_tables.GetMutable<0>().Insert(a.data(), b.data(), size); + if (SampleInsert(cmp_tables.GetMutable<0>(), mem_sample_counter)) { + cmp_tables.GetMutable<0>().Insert(a.data(), b.data(), size); + } }); } @@ -139,32 +160,35 @@ void FuzzTestMutator::CrossOver(ByteArray &data, const ByteArray &other) { } } -std::vector FuzzTestMutator::MutateMany( - const std::vector &inputs, size_t num_mutants) { +std::vector FuzzTestMutator::MutateMany( + const std::vector& inputs, size_t num_mutants) { if (inputs.empty()) abort(); auto& cmp_tables = mutation_metadata_->cmp_tables; cmp_tables.resize(inputs.size()); - std::vector mutants; + std::vector mutants; mutants.reserve(num_mutants); - for (int i = 0; i < num_mutants; ++i) { - auto index = absl::Uniform(prng_, 0, inputs.size()); - if (!cmp_tables[index].has_value() && inputs[index].metadata != nullptr) { - cmp_tables[index].emplace(/*compact=*/true); - PopulateCmpEntries(*inputs[index].metadata, *cmp_tables[index]); + for (size_t i = 0; i < num_mutants; ++i) { + Mutant mutant; + mutant.origin = absl::Uniform(prng_, 0, inputs.size()); + if (!cmp_tables[mutant.origin].has_value() && + inputs[mutant.origin].metadata != nullptr) { + cmp_tables[mutant.origin].emplace(/*compact=*/true); + PopulateCmpEntries(*inputs[mutant.origin].metadata, + *cmp_tables[mutant.origin]); } - auto mutant = inputs[index].data; - if (mutant.size() > max_len_) mutant.resize(max_len_); + mutant.data = inputs[mutant.origin].data; + if (mutant.data.size() > max_len_) mutant.data.resize(max_len_); if (knobs_.GenerateBool(knob_mutate_or_crossover, prng_())) { // Perform crossover with some other input. It may be the same input. const auto &other_input = inputs[absl::Uniform(prng_, 0, inputs.size())].data; - CrossOver(mutant, other_input); + CrossOver(mutant.data, other_input); } else { - domain_->Mutate( - mutant, prng_, - {/*cmp_tables=*/cmp_tables[index].has_value() ? &*cmp_tables[index] - : nullptr}, - /*only_shrink=*/false); + domain_->Mutate(mutant.data, prng_, + {/*cmp_tables=*/cmp_tables[mutant.origin].has_value() + ? &*cmp_tables[mutant.origin] + : nullptr}, + /*only_shrink=*/false); } mutants.push_back(std::move(mutant)); } diff --git a/centipede/fuzztest_mutator.h b/centipede/fuzztest_mutator.h index 8b6846096..ff204f2eb 100644 --- a/centipede/fuzztest_mutator.h +++ b/centipede/fuzztest_mutator.h @@ -22,7 +22,7 @@ #include "./centipede/execution_metadata.h" #include "./centipede/knobs.h" -#include "./centipede/mutation_input.h" +#include "./centipede/mutation_data.h" #include "./common/defs.h" #include "./fuzztest/internal/table_of_recent_compares.h" @@ -44,8 +44,8 @@ class FuzzTestMutator { ~FuzzTestMutator(); // Takes non-empty `inputs` and produces `num_mutants` mutants. - std::vector MutateMany(const std::vector &inputs, - size_t num_mutants); + std::vector MutateMany(const std::vector& inputs, + size_t num_mutants); // Adds `dict_entries` to the internal mutation dictionary. void AddToDictionary(const std::vector& dict_entries); diff --git a/centipede/fuzztest_mutator_test.cc b/centipede/fuzztest_mutator_test.cc index 327bc836b..a732bcad1 100644 --- a/centipede/fuzztest_mutator_test.cc +++ b/centipede/fuzztest_mutator_test.cc @@ -24,7 +24,7 @@ #include "absl/strings/str_join.h" #include "./centipede/execution_metadata.h" #include "./centipede/knobs.h" -#include "./centipede/mutation_input.h" +#include "./centipede/mutation_data.h" #include "./common/defs.h" namespace fuzztest::internal { @@ -33,6 +33,7 @@ namespace { using ::testing::AllOf; using ::testing::Each; +using ::testing::Field; using ::testing::IsSupersetOf; using ::testing::Le; using ::testing::SizeIs; @@ -49,10 +50,10 @@ TEST(FuzzTestMutator, DifferentRngSeedsLeadToDifferentMutantSequences) { std::vector mutation_inputs = {{data}}; constexpr size_t kMutantSequenceLength = 100; for (size_t iter = 0; iter < kMutantSequenceLength; iter++) { - const std::vector mutants = + const std::vector mutants = mutator[i].MutateMany(mutation_inputs, 1); ASSERT_EQ(mutants.size(), 1); - res[i].push_back(mutants[0]); + res[i].push_back(mutants[0].data); } } EXPECT_NE(res[0], res[1]); @@ -64,7 +65,7 @@ TEST(FuzzTestMutator, MutateManyWorksWithInputsLargerThanMaxLen) { FuzzTestMutator mutator(knobs, /*seed=*/1); EXPECT_TRUE(mutator.set_max_len(kMaxLen)); constexpr size_t kNumMutantsToGenerate = 10000; - const std::vector mutants = mutator.MutateMany( + const std::vector mutants = mutator.MutateMany( { {/*data=*/{0, 1, 2, 3, 4, 5, 6, 7}}, {/*data=*/{0}}, @@ -74,70 +75,70 @@ TEST(FuzzTestMutator, MutateManyWorksWithInputsLargerThanMaxLen) { }, kNumMutantsToGenerate); - EXPECT_THAT(mutants, - AllOf(SizeIs(kNumMutantsToGenerate), Each(SizeIs(Le(kMaxLen))))); + EXPECT_THAT(mutants, AllOf(SizeIs(kNumMutantsToGenerate), + Each(Field(&Mutant::data, SizeIs(Le(kMaxLen)))))); } TEST(FuzzTestMutator, CrossOverInsertsDataFromOtherInputs) { const Knobs knobs; FuzzTestMutator mutator(knobs, /*seed=*/1); constexpr size_t kNumMutantsToGenerate = 100000; - const std::vector mutants = mutator.MutateMany( + const std::vector mutants = mutator.MutateMany( { {/*data=*/{0, 1, 2, 3}}, {/*data=*/{4, 5, 6, 7}}, }, kNumMutantsToGenerate); - EXPECT_THAT(mutants, IsSupersetOf(std::vector{ - // The entire other input - {4, 5, 6, 7, 0, 1, 2, 3}, - {0, 1, 4, 5, 6, 7, 2, 3}, - {0, 1, 2, 3, 4, 5, 6, 7}, - // The prefix of other input - {4, 5, 6, 0, 1, 2, 3}, - {0, 1, 4, 5, 6, 2, 3}, - {0, 1, 2, 3, 4, 5, 6}, - // The suffix of other input - {5, 6, 7, 0, 1, 2, 3}, - {0, 1, 5, 6, 7, 2, 3}, - {0, 1, 2, 3, 5, 6, 7}, - // The middle of other input - {5, 6, 0, 1, 2, 3}, - {0, 1, 5, 6, 2, 3}, - {0, 1, 2, 3, 5, 6}, - })); + EXPECT_THAT(GetDataFromMutants(mutants), IsSupersetOf(std::vector{ + // The entire other input + {4, 5, 6, 7, 0, 1, 2, 3}, + {0, 1, 4, 5, 6, 7, 2, 3}, + {0, 1, 2, 3, 4, 5, 6, 7}, + // The prefix of other input + {4, 5, 6, 0, 1, 2, 3}, + {0, 1, 4, 5, 6, 2, 3}, + {0, 1, 2, 3, 4, 5, 6}, + // The suffix of other input + {5, 6, 7, 0, 1, 2, 3}, + {0, 1, 5, 6, 7, 2, 3}, + {0, 1, 2, 3, 5, 6, 7}, + // The middle of other input + {5, 6, 0, 1, 2, 3}, + {0, 1, 5, 6, 2, 3}, + {0, 1, 2, 3, 5, 6}, + })); } TEST(FuzzTestMutator, CrossOverOverwritesDataFromOtherInputs) { const Knobs knobs; FuzzTestMutator mutator(knobs, /*seed=*/1); constexpr size_t kNumMutantsToGenerate = 100000; - const std::vector mutants = mutator.MutateMany( + const std::vector mutants = mutator.MutateMany( { {/*data=*/{0, 1, 2, 3, 4, 5, 6, 7}}, {/*data=*/{100, 101, 102, 103}}, }, kNumMutantsToGenerate); - EXPECT_THAT(mutants, IsSupersetOf(std::vector{ - // The entire other input - {100, 101, 102, 103, 4, 5, 6, 7}, - {0, 1, 100, 101, 102, 103, 6, 7}, - {0, 1, 2, 3, 100, 101, 102, 103}, - // The prefix of other input - {100, 101, 102, 3, 4, 5, 6, 7}, - {0, 1, 2, 100, 101, 102, 6, 7}, - {0, 1, 2, 3, 4, 100, 101, 102}, - // The suffix of other input - {101, 102, 103, 3, 4, 5, 6, 7}, - {0, 1, 2, 101, 102, 103, 6, 7}, - {0, 1, 2, 3, 4, 101, 102, 103}, - // The middle of other input - {101, 102, 2, 3, 4, 5, 6, 7}, - {0, 1, 2, 101, 102, 5, 6, 7}, - {0, 1, 2, 3, 4, 5, 101, 102}, - })); + EXPECT_THAT(GetDataFromMutants(mutants), IsSupersetOf(std::vector{ + // The entire other input + {100, 101, 102, 103, 4, 5, 6, 7}, + {0, 1, 100, 101, 102, 103, 6, 7}, + {0, 1, 2, 3, 100, 101, 102, 103}, + // The prefix of other input + {100, 101, 102, 3, 4, 5, 6, 7}, + {0, 1, 2, 100, 101, 102, 6, 7}, + {0, 1, 2, 3, 4, 100, 101, 102}, + // The suffix of other input + {101, 102, 103, 3, 4, 5, 6, 7}, + {0, 1, 2, 101, 102, 103, 6, 7}, + {0, 1, 2, 3, 4, 101, 102, 103}, + // The middle of other input + {101, 102, 2, 3, 4, 5, 6, 7}, + {0, 1, 2, 101, 102, 5, 6, 7}, + {0, 1, 2, 3, 4, 5, 101, 102}, + })); } // Test parameter containing the mutation settings and the expectations of a @@ -181,12 +182,12 @@ TEST_P(MutationStepTest, GeneratesExpectedMutantsAndAvoidsUnexpectedMutants) { const std::vector inputs = { {/*data=*/GetParam().seed_input, /*metadata=*/&metadata}}; for (size_t i = 0; i < GetParam().max_num_iterations; i++) { - const std::vector mutants = mutator.MutateMany(inputs, 1); + const std::vector mutants = mutator.MutateMany(inputs, 1); ASSERT_EQ(mutants.size(), 1); const auto& mutant = mutants[0]; - EXPECT_FALSE(unexpected_mutants.contains(mutant)) - << "Unexpected mutant: {" << absl::StrJoin(mutant, ",") << "}"; - unmatched_expected_mutants.erase(mutant); + EXPECT_FALSE(unexpected_mutants.contains(mutant.data)) + << "Unexpected mutant: {" << absl::StrJoin(mutant.data, ",") << "}"; + unmatched_expected_mutants.erase(mutant.data); if (unmatched_expected_mutants.empty() && i >= GetParam().min_num_iterations) break; @@ -284,22 +285,34 @@ INSTANTIATE_TEST_SUITE_P(InsertFromCmpDictionary, MutationStepTest, Values([] { INSTANTIATE_TEST_SUITE_P(SkipsLongCmpEntry, MutationStepTest, Values([] { MutationStepTestParameter params; params.seed_input = {0}; + ByteArray short_entry; + for (size_t i = 0; i < 5; ++i) { + short_entry.push_back(i); + } params.expected_mutants = { - {0, 1, 2, 3, 4}, + short_entry, }; + ByteArray long_entry; + for (size_t i = 0; i < 129; ++i) { + long_entry.push_back(i); + } params.unexpected_mutants = { - {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, - 11, 12, 13, 14, 15, 16, 17, 18, 19, 20}, + long_entry, }; - params.cmp_data = { - 20, // size - 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, - 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, // lhs - 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, - 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, // rhs - 4, // size - 1, 2, 3, 4, // lhs - 1, 2, 3, 4}; // rhs + params.cmp_data.push_back(short_entry.size()); + params.cmp_data.insert(params.cmp_data.end(), + short_entry.begin(), + short_entry.end()); // lhs + params.cmp_data.insert(params.cmp_data.end(), + short_entry.begin(), + short_entry.end()); // rhs + params.cmp_data.push_back(long_entry.size()); + params.cmp_data.insert(params.cmp_data.end(), + long_entry.begin(), + long_entry.end()); // lhs + params.cmp_data.insert(params.cmp_data.end(), + long_entry.begin(), + long_entry.end()); // rhs return params; }())); diff --git a/centipede/minimize_crash.cc b/centipede/minimize_crash.cc index d9fa681e8..245846cc4 100644 --- a/centipede/minimize_crash.cc +++ b/centipede/minimize_crash.cc @@ -26,7 +26,7 @@ #include "absl/synchronization/mutex.h" #include "./centipede/centipede_callbacks.h" #include "./centipede/environment.h" -#include "./centipede/mutation_input.h" +#include "./centipede/mutation_data.h" #include "./centipede/runner_result.h" #include "./centipede/stop.h" #include "./centipede/thread_pool.h" @@ -114,21 +114,21 @@ static void MinimizeCrash(const Environment &env, // discarding all inputs that are too large. // TODO(kcc): modify the Mutate() interface such that max_len can be passed. // - const std::vector mutants = callbacks->Mutate( + const std::vector mutants = callbacks->Mutate( GetMutationInputRefsFromDataInputs(recent_crashers), env.batch_size); - std::vector smaller_mutants; + std::vector smaller_mutants; for (const auto &m : mutants) { - if (m.size() < min_known_size) smaller_mutants.push_back(m); + if (m.data.size() < min_known_size) smaller_mutants.push_back(m.data); } // Execute all mutants. If a new crasher is found, add it to `queue`. if (!callbacks->Execute(env.binary, smaller_mutants, batch_result)) { size_t crash_inputs_idx = batch_result.num_outputs_read(); FUZZTEST_CHECK_LT(crash_inputs_idx, smaller_mutants.size()); - const auto &new_crasher = smaller_mutants[crash_inputs_idx]; + const auto new_crasher = smaller_mutants[crash_inputs_idx]; FUZZTEST_LOG(INFO) << "Crasher: size: " << new_crasher.size() << ": " << AsPrintableString(new_crasher, /*max_len=*/40); - queue.AddCrasher(new_crasher); + queue.AddCrasher({new_crasher.begin(), new_crasher.end()}); } } } diff --git a/centipede/minimize_crash_test.cc b/centipede/minimize_crash_test.cc index 30a145a9e..539483100 100644 --- a/centipede/minimize_crash_test.cc +++ b/centipede/minimize_crash_test.cc @@ -40,10 +40,10 @@ class MinimizerMock : public CentipedeCallbacks { MinimizerMock(const Environment &env) : CentipedeCallbacks(env) {} // Runs FuzzMe() on every input, imitates failure if FuzzMe() returns true. - bool Execute(std::string_view binary, const std::vector &inputs, - BatchResult &batch_result) override { + bool Execute(std::string_view binary, const std::vector& inputs, + BatchResult& batch_result) override { batch_result.ClearAndResize(inputs.size()); - for (auto &input : inputs) { + for (auto input : inputs) { if (FuzzMe(input)) { batch_result.exit_code() = EXIT_FAILURE; return false; diff --git a/centipede/mutation_input.h b/centipede/mutation_data.h similarity index 53% rename from centipede/mutation_input.h rename to centipede/mutation_data.h index 504c75327..d4afb54da 100644 --- a/centipede/mutation_input.h +++ b/centipede/mutation_data.h @@ -12,13 +12,14 @@ // See the License for the specific language governing permissions and // limitations under the License. -// Data types used for mutation inputs. +// Data types used for mutation. // // This library is for both engine and runner. -#ifndef THIRD_PARTY_CENTIPEDE_MUTATION_INPUT_H_ -#define THIRD_PARTY_CENTIPEDE_MUTATION_INPUT_H_ +#ifndef THIRD_PARTY_CENTIPEDE_MUTATION_DATA_H_ +#define THIRD_PARTY_CENTIPEDE_MUTATION_DATA_H_ +#include #include #include "./centipede/execution_metadata.h" @@ -48,6 +49,41 @@ inline std::vector GetMutationInputRefsFromDataInputs( return results; } +// Represents a mutation result. +struct Mutant { + // The mutant `data`. + ByteArray data; + // The index of the input used to mutate into `data`. The base array may be + // different depending on the context: As mutation output it refers to the + // mutation input batch; As execution input it refers to the in-memory corpus. + size_t origin = kOriginNone; + // A special `origin` value to indicate that the mutant has no origin. + static constexpr size_t kOriginNone = static_cast(-1); + + // For testing. + bool operator==(const Mutant& other) const { + return data == other.data && origin == other.origin; + } +}; + +// A reference counterpart of `Mutant`. Needed because it can be constructed +// from std::string and/or by the C-only dispatcher without copying the +// underlying data. +struct MutantRef { + ByteSpan data; + size_t origin = Mutant::kOriginNone; +}; + +inline std::vector GetDataFromMutants( + const std::vector& mutants) { + std::vector results; + results.reserve(mutants.size()); + for (const auto& mutant : mutants) { + results.push_back(mutant.data); + } + return results; +} + } // namespace fuzztest::internal -#endif // THIRD_PARTY_CENTIPEDE_MUTATION_INPUT_H_ +#endif // THIRD_PARTY_CENTIPEDE_MUTATION_DATA_H_ diff --git a/centipede/mutation_input_test.cc b/centipede/mutation_data_test.cc similarity index 96% rename from centipede/mutation_input_test.cc rename to centipede/mutation_data_test.cc index f1bc7414c..4ccf7ca5b 100644 --- a/centipede/mutation_input_test.cc +++ b/centipede/mutation_data_test.cc @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "./centipede/mutation_input.h" +#include "./centipede/mutation_data.h" #include diff --git a/centipede/runner.cc b/centipede/runner.cc index fbf5a2bc0..fa7dc90a2 100644 --- a/centipede/runner.cc +++ b/centipede/runner.cc @@ -53,7 +53,7 @@ #include "./centipede/dispatcher_flag_helper.h" #include "./centipede/execution_metadata.h" #include "./centipede/feature.h" -#include "./centipede/mutation_input.h" +#include "./centipede/mutation_data.h" #include "./centipede/runner_interface.h" #include "./centipede/runner_request.h" #include "./centipede/runner_result.h" @@ -327,8 +327,8 @@ void RunnerCallbacks::GetSeeds(std::function seed_callback) { std::string RunnerCallbacks::GetSerializedTargetConfig() { return ""; } bool RunnerCallbacks::Mutate( - const std::vector & /*inputs*/, size_t /*num_mutants*/, - std::function /*new_mutant_callback*/) { + const std::vector& /*inputs*/, size_t /*num_mutants*/, + std::function /*new_mutant_callback*/) { RunnerCheck(!HasCustomMutator(), "Class deriving from RunnerCallbacks must implement Mutate() if " "HasCustomMutator() returns true."); @@ -358,8 +358,8 @@ class LegacyRunnerCallbacks : public RunnerCallbacks { return custom_mutator_cb_ != nullptr; } - bool Mutate(const std::vector &inputs, size_t num_mutants, - std::function new_mutant_callback) override; + bool Mutate(const std::vector& inputs, size_t num_mutants, + std::function new_mutant_callback) override; private: FuzzerTestOneInputCallback test_one_input_cb_; @@ -623,7 +623,7 @@ static int MutateInputsFromShmem(BlobSequence &inputs_blobseq, } if (!callbacks.HasCustomMutator()) return EXIT_SUCCESS; - if (!callbacks.Mutate(input_refs, num_mutants, [&](ByteSpan mutant) { + if (!callbacks.Mutate(input_refs, num_mutants, [&](MutantRef mutant) { MutationResult::WriteMutant(mutant, outputs_blobseq); })) { return EXIT_FAILURE; @@ -632,8 +632,8 @@ static int MutateInputsFromShmem(BlobSequence &inputs_blobseq, } bool LegacyRunnerCallbacks::Mutate( - const std::vector &inputs, size_t num_mutants, - std::function new_mutant_callback) { + const std::vector& inputs, size_t num_mutants, + std::function new_mutant_callback) { if (custom_mutator_cb_ == nullptr) return false; unsigned int seed = GetRandomSeed(); const size_t num_inputs = inputs.size(); @@ -644,7 +644,8 @@ bool LegacyRunnerCallbacks::Mutate( attempt < num_mutants * kAverageMutationAttempts && num_outputs < num_mutants; ++attempt) { - const auto &input_data = inputs[rand_r(&seed) % num_inputs].data; + size_t origin = rand_r(&seed) % num_inputs; + const auto& input_data = inputs[origin].data; size_t size = std::min(input_data.size(), max_mutant_size); std::copy(input_data.cbegin(), input_data.cbegin() + size, mutant.begin()); @@ -661,7 +662,8 @@ bool LegacyRunnerCallbacks::Mutate( rand_r(&seed)); } if (new_size == 0) continue; - new_mutant_callback({mutant.data(), new_size}); + if (new_size > max_mutant_size) new_size = max_mutant_size; + new_mutant_callback({{mutant.data(), new_size}, origin}); ++num_outputs; } return true; diff --git a/centipede/runner_cmp_trace.h b/centipede/runner_cmp_trace.h index 66878758b..32f081a55 100644 --- a/centipede/runner_cmp_trace.h +++ b/centipede/runner_cmp_trace.h @@ -18,10 +18,14 @@ // Capturing arguments of CMP instructions, memcmp, and similar. // WARNING: this code needs to have minimal dependencies. +#include + #include #include #include +#include "absl/base/optimization.h" + namespace fuzztest::internal { // Captures up to `kNumItems` different CMP argument pairs. @@ -33,28 +37,47 @@ namespace fuzztest::internal { // // Every new captured pair may overwrite a pair stored previously. // -// Outside of tests, objects of this class will be created in TLS, thus no CTOR. +// Outside of tests, objects of this class will be zero-initialized in TLS, +// thus no CTOR. In tests the objects should be default-initialized. template class CmpTrace { public: // kMaxNumBytesPerValue does not depend on kFixedSize. - static constexpr size_t kMaxNumBytesPerValue = 16; + static constexpr size_t kMaxNumBytesPerValue = 128; static constexpr size_t kNumBytesPerValue = kFixedSize ? kFixedSize : kMaxNumBytesPerValue; // No CTOR - objects will be created in TLS. // Clears `this`. - void Clear() { memset(this, 0, sizeof(*this)); } + void Clear() { to_clear = true; } // Captures one CMP argument pair, as two byte arrays, `size` bytes each. void Capture(uint8_t size, const uint8_t *value0, const uint8_t *value1) { + if (ABSL_PREDICT_FALSE(to_clear)) { + for (size_t i = 0; i < kNumItems; ++i) { + if (sizes_[i] == 0) break; + sizes_[i] = 0; + } + capture_count_ = 0; + to_clear = false; + } if (size > kNumBytesPerValue) size = kNumBytesPerValue; - // We choose a pseudo-random slot each time. - // This way after capturing many pairs we end up with up to `kNumItems` - // pairs which are typically, but not always, the most recent. - rand_seed_ = rand_seed_ * 1103515245 + 12345; - const size_t index = rand_seed_ % kNumItems; + // Fill the initial `kNumItems` pairs sequentially, then randomly overwrite + // previous entries with diminishing probability. + size_t index = capture_count_++; + if (index >= kNumItems) { + if (rand_seed_ == 0) { + // Initialize the random seed (likely) once. + struct timeval tv = {}; + constexpr size_t kUsecInSec = 1000000; + gettimeofday(&tv, nullptr); + rand_seed_ = tv.tv_sec * kUsecInSec + tv.tv_usec; + } + rand_seed_ = rand_seed_ * 1103515245 + 12345; + index = rand_seed_ % capture_count_; + if (index >= kNumItems) return; + } Item& item = items_[index]; sizes_[index] = size; __builtin_memcpy(item.value0, value0, size); @@ -74,12 +97,14 @@ class CmpTrace { // Iterates non-zero CMP pairs. template void ForEachNonZero(Callback callback) { + if (ABSL_PREDICT_FALSE(to_clear)) return; for (size_t i = 0; i < kNumItems; ++i) { const auto size = sizes_[i]; - if (size == 0 || size > kNumBytesPerValue) continue; - sizes_[i] = 0; + if (size == 0) break; + if (size > kNumBytesPerValue) continue; callback(size, items_[i].value0, items_[i].value1); } + to_clear = true; } private: @@ -89,17 +114,22 @@ class CmpTrace { uint8_t value1[kNumBytesPerValue]; }; - // Value sizes of argument pairs. zero-size indicates that the corresponding - // entry is empty. + volatile bool to_clear; + + // Value sizes of argument pairs. Zero-size indicates end of valid entries. // // Marked volatile because of the potential racing between the owning thread - // and the main thread, which is tolerated gracefully. + // and the main thread, which is tolerated gracefully. It is written by only + // the owning thread. volatile uint8_t sizes_[kNumItems]; + + size_t capture_count_; // Values of argument pairs. Item items_[kNumItems]; - // Pseudo-random seed. - size_t rand_seed_; + // Pseudo-random seed from glibc + // (https://en.wikipedia.org/wiki/Linear_congruential_generator). + uint32_t rand_seed_; }; } // namespace fuzztest::internal diff --git a/centipede/runner_cmp_trace_test.cc b/centipede/runner_cmp_trace_test.cc index 492c18f4d..ade09d485 100644 --- a/centipede/runner_cmp_trace_test.cc +++ b/centipede/runner_cmp_trace_test.cc @@ -16,6 +16,7 @@ #include #include +#include #include #include "gmock/gmock.h" @@ -56,10 +57,10 @@ TEST(CmpTrace, T1) { observed_pairs.push_back(cmp_pair); }; - CmpTrace<2, 10> trace2; - CmpTrace<4, 11> trace4; - CmpTrace<8, 12> trace8; - CmpTrace<0, 13> traceN; + CmpTrace<2, 10> trace2 = {}; + CmpTrace<4, 11> trace4 = {}; + CmpTrace<8, 12> trace8 = {}; + CmpTrace<0, 13> traceN = {}; trace2.Clear(); trace4.Clear(); trace8.Clear(); @@ -102,12 +103,12 @@ TEST(CmpTrace, T1) { constexpr uint8_t value0[10] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 0}; constexpr uint8_t value1[10] = {0, 9, 8, 7, 6, 5, 4, 3, 2, 1}; - constexpr uint8_t long_array[20] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, - 10, 11, 12, 13, 14, 15, 16, 17, 18, 19}; + uint8_t long_array[129]; + std::iota(long_array, long_array + 129, 0); traceN.Capture(7, value0, value1); traceN.Capture(3, value0, value1); traceN.Capture(10, value0, value1); - traceN.Capture(20, long_array, long_array); // will be trimmed to 16. + traceN.Capture(129, long_array, long_array); // will be trimmed to 128. observed_pairs.clear(); traceN.ForEachNonZero(callback); EXPECT_THAT(observed_pairs, @@ -115,7 +116,7 @@ TEST(CmpTrace, T1) { TwoArraysToByteVector(value0, value1, 10), TwoArraysToByteVector(value0, value1, 7), TwoArraysToByteVector(value0, value1, 3), - TwoArraysToByteVector(long_array, long_array, 16))); + TwoArraysToByteVector(long_array, long_array, 128))); } } // namespace diff --git a/centipede/runner_interface.h b/centipede/runner_interface.h index 09e2bbd0a..cfbf8499b 100644 --- a/centipede/runner_interface.h +++ b/centipede/runner_interface.h @@ -25,7 +25,7 @@ #include #include "absl/base/nullability.h" -#include "./centipede/mutation_input.h" +#include "./centipede/mutation_data.h" #include "./common/defs.h" // Typedefs for the libFuzzer API, https://llvm.org/docs/LibFuzzer.html @@ -153,9 +153,9 @@ class RunnerCallbacks { // // TODO(xinhaoyuan): Consider supporting only_shrink to speed up // input shrinking. - virtual bool Mutate(const std::vector &inputs, + virtual bool Mutate(const std::vector& inputs, size_t num_mutants, - std::function new_mutant_callback); + std::function new_mutant_callback); virtual ~RunnerCallbacks() = default; }; diff --git a/centipede/runner_request.cc b/centipede/runner_request.cc index 7d286d30d..66027dfe3 100644 --- a/centipede/runner_request.cc +++ b/centipede/runner_request.cc @@ -18,7 +18,7 @@ #include #include "./centipede/execution_metadata.h" -#include "./centipede/mutation_input.h" +#include "./centipede/mutation_data.h" #include "./centipede/shared_memory_blob_sequence.h" #include "./common/defs.h" @@ -37,8 +37,8 @@ enum Tags : Blob::SizeAndTagT { }; // Writes `inputs` to `blobseq`, returns the number of inputs written. -static size_t WriteInputs(const std::vector &inputs, - BlobSequence &blobseq) { +static size_t WriteInputs(const std::vector& inputs, + BlobSequence& blobseq) { size_t num_inputs = inputs.size(); if (!blobseq.Write(kTagNumInputs, num_inputs)) return 0; size_t result = 0; @@ -75,8 +75,8 @@ static size_t WriteInputs(const std::vector &inputs, } // namespace -size_t RequestExecution(const std::vector &inputs, - BlobSequence &blobseq) { +size_t RequestExecution(const std::vector& inputs, + BlobSequence& blobseq) { if (!blobseq.Write({kTagExecution, 0, nullptr})) return 0; return WriteInputs(inputs, blobseq); } diff --git a/centipede/runner_request.h b/centipede/runner_request.h index 63b90a257..1d5e068f4 100644 --- a/centipede/runner_request.h +++ b/centipede/runner_request.h @@ -21,7 +21,7 @@ #include #include "./centipede/execution_metadata.h" -#include "./centipede/mutation_input.h" +#include "./centipede/mutation_data.h" #include "./centipede/shared_memory_blob_sequence.h" #include "./common/defs.h" @@ -35,8 +35,8 @@ enum class PersistentModeRequest : char { // Sends a request (via `blobseq`) to execute `inputs`. // Returns the number of sent inputs, which would normally be inputs.size(). -size_t RequestExecution(const std::vector &inputs, - BlobSequence &blobseq); +size_t RequestExecution(const std::vector& inputs, + BlobSequence& blobseq); // Sends a request (via `blobseq`) to compute `num_mutants` mutants of `inputs`. // Returns the number of sent inputs, which would normally be inputs.size(). diff --git a/centipede/runner_result.cc b/centipede/runner_result.cc index 3b726e758..07b781024 100644 --- a/centipede/runner_result.cc +++ b/centipede/runner_result.cc @@ -22,6 +22,7 @@ #include "./centipede/execution_metadata.h" #include "./centipede/feature.h" +#include "./centipede/mutation_data.h" #include "./centipede/shared_memory_blob_sequence.h" #include "./common/defs.h" @@ -44,6 +45,7 @@ enum Tags : Blob::SizeAndTagT { // Mutation result tags. kTagHasCustomMutator, + kTagMutantOrigin, kTagMutant, }; @@ -185,8 +187,12 @@ bool MutationResult::WriteHasCustomMutator(bool has_custom_mutator, reinterpret_cast(&has_custom_mutator)}); } -bool MutationResult::WriteMutant(ByteSpan mutant, BlobSequence &blobseq) { - return blobseq.Write({kTagMutant, mutant.size(), mutant.data()}); +bool MutationResult::WriteMutant(MutantRef mutant, BlobSequence& blobseq) { + if (!blobseq.Write({kTagMutantOrigin, sizeof(mutant.origin), + reinterpret_cast(&mutant.origin)})) { + return false; + } + return blobseq.Write({kTagMutant, mutant.data.size(), mutant.data.data()}); } bool MutationResult::Read(size_t num_mutants, BlobSequence &blobseq) { @@ -199,10 +205,17 @@ bool MutationResult::Read(size_t num_mutants, BlobSequence &blobseq) { mutants_.clear(); mutants_.reserve(num_mutants); for (size_t i = 0; i < num_mutants; ++i) { + size_t origin = Mutant::kOriginNone; + { + const Blob blob = blobseq.Read(); + if (blob.tag != kTagMutantOrigin) return false; + if (blob.size != sizeof(origin)) return false; + std::memcpy(&origin, blob.data, sizeof(origin)); + } const Blob blob = blobseq.Read(); if (blob.tag != kTagMutant) return false; if (blob.size == 0) break; - mutants_.emplace_back(blob.data, blob.data + blob.size); + mutants_.push_back({ByteArray{blob.data, blob.data + blob.size}, origin}); } return true; } diff --git a/centipede/runner_result.h b/centipede/runner_result.h index 1b94f3187..8266103a6 100644 --- a/centipede/runner_result.h +++ b/centipede/runner_result.h @@ -25,6 +25,7 @@ #include "./centipede/execution_metadata.h" #include "./centipede/feature.h" +#include "./centipede/mutation_data.h" #include "./centipede/shared_memory_blob_sequence.h" #include "./common/defs.h" @@ -217,7 +218,7 @@ class MutationResult { BlobSequence& blobseq); // Writes one mutant to `blobseq`. Returns true iff successful. - static bool WriteMutant(ByteSpan mutant, BlobSequence& blobseq); + static bool WriteMutant(MutantRef mutant, BlobSequence& blobseq); // Reads whether the target has a custom mutator, and if so, reads at most // `num_mutants` mutants from `blobseq`. Returns true iff successful. @@ -227,13 +228,13 @@ class MutationResult { int exit_code() const { return exit_code_; } int& exit_code() { return exit_code_; } bool has_custom_mutator() const { return has_custom_mutator_; } - const std::vector& mutants() const& { return mutants_; } - std::vector&& mutants() && { return std::move(mutants_); } + const std::vector& mutants() const& { return mutants_; } + std::vector&& mutants() && { return std::move(mutants_); } private: int exit_code_ = EXIT_SUCCESS; bool has_custom_mutator_ = false; - std::vector mutants_; + std::vector mutants_; }; } // namespace fuzztest::internal diff --git a/centipede/runner_result_test.cc b/centipede/runner_result_test.cc index 864401a62..dd65ccd8a 100644 --- a/centipede/runner_result_test.cc +++ b/centipede/runner_result_test.cc @@ -29,6 +29,7 @@ #include "gtest/gtest.h" #include "./centipede/execution_metadata.h" #include "./centipede/feature.h" +#include "./centipede/mutation_data.h" #include "./centipede/shared_memory_blob_sequence.h" #include "./common/defs.h" #include "./common/test_util.h" @@ -213,18 +214,18 @@ TEST(MutationResult, WriteThenRead) { // Write a mutation result. ASSERT_TRUE(MutationResult::WriteHasCustomMutator(true, blobseq)); - ASSERT_TRUE(MutationResult::WriteMutant({1, 2, 3}, blobseq)); - ASSERT_TRUE(MutationResult::WriteMutant({4, 5, 6}, blobseq)); - ASSERT_TRUE(MutationResult::WriteMutant({7, 8, 9}, blobseq)); + ASSERT_TRUE(MutationResult::WriteMutant({{1, 2, 3}, 3}, blobseq)); + ASSERT_TRUE(MutationResult::WriteMutant({{4, 5, 6}, 2}, blobseq)); + ASSERT_TRUE(MutationResult::WriteMutant({{7, 8, 9}, 1}, blobseq)); blobseq.Reset(); MutationResult mutation_result; ASSERT_TRUE(mutation_result.Read(3, blobseq)); EXPECT_TRUE(mutation_result.has_custom_mutator()); - EXPECT_THAT( - mutation_result.mutants(), - ElementsAre(ByteArray{1, 2, 3}, ByteArray{4, 5, 6}, ByteArray{7, 8, 9})); + EXPECT_THAT(mutation_result.mutants(), + ElementsAre(Mutant{{1, 2, 3}, 3}, Mutant{{4, 5, 6}, 2}, + Mutant{{7, 8, 9}, 1})); } TEST(ExecutionResult, ReadResultSucceedsOnlyWithInputBegin) { diff --git a/centipede/test_coverage_util.cc b/centipede/test_coverage_util.cc index 2602a58c9..8b6dfa3bc 100644 --- a/centipede/test_coverage_util.cc +++ b/centipede/test_coverage_util.cc @@ -41,7 +41,8 @@ std::vector RunInputsAndCollectCorpusRecords( } BatchResult batch_result; // Run. - CBs.Execute(env.binary, byte_array_inputs, batch_result); + CBs.Execute(env.binary, {byte_array_inputs.begin(), byte_array_inputs.end()}, + batch_result); // Repackage execution results into a vector of CorpusRecords. std::vector corpus_records; diff --git a/centipede/test_coverage_util.h b/centipede/test_coverage_util.h index c06a9bf7a..84cef8d92 100644 --- a/centipede/test_coverage_util.h +++ b/centipede/test_coverage_util.h @@ -25,7 +25,7 @@ #include "./centipede/corpus.h" #include "./centipede/environment.h" #include "./centipede/feature.h" -#include "./centipede/mutation_input.h" +#include "./centipede/mutation_data.h" #include "./centipede/runner_result.h" #include "./common/defs.h" namespace fuzztest::internal { @@ -43,15 +43,15 @@ std::vector RunInputsAndCollectCoverage( class TestCallbacks : public CentipedeCallbacks { public: explicit TestCallbacks(const Environment &env) : CentipedeCallbacks(env) {} - bool Execute(std::string_view binary, const std::vector &inputs, - BatchResult &batch_result) override { + bool Execute(std::string_view binary, const std::vector& inputs, + BatchResult& batch_result) override { int result = ExecuteCentipedeSancovBinaryWithShmem(binary, inputs, batch_result); FUZZTEST_CHECK_EQ(EXIT_SUCCESS, result); return true; } - std::vector Mutate(const std::vector &inputs, - size_t num_mutants) override { + std::vector Mutate(const std::vector& inputs, + size_t num_mutants) override { return {}; } }; diff --git a/centipede/testing/BUILD b/centipede/testing/BUILD index ffffb600e..438e7e89d 100644 --- a/centipede/testing/BUILD +++ b/centipede/testing/BUILD @@ -83,7 +83,7 @@ cc_binary( "@abseil-cpp//absl/flags:flag", "@abseil-cpp//absl/flags:parse", "@com_google_fuzztest//centipede:centipede_runner_no_main", - "@com_google_fuzztest//centipede:mutation_input", + "@com_google_fuzztest//centipede:mutation_data", "@com_google_fuzztest//common:defs", ], ) diff --git a/centipede/testing/async_failing_target.cc b/centipede/testing/async_failing_target.cc index e66ac8bf4..3d81ebcd3 100644 --- a/centipede/testing/async_failing_target.cc +++ b/centipede/testing/async_failing_target.cc @@ -33,7 +33,7 @@ class AsyncFailingTargetRunnerCallbacks bool Mutate(const std::vector& inputs, size_t num_mutants, - std::function + std::function new_mutant_callback) override { if (to_fail_in_mutation) { fprintf(stderr, "Fail in mutation\n"); diff --git a/centipede/testing/fuzz_target_with_custom_mutator.cc b/centipede/testing/fuzz_target_with_custom_mutator.cc index a95dbf84d..5f47016ae 100644 --- a/centipede/testing/fuzz_target_with_custom_mutator.cc +++ b/centipede/testing/fuzz_target_with_custom_mutator.cc @@ -19,7 +19,7 @@ #include "absl/base/nullability.h" #include "absl/flags/flag.h" #include "absl/flags/parse.h" -#include "./centipede/mutation_input.h" +#include "./centipede/mutation_data.h" #include "./centipede/runner_interface.h" #include "./common/defs.h" @@ -38,12 +38,11 @@ class CustomMutatorRunnerCallbacks bool Mutate(const std::vector& inputs, size_t num_mutants, - std::function new_mutant_callback) override { - size_t i = 0; - for (fuzztest::internal::MutationInputRef input : inputs) { - if (i++ >= num_mutants) break; + std::function + new_mutant_callback) override { + for (size_t i = 0; i < inputs.size() && i < num_mutants; ++i) { // Just return the original input as a mutant. - new_mutant_callback(input.data); + new_mutant_callback({inputs[i].data, i}); } return true; } diff --git a/centipede/util.cc b/centipede/util.cc index 376d7b4c7..83da9e7f6 100644 --- a/centipede/util.cc +++ b/centipede/util.cc @@ -226,13 +226,11 @@ std::string ExtractHashFromArray(ByteArray &ba) { return res; } -ByteArray PackFeaturesAndHash(const ByteArray &data, - const FeatureVec &features) { +ByteArray PackFeaturesAndHash(ByteSpan data, const FeatureVec& features) { return PackFeaturesAndHashAsRawBytes(data, AsByteSpan(features)); } -ByteArray PackFeaturesAndHashAsRawBytes(const ByteArray &data, - ByteSpan features) { +ByteArray PackFeaturesAndHashAsRawBytes(ByteSpan data, ByteSpan features) { ByteArray feature_bytes_with_hash(features.size() + kHashLen); auto hash = Hash(data); FUZZTEST_CHECK_EQ(hash.size(), kHashLen); diff --git a/centipede/util.h b/centipede/util.h index a34feda03..ac5e96392 100644 --- a/centipede/util.h +++ b/centipede/util.h @@ -140,12 +140,10 @@ void AppendHashToArray(ByteArray &ba, std::string_view hash); std::string ExtractHashFromArray(ByteArray &ba); // Pack {features, Hash(data)} into a byte array. -ByteArray PackFeaturesAndHash(const ByteArray &data, - const FeatureVec &features); +ByteArray PackFeaturesAndHash(ByteSpan data, const FeatureVec& features); // Pack `features` and the hash of `data` directly from their raw data format. -ByteArray PackFeaturesAndHashAsRawBytes(const ByteArray &data, - ByteSpan features); +ByteArray PackFeaturesAndHashAsRawBytes(ByteSpan data, ByteSpan features); // Given a `blob` created by `PackFeaturesAndHash`, unpack the features into // `features` and return the hash. diff --git a/common/remote_file.cc b/common/remote_file.cc index b628ad610..32ed34bbb 100644 --- a/common/remote_file.cc +++ b/common/remote_file.cc @@ -17,6 +17,7 @@ #include "./common/remote_file.h" +#include #include #include @@ -28,10 +29,16 @@ namespace fuzztest::internal { +absl::Status RemoteFileAppend(RemoteFile* absl_nonnull f, + const ByteArray& contents) { + return RemoteFileAppend(f, ByteSpan{contents.data(), contents.size()}); +} + absl::Status RemoteFileAppend(RemoteFile *absl_nonnull f, const std::string &contents) { - ByteArray contents_ba{contents.cbegin(), contents.cend()}; - return RemoteFileAppend(f, contents_ba); + return RemoteFileAppend( + f, ByteSpan{reinterpret_cast(contents.data()), + contents.size()}); } absl::Status RemoteFileRead(RemoteFile *absl_nonnull f, std::string &contents) { @@ -43,17 +50,18 @@ absl::Status RemoteFileRead(RemoteFile *absl_nonnull f, std::string &contents) { absl::Status RemoteFileSetContents(std::string_view path, const ByteArray &contents) { - ASSIGN_OR_RETURN_IF_NOT_OK(RemoteFile * file, RemoteFileOpen(path, "w")); - if (file == nullptr) { - return absl::UnknownError( - "RemoteFileOpen returned an OK status but a nullptr RemoteFile*"); - } - RETURN_IF_NOT_OK(RemoteFileAppend(file, contents)); - return RemoteFileClose(file); + return RemoteFileSetContents(path, + ByteSpan{contents.data(), contents.size()}); } absl::Status RemoteFileSetContents(std::string_view path, const std::string &contents) { + return RemoteFileSetContents( + path, ByteSpan{reinterpret_cast(contents.data()), + contents.size()}); +} + +absl::Status RemoteFileSetContents(std::string_view path, ByteSpan contents) { ASSIGN_OR_RETURN_IF_NOT_OK(RemoteFile * file, RemoteFileOpen(path, "w")); if (file == nullptr) { return absl::UnknownError( diff --git a/common/remote_file.h b/common/remote_file.h index e59b48432..63fe07b94 100644 --- a/common/remote_file.h +++ b/common/remote_file.h @@ -74,13 +74,17 @@ absl::Status RemoteFileClose(RemoteFile *absl_nonnull f); absl::Status RemoteFileSetWriteBufferSize(RemoteFile *absl_nonnull f, size_t size); -// Appends bytes from 'ba' to 'f'. -absl::Status RemoteFileAppend(RemoteFile *absl_nonnull f, const ByteArray &ba); +// Appends bytes from 'contents' to 'f'. +absl::Status RemoteFileAppend(RemoteFile* absl_nonnull f, + const ByteArray& contents); // Appends characters from 'contents' to 'f'. absl::Status RemoteFileAppend(RemoteFile *absl_nonnull f, const std::string &contents); +// Appends bytes from 'contents' to 'f'. +absl::Status RemoteFileAppend(RemoteFile* absl_nonnull f, ByteSpan contents); + // Flushes the file's internal buffer. Some dynamic results of a running // pipeline are consumed by itself (e.g. shard cross-pollination) and can be // consumed by external processes (e.g. monitoring): for such files, call this @@ -105,6 +109,9 @@ absl::Status RemoteFileSetContents(std::string_view path, absl::Status RemoteFileSetContents(std::string_view path, const std::string &contents); +// Sets the contents of the file at 'path' to 'contents'. +absl::Status RemoteFileSetContents(std::string_view path, ByteSpan contents); + // Reads the contents of the file at 'path' into 'contents'. absl::Status RemoteFileGetContents(std::string_view path, ByteArray &contents); diff --git a/common/remote_file_oss.cc b/common/remote_file_oss.cc index fb6f23ad8..10299f6b6 100644 --- a/common/remote_file_oss.cc +++ b/common/remote_file_oss.cc @@ -90,11 +90,11 @@ class LocalRemoteFile : public RemoteFile { return absl::OkStatus(); } - absl::Status Write(const ByteArray &ba) { - static constexpr auto elt_size = sizeof(ba[0]); - const auto elts_to_write = ba.size(); + absl::Status Write(ByteSpan contents) { + static constexpr auto elt_size = sizeof(contents[0]); + const auto elts_to_write = contents.size(); const auto elts_written = - std::fwrite(ba.data(), elt_size, elts_to_write, file_); + std::fwrite(contents.data(), elt_size, elts_to_write, file_); if (elts_written != elts_to_write) { return absl::UnknownError(absl::StrCat( "fwrite() wrote less elements that expected, wrote: ", elts_written, @@ -327,8 +327,8 @@ absl::Status RemoteFileSetWriteBufferSize(RemoteFile *absl_nonnull f, return static_cast(f)->SetWriteBufSize(size); } -absl::Status RemoteFileAppend(RemoteFile *absl_nonnull f, const ByteArray &ba) { - return static_cast(f)->Write(ba); +absl::Status RemoteFileAppend(RemoteFile* absl_nonnull f, ByteSpan contents) { + return static_cast(f)->Write(contents); } absl::Status RemoteFileFlush(RemoteFile *absl_nonnull f) { diff --git a/fuzztest/internal/BUILD b/fuzztest/internal/BUILD index abb0a3fa0..da4e80609 100644 --- a/fuzztest/internal/BUILD +++ b/fuzztest/internal/BUILD @@ -78,7 +78,7 @@ cc_library( "@com_google_fuzztest//centipede:environment", "@com_google_fuzztest//centipede:execution_metadata", "@com_google_fuzztest//centipede:fuzztest_mutator", - "@com_google_fuzztest//centipede:mutation_input", + "@com_google_fuzztest//centipede:mutation_data", "@com_google_fuzztest//centipede:runner_result", "@com_google_fuzztest//centipede:stop", "@com_google_fuzztest//centipede:workdir", diff --git a/fuzztest/internal/centipede_adaptor.cc b/fuzztest/internal/centipede_adaptor.cc index 9bb0256b2..92a0eaccf 100644 --- a/fuzztest/internal/centipede_adaptor.cc +++ b/fuzztest/internal/centipede_adaptor.cc @@ -70,7 +70,7 @@ #include "./centipede/environment.h" #include "./centipede/execution_metadata.h" #include "./centipede/fuzztest_mutator.h" -#include "./centipede/mutation_input.h" +#include "./centipede/mutation_data.h" #include "./centipede/runner_interface.h" #include "./centipede/runner_result.h" #include "./centipede/stop.h" @@ -516,15 +516,14 @@ class CentipedeAdaptorRunnerCallbacks bool HasCustomMutator() const override { return true; } - bool Mutate(const std::vector& inputs, - size_t num_mutants, - std::function - new_mutant_callback) override { + bool Mutate(const std::vector& inputs, size_t num_mutants, + std::function new_mutant_callback) override { if (inputs.empty()) return false; cmp_tables.resize(inputs.size()); absl::Cleanup cmp_tables_cleaner = [this]() { cmp_tables.clear(); }; for (size_t i = 0; i < num_mutants; ++i) { const auto choice = absl::Uniform(prng_, 0, 1); + size_t origin_index = Mutant::kOriginNone; std::string mutant_data; constexpr double kDomainInitRatio = 0.0001; if (choice < kDomainInitRatio) { @@ -532,8 +531,7 @@ class CentipedeAdaptorRunnerCallbacks SerializeIRObject(fuzzer_impl_.params_domain_.SerializeCorpus( fuzzer_impl_.params_domain_.Init(prng_))); } else { - const auto origin_index = - absl::Uniform(prng_, 0, inputs.size()); + origin_index = absl::Uniform(prng_, 0, inputs.size()); const auto& origin = inputs[origin_index].data; auto parsed_origin = fuzzer_impl_.TryParse({(const char*)origin.data(), origin.size()}); @@ -556,7 +554,8 @@ class CentipedeAdaptorRunnerCallbacks fuzzer_impl_.params_domain_.SerializeCorpus(mutant.args)); } new_mutant_callback( - {(unsigned char*)mutant_data.data(), mutant_data.size()}); + {{(unsigned char*)mutant_data.data(), mutant_data.size()}, + origin_index}); } return true; } @@ -1018,7 +1017,7 @@ class CentipedeCallbacksForRunnerFlagsExtraction using fuzztest::internal::CentipedeCallbacks::CentipedeCallbacks; bool Execute(std::string_view binary, - const std::vector& inputs, + const std::vector& inputs, fuzztest::internal::BatchResult& batch_result) override { return false; } diff --git a/fuzztest/internal/table_of_recent_compares.h b/fuzztest/internal/table_of_recent_compares.h index ed59b71ef..742bf55e5 100644 --- a/fuzztest/internal/table_of_recent_compares.h +++ b/fuzztest/internal/table_of_recent_compares.h @@ -1,4 +1,3 @@ - // Copyright 2022 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License");