Skip to content

Commit

Permalink
Write crashes and crash metadata to sharded directories.
Browse files Browse the repository at this point in the history
Previously, when running with multiple threads, the threads would sometimes
concurrently find the same crashing input, and would then concurrently try
to write it to the same file.

PiperOrigin-RevId: 690846033
  • Loading branch information
fniksic authored and copybara-github committed Oct 29, 2024
1 parent a54132f commit d152d45
Show file tree
Hide file tree
Showing 15 changed files with 159 additions and 151 deletions.
25 changes: 12 additions & 13 deletions centipede/centipede.cc
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ void Centipede::CorpusFromFiles(const Environment &env, std::string_view dir) {
size_t inputs_ignored = 0;
const auto corpus_files = WorkDir{env}.CorpusFiles();
for (size_t shard = 0; shard < env.total_shards; shard++) {
const std::string corpus_path = corpus_files.ShardPath(shard);
const std::string corpus_path = corpus_files.Shard(shard);
size_t num_shard_bytes = 0;
// Read the shard (if it exists), collect input hashes from it.
absl::flat_hash_set<std::string> existing_hashes;
Expand Down Expand Up @@ -455,8 +455,8 @@ void Centipede::LoadShard(const Environment &load_env, size_t shard_index,
// See serialize_shard_loads on why we may want to serialize shard loads.
// TODO(kcc): remove serialize_shard_loads when LoadShards() uses less RAM.
const WorkDir wd{load_env};
const std::string corpus_path = wd.CorpusFiles().ShardPath(shard_index);
const std::string features_path = wd.FeaturesFiles().ShardPath(shard_index);
const std::string corpus_path = wd.CorpusFiles().Shard(shard_index);
const std::string features_path = wd.FeaturesFiles().Shard(shard_index);
if (env_.serialize_shard_loads) {
ABSL_CONST_INIT static absl::Mutex load_shard_mu{absl::kConstInit};
absl::MutexLock lock(&load_shard_mu);
Expand Down Expand Up @@ -490,7 +490,7 @@ void Centipede::LoadAllShardsInRandomOrder(const Environment &load_env,

void Centipede::Rerun(std::vector<ByteArray> &to_rerun) {
if (to_rerun.empty()) return;
auto features_file_path = wd_.FeaturesFiles().ShardPath(env_.my_shard_index);
auto features_file_path = wd_.FeaturesFiles().Shard(env_.my_shard_index);
auto features_file = DefaultBlobFileWriterFactory(env_.riegeli);
CHECK_OK(features_file->Open(features_file_path, "a"));

Expand Down Expand Up @@ -635,8 +635,7 @@ void Centipede::MergeFromOtherCorpus(std::string_view merge_from_dir,
CHECK_GE(new_corpus_size, initial_corpus_size); // Corpus can't shrink here.
if (new_corpus_size > initial_corpus_size) {
auto appender = DefaultBlobFileWriterFactory(env_.riegeli);
CHECK_OK(
appender->Open(wd_.CorpusFiles().ShardPath(env_.my_shard_index), "a"));
CHECK_OK(appender->Open(wd_.CorpusFiles().Shard(env_.my_shard_index), "a"));
for (size_t idx = initial_corpus_size; idx < new_corpus_size; ++idx) {
CHECK_OK(appender->Write(corpus_.Get(idx)));
}
Expand All @@ -655,7 +654,7 @@ void Centipede::ReloadAllShardsAndWriteDistilledCorpus() {

// Save the distilled corpus to a file in workdir and possibly to a hashed
// file in the first corpus dir passed in `--corpus_dir`.
const auto distill_to_path = wd_.DistilledCorpusFiles().MyShardPath();
const auto distill_to_path = wd_.DistilledCorpusFiles().MyShard();
LOG(INFO) << "Distilling: shard: " << env_.my_shard_index
<< " output: " << distill_to_path << " "
<< " distilled size: " << corpus_.NumActive();
Expand Down Expand Up @@ -718,10 +717,10 @@ void Centipede::FuzzingLoop() {

if (env_.load_shards_only) return;

auto corpus_path = wd_.CorpusFiles().ShardPath(env_.my_shard_index);
auto corpus_path = wd_.CorpusFiles().Shard(env_.my_shard_index);
auto corpus_file = DefaultBlobFileWriterFactory(env_.riegeli);
CHECK_OK(corpus_file->Open(corpus_path, "a"));
auto features_path = wd_.FeaturesFiles().ShardPath(env_.my_shard_index);
auto features_path = wd_.FeaturesFiles().Shard(env_.my_shard_index);
auto features_file = DefaultBlobFileWriterFactory(env_.riegeli);
CHECK_OK(features_file->Open(features_path, "a"));

Expand Down Expand Up @@ -872,10 +871,10 @@ void Centipede::ReportCrash(std::string_view binary,
BatchResult one_input_batch_result;
if (!user_callbacks_.Execute(binary, {one_input}, one_input_batch_result)) {
auto hash = Hash(one_input);
auto crash_dir = wd_.CrashReproducerDirPath();
auto crash_dir = wd_.CrashReproducerDirs().MyShard();
CHECK_OK(RemoteMkdir(crash_dir));
std::string input_file_path = std::filesystem::path(crash_dir) / hash;
auto crash_metadata_dir = wd_.CrashMetadataDirPath();
auto crash_metadata_dir = wd_.CrashMetadataDirs().MyShard();
CHECK_OK(RemoteMkdir(crash_metadata_dir));
std::string crash_metadata_file_path =
std::filesystem::path(crash_metadata_dir) / hash;
Expand Down Expand Up @@ -909,7 +908,7 @@ void Centipede::ReportCrash(std::string_view binary,
// shard-like corpus file instead.
const auto &suspect_input = input_vec[suspect_input_idx];
auto suspect_hash = Hash(suspect_input);
auto crash_dir = wd_.CrashReproducerDirPath();
auto crash_dir = wd_.CrashReproducerDirs().MyShard();
CHECK_OK(RemoteMkdir(crash_dir));
std::string crashing_batch_name =
absl::StrCat("crashing_batch-", suspect_hash);
Expand All @@ -923,7 +922,7 @@ void Centipede::ReportCrash(std::string_view binary,
absl::StrFormat("input-%010d-%s", i, hash));
CHECK_OK(RemoteFileSetContents(file_path, one_input));
}
auto crash_metadata_dir = wd_.CrashMetadataDirPath();
auto crash_metadata_dir = wd_.CrashMetadataDirs().MyShard();
CHECK_OK(RemoteMkdir(crash_metadata_dir));
std::string crash_metadata_file_path =
std::filesystem::path(crash_metadata_dir) / crashing_batch_name;
Expand Down
52 changes: 28 additions & 24 deletions centipede/centipede_interface.cc
Original file line number Diff line number Diff line change
Expand Up @@ -415,29 +415,33 @@ absl::flat_hash_set<std::string> PruneOldCrashesAndGetRemainingCrashMetadata(

void DeduplicateAndStoreNewCrashes(
const std::filesystem::path &crashing_dir, const WorkDir &workdir,
absl::flat_hash_set<std::string> crash_metadata) {
const std::vector<std::string> new_crashing_input_files =
// The crash reproducer directory may contain subdirectories with
// input files that don't individually cause a crash. We ignore those
// for now and don't list the files recursively.
ValueOrDie(RemoteListFiles(workdir.CrashReproducerDirPath(),
/*recursively=*/false));
const std::filesystem::path crash_metadata_dir =
workdir.CrashMetadataDirPath();

CHECK_OK(RemoteMkdir(crashing_dir.c_str()));
for (const std::string &crashing_input_file : new_crashing_input_files) {
const std::string crashing_input_file_name =
std::filesystem::path(crashing_input_file).filename();
const std::string crash_metadata_file =
crash_metadata_dir / crashing_input_file_name;
std::string new_crash_metadata;
CHECK_OK(RemoteFileGetContents(crash_metadata_file, new_crash_metadata));
const bool is_duplicate = !crash_metadata.insert(new_crash_metadata).second;
if (is_duplicate) continue;
CHECK_OK(
RemotePathRename(crashing_input_file,
(crashing_dir / crashing_input_file_name).c_str()));
size_t total_shards, absl::flat_hash_set<std::string> crash_metadata) {
for (size_t shard_idx = 0; shard_idx < total_shards; ++shard_idx) {
const std::vector<std::string> new_crashing_input_files =
// The crash reproducer directory may contain subdirectories with
// input files that don't individually cause a crash. We ignore those
// for now and don't list the files recursively.
ValueOrDie(
RemoteListFiles(workdir.CrashReproducerDirs().Shard(shard_idx),
/*recursively=*/false));
const std::filesystem::path crash_metadata_dir =
workdir.CrashMetadataDirs().Shard(shard_idx);

CHECK_OK(RemoteMkdir(crashing_dir.c_str()));
for (const std::string &crashing_input_file : new_crashing_input_files) {
const std::string crashing_input_file_name =
std::filesystem::path(crashing_input_file).filename();
const std::string crash_metadata_file =
crash_metadata_dir / crashing_input_file_name;
std::string new_crash_metadata;
CHECK_OK(RemoteFileGetContents(crash_metadata_file, new_crash_metadata));
const bool is_duplicate =
!crash_metadata.insert(new_crash_metadata).second;
if (is_duplicate) continue;
CHECK_OK(
RemotePathRename(crashing_input_file,
(crashing_dir / crashing_input_file_name).c_str()));
}
}
}

Expand Down Expand Up @@ -607,7 +611,7 @@ int UpdateCorpusDatabaseForFuzzTests(
absl::flat_hash_set<std::string> crash_metadata =
PruneOldCrashesAndGetRemainingCrashMetadata(crashing_dir, env,
callbacks_factory);
DeduplicateAndStoreNewCrashes(crashing_dir, workdir,
DeduplicateAndStoreNewCrashes(crashing_dir, workdir, env.total_shards,
std::move(crash_metadata));
}
CHECK_OK(RemotePathDelete(base_workdir_path.c_str(), /*recursively=*/true));
Expand Down
11 changes: 5 additions & 6 deletions centipede/centipede_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -730,7 +730,7 @@ TEST(Centipede, ExtraBinaries) {

// Verify that we see the expected crashes.
// The "crashes" dir must contain 3 crashy inputs, one for each binary.
auto crashes_dir_path = WorkDir{env}.CrashReproducerDirPath();
auto crashes_dir_path = WorkDir{env}.CrashReproducerDirs().MyShard();
ASSERT_TRUE(std::filesystem::exists(crashes_dir_path))
<< VV(crashes_dir_path);
EXPECT_THAT(crashes_dir_path,
Expand All @@ -742,7 +742,7 @@ TEST(Centipede, ExtraBinaries) {
// Verify that we see the expected crash metadata.
// The "crash-metadata" dir must contain 3 crash metadata files, one for each
// crashy input.
auto crash_metadata_dir_path = WorkDir{env}.CrashMetadataDirPath();
auto crash_metadata_dir_path = WorkDir{env}.CrashMetadataDirs().MyShard();
ASSERT_TRUE(std::filesystem::exists(crash_metadata_dir_path))
<< VV(crash_metadata_dir_path);
EXPECT_THAT(crash_metadata_dir_path,
Expand Down Expand Up @@ -855,10 +855,9 @@ TEST(Centipede, UndetectedCrashingInput) {
// the batch that were executing during the session. We simply verify the
// number of saved inputs matches the number of executed inputs.
const auto crashing_input_hash = Hash(mock.crashing_input());
const auto crashes_dir_path = std::filesystem::path(temp_dir.path())
.append("crashes")
.append("crashing_batch-")
.concat(crashing_input_hash);
const auto crashes_dir_path =
std::filesystem::path{WorkDir{env}.CrashReproducerDirs().MyShard()} /
absl::StrCat("crashing_batch-", crashing_input_hash);
EXPECT_TRUE(std::filesystem::exists(crashes_dir_path)) << crashes_dir_path;
std::vector<std::string> found_crash_file_names;
for (auto const &dir_ent :
Expand Down
7 changes: 3 additions & 4 deletions centipede/corpus_io_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -113,11 +113,10 @@ TEST(ExportCorpusTest, ExportsCorpusToIndividualFiles) {
const WorkDir workdir{temp_dir.c_str(), "fake_binary_name",
"fake_binary_hash", /*my_shard_index=*/0};
const auto corpus_files = workdir.CorpusFiles();
WriteBlobsToFile(corpus_files.ShardPath(0), {ByteArray{1, 2}, ByteArray{3}});
WriteBlobsToFile(corpus_files.ShardPath(1), {ByteArray{4}, ByteArray{5, 6}});
WriteBlobsToFile(corpus_files.Shard(0), {ByteArray{1, 2}, ByteArray{3}});
WriteBlobsToFile(corpus_files.Shard(1), {ByteArray{4}, ByteArray{5, 6}});

ExportCorpus({corpus_files.ShardPath(0), corpus_files.ShardPath(1)},
out_dir.c_str());
ExportCorpus({corpus_files.Shard(0), corpus_files.Shard(1)}, out_dir.c_str());

EXPECT_THAT(ReadInputsFromFiles(out_dir.c_str()),
UnorderedElementsAre(ByteArray{1, 2}, ByteArray{3}, ByteArray{4},
Expand Down
12 changes: 6 additions & 6 deletions centipede/distill.cc
Original file line number Diff line number Diff line change
Expand Up @@ -115,8 +115,8 @@ class InputCorpusShardReader {
: workdir_{env}, log_prefix_{LogPrefix(env)} {}

perf::MemSize EstimateRamFootprint(size_t shard_idx) const {
const auto corpus_path = workdir_.CorpusFiles().ShardPath(shard_idx);
const auto features_path = workdir_.FeaturesFiles().ShardPath(shard_idx);
const auto corpus_path = workdir_.CorpusFiles().Shard(shard_idx);
const auto features_path = workdir_.FeaturesFiles().Shard(shard_idx);
const perf::MemSize corpus_file_size =
ValueOrDie(RemoteFileGetSize(corpus_path));
const perf::MemSize features_file_size =
Expand All @@ -134,8 +134,8 @@ class InputCorpusShardReader {

// Reads and returns a single shard's elements. Thread-safe.
CorpusEltVec ReadShard(size_t shard_idx) {
const auto corpus_path = workdir_.CorpusFiles().ShardPath(shard_idx);
const auto features_path = workdir_.FeaturesFiles().ShardPath(shard_idx);
const auto corpus_path = workdir_.CorpusFiles().Shard(shard_idx);
const auto features_path = workdir_.FeaturesFiles().Shard(shard_idx);
VLOG(1) << log_prefix_ << "reading input shard " << shard_idx << ":\n"
<< VV(corpus_path) << "\n"
<< VV(features_path);
Expand Down Expand Up @@ -167,8 +167,8 @@ class CorpusShardWriter {
CorpusShardWriter(const Environment &env, bool append)
: workdir_{env},
log_prefix_{LogPrefix(env)},
corpus_path_{workdir_.DistilledCorpusFiles().MyShardPath()},
features_path_{workdir_.DistilledFeaturesFiles().MyShardPath()},
corpus_path_{workdir_.DistilledCorpusFiles().MyShard()},
features_path_{workdir_.DistilledFeaturesFiles().MyShard()},
corpus_writer_{DefaultBlobFileWriterFactory()},
feature_writer_{DefaultBlobFileWriterFactory()} {
CHECK_OK(corpus_writer_->Open(corpus_path_, append ? "a" : "w"));
Expand Down
9 changes: 4 additions & 5 deletions centipede/distill_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,8 @@ using InputVec = std::vector<ByteArray>;
void WriteToShard(const Environment &env, const TestCorpusRecord &record,
size_t shard_index) {
const WorkDir wd{env};
const auto corpus_path = wd.CorpusFiles().ShardPath(shard_index);
const auto features_path = wd.FeaturesFiles().ShardPath(shard_index);
const auto corpus_path = wd.CorpusFiles().Shard(shard_index);
const auto features_path = wd.FeaturesFiles().Shard(shard_index);
const auto corpus_appender = DefaultBlobFileWriterFactory(env.riegeli);
const auto features_appender = DefaultBlobFileWriterFactory(env.riegeli);
CHECK_OK(corpus_appender->Open(corpus_path, "a"));
Expand All @@ -80,9 +80,8 @@ void WriteToShard(const Environment &env, const TestCorpusRecord &record,
// Reads and returns the distilled corpus record from
// `wd.DistilledCorpusPath()` and `wd.DistilledFeaturesPath()`.
std::vector<TestCorpusRecord> ReadFromDistilled(const WorkDir &wd) {
const auto distilled_corpus_path = wd.DistilledCorpusFiles().MyShardPath();
const auto distilled_features_path =
wd.DistilledFeaturesFiles().MyShardPath();
const auto distilled_corpus_path = wd.DistilledCorpusFiles().MyShard();
const auto distilled_features_path = wd.DistilledFeaturesFiles().MyShard();

std::vector<TestCorpusRecord> result;
auto shard_reader_callback = [&result](ByteArray input, FeatureVec features) {
Expand Down
2 changes: 1 addition & 1 deletion centipede/minimize_crash.cc
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ int MinimizeCrash(ByteSpan crashy_input, const Environment &env,
LOG(INFO) << "Starting the crash minimization loop in " << env.num_threads
<< "threads";

MinimizerWorkQueue queue(WorkDir{env}.CrashReproducerDirPath(),
MinimizerWorkQueue queue(WorkDir{env}.CrashReproducerDirs().MyShard(),
original_crashy_input);

{
Expand Down
4 changes: 2 additions & 2 deletions centipede/minimize_crash_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -98,8 +98,8 @@ TEST(MinimizeTest, MinimizeTest) {
EXPECT_EQ(MinimizeCrash(original_crasher, env, factory), EXIT_SUCCESS);
// Collect the new crashers from the crasher dir.
std::vector<ByteArray> crashers;
for (auto const &dir_entry :
std::filesystem::directory_iterator{wd.CrashReproducerDirPath()}) {
for (auto const &dir_entry : std::filesystem::directory_iterator{
wd.CrashReproducerDirs().MyShard()}) {
ByteArray crasher;
const std::string &path = dir_entry.path();
ReadFromLocalFile(path, crasher);
Expand Down
22 changes: 11 additions & 11 deletions centipede/seed_corpus_maker_lib.cc
Original file line number Diff line number Diff line change
Expand Up @@ -160,10 +160,10 @@ absl::Status SampleSeedCorpusElementsFromSource( //
const auto work_dir = WorkDir::FromCorpusShardPath( //
corpus_fname, coverage_binary_name, coverage_binary_hash);
const std::string features_fname =
work_dir.CorpusFiles().IsShardPath(corpus_fname)
? work_dir.FeaturesFiles().MyShardPath()
: work_dir.DistilledCorpusFiles().IsShardPath(corpus_fname)
? work_dir.DistilledFeaturesFiles().MyShardPath()
work_dir.CorpusFiles().IsShard(corpus_fname)
? work_dir.FeaturesFiles().MyShard()
: work_dir.DistilledCorpusFiles().IsShard(corpus_fname)
? work_dir.DistilledFeaturesFiles().MyShard()
: "";

VLOG(2) << "Reading elements from source shard " << shard
Expand Down Expand Up @@ -356,21 +356,21 @@ absl::Status WriteSeedCorpusElementsToDestination( //
const auto work_dir = WorkDir::FromCorpusShardPath( //
corpus_fname, coverage_binary_name, coverage_binary_hash);

if (corpus_fname != work_dir.CorpusFiles().MyShardPath() &&
corpus_fname != work_dir.DistilledCorpusFiles().MyShardPath()) {
if (corpus_fname != work_dir.CorpusFiles().MyShard() &&
corpus_fname != work_dir.DistilledCorpusFiles().MyShard()) {
return absl::InvalidArgumentError(absl::StrCat(
"Bad config: generated destination corpus filename '",
corpus_fname, "' doesn't match one of two expected forms '",
work_dir.CorpusFiles().MyShardPath(), "' or '",
work_dir.DistilledCorpusFiles().MyShardPath(),
work_dir.CorpusFiles().MyShard(), "' or '",
work_dir.DistilledCorpusFiles().MyShard(),
"'; make sure binary name in config matches explicitly passed '",
coverage_binary_name, "'"));
}

const std::string features_fname =
work_dir.CorpusFiles().IsShardPath(corpus_fname)
? work_dir.FeaturesFiles().MyShardPath()
: work_dir.DistilledFeaturesFiles().MyShardPath();
work_dir.CorpusFiles().IsShard(corpus_fname)
? work_dir.FeaturesFiles().MyShard()
: work_dir.DistilledFeaturesFiles().MyShard();
CHECK(!features_fname.empty());

VLOG(2) << "Writing " << std::distance(elt_range_begin, elt_range_end)
Expand Down
20 changes: 10 additions & 10 deletions centipede/seed_corpus_maker_lib_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -53,21 +53,21 @@ void VerifyShardsExist( //
std::string{binary_hash},
/*my_shard_index=*/0,
};
const WorkDir::ShardedFileInfo corpus_files =
const WorkDir::ShardedPath corpus_files =
shard_type == kNormal ? wd.CorpusFiles() : wd.DistilledCorpusFiles();
const WorkDir::ShardedFileInfo features_files =
const WorkDir::ShardedPath features_files =
shard_type == kNormal ? wd.FeaturesFiles() : wd.DistilledFeaturesFiles();
for (int shard = 0; shard < num_shards + 2; ++shard) {
if (shard < num_shards) {
ASSERT_TRUE(fs::exists(corpus_files.ShardPath(shard)))
<< VV(shard) << VV(corpus_files.ShardPath(shard));
ASSERT_TRUE(fs::exists(features_files.ShardPath(shard)))
<< VV(shard) << VV(features_files.ShardPath(shard));
ASSERT_TRUE(fs::exists(corpus_files.Shard(shard)))
<< VV(shard) << VV(corpus_files.Shard(shard));
ASSERT_TRUE(fs::exists(features_files.Shard(shard)))
<< VV(shard) << VV(features_files.Shard(shard));
} else {
ASSERT_FALSE(fs::exists(corpus_files.ShardPath(shard)))
<< VV(shard) << VV(corpus_files.ShardPath(shard));
ASSERT_FALSE(fs::exists(features_files.ShardPath(shard)))
<< VV(shard) << VV(features_files.ShardPath(shard));
ASSERT_FALSE(fs::exists(corpus_files.Shard(shard)))
<< VV(shard) << VV(corpus_files.Shard(shard));
ASSERT_FALSE(fs::exists(features_files.Shard(shard)))
<< VV(shard) << VV(features_files.Shard(shard));
}
}
}
Expand Down
Loading

0 comments on commit d152d45

Please sign in to comment.