Skip to content
Closed
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions common/experiment_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,12 @@ def get_oss_fuzz_corpora_filestore_path():
return posixpath.join(get_experiment_filestore_path(), 'oss_fuzz_corpora')


def get_random_seed_corpora_filestore_path():
"""Returns path containing the user-provided seed corpora."""
return posixpath.join(get_experiment_filestore_path(),
'random_seed_corpora')


def get_dispatcher_instance_name(experiment: str) -> str:
"""Returns a dispatcher instance name for an experiment."""
return 'd-%s' % experiment
Expand Down
1 change: 1 addition & 0 deletions experiment/resources/runner-startup-script-template.sh
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ docker run \
-e NO_SEEDS={{no_seeds}} \
-e NO_DICTIONARIES={{no_dictionaries}} \
-e OSS_FUZZ_CORPUS={{oss_fuzz_corpus}} \
-e RANDOM_SEED_CORPUS={{random_seed_corpus}} \
-e DOCKER_REGISTRY={{docker_registry}} {% if not local_experiment %}-e CLOUD_PROJECT={{cloud_project}} -e CLOUD_COMPUTE_ZONE={{cloud_compute_zone}} {% endif %}\
-e EXPERIMENT_FILESTORE={{experiment_filestore}} {% if local_experiment %}-v {{experiment_filestore}}:{{experiment_filestore}} {% endif %}\
-e REPORT_FILESTORE={{report_filestore}} {% if local_experiment %}-v {{report_filestore}}:{{report_filestore}} {% endif %}\
Expand Down
86 changes: 84 additions & 2 deletions experiment/run_experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import sys
import tarfile
import tempfile
import zipfile
from typing import Dict, List

import jinja2
Expand Down Expand Up @@ -63,6 +64,10 @@
'gs://{project}-backup.clusterfuzz-external.appspot.com/corpus/'
'libFuzzer/{fuzz_target}/public.zip')

# max size allowed per seed corpus for AFL
CORPUS_ELEMENT_BYTES_LIMIT = 1 * 1024 * 1024
RANDOM_CORPORA_ZIP_DIR_NAME = "random_seed_corpora_zip"


def read_and_validate_experiment_config(config_filename: str) -> Dict:
"""Reads |config_filename|, validates it, finds as many errors as possible,
Expand Down Expand Up @@ -148,6 +153,54 @@ def get_directories(parent_dir):
]


# pylint: disable=too-many-locals
def validate_and_pack_random_seed_corpus(random_seed_corpus, benchmarks):
"""Validate and archive seed corpus provided by user and."""
if not os.path.isdir(random_seed_corpus):
raise ValidationError('Corpus location "%s" is invalid.' %
random_seed_corpus)

with tempfile.TemporaryDirectory() as zip_dir:
for benchmark in benchmarks:
benchmark_corpus_dir = os.path.join(random_seed_corpus, benchmark)
if not os.path.exists(benchmark_corpus_dir):
raise ValidationError('Random seed corpus directory for '
'benchmark "%s" does not exist.' %
benchmark)
if not os.path.isdir(benchmark_corpus_dir):
raise ValidationError('Seed corpus of benchmark "%s" must be '
'a directory.' % benchmark)
if not os.listdir(benchmark_corpus_dir):
raise ValidationError(
'Seed corpus of benchmark "%s" is empty.' % benchmark)

valid_corpus_files = set()
for root, _, files in os.walk(benchmark_corpus_dir):
for filename in files:
file_path = os.path.join(root, filename)
file_size = os.path.getsize(file_path)

if file_size == 0 or file_size > CORPUS_ELEMENT_BYTES_LIMIT:
continue
valid_corpus_files.add(file_path)

if not valid_corpus_files:
raise ValidationError('No valid corpus files for "%s"' %
benchmark)

benchmark_corpus_archive_path = os.path.join(zip_dir, f'{benchmark}.zip')
with zipfile.ZipFile(benchmark_corpus_archive_path, 'w') as archive:
for filename in valid_corpus_files:
dir_name = os.path.dirname(filename)
archive.write(
filename,
os.path.relpath(filename, os.path.join(dir_name, '..')))

random_seed_corpora_zip_dir = os.path.join(random_seed_corpus,
RANDOM_CORPORA_ZIP_DIR_NAME)
filesystem.replace_dir(zip_dir, random_seed_corpora_zip_dir)


def validate_benchmarks(benchmarks: List[str]):
"""Parses and validates list of benchmarks."""
benchmark_types = set()
Expand Down Expand Up @@ -220,7 +273,8 @@ def start_experiment( # pylint: disable=too-many-arguments
concurrent_builds=None,
measurers_cpus=None,
runners_cpus=None,
use_branch_coverage=False):
use_branch_coverage=False,
random_seed_corpus=None):
"""Start a fuzzer benchmarking experiment."""
if not allow_uncommitted_changes:
check_no_uncommitted_changes()
Expand Down Expand Up @@ -250,6 +304,12 @@ def start_experiment( # pylint: disable=too-many-arguments
# 12GB is just the amount that KLEE needs, use this default to make KLEE
# experiments easier to run.
config['runner_memory'] = config.get('runner_memory', '12GB')

config['random_seed_corpus'] = random_seed_corpus
if config['random_seed_corpus']:
validate_and_pack_random_seed_corpus(config['random_seed_corpus'],
benchmarks)

return start_experiment_from_full_config(config)


Expand Down Expand Up @@ -332,6 +392,15 @@ def filter_file(tar_info):
for benchmark in config['benchmarks']:
add_oss_fuzz_corpus(benchmark, oss_fuzz_corpora_dir)

if config['random_seed_corpus']:
random_seed_corpus_zip = os.path.join(config['random_seed_corpus'],
RANDOM_CORPORA_ZIP_DIR_NAME)
filestore_utils.cp(
random_seed_corpus_zip,
experiment_utils.get_random_seed_corpora_filestore_path(),
recursive=True,
parallel=True)


class BaseDispatcher:
"""Class representing the dispatcher."""
Expand Down Expand Up @@ -524,6 +593,10 @@ def main():
'--runners-cpus',
help='Cpus available to the runners.',
required=False)
parser.add_argument('-rs',
'--random-seed-corpus',
help='Path to the random seed corpus',
Comment thread
jiradeto marked this conversation as resolved.
required=False)

all_fuzzers = fuzzer_utils.get_fuzzer_names()
parser.add_argument('-f',
Expand Down Expand Up @@ -593,6 +666,14 @@ def main():
parser.error('The sum of runners and measurers cpus is greater than the'
' available cpu cores (%d)' % os.cpu_count())

if args.random_seed_corpus:
if args.no_seeds:
parser.error(
'You cannot start an experiment with no_seeds option if'
Comment thread
jiradeto marked this conversation as resolved.
Outdated
' seeds location is provided you')
if args.oss_fuzz_corpus:
parser.error('Cannot use seeds from multiple sources')
Comment thread
jiradeto marked this conversation as resolved.
Outdated

start_experiment(args.experiment_name,
args.experiment_config,
args.benchmarks,
Expand All @@ -605,7 +686,8 @@ def main():
concurrent_builds=concurrent_builds,
measurers_cpus=measurers_cpus,
runners_cpus=runners_cpus,
use_branch_coverage=args.use_branch_coverage)
use_branch_coverage=args.use_branch_coverage,
random_seed_corpus=args.random_seed_corpus)
return 0


Expand Down
20 changes: 19 additions & 1 deletion experiment/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import threading
import time
import zipfile
import random

from common import benchmark_config
from common import environment
Expand Down Expand Up @@ -115,6 +116,20 @@ def get_clusterfuzz_seed_corpus_path(fuzz_target_path):
return seed_corpus_path if os.path.exists(seed_corpus_path) else None


def _unpack_random_seed_corpus(corpus_directory):
"Unpack and randomply pick one input from the seed corpus provided by user"
# remove initial seed corpus
shutil.rmtree(corpus_directory)
os.mkdir(corpus_directory)
benchmark = environment.get('BENCHMARK')
corpus_archive_filename = posixpath.join(
experiment_utils.get_random_seed_corpora_filestore_path(),
f'{benchmark}.zip')
with zipfile.ZipFile(corpus_archive_filename) as zip_file:
selected_file = random.choice(zip_file.infolist())
zip_file.extract(selected_file, corpus_directory)


def _unpack_clusterfuzz_seed_corpus(fuzz_target_path, corpus_directory):
"""If a clusterfuzz seed corpus archive is available, unpack it into the
corpus directory if it exists. Copied from unpack_seed_corpus in
Expand Down Expand Up @@ -172,7 +187,10 @@ def run_fuzzer(max_total_time, log_filename):
logs.error('Fuzz target binary not found.')
return

_unpack_clusterfuzz_seed_corpus(target_binary, input_corpus)
if environment.get('RANDOM_SEED_CORPUS'):
_unpack_random_seed_corpus(input_corpus)
else:
_unpack_clusterfuzz_seed_corpus(target_binary, input_corpus)
_clean_seed_corpus(input_corpus)

if max_total_time is None:
Expand Down
1 change: 1 addition & 0 deletions experiment/scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -717,6 +717,7 @@ def render_startup_script_template(instance_name: str, fuzzer: str,
'oss_fuzz_corpus': experiment_config['oss_fuzz_corpus'],
'num_cpu_cores': experiment_config['runner_num_cpu_cores'],
'cpuset': CPUSET,
'random_seed_corpus': experiment_config['random_seed_corpus'],
}

if not local_experiment:
Expand Down
1 change: 1 addition & 0 deletions experiment/test_data/experiment-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ git_hash: "git-hash"
no_seeds: false
no_dictionaries: false
oss_fuzz_corpus: false
random_seed_corpus: false
description: "Test experiment"
concurrent_builds: null
runners_cpus: null
Expand Down
1 change: 1 addition & 0 deletions experiment/test_run_experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,7 @@ def test_copy_resources_to_bucket(tmp_path):
'experiment': 'experiment',
'benchmarks': ['libxslt_xpath'],
'oss_fuzz_corpus': True,
'random_seed_corpus': False,
}
try:
with mock.patch('common.filestore_utils.cp') as mocked_filestore_cp:
Expand Down
1 change: 1 addition & 0 deletions experiment/test_scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ def test_create_trial_instance(benchmark, expected_image, expected_target,
-e NO_SEEDS=False \\
-e NO_DICTIONARIES=False \\
-e OSS_FUZZ_CORPUS=False \\
-e RANDOM_SEED_CORPUS=False \\
-e DOCKER_REGISTRY=gcr.io/fuzzbench -e CLOUD_PROJECT=fuzzbench -e CLOUD_COMPUTE_ZONE=us-central1-a \\
-e EXPERIMENT_FILESTORE=gs://experiment-data \\
-e REPORT_FILESTORE=gs://web-reports \\
Expand Down