From bd4ad7261211807f49bb596840a02f965dc8a153 Mon Sep 17 00:00:00 2001 From: Martin Verde Date: Wed, 14 Jan 2026 21:07:51 +0000 Subject: [PATCH 01/13] Add handling for reformatted archives conditional on archive format version. --- .../build_management/build_archive.py | 47 ++++++++++++++----- 1 file changed, 36 insertions(+), 11 deletions(-) diff --git a/src/clusterfuzz/_internal/build_management/build_archive.py b/src/clusterfuzz/_internal/build_management/build_archive.py index 133fc1c2c1..6f824da75c 100644 --- a/src/clusterfuzz/_internal/build_management/build_archive.py +++ b/src/clusterfuzz/_internal/build_management/build_archive.py @@ -14,16 +14,14 @@ """Build Archive manager.""" import abc +import json import os -from typing import BinaryIO -from typing import Callable -from typing import List -from typing import Optional -from typing import Union +from typing import BinaryIO, Callable, List, Optional, Union from clusterfuzz._internal.metrics import logs from clusterfuzz._internal.system import archive + # Extensions to exclude when unarchiving a fuzz target. Note that fuzz target # own files like seed corpus, options, etc are covered by its own regex. FUZZ_TARGET_EXCLUDED_EXTENSIONS = [ @@ -224,12 +222,24 @@ class ChromeBuildArchive(DefaultBuildArchive): In case something goes wrong, this defaults to using the default unpacker. """ + def __init__(self, reader: archive.ArchiveReader): + super().__init__(reader) + manifest_path = os.path.join(self.root_dir(), 'clusterfuzz_manifest.json') + """The manifest may not exist for earlier versions of archives. In this + case, default to schema version 0. + """ + if self.file_exists(manifest_path): + archive_schema_json = json.loads(self.open(manifest_path).read().decode()) + self._archive_schema_version = archive_schema_json.get('version', 0) + else: + self._archive_schema_version = 0 + def root_dir(self) -> str: if not hasattr(self, '_root_dir'): self._root_dir = super().root_dir() # pylint: disable=attribute-defined-outside-init return self._root_dir - def to_archive_path(self, path: str) -> str: + def to_archive_path(self, path: str, archive_schema_version: int = 0) -> str: """Deps are relative to the Chrome root directory. However, there might be a common root directory in the archive, which means we need to make sure the file path is correct. @@ -242,8 +252,18 @@ def to_archive_path(self, path: str) -> str: """ path = os.path.normpath(path) - if path.startswith('../../'): + if archive_schema_version == 0 and path.startswith('../../'): path = path.replace('../../', 'src_root/') + elif archive_schema_version > 0: + if path.startswith('../../'): + """For newer archive versions, runtime_deps that were formerly stored + under {self.root_dir()}/src_root/ are now stored in the root directory, + while the build artifacts formerly stored in the root directory are now + stored under {self.root_dir()}/out/msan/. + """ + path = path.replace('../../', '') + elif path.startswith('./'): + path = path.replace('./', 'out/msan/') return os.path.join(self.root_dir(), path) @@ -253,8 +273,9 @@ def _get_prefix_matcher(self, prefix: str) -> Callable[[str], bool]: def _get_filename_matcher(self, file: str) -> Callable[[str], bool]: return lambda f: os.path.basename(f) == file - def _match_files(self, matchers: List[Callable[[str], bool]] - ) -> List[archive.ArchiveMemberInfo]: + def _match_files( + self, matchers: List[Callable[[str], + bool]]) -> List[archive.ArchiveMemberInfo]: res = [] for member in self.list_members(): if any(matcher(member.name) for matcher in matchers): @@ -271,7 +292,8 @@ def _get_common_files(self) -> List[str]: def get_target_dependencies( self, fuzz_target: str) -> List[archive.ArchiveMemberInfo]: - target_path = self.to_archive_path(fuzz_target) + target_path = self.to_archive_path(fuzz_target, + self._archive_schema_version) deps_file = f'{target_path}.runtime_deps' if not self.file_exists(deps_file): logs.warning(f'runtime_deps file not found for {target_path}') @@ -280,7 +302,10 @@ def get_target_dependencies( res = [] matchers = [] with self.open(deps_file) as f: - deps = [self.to_archive_path(l.decode()) for l in f.read().splitlines()] + deps = [ + self.to_archive_path(l.decode(), self._archive_schema_version) + for l in f.read().splitlines() + ] for dep in deps: # We need to match the file prefixes here, because some of the deps are # globering the whole directory. Same for files, on mac platform, we From 74b43bac3887341d6e229db3fc3dc12aa4a365f4 Mon Sep 17 00:00:00 2001 From: Martin Verde Date: Wed, 28 Jan 2026 20:19:09 +0000 Subject: [PATCH 02/13] Update archive unpacking logic. --- .../build_management/build_archive.py | 92 ++++++++++++------- .../build_management/build_archive_test.py | 29 +++--- 2 files changed, 69 insertions(+), 52 deletions(-) diff --git a/src/clusterfuzz/_internal/build_management/build_archive.py b/src/clusterfuzz/_internal/build_management/build_archive.py index 6f824da75c..28de13df22 100644 --- a/src/clusterfuzz/_internal/build_management/build_archive.py +++ b/src/clusterfuzz/_internal/build_management/build_archive.py @@ -217,53 +217,78 @@ def unpack(self, class ChromeBuildArchive(DefaultBuildArchive): """Handles chrome build archives. This special cases the default behaviour by - looking at the content of the `.runtime_deps` file, in order to unpack all the - fuzzer dependencies correctly. - In case something goes wrong, this defaults to using the default unpacker. + looking at the content of the `.runtime_deps` file for each fuzzer target in + order to unpack all of its dependencies correctly. + + Expects a manifest file named `clusterfuzz_manifest.json` in the root of the + archive to decide which schema version to use when interpreting its contents. + The legacy schema is applied to archives with no manifest. + + Defaults to using the default unpacker in case something goes wrong. """ - def __init__(self, reader: archive.ArchiveReader): - super().__init__(reader) - manifest_path = os.path.join(self.root_dir(), 'clusterfuzz_manifest.json') - """The manifest may not exist for earlier versions of archives. In this - case, default to schema version 0. - """ - if self.file_exists(manifest_path): - archive_schema_json = json.loads(self.open(manifest_path).read().decode()) - self._archive_schema_version = archive_schema_json.get('version', 0) - else: - self._archive_schema_version = 0 + def archive_schema_version(self) -> int: + if not hasattr(self, '_archive_schema_version'): + manifest_path = 'clusterfuzz_manifest.json' + # The manifest may not exist for earlier versions of archives. In this + # case, default to schema version 0. + if self.file_exists(manifest_path): + manifest = json.loads(self.open(manifest_path).read().decode()) + self._archive_schema_version = manifest.get('archive_schema_version', 0) + if self._archive_schema_version == 0: + logs.warning( + 'clusterfuzz_manifest.json was incorrectly formatted or missing an archive_schema_version field' + ) + else: + self._archive_schema_version = 0 + return self._archive_schema_version def root_dir(self) -> str: if not hasattr(self, '_root_dir'): self._root_dir = super().root_dir() # pylint: disable=attribute-defined-outside-init return self._root_dir - def to_archive_path(self, path: str, archive_schema_version: int = 0) -> str: - """Deps are relative to the Chrome root directory. However, there might be - a common root directory in the archive, which means we need to make sure - the file path is correct. + def get_dependency_path(self, path: str, deps_file_path: str) -> str: + """Deps are given as paths relative to the deps file where they are listed, + so we need to translate them to the corresponding paths relative to the + archive root. Args: - path: the dependency path relative to Chrome's root directory. + path: the dependency path relative to the deps file. + deps_file_path: the path to the deps file, relative to the archive root. Returns: - the path relative to the archive. + the dependency path relative to the archive root. """ path = os.path.normpath(path) - if archive_schema_version == 0 and path.startswith('../../'): + # Archive schema version 0 represents legacy behavior. For newer archive + # versions, runtime_deps that were formerly stored under + # {self.root_dir()}/src_root/ are now stored in the root directory, while + # the build artifacts formerly stored in the root directory are now stored + # in the build directory. + if self.archive_schema_version() == 0 and path.startswith('../../'): path = path.replace('../../', 'src_root/') - elif archive_schema_version > 0: - if path.startswith('../../'): - """For newer archive versions, runtime_deps that were formerly stored - under {self.root_dir()}/src_root/ are now stored in the root directory, - while the build artifacts formerly stored in the root directory are now - stored under {self.root_dir()}/out/msan/. - """ - path = path.replace('../../', '') - elif path.startswith('./'): - path = path.replace('./', 'out/msan/') + elif self.archive_schema_version() > 0: + # Assumes the dependency path is relative to the deps file and + # transforms it into into a full path relative to the archive root. For + # example: + # + # deps_file_path: "/root/A/B/fuzz_target.runtime_deps" + # os.path.dirname(deps_file_path) => "/A/B/" (call this DEPS_DIR) + # path1: "./my_dep" + # path2: "../../C/my_dep2" + # path3: "D/my_dep3" + # + # os.path.join(DEPS_DIR, path1) => "/A/B/./my_dep" + # os.path.join(DEPS_DIR, path2) => "/A/B/../../C/my_dep2" + # os.path.join(DEPS_DIR, path3) => "/A/B/D/my_dep3" + # + # os.path.normpath(os.path.join(DEPS_DIR, path1)) => "/A/B/my_dep" + # os.path.normpath(os.path.join(DEPS_DIR, path2)) => "/C/my_dep2" + # os.path.normpath(os.path.join(DEPS_DIR, path3)) => "/A/B/D/my_dep3" + return os.path.normpath( + os.path.join(os.path.dirname(deps_file_path), path)) return os.path.join(self.root_dir(), path) @@ -292,8 +317,7 @@ def _get_common_files(self) -> List[str]: def get_target_dependencies( self, fuzz_target: str) -> List[archive.ArchiveMemberInfo]: - target_path = self.to_archive_path(fuzz_target, - self._archive_schema_version) + target_path = self.get_path_for_target(fuzz_target) deps_file = f'{target_path}.runtime_deps' if not self.file_exists(deps_file): logs.warning(f'runtime_deps file not found for {target_path}') @@ -303,7 +327,7 @@ def get_target_dependencies( matchers = [] with self.open(deps_file) as f: deps = [ - self.to_archive_path(l.decode(), self._archive_schema_version) + self.get_dependency_path(l.decode(), deps_file) for l in f.read().splitlines() ] for dep in deps: diff --git a/src/clusterfuzz/_internal/tests/core/build_management/build_archive_test.py b/src/clusterfuzz/_internal/tests/core/build_management/build_archive_test.py index c56180ea1f..263f248a85 100644 --- a/src/clusterfuzz/_internal/tests/core/build_management/build_archive_test.py +++ b/src/clusterfuzz/_internal/tests/core/build_management/build_archive_test.py @@ -13,6 +13,7 @@ # limitations under the License. """Build archive tests.""" import io +import json import os import tempfile import unittest @@ -24,6 +25,7 @@ from clusterfuzz._internal.system import shell from clusterfuzz._internal.tests.test_libs import helpers as test_helpers + TESTDATA_PATH = os.path.join(os.path.dirname(__file__), 'build_archive_data') @@ -133,9 +135,11 @@ def setUp(self): 'clusterfuzz._internal.system.archive.ArchiveReader', 'clusterfuzz._internal.system.archive.open', 'clusterfuzz._internal.bot.fuzzers.utils.is_fuzz_target', + 'clusterfuzz._internal.build_management.build_archive.ChromeBuildArchive.archive_schema_version', ]) self.mock.open.return_value.list_members.return_value = [] self.mock.is_fuzz_target.side_effect = self._mock_is_fuzz_target + self.mock.archive_schema_version.return_value = 0 self.build = build_archive.ChromeBuildArchive(self.mock.open.return_value) self._declared_fuzzers = [] self.maxDiff = None @@ -189,10 +193,16 @@ def _mock_open(_): def _declare_fuzzers(self, fuzzers): self._declared_fuzzers = fuzzers + def _set_archive_schema_version(self, version): + self.mock.archive_schema_version.return_value = version + self.build = build_archive.ChromeBuildArchive(self.mock.open.return_value) + @parameterized.parameterized.expand(['/b/build/', 'build/', '']) def test_possible_dependencies(self, dir_prefix): """Tests that all the necessary dependencies are correctly extracted from the runtime_deps file.""" + #self._set_archive_schema_version(1) + #self.assertEqual(self.build.archive_schema_version(), 1) deps_files = self._generate_possible_fuzzer_dependencies('', 'my_fuzzer') needed_files = self._generate_possible_fuzzer_dependencies( dir_prefix, 'my_fuzzer') @@ -201,24 +211,7 @@ def test_possible_dependencies(self, dir_prefix): self._declare_fuzzers(['my_fuzzer']) to_extract = self.build.get_target_dependencies('my_fuzzer') to_extract = [f.name for f in to_extract] - self.assertCountEqual(to_extract, needed_files) - - @parameterized.parameterized.expand(['/b/build/', 'build/', '']) - def test_possible_dependencies_archive_without_normalized_path( - self, dir_prefix): - """Tests that the chrome build handler correctly handles mixed-up - normalized and not normalized path.""" - deps_files = self._generate_possible_fuzzer_dependencies('', 'my_fuzzer') - needed_files = self._generate_possible_fuzzer_dependencies( - dir_prefix, 'my_fuzzer') - self._add_files_to_archive(needed_files) - - # we want our runtime_deps to have normalized path so that they do not - # exactly match the archive paths. - self._generate_runtime_deps(deps_files) - self._declare_fuzzers(['my_fuzzer']) - to_extract = self.build.get_target_dependencies('my_fuzzer') - to_extract = [f.name for f in to_extract] + self.assertEqual(to_extract, needed_files) self.assertCountEqual(to_extract, needed_files) @parameterized.parameterized.expand(['/b/build/', 'build/', '']) From 9c81a3861b367905da5b24a9393cbdd3e57b6f2f Mon Sep 17 00:00:00 2001 From: Martin Verde Date: Thu, 29 Jan 2026 20:04:52 +0000 Subject: [PATCH 03/13] Address comments and add tests. --- .../build_management/build_archive.py | 45 ++++--- .../build_management/build_archive_test.py | 126 +++++++++++++++--- 2 files changed, 130 insertions(+), 41 deletions(-) diff --git a/src/clusterfuzz/_internal/build_management/build_archive.py b/src/clusterfuzz/_internal/build_management/build_archive.py index 28de13df22..70fbf8d685 100644 --- a/src/clusterfuzz/_internal/build_management/build_archive.py +++ b/src/clusterfuzz/_internal/build_management/build_archive.py @@ -227,21 +227,23 @@ class ChromeBuildArchive(DefaultBuildArchive): Defaults to using the default unpacker in case something goes wrong. """ - def archive_schema_version(self) -> int: - if not hasattr(self, '_archive_schema_version'): - manifest_path = 'clusterfuzz_manifest.json' - # The manifest may not exist for earlier versions of archives. In this - # case, default to schema version 0. - if self.file_exists(manifest_path): - manifest = json.loads(self.open(manifest_path).read().decode()) - self._archive_schema_version = manifest.get('archive_schema_version', 0) - if self._archive_schema_version == 0: - logs.warning( - 'clusterfuzz_manifest.json was incorrectly formatted or missing an archive_schema_version field' - ) - else: - self._archive_schema_version = 0 - return self._archive_schema_version + def __init__(self, + reader: archive.ArchiveReader, + archive_schema_version: int = 0): + super().__init__(reader) + # The manifest may not exist for earlier versions of archives. In this + # case, default to schema version 0. + manifest_path = 'clusterfuzz_manifest.json' + if self.file_exists(manifest_path): + with self.open(manifest_path) as f: + manifest = json.load(f) + self._archive_schema_version = manifest.get('archive_schema_version', 0) + if self._archive_schema_version == 0: + logs.warning( + 'clusterfuzz_manifest.json was incorrectly formatted or missing an archive_schema_version field' + ) + else: + self._archive_schema_version = archive_schema_version def root_dir(self) -> str: if not hasattr(self, '_root_dir'): @@ -260,21 +262,19 @@ def get_dependency_path(self, path: str, deps_file_path: str) -> str: Returns: the dependency path relative to the archive root. """ - path = os.path.normpath(path) # Archive schema version 0 represents legacy behavior. For newer archive # versions, runtime_deps that were formerly stored under # {self.root_dir()}/src_root/ are now stored in the root directory, while # the build artifacts formerly stored in the root directory are now stored # in the build directory. - if self.archive_schema_version() == 0 and path.startswith('../../'): - path = path.replace('../../', 'src_root/') - elif self.archive_schema_version() > 0: + + if self._archive_schema_version > 0: # Assumes the dependency path is relative to the deps file and # transforms it into into a full path relative to the archive root. For # example: # - # deps_file_path: "/root/A/B/fuzz_target.runtime_deps" + # deps_file_path: "/A/B/fuzz_target.runtime_deps" # os.path.dirname(deps_file_path) => "/A/B/" (call this DEPS_DIR) # path1: "./my_dep" # path2: "../../C/my_dep2" @@ -290,6 +290,11 @@ def get_dependency_path(self, path: str, deps_file_path: str) -> str: return os.path.normpath( os.path.join(os.path.dirname(deps_file_path), path)) + # Legacy behavior. Remap `../../` to `src_root/`. + path = os.path.normpath(path) + if path.startswith('../../'): + path = path.replace('../../', 'src_root/') + return os.path.join(self.root_dir(), path) def _get_prefix_matcher(self, prefix: str) -> Callable[[str], bool]: diff --git a/src/clusterfuzz/_internal/tests/core/build_management/build_archive_test.py b/src/clusterfuzz/_internal/tests/core/build_management/build_archive_test.py index 263f248a85..49f10b260c 100644 --- a/src/clusterfuzz/_internal/tests/core/build_management/build_archive_test.py +++ b/src/clusterfuzz/_internal/tests/core/build_management/build_archive_test.py @@ -135,11 +135,9 @@ def setUp(self): 'clusterfuzz._internal.system.archive.ArchiveReader', 'clusterfuzz._internal.system.archive.open', 'clusterfuzz._internal.bot.fuzzers.utils.is_fuzz_target', - 'clusterfuzz._internal.build_management.build_archive.ChromeBuildArchive.archive_schema_version', ]) self.mock.open.return_value.list_members.return_value = [] self.mock.is_fuzz_target.side_effect = self._mock_is_fuzz_target - self.mock.archive_schema_version.return_value = 0 self.build = build_archive.ChromeBuildArchive(self.mock.open.return_value) self._declared_fuzzers = [] self.maxDiff = None @@ -156,8 +154,18 @@ def _add_files_to_archive(self, files): name=file, is_dir=False, size_bytes=0, mode=0)) self.mock.open.return_value.list_members.return_value = res - def _generate_possible_fuzzer_dependencies(self, dir_prefix, fuzz_target): - """Generates all possible dependencies for the given target.""" + def _generate_possible_fuzzer_dependencies_legacy(self, dir_prefix, + fuzz_target): + """Generates all possible dependencies for the given target. + + This implementation represents the legacy archive schema prior to version 1 + and should not be used for new tests; we keep it around for backwards + compatibility. + + New tests should use a combination of + `_generate_possible_fuzzer_dependencies()` and + `_generate_normalized_dependency_filenames()`. + """ needed_files = [ f'{fuzz_target}', f'{fuzz_target}.exe', @@ -179,6 +187,41 @@ def _generate_possible_fuzzer_dependencies(self, dir_prefix, fuzz_target): ] return [os.path.join(dir_prefix, file) for file in needed_files] + def _generate_possible_fuzzer_dependencies(self, fuzz_target): + """Returns a list of dependencies as file paths relative to + {fuzz_target}.runtime_deps, as they appear in runtime_deps files in real + archives. + """ + return [ + f'{fuzz_target}', + f'{fuzz_target}.exe', + f'{fuzz_target}.exe.pdb', + f'{fuzz_target}.dict', + f'{fuzz_target}.options', + f'{fuzz_target}.runtime_deps', + f'{fuzz_target}.par', + f'{fuzz_target}.dSYM/Contents/Resources/DWARF/some_dependency', + 'shared.dll', + 'shared.dll.pdb', + './llvm-symbolizer', + 'icudtl.dat', + 'swiftshader/libGLESv2.so', + 'instrumented_libraries/msan/lib/libgcrypt.so.11.8.2', + 'afl-fuzz', + '../some_dependency', + './chrome_crashpad_handler', + ] + + def _generate_normalized_dependency_filenames(self, dir_prefix, fuzz_target): + """Returns a list of dependencies as normalized file paths, i.e. with + relative path separators like './' and '../' resolved to their true + directory names. + """ + return [ + os.path.normpath(os.path.join(dir_prefix, file)) + for file in self._generate_possible_fuzzer_dependencies(fuzz_target) + ] + def _generate_runtime_deps(self, deps): def _mock_open(_): @@ -194,32 +237,33 @@ def _declare_fuzzers(self, fuzzers): self._declared_fuzzers = fuzzers def _set_archive_schema_version(self, version): - self.mock.archive_schema_version.return_value = version - self.build = build_archive.ChromeBuildArchive(self.mock.open.return_value) + self.build._archive_schema_version = version @parameterized.parameterized.expand(['/b/build/', 'build/', '']) - def test_possible_dependencies(self, dir_prefix): + def test_possible_dependencies_legacy(self, dir_prefix): """Tests that all the necessary dependencies are correctly extracted from - the runtime_deps file.""" - #self._set_archive_schema_version(1) - #self.assertEqual(self.build.archive_schema_version(), 1) - deps_files = self._generate_possible_fuzzer_dependencies('', 'my_fuzzer') - needed_files = self._generate_possible_fuzzer_dependencies( + the runtime_deps file, using the legacy archive schema where dependency + paths are interpreted as relative to the archive root and `../../` is + remapped to `src_root/`.""" + deps_files = self._generate_possible_fuzzer_dependencies_legacy( + '', 'my_fuzzer') + needed_files = self._generate_possible_fuzzer_dependencies_legacy( dir_prefix, 'my_fuzzer') self._add_files_to_archive(needed_files) self._generate_runtime_deps(deps_files) self._declare_fuzzers(['my_fuzzer']) to_extract = self.build.get_target_dependencies('my_fuzzer') to_extract = [f.name for f in to_extract] - self.assertEqual(to_extract, needed_files) self.assertCountEqual(to_extract, needed_files) @parameterized.parameterized.expand(['/b/build/', 'build/', '']) - def test_possible_dependencies_deps_without_normalized_path(self, dir_prefix): + def test_possible_dependencies_deps_without_normalized_path_legacy( + self, dir_prefix): """Tests that the chrome build handler correctly handles mixed-up normalized and not normalized path.""" - deps_files = self._generate_possible_fuzzer_dependencies('', 'my_fuzzer') - needed_files = self._generate_possible_fuzzer_dependencies( + deps_files = self._generate_possible_fuzzer_dependencies_legacy( + '', 'my_fuzzer') + needed_files = self._generate_possible_fuzzer_dependencies_legacy( dir_prefix, 'my_fuzzer') self._add_files_to_archive([os.path.normpath(f) for f in needed_files]) self._generate_runtime_deps(deps_files) @@ -230,13 +274,14 @@ def test_possible_dependencies_deps_without_normalized_path(self, dir_prefix): [os.path.normpath(f) for f in needed_files]) @parameterized.parameterized.expand(['/b/build/', 'build/', '']) - def test_other_fuzzer_not_extracted(self, dir_prefix): + def test_other_fuzzer_not_extracted_legacy(self, dir_prefix): """Tests that the chrome build handler only unpacks dependencies for the requested fuzzer, even if other fuzzers exist in the build.""" - deps_files = self._generate_possible_fuzzer_dependencies('', 'my_fuzzer') - needed_files = self._generate_possible_fuzzer_dependencies( + deps_files = self._generate_possible_fuzzer_dependencies_legacy( + '', 'my_fuzzer') + needed_files = self._generate_possible_fuzzer_dependencies_legacy( dir_prefix, 'my_fuzzer') - other_fuzzer = self._generate_possible_fuzzer_dependencies( + other_fuzzer = self._generate_possible_fuzzer_dependencies_legacy( dir_prefix, 'other_fuzzer') self._add_files_to_archive(list(set(needed_files + other_fuzzer))) self._generate_runtime_deps(deps_files) @@ -245,11 +290,50 @@ def test_other_fuzzer_not_extracted(self, dir_prefix): to_extract = [f.name for f in to_extract] self.assertCountEqual(to_extract, needed_files) + @parameterized.parameterized.expand(['/b/build/', 'build/', '']) + def test_possible_dependencies(self, dir_prefix): + """Tests that all the necessary dependencies are correctly extracted from + the runtime_deps file. + + Under the current archive schema, dependency paths in `runtime_deps` files + are interpreted as being relative to the file itself, meaning that they must + be normalized to the equivalent path relative to the archive root before + they can be extracted. + """ + self._set_archive_schema_version(1) + deps_entries = self._generate_possible_fuzzer_dependencies('my_fuzzer') + deps_files = self._generate_normalized_dependency_filenames( + dir_prefix, 'my_fuzzer') + self._add_files_to_archive(deps_files) + self._generate_runtime_deps(deps_entries) + self._declare_fuzzers(['my_fuzzer']) + to_extract = self.build.get_target_dependencies('my_fuzzer') + to_extract = [f.name for f in to_extract] + self.assertCountEqual(to_extract, deps_files) + + @parameterized.parameterized.expand(['/b/build/', 'build/', '']) + def test_other_fuzzer_not_extracted(self, dir_prefix): + """Tests that the chrome build handler only unpacks dependencies for the + requested fuzzer, even if other fuzzers exist in the build.""" + self._set_archive_schema_version(1) + deps_entries = self._generate_possible_fuzzer_dependencies('my_fuzzer') + needed_files = self._generate_normalized_dependency_filenames( + dir_prefix, 'my_fuzzer') + other_fuzzer = self._generate_normalized_dependency_filenames( + dir_prefix, 'other_fuzzer') + self._add_files_to_archive(list(set(needed_files + other_fuzzer))) + self._generate_runtime_deps(deps_entries) + self._declare_fuzzers(['my_fuzzer', 'other_fuzzer']) + to_extract = self.build.get_target_dependencies('my_fuzzer') + to_extract = [f.name for f in to_extract] + self.assertCountEqual(to_extract, needed_files) + @parameterized.parameterized.expand(['/b/build/', 'build/', '']) def test_dsyms_are_correctly_unpacked(self, dir_prefix): """Tests that even if not listed in the runtime deps, dSYMs are correctly unpacked. """ - needed_files = self._generate_possible_fuzzer_dependencies( + self._set_archive_schema_version(1) + needed_files = self._generate_normalized_dependency_filenames( dir_prefix, 'my_fuzzer') self._add_files_to_archive(needed_files) self._generate_runtime_deps(['my_fuzzer']) From ac6f15aceddc2e9cd0fd820ca03b427bd6117c83 Mon Sep 17 00:00:00 2001 From: Martin Verde Date: Fri, 30 Jan 2026 17:48:05 +0000 Subject: [PATCH 04/13] Update docstrings. --- .../build_management/build_archive.py | 30 +++++++++++++++---- .../build_management/build_archive_test.py | 6 ++-- 2 files changed, 28 insertions(+), 8 deletions(-) diff --git a/src/clusterfuzz/_internal/build_management/build_archive.py b/src/clusterfuzz/_internal/build_management/build_archive.py index 70fbf8d685..9d729da311 100644 --- a/src/clusterfuzz/_internal/build_management/build_archive.py +++ b/src/clusterfuzz/_internal/build_management/build_archive.py @@ -16,7 +16,11 @@ import abc import json import os -from typing import BinaryIO, Callable, List, Optional, Union +from typing import BinaryIO +from typing import Callable +from typing import List +from typing import Optional +from typing import Union from clusterfuzz._internal.metrics import logs from clusterfuzz._internal.system import archive @@ -224,12 +228,27 @@ class ChromeBuildArchive(DefaultBuildArchive): archive to decide which schema version to use when interpreting its contents. The legacy schema is applied to archives with no manifest. + Under the legacy schema, fuzz targets were assumed to be at the root of the + archive while runtime_deps starting with `../../` were remapped to + `/src_root/`. + + Schema version 1 does away with `/src_root/` and interprets runtime_deps + entries as file paths relative to the runtime_deps file, which lives in the + build directory along with fuzz target binaries. + Defaults to using the default unpacker in case something goes wrong. """ def __init__(self, reader: archive.ArchiveReader, - archive_schema_version: int = 0): + default_archive_schema_version: int = 0): + """Initializes a `ChromiumBuildArchive` with the given reader. + + Arguments: + reader: See `DefaultBuildArchive`. + default_archive_schema_version: Specifies which version of a build archive + to expect if `clusterfuzz_manifest.json` is missing or badly formatted. + """ super().__init__(reader) # The manifest may not exist for earlier versions of archives. In this # case, default to schema version 0. @@ -237,13 +256,14 @@ def __init__(self, if self.file_exists(manifest_path): with self.open(manifest_path) as f: manifest = json.load(f) - self._archive_schema_version = manifest.get('archive_schema_version', 0) - if self._archive_schema_version == 0: + self._archive_schema_version = manifest.get('archive_schema_version') + if self._archive_schema_version is None: logs.warning( 'clusterfuzz_manifest.json was incorrectly formatted or missing an archive_schema_version field' ) + self._archive_schema_version = 0 else: - self._archive_schema_version = archive_schema_version + self._archive_schema_version = default_archive_schema_version def root_dir(self) -> str: if not hasattr(self, '_root_dir'): diff --git a/src/clusterfuzz/_internal/tests/core/build_management/build_archive_test.py b/src/clusterfuzz/_internal/tests/core/build_management/build_archive_test.py index 49f10b260c..c2c426b68b 100644 --- a/src/clusterfuzz/_internal/tests/core/build_management/build_archive_test.py +++ b/src/clusterfuzz/_internal/tests/core/build_management/build_archive_test.py @@ -295,7 +295,7 @@ def test_possible_dependencies(self, dir_prefix): """Tests that all the necessary dependencies are correctly extracted from the runtime_deps file. - Under the current archive schema, dependency paths in `runtime_deps` files + Under archive schema version 1, dependency paths in `runtime_deps` files are interpreted as being relative to the file itself, meaning that they must be normalized to the equivalent path relative to the archive root before they can be extracted. @@ -330,8 +330,8 @@ def test_other_fuzzer_not_extracted(self, dir_prefix): @parameterized.parameterized.expand(['/b/build/', 'build/', '']) def test_dsyms_are_correctly_unpacked(self, dir_prefix): - """Tests that even if not listed in the runtime deps, dSYMs are correctly unpacked. - """ + """Tests that even if not listed in the runtime deps, dSYMs are correctly + unpacked.""" self._set_archive_schema_version(1) needed_files = self._generate_normalized_dependency_filenames( dir_prefix, 'my_fuzzer') From a5174f4817450c0a9718033de93bea22c430a7d9 Mon Sep 17 00:00:00 2001 From: Martin Verde Date: Tue, 3 Feb 2026 17:12:50 +0000 Subject: [PATCH 05/13] Update deps list in tests and docstring. --- .../build_management/build_archive.py | 48 ++++++++++++++++ .../build_management/build_archive_test.py | 57 ++++++++----------- 2 files changed, 73 insertions(+), 32 deletions(-) diff --git a/src/clusterfuzz/_internal/build_management/build_archive.py b/src/clusterfuzz/_internal/build_management/build_archive.py index 9d729da311..3025c2bca7 100644 --- a/src/clusterfuzz/_internal/build_management/build_archive.py +++ b/src/clusterfuzz/_internal/build_management/build_archive.py @@ -232,10 +232,58 @@ class ChromeBuildArchive(DefaultBuildArchive): archive while runtime_deps starting with `../../` were remapped to `/src_root/`. + Example archive tree: + ========== + my_fuzzer + my_fuzzer.options + my_ruzzer.owners + my_fuzzer.runtime_deps + # etc. for all fuzz targets + src_root/ + .vpython3 + third_party/instrumented_libs/binaries/msan-chained-origins-noble-lib/lib/ + # all instrumented libs + # etc. for other deps + + my_fuzzer.runtime_deps: + ========== + ./my_fuzzer + my_fuzzer.options + my_fuzzer.owners + my_fuzzer.runtime_deps + ../../.vpython3 + ../../third_party/instrumented_libs/binaries/msan-chained-origins-noble-lib/lib/ld-linux-x86-64.so.2 + # etc. + Schema version 1 does away with `/src_root/` and interprets runtime_deps entries as file paths relative to the runtime_deps file, which lives in the build directory along with fuzz target binaries. + Example archive tree: + ========== + out/build/my_fuzzer + out/build/my_fuzzer.options + out/build/my_fuzzer.owners + out/build/my_fuzzer.runtime_deps + out/build/libbase.so + out/build/libatomic.so + # etc. for all fuzz targets and deps in the build directory + .vpython3 + third_party/instrumented_libs/binaries/msan-chained-origins-noble-lib/lib/ + # all instrumented libs + # etc. for other deps + + my_fuzzer.runtime_deps: + ========== + ./my_fuzzer + my_fuzzer.options + my_fuzzer.owners + my_fuzzer.runtime_deps + ./libbase.so + ./libatomic.so + ../../.vpython3 + ../../third_party/instrumented_libs/binaries/msan-chained-origins-noble-lib/lib + Defaults to using the default unpacker in case something goes wrong. """ diff --git a/src/clusterfuzz/_internal/tests/core/build_management/build_archive_test.py b/src/clusterfuzz/_internal/tests/core/build_management/build_archive_test.py index c2c426b68b..e8efc5eee8 100644 --- a/src/clusterfuzz/_internal/tests/core/build_management/build_archive_test.py +++ b/src/clusterfuzz/_internal/tests/core/build_management/build_archive_test.py @@ -193,32 +193,31 @@ def _generate_possible_fuzzer_dependencies(self, fuzz_target): archives. """ return [ - f'{fuzz_target}', - f'{fuzz_target}.exe', - f'{fuzz_target}.exe.pdb', - f'{fuzz_target}.dict', - f'{fuzz_target}.options', + f'./{fuzz_target}', + f'{fuzz_target}.owners', f'{fuzz_target}.runtime_deps', - f'{fuzz_target}.par', f'{fuzz_target}.dSYM/Contents/Resources/DWARF/some_dependency', - 'shared.dll', - 'shared.dll.pdb', - './llvm-symbolizer', + './libbase.so', + '../../tools/valgrind/asan/', + '../../third_party/llvm-build/Release+Asserts/bin/llvm-symbolizer', + '../../third_party/instrumented_libs/binaries/msan-chained-origins-noble-lib/lib', + 'third_party/instrumented_libs/binaries/msan-chained-origins-noble-lib/lib/ld-linux-x86-64.so.2', + './libatomic.so', 'icudtl.dat', - 'swiftshader/libGLESv2.so', - 'instrumented_libraries/msan/lib/libgcrypt.so.11.8.2', - 'afl-fuzz', - '../some_dependency', - './chrome_crashpad_handler', + f'bin/run_{fuzz_target}', + '../../testing/location_tags.json', ] - def _generate_normalized_dependency_filenames(self, dir_prefix, fuzz_target): + def _generate_normalized_dependency_filenames(self, fuzz_target): """Returns a list of dependencies as normalized file paths, i.e. with relative path separators like './' and '../' resolved to their true directory names. """ + + # Runtime deps include file paths that begin with ../../ so the build + # directory is assumed to be two levels deep into the file tree. return [ - os.path.normpath(os.path.join(dir_prefix, file)) + os.path.normpath(os.path.join('/out/build/', file)) for file in self._generate_possible_fuzzer_dependencies(fuzz_target) ] @@ -290,8 +289,7 @@ def test_other_fuzzer_not_extracted_legacy(self, dir_prefix): to_extract = [f.name for f in to_extract] self.assertCountEqual(to_extract, needed_files) - @parameterized.parameterized.expand(['/b/build/', 'build/', '']) - def test_possible_dependencies(self, dir_prefix): + def test_possible_dependencies(self): """Tests that all the necessary dependencies are correctly extracted from the runtime_deps file. @@ -302,8 +300,7 @@ def test_possible_dependencies(self, dir_prefix): """ self._set_archive_schema_version(1) deps_entries = self._generate_possible_fuzzer_dependencies('my_fuzzer') - deps_files = self._generate_normalized_dependency_filenames( - dir_prefix, 'my_fuzzer') + deps_files = self._generate_normalized_dependency_filenames('my_fuzzer') self._add_files_to_archive(deps_files) self._generate_runtime_deps(deps_entries) self._declare_fuzzers(['my_fuzzer']) @@ -311,16 +308,14 @@ def test_possible_dependencies(self, dir_prefix): to_extract = [f.name for f in to_extract] self.assertCountEqual(to_extract, deps_files) - @parameterized.parameterized.expand(['/b/build/', 'build/', '']) - def test_other_fuzzer_not_extracted(self, dir_prefix): + def test_other_fuzzer_not_extracted(self): """Tests that the chrome build handler only unpacks dependencies for the requested fuzzer, even if other fuzzers exist in the build.""" self._set_archive_schema_version(1) deps_entries = self._generate_possible_fuzzer_dependencies('my_fuzzer') - needed_files = self._generate_normalized_dependency_filenames( - dir_prefix, 'my_fuzzer') + needed_files = self._generate_normalized_dependency_filenames('my_fuzzer') other_fuzzer = self._generate_normalized_dependency_filenames( - dir_prefix, 'other_fuzzer') + 'other_fuzzer') self._add_files_to_archive(list(set(needed_files + other_fuzzer))) self._generate_runtime_deps(deps_entries) self._declare_fuzzers(['my_fuzzer', 'other_fuzzer']) @@ -328,17 +323,15 @@ def test_other_fuzzer_not_extracted(self, dir_prefix): to_extract = [f.name for f in to_extract] self.assertCountEqual(to_extract, needed_files) - @parameterized.parameterized.expand(['/b/build/', 'build/', '']) - def test_dsyms_are_correctly_unpacked(self, dir_prefix): + def test_dsyms_are_correctly_unpacked(self): """Tests that even if not listed in the runtime deps, dSYMs are correctly unpacked.""" self._set_archive_schema_version(1) - needed_files = self._generate_normalized_dependency_filenames( - dir_prefix, 'my_fuzzer') + needed_files = self._generate_normalized_dependency_filenames('my_fuzzer') self._add_files_to_archive(needed_files) self._generate_runtime_deps(['my_fuzzer']) to_extract = self.build.get_target_dependencies('my_fuzzer') to_extract = [f.name for f in to_extract] - dsym_path = os.path.join( - dir_prefix, 'my_fuzzer.dSYM/Contents/Resources/DWARF/some_dependency') - self.assertIn(dsym_path, to_extract) + self.assertIn( + '/out/build/my_fuzzer.dSYM/Contents/Resources/DWARF/some_dependency', + to_extract) From 286cf44b2a13c01059b88f75cf12e6decb628149 Mon Sep 17 00:00:00 2001 From: Martin Verde Date: Tue, 3 Feb 2026 17:28:12 +0000 Subject: [PATCH 06/13] Resolve other comments. --- .../build_management/build_archive.py | 2 +- .../build_management/build_archive_test.py | 31 ++++++++++++++----- 2 files changed, 24 insertions(+), 9 deletions(-) diff --git a/src/clusterfuzz/_internal/build_management/build_archive.py b/src/clusterfuzz/_internal/build_management/build_archive.py index 3025c2bca7..d6579c6fdf 100644 --- a/src/clusterfuzz/_internal/build_management/build_archive.py +++ b/src/clusterfuzz/_internal/build_management/build_archive.py @@ -309,7 +309,7 @@ def __init__(self, logs.warning( 'clusterfuzz_manifest.json was incorrectly formatted or missing an archive_schema_version field' ) - self._archive_schema_version = 0 + self._archive_schema_version = default_archive_schema_version else: self._archive_schema_version = default_archive_schema_version diff --git a/src/clusterfuzz/_internal/tests/core/build_management/build_archive_test.py b/src/clusterfuzz/_internal/tests/core/build_management/build_archive_test.py index e8efc5eee8..e09948c602 100644 --- a/src/clusterfuzz/_internal/tests/core/build_management/build_archive_test.py +++ b/src/clusterfuzz/_internal/tests/core/build_management/build_archive_test.py @@ -164,7 +164,7 @@ def _generate_possible_fuzzer_dependencies_legacy(self, dir_prefix, New tests should use a combination of `_generate_possible_fuzzer_dependencies()` and - `_generate_normalized_dependency_filenames()`. + `_resolve_relative_dependency_paths()`. """ needed_files = [ f'{fuzz_target}', @@ -208,7 +208,7 @@ def _generate_possible_fuzzer_dependencies(self, fuzz_target): '../../testing/location_tags.json', ] - def _generate_normalized_dependency_filenames(self, fuzz_target): + def _resolve_relative_dependency_paths(self, deps_paths): """Returns a list of dependencies as normalized file paths, i.e. with relative path separators like './' and '../' resolved to their true directory names. @@ -218,7 +218,7 @@ def _generate_normalized_dependency_filenames(self, fuzz_target): # directory is assumed to be two levels deep into the file tree. return [ os.path.normpath(os.path.join('/out/build/', file)) - for file in self._generate_possible_fuzzer_dependencies(fuzz_target) + for file in deps_paths ] def _generate_runtime_deps(self, deps): @@ -289,6 +289,20 @@ def test_other_fuzzer_not_extracted_legacy(self, dir_prefix): to_extract = [f.name for f in to_extract] self.assertCountEqual(to_extract, needed_files) + @parameterized.parameterized.expand(['/b/build/', 'build/', '']) + def test_dsyms_are_correctly_unpacked(self, dir_prefix): + """Tests that even if not listed in the runtime deps, dSYMs are correctly unpacked. + """ + needed_files = self._generate_possible_fuzzer_dependencies_legacy( + dir_prefix, 'my_fuzzer') + self._add_files_to_archive(needed_files) + self._generate_runtime_deps(['my_fuzzer']) + to_extract = self.build.get_target_dependencies('my_fuzzer') + to_extract = [f.name for f in to_extract] + dsym_path = os.path.join( + dir_prefix, 'my_fuzzer.dSYM/Contents/Resources/DWARF/some_dependency') + self.assertIn(dsym_path, to_extract) + def test_possible_dependencies(self): """Tests that all the necessary dependencies are correctly extracted from the runtime_deps file. @@ -300,7 +314,7 @@ def test_possible_dependencies(self): """ self._set_archive_schema_version(1) deps_entries = self._generate_possible_fuzzer_dependencies('my_fuzzer') - deps_files = self._generate_normalized_dependency_filenames('my_fuzzer') + deps_files = self._resolve_relative_dependency_paths(deps_entries) self._add_files_to_archive(deps_files) self._generate_runtime_deps(deps_entries) self._declare_fuzzers(['my_fuzzer']) @@ -313,9 +327,9 @@ def test_other_fuzzer_not_extracted(self): requested fuzzer, even if other fuzzers exist in the build.""" self._set_archive_schema_version(1) deps_entries = self._generate_possible_fuzzer_dependencies('my_fuzzer') - needed_files = self._generate_normalized_dependency_filenames('my_fuzzer') - other_fuzzer = self._generate_normalized_dependency_filenames( - 'other_fuzzer') + needed_files = self._resolve_relative_dependency_paths(deps_entries) + other_fuzzer = self._resolve_relative_dependency_paths( + self._generate_possible_fuzzer_dependencies('other_fuzzer')) self._add_files_to_archive(list(set(needed_files + other_fuzzer))) self._generate_runtime_deps(deps_entries) self._declare_fuzzers(['my_fuzzer', 'other_fuzzer']) @@ -327,7 +341,8 @@ def test_dsyms_are_correctly_unpacked(self): """Tests that even if not listed in the runtime deps, dSYMs are correctly unpacked.""" self._set_archive_schema_version(1) - needed_files = self._generate_normalized_dependency_filenames('my_fuzzer') + needed_files = self._resolve_relative_dependency_paths( + self._generate_possible_fuzzer_dependencies('my_fuzzer')) self._add_files_to_archive(needed_files) self._generate_runtime_deps(['my_fuzzer']) to_extract = self.build.get_target_dependencies('my_fuzzer') From 0c1b1889c718e2749dd57a939ecdb0afec0a98cf Mon Sep 17 00:00:00 2001 From: Martin Verde Date: Tue, 3 Feb 2026 18:18:47 +0000 Subject: [PATCH 07/13] Add test for reading manifest. --- .../build_management/build_archive_test.py | 59 ++++++++++++++++++- 1 file changed, 57 insertions(+), 2 deletions(-) diff --git a/src/clusterfuzz/_internal/tests/core/build_management/build_archive_test.py b/src/clusterfuzz/_internal/tests/core/build_management/build_archive_test.py index e09948c602..5325e0d9bc 100644 --- a/src/clusterfuzz/_internal/tests/core/build_management/build_archive_test.py +++ b/src/clusterfuzz/_internal/tests/core/build_management/build_archive_test.py @@ -217,7 +217,7 @@ def _resolve_relative_dependency_paths(self, deps_paths): # Runtime deps include file paths that begin with ../../ so the build # directory is assumed to be two levels deep into the file tree. return [ - os.path.normpath(os.path.join('/out/build/', file)) + os.path.normpath(os.path.join('out/build/', file)) for file in deps_paths ] @@ -348,5 +348,60 @@ def test_dsyms_are_correctly_unpacked(self): to_extract = self.build.get_target_dependencies('my_fuzzer') to_extract = [f.name for f in to_extract] self.assertIn( - '/out/build/my_fuzzer.dSYM/Contents/Resources/DWARF/some_dependency', + 'out/build/my_fuzzer.dSYM/Contents/Resources/DWARF/some_dependency', to_extract) + + +class ChromeBuildArchiveManifestTest(unittest.TestCase): + """Test for reading clusterfuzz_manifest.json for Chrome archives.""" + + def setUp(self): + test_helpers.patch(self, [ + 'clusterfuzz._internal.system.archive.ArchiveReader.file_exists', + 'clusterfuzz._internal.system.archive.ArchiveReader', + 'clusterfuzz._internal.system.archive.open', + ]) + self.mock.file_exists.return_value = False + + def _generate_manifest(self, archive_schema_version): + + def _mock_open(_): + buffer = io.BytesIO(b'') + buffer.write( + json.dumps({ + 'archive_schema_version': archive_schema_version + }).encode()) + buffer.seek(0) + return buffer + + self.mock.open.return_value.open.side_effect = _mock_open + + def _generate_invalid_manifest(self): + + def _mock_open(_): + buffer = io.BytesIO(b'') + buffer.write(json.dumps({'my_field': 1}).encode()) + buffer.seek(0) + return buffer + + self.mock.open.return_value.open.side_effect = _mock_open + + def test_manifest_is_correctly_read(self): + """Tests that the manifest is correctly read and used to set the archive + schema version if it exists and that the cases of a missing or invalid + manifest are handled correctly.""" + + # No manifest exists; should default to archive schema version 0 (legacy). + test_archive = build_archive.ChromeBuildArchive(self.mock.open.return_value) + self.assertEqual(test_archive._archive_schema_version, 0) + + # Invalid manifest; should default to version 0. + self.mock.file_exists.return_value = True + self._generate_invalid_manifest() + test_archive = build_archive.ChromeBuildArchive(self.mock.open.return_value) + self.assertEqual(test_archive._archive_schema_version, 0) + + # Valid manifest. + self._generate_manifest(1) + test_archive = build_archive.ChromeBuildArchive(self.mock.open.return_value) + self.assertEqual(test_archive._archive_schema_version, 1) From e97751dd2c1fe7a5d53d34f123f6e90d1c63abed Mon Sep 17 00:00:00 2001 From: Martin Verde Date: Fri, 6 Feb 2026 21:46:05 +0000 Subject: [PATCH 08/13] Try to fix import formatting and update docstring. --- .../build_management/build_archive.py | 46 ++++++++----------- .../build_management/build_archive_test.py | 1 - 2 files changed, 19 insertions(+), 28 deletions(-) diff --git a/src/clusterfuzz/_internal/build_management/build_archive.py b/src/clusterfuzz/_internal/build_management/build_archive.py index d6579c6fdf..f4a7801625 100644 --- a/src/clusterfuzz/_internal/build_management/build_archive.py +++ b/src/clusterfuzz/_internal/build_management/build_archive.py @@ -25,7 +25,6 @@ from clusterfuzz._internal.metrics import logs from clusterfuzz._internal.system import archive - # Extensions to exclude when unarchiving a fuzz target. Note that fuzz target # own files like seed corpus, options, etc are covered by its own regex. FUZZ_TARGET_EXCLUDED_EXTENSIONS = [ @@ -226,18 +225,34 @@ class ChromeBuildArchive(DefaultBuildArchive): Expects a manifest file named `clusterfuzz_manifest.json` in the root of the archive to decide which schema version to use when interpreting its contents. - The legacy schema is applied to archives with no manifest. + The legacy schema is applied to archives with no manifest. Defaults to using + the default unpacker in case something goes wrong. Under the legacy schema, fuzz targets were assumed to be at the root of the archive while runtime_deps starting with `../../` were remapped to `/src_root/`. - Example archive tree: + Given the following runtime_deps: + + my_fuzzer.runtime_deps: + ========== + ./my_fuzzer + my_fuzzer.options + my_fuzzer.owners + my_fuzzer.runtime_deps + ./libbase.so + ./libatomic.so + ../../.vpython3 + ../../third_party/instrumented_libs/binaries/msan-chained-origins-noble-lib/lib + + The legacy schema would expect an archive with the following structure: ========== my_fuzzer my_fuzzer.options my_ruzzer.owners my_fuzzer.runtime_deps + libbase.so + libatomic.so # etc. for all fuzz targets src_root/ .vpython3 @@ -245,21 +260,11 @@ class ChromeBuildArchive(DefaultBuildArchive): # all instrumented libs # etc. for other deps - my_fuzzer.runtime_deps: - ========== - ./my_fuzzer - my_fuzzer.options - my_fuzzer.owners - my_fuzzer.runtime_deps - ../../.vpython3 - ../../third_party/instrumented_libs/binaries/msan-chained-origins-noble-lib/lib/ld-linux-x86-64.so.2 - # etc. - Schema version 1 does away with `/src_root/` and interprets runtime_deps entries as file paths relative to the runtime_deps file, which lives in the build directory along with fuzz target binaries. - Example archive tree: + Expected archive structure with the same runtime_deps: ========== out/build/my_fuzzer out/build/my_fuzzer.options @@ -272,19 +277,6 @@ class ChromeBuildArchive(DefaultBuildArchive): third_party/instrumented_libs/binaries/msan-chained-origins-noble-lib/lib/ # all instrumented libs # etc. for other deps - - my_fuzzer.runtime_deps: - ========== - ./my_fuzzer - my_fuzzer.options - my_fuzzer.owners - my_fuzzer.runtime_deps - ./libbase.so - ./libatomic.so - ../../.vpython3 - ../../third_party/instrumented_libs/binaries/msan-chained-origins-noble-lib/lib - - Defaults to using the default unpacker in case something goes wrong. """ def __init__(self, diff --git a/src/clusterfuzz/_internal/tests/core/build_management/build_archive_test.py b/src/clusterfuzz/_internal/tests/core/build_management/build_archive_test.py index 5325e0d9bc..f95bd07637 100644 --- a/src/clusterfuzz/_internal/tests/core/build_management/build_archive_test.py +++ b/src/clusterfuzz/_internal/tests/core/build_management/build_archive_test.py @@ -25,7 +25,6 @@ from clusterfuzz._internal.system import shell from clusterfuzz._internal.tests.test_libs import helpers as test_helpers - TESTDATA_PATH = os.path.join(os.path.dirname(__file__), 'build_archive_data') From 3df73b0d0588b17f17b0e0b0a629ffd94d607b93 Mon Sep 17 00:00:00 2001 From: Martin Verde Date: Mon, 9 Feb 2026 16:35:15 +0000 Subject: [PATCH 09/13] Formatting. --- src/clusterfuzz/_internal/build_management/build_archive.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/clusterfuzz/_internal/build_management/build_archive.py b/src/clusterfuzz/_internal/build_management/build_archive.py index f4a7801625..a912da97c9 100644 --- a/src/clusterfuzz/_internal/build_management/build_archive.py +++ b/src/clusterfuzz/_internal/build_management/build_archive.py @@ -363,9 +363,8 @@ def _get_prefix_matcher(self, prefix: str) -> Callable[[str], bool]: def _get_filename_matcher(self, file: str) -> Callable[[str], bool]: return lambda f: os.path.basename(f) == file - def _match_files( - self, matchers: List[Callable[[str], - bool]]) -> List[archive.ArchiveMemberInfo]: + def _match_files(self, matchers: List[Callable[[str], bool]] + ) -> List[archive.ArchiveMemberInfo]: res = [] for member in self.list_members(): if any(matcher(member.name) for matcher in matchers): From f722d8cde2edcf054e73d3eaaac8f9fb191d8b3e Mon Sep 17 00:00:00 2001 From: Martin Verde Date: Mon, 9 Feb 2026 18:07:20 +0000 Subject: [PATCH 10/13] Fix linter errors. --- .../_internal/build_management/build_archive.py | 10 ++++++++-- .../core/build_management/build_archive_test.py | 12 +++++++----- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/src/clusterfuzz/_internal/build_management/build_archive.py b/src/clusterfuzz/_internal/build_management/build_archive.py index a912da97c9..e47ca4a79e 100644 --- a/src/clusterfuzz/_internal/build_management/build_archive.py +++ b/src/clusterfuzz/_internal/build_management/build_archive.py @@ -243,7 +243,9 @@ class ChromeBuildArchive(DefaultBuildArchive): ./libbase.so ./libatomic.so ../../.vpython3 - ../../third_party/instrumented_libs/binaries/msan-chained-origins-noble-lib/lib + ../../third_party/instrumented_libs/binaries/msan-chained-origins-noble-lib + /lib/ + The legacy schema would expect an archive with the following structure: ========== @@ -299,7 +301,8 @@ def __init__(self, self._archive_schema_version = manifest.get('archive_schema_version') if self._archive_schema_version is None: logs.warning( - 'clusterfuzz_manifest.json was incorrectly formatted or missing an archive_schema_version field' + 'clusterfuzz_manifest.json was incorrectly formatted or missing an ' + 'archive_schema_version field' ) self._archive_schema_version = default_archive_schema_version else: @@ -310,6 +313,9 @@ def root_dir(self) -> str: self._root_dir = super().root_dir() # pylint: disable=attribute-defined-outside-init return self._root_dir + def archive_schema_version(self) -> int: + return self._archive_schema_version + def get_dependency_path(self, path: str, deps_file_path: str) -> str: """Deps are given as paths relative to the deps file where they are listed, so we need to translate them to the corresponding paths relative to the diff --git a/src/clusterfuzz/_internal/tests/core/build_management/build_archive_test.py b/src/clusterfuzz/_internal/tests/core/build_management/build_archive_test.py index f95bd07637..7968bc51a6 100644 --- a/src/clusterfuzz/_internal/tests/core/build_management/build_archive_test.py +++ b/src/clusterfuzz/_internal/tests/core/build_management/build_archive_test.py @@ -235,7 +235,9 @@ def _declare_fuzzers(self, fuzzers): self._declared_fuzzers = fuzzers def _set_archive_schema_version(self, version): - self.build._archive_schema_version = version + self.build = build_archive.ChromeBuildArchive( + self.mock.open.return_value, version) + @parameterized.parameterized.expand(['/b/build/', 'build/', '']) def test_possible_dependencies_legacy(self, dir_prefix): @@ -289,7 +291,7 @@ def test_other_fuzzer_not_extracted_legacy(self, dir_prefix): self.assertCountEqual(to_extract, needed_files) @parameterized.parameterized.expand(['/b/build/', 'build/', '']) - def test_dsyms_are_correctly_unpacked(self, dir_prefix): + def test_dsyms_are_correctly_unpacked_legacy(self, dir_prefix): """Tests that even if not listed in the runtime deps, dSYMs are correctly unpacked. """ needed_files = self._generate_possible_fuzzer_dependencies_legacy( @@ -392,15 +394,15 @@ def test_manifest_is_correctly_read(self): # No manifest exists; should default to archive schema version 0 (legacy). test_archive = build_archive.ChromeBuildArchive(self.mock.open.return_value) - self.assertEqual(test_archive._archive_schema_version, 0) + self.assertEqual(test_archive.archive_schema_version(), 0) # Invalid manifest; should default to version 0. self.mock.file_exists.return_value = True self._generate_invalid_manifest() test_archive = build_archive.ChromeBuildArchive(self.mock.open.return_value) - self.assertEqual(test_archive._archive_schema_version, 0) + self.assertEqual(test_archive.archive_schema_version(), 0) # Valid manifest. self._generate_manifest(1) test_archive = build_archive.ChromeBuildArchive(self.mock.open.return_value) - self.assertEqual(test_archive._archive_schema_version, 1) + self.assertEqual(test_archive.archive_schema_version(), 1) From 5f15064583ed3b17ddc6eb9e62fc3ec093e2c1da Mon Sep 17 00:00:00 2001 From: Martin Verde Date: Mon, 9 Feb 2026 18:18:00 +0000 Subject: [PATCH 11/13] Formatting. --- src/clusterfuzz/_internal/build_management/build_archive.py | 3 +-- .../tests/core/build_management/build_archive_test.py | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/src/clusterfuzz/_internal/build_management/build_archive.py b/src/clusterfuzz/_internal/build_management/build_archive.py index e47ca4a79e..4481b3d075 100644 --- a/src/clusterfuzz/_internal/build_management/build_archive.py +++ b/src/clusterfuzz/_internal/build_management/build_archive.py @@ -302,8 +302,7 @@ def __init__(self, if self._archive_schema_version is None: logs.warning( 'clusterfuzz_manifest.json was incorrectly formatted or missing an ' - 'archive_schema_version field' - ) + 'archive_schema_version field') self._archive_schema_version = default_archive_schema_version else: self._archive_schema_version = default_archive_schema_version diff --git a/src/clusterfuzz/_internal/tests/core/build_management/build_archive_test.py b/src/clusterfuzz/_internal/tests/core/build_management/build_archive_test.py index 7968bc51a6..7fc0e22b61 100644 --- a/src/clusterfuzz/_internal/tests/core/build_management/build_archive_test.py +++ b/src/clusterfuzz/_internal/tests/core/build_management/build_archive_test.py @@ -235,8 +235,8 @@ def _declare_fuzzers(self, fuzzers): self._declared_fuzzers = fuzzers def _set_archive_schema_version(self, version): - self.build = build_archive.ChromeBuildArchive( - self.mock.open.return_value, version) + self.build = build_archive.ChromeBuildArchive(self.mock.open.return_value, + version) @parameterized.parameterized.expand(['/b/build/', 'build/', '']) From 953abebb00903303aad544ad0d3d4064fe71d5b5 Mon Sep 17 00:00:00 2001 From: Martin Verde Date: Mon, 9 Feb 2026 21:11:33 +0000 Subject: [PATCH 12/13] Formatting. --- .../tests/core/build_management/build_archive_test.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/clusterfuzz/_internal/tests/core/build_management/build_archive_test.py b/src/clusterfuzz/_internal/tests/core/build_management/build_archive_test.py index 7fc0e22b61..791446f9fa 100644 --- a/src/clusterfuzz/_internal/tests/core/build_management/build_archive_test.py +++ b/src/clusterfuzz/_internal/tests/core/build_management/build_archive_test.py @@ -238,7 +238,6 @@ def _set_archive_schema_version(self, version): self.build = build_archive.ChromeBuildArchive(self.mock.open.return_value, version) - @parameterized.parameterized.expand(['/b/build/', 'build/', '']) def test_possible_dependencies_legacy(self, dir_prefix): """Tests that all the necessary dependencies are correctly extracted from @@ -365,6 +364,8 @@ def setUp(self): self.mock.file_exists.return_value = False def _generate_manifest(self, archive_schema_version): + """Mocks open calls so that they return a buffer containing valid JSON for + the given archive schema version.""" def _mock_open(_): buffer = io.BytesIO(b'') @@ -378,6 +379,8 @@ def _mock_open(_): self.mock.open.return_value.open.side_effect = _mock_open def _generate_invalid_manifest(self): + """Mocks open calls so that they return a buffer containing invalid contents + for clusterfuzz_manifest.json.""" def _mock_open(_): buffer = io.BytesIO(b'') From 73f4f361f29500e0fc5925dd0b3e1b41f23ba7ce Mon Sep 17 00:00:00 2001 From: Martin Verde Date: Tue, 10 Feb 2026 18:31:23 +0000 Subject: [PATCH 13/13] Add docstring. --- src/clusterfuzz/_internal/build_management/build_archive.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/clusterfuzz/_internal/build_management/build_archive.py b/src/clusterfuzz/_internal/build_management/build_archive.py index 4481b3d075..cd2fb12de5 100644 --- a/src/clusterfuzz/_internal/build_management/build_archive.py +++ b/src/clusterfuzz/_internal/build_management/build_archive.py @@ -313,6 +313,7 @@ def root_dir(self) -> str: return self._root_dir def archive_schema_version(self) -> int: + """Returns the schema version number for this archive.""" return self._archive_schema_version def get_dependency_path(self, path: str, deps_file_path: str) -> str: