Skip to content

Commit b613d80

Browse files
letitzmverde
andauthored
Add handling for reformatted archives conditional on archive format version (#5145)
Clusterfuzz will now look for a file called clusterfuzz_manifest.json at the root of Chrome archives and decide how to interpret runtime dependencies based on the json's version field. Version 0 (or no manifest file found) means to keep using the legacy logic while version 1 means to interpret relative dependency paths as relative to the corresponding runtime_deps file instead of the archive root. Version 1 also stops expecting a src_root/ directory in the archive root. --------- Co-authored-by: Martin Verde <thesalsa@google.com>
1 parent 14d4000 commit b613d80

2 files changed

Lines changed: 307 additions & 44 deletions

File tree

src/clusterfuzz/_internal/build_management/build_archive.py

Lines changed: 131 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
"""Build Archive manager."""
1515

1616
import abc
17+
import json
1718
import os
1819
from typing import BinaryIO
1920
from typing import Callable
@@ -219,29 +220,144 @@ def unpack(self,
219220

220221
class ChromeBuildArchive(DefaultBuildArchive):
221222
"""Handles chrome build archives. This special cases the default behaviour by
222-
looking at the content of the `.runtime_deps` file, in order to unpack all the
223-
fuzzer dependencies correctly.
224-
In case something goes wrong, this defaults to using the default unpacker.
223+
looking at the content of the `.runtime_deps` file for each fuzzer target in
224+
order to unpack all of its dependencies correctly.
225+
226+
Expects a manifest file named `clusterfuzz_manifest.json` in the root of the
227+
archive to decide which schema version to use when interpreting its contents.
228+
The legacy schema is applied to archives with no manifest. Defaults to using
229+
the default unpacker in case something goes wrong.
230+
231+
Under the legacy schema, fuzz targets were assumed to be at the root of the
232+
archive while runtime_deps starting with `../../` were remapped to
233+
`/src_root/`.
234+
235+
Given the following runtime_deps:
236+
237+
my_fuzzer.runtime_deps:
238+
==========
239+
./my_fuzzer
240+
my_fuzzer.options
241+
my_fuzzer.owners
242+
my_fuzzer.runtime_deps
243+
./libbase.so
244+
./libatomic.so
245+
../../.vpython3
246+
../../third_party/instrumented_libs/binaries/msan-chained-origins-noble-lib
247+
/lib/
248+
249+
250+
The legacy schema would expect an archive with the following structure:
251+
==========
252+
my_fuzzer
253+
my_fuzzer.options
254+
my_ruzzer.owners
255+
my_fuzzer.runtime_deps
256+
libbase.so
257+
libatomic.so
258+
# etc. for all fuzz targets
259+
src_root/
260+
.vpython3
261+
third_party/instrumented_libs/binaries/msan-chained-origins-noble-lib/lib/
262+
# all instrumented libs
263+
# etc. for other deps
264+
265+
Schema version 1 does away with `/src_root/` and interprets runtime_deps
266+
entries as file paths relative to the runtime_deps file, which lives in the
267+
build directory along with fuzz target binaries.
268+
269+
Expected archive structure with the same runtime_deps:
270+
==========
271+
out/build/my_fuzzer
272+
out/build/my_fuzzer.options
273+
out/build/my_fuzzer.owners
274+
out/build/my_fuzzer.runtime_deps
275+
out/build/libbase.so
276+
out/build/libatomic.so
277+
# etc. for all fuzz targets and deps in the build directory
278+
.vpython3
279+
third_party/instrumented_libs/binaries/msan-chained-origins-noble-lib/lib/
280+
# all instrumented libs
281+
# etc. for other deps
225282
"""
226283

284+
def __init__(self,
285+
reader: archive.ArchiveReader,
286+
default_archive_schema_version: int = 0):
287+
"""Initializes a `ChromiumBuildArchive` with the given reader.
288+
289+
Arguments:
290+
reader: See `DefaultBuildArchive`.
291+
default_archive_schema_version: Specifies which version of a build archive
292+
to expect if `clusterfuzz_manifest.json` is missing or badly formatted.
293+
"""
294+
super().__init__(reader)
295+
# The manifest may not exist for earlier versions of archives. In this
296+
# case, default to schema version 0.
297+
manifest_path = 'clusterfuzz_manifest.json'
298+
if self.file_exists(manifest_path):
299+
with self.open(manifest_path) as f:
300+
manifest = json.load(f)
301+
self._archive_schema_version = manifest.get('archive_schema_version')
302+
if self._archive_schema_version is None:
303+
logs.warning(
304+
'clusterfuzz_manifest.json was incorrectly formatted or missing an '
305+
'archive_schema_version field')
306+
self._archive_schema_version = default_archive_schema_version
307+
else:
308+
self._archive_schema_version = default_archive_schema_version
309+
227310
def root_dir(self) -> str:
228311
if not hasattr(self, '_root_dir'):
229312
self._root_dir = super().root_dir() # pylint: disable=attribute-defined-outside-init
230313
return self._root_dir
231314

232-
def to_archive_path(self, path: str) -> str:
233-
"""Deps are relative to the Chrome root directory. However, there might be
234-
a common root directory in the archive, which means we need to make sure
235-
the file path is correct.
315+
def archive_schema_version(self) -> int:
316+
"""Returns the schema version number for this archive."""
317+
return self._archive_schema_version
318+
319+
def get_dependency_path(self, path: str, deps_file_path: str) -> str:
320+
"""Deps are given as paths relative to the deps file where they are listed,
321+
so we need to translate them to the corresponding paths relative to the
322+
archive root.
236323
237324
Args:
238-
path: the dependency path relative to Chrome's root directory.
325+
path: the dependency path relative to the deps file.
326+
deps_file_path: the path to the deps file, relative to the archive root.
239327
240328
Returns:
241-
the path relative to the archive.
329+
the dependency path relative to the archive root.
242330
"""
243-
path = os.path.normpath(path)
244331

332+
# Archive schema version 0 represents legacy behavior. For newer archive
333+
# versions, runtime_deps that were formerly stored under
334+
# {self.root_dir()}/src_root/ are now stored in the root directory, while
335+
# the build artifacts formerly stored in the root directory are now stored
336+
# in the build directory.
337+
338+
if self._archive_schema_version > 0:
339+
# Assumes the dependency path is relative to the deps file and
340+
# transforms it into into a full path relative to the archive root. For
341+
# example:
342+
#
343+
# deps_file_path: "/A/B/fuzz_target.runtime_deps"
344+
# os.path.dirname(deps_file_path) => "/A/B/" (call this DEPS_DIR)
345+
# path1: "./my_dep"
346+
# path2: "../../C/my_dep2"
347+
# path3: "D/my_dep3"
348+
#
349+
# os.path.join(DEPS_DIR, path1) => "/A/B/./my_dep"
350+
# os.path.join(DEPS_DIR, path2) => "/A/B/../../C/my_dep2"
351+
# os.path.join(DEPS_DIR, path3) => "/A/B/D/my_dep3"
352+
#
353+
# os.path.normpath(os.path.join(DEPS_DIR, path1)) => "/A/B/my_dep"
354+
# os.path.normpath(os.path.join(DEPS_DIR, path2)) => "/C/my_dep2"
355+
# os.path.normpath(os.path.join(DEPS_DIR, path3)) => "/A/B/D/my_dep3"
356+
return os.path.normpath(
357+
os.path.join(os.path.dirname(deps_file_path), path))
358+
359+
# Legacy behavior. Remap `../../` to `src_root/`.
360+
path = os.path.normpath(path)
245361
if path.startswith('../../'):
246362
path = path.replace('../../', 'src_root/')
247363

@@ -271,7 +387,7 @@ def _get_common_files(self) -> List[str]:
271387

272388
def get_target_dependencies(
273389
self, fuzz_target: str) -> List[archive.ArchiveMemberInfo]:
274-
target_path = self.to_archive_path(fuzz_target)
390+
target_path = self.get_path_for_target(fuzz_target)
275391
deps_file = f'{target_path}.runtime_deps'
276392
if not self.file_exists(deps_file):
277393
logs.warning(f'runtime_deps file not found for {target_path}')
@@ -280,7 +396,10 @@ def get_target_dependencies(
280396
res = []
281397
matchers = []
282398
with self.open(deps_file) as f:
283-
deps = [self.to_archive_path(l.decode()) for l in f.read().splitlines()]
399+
deps = [
400+
self.get_dependency_path(l.decode(), deps_file)
401+
for l in f.read().splitlines()
402+
]
284403
for dep in deps:
285404
# We need to match the file prefixes here, because some of the deps are
286405
# globering the whole directory. Same for files, on mac platform, we

0 commit comments

Comments
 (0)