diff --git a/configs/test/batch/batch.yaml b/configs/test/batch/batch.yaml index 12788e149d..a77ee1e01e 100644 --- a/configs/test/batch/batch.yaml +++ b/configs/test/batch/batch.yaml @@ -76,6 +76,9 @@ mapping: name: east4-network2 weight: 1 project: 'test-clusterfuzz' +queue_check_regions: + - us-central1 + - us-east4 subconfigs: central1-network1: region: 'us-central1' diff --git a/src/clusterfuzz/_internal/base/external_users.py b/src/clusterfuzz/_internal/base/external_users.py index 7c84809bb2..a9d4da563c 100644 --- a/src/clusterfuzz/_internal/base/external_users.py +++ b/src/clusterfuzz/_internal/base/external_users.py @@ -22,6 +22,9 @@ MEMCACHE_TTL_IN_SECONDS = 15 * 60 +# OSS-Fuzz issue tracker CC group +OSS_FUZZ_CC_GROUP_SUFFIX = '-ccs@oss-fuzz.com' + def _fuzzers_for_job(job_type, include_parents): """Return all fuzzers that have the job associated. @@ -198,7 +201,10 @@ def _allowed_users_for_entity(name, entity_kind, auto_cc=None): return sorted(allowed_users) -def _cc_users_for_entity(name, entity_type, security_flag): +def _cc_users_for_entity(name, + entity_type, + security_flag, + allow_cc_group_for_job=True): """Return CC users for entity.""" users = _allowed_users_for_entity(name, entity_type, data_types.AutoCCType.ALL) @@ -208,6 +214,20 @@ def _cc_users_for_entity(name, entity_type, security_flag): _allowed_users_for_entity(name, entity_type, data_types.AutoCCType.SECURITY)) + if (entity_type != data_types.PermissionEntityKind.JOB or + not allow_cc_group_for_job): + return sorted(users) + + # CC group is only available for jobs, as it is not possible to infer the + # project from the other permission entity kinds alone. + users_in_cc_group = _allowed_users_for_entity( + name, entity_type, data_types.AutoCCType.USE_CC_GROUP) + if users_in_cc_group: + # Assume users are synced with the project group. + group_name = get_cc_group_from_job(name) + if group_name: + users.append(group_name) + return sorted(users) @@ -336,15 +356,28 @@ def is_upload_allowed_for_user(user_email): return bool(permissions.get()) -def cc_users_for_job(job_type, security_flag): +def cc_users_for_job(job_type, security_flag, allow_cc_group=True): """Return external users that should be CC'ed according to the given rule. Args: job_type: The name of the job security_flag: Whether or not the CC is for a security issue. + allow_cc_group: Whether to allow including the project cc group from the + job, if exists any user with the use cc group auto_cc type. Returns: A list of user emails that should be CC'ed. """ return _cc_users_for_entity(job_type, data_types.PermissionEntityKind.JOB, - security_flag) + security_flag, allow_cc_group) + + +def get_cc_group_from_job(job_type: str) -> str: + """Docstring for get_cc_group_from_entity""" + project_name = data_handler.get_project_name(job_type) + return get_oss_fuzz_project_cc_group(project_name) + + +def get_oss_fuzz_project_cc_group(project_name: str) -> str | None: + """Return oss-fuzz issue tracker CC group email for a project.""" + return f'{project_name}{OSS_FUZZ_CC_GROUP_SUFFIX}' diff --git a/src/clusterfuzz/_internal/base/memoize.py b/src/clusterfuzz/_internal/base/memoize.py index ae3063b0ce..8b214f9f91 100644 --- a/src/clusterfuzz/_internal/base/memoize.py +++ b/src/clusterfuzz/_internal/base/memoize.py @@ -17,6 +17,7 @@ import functools import json import threading +import time from clusterfuzz._internal.base import persistent_cache from clusterfuzz._internal.metrics import logs @@ -89,6 +90,30 @@ def get_key(self, func, args, kwargs): return _default_key(func, args, kwargs) +class InMemory(FifoInMemory): + """In-memory caching engine with TTL.""" + + def __init__(self, ttl_in_seconds, capacity=1000): + super().__init__(capacity) + self.ttl_in_seconds = ttl_in_seconds + + def put(self, key, value): + """Put (key, value) into cache.""" + super().put(key, (value, time.time() + self.ttl_in_seconds)) + + def get(self, key): + """Get the value from cache.""" + entry = super().get(key) + if entry is None: + return None + + value, expiry = entry + if expiry < time.time(): + return None + + return value + + class FifoOnDisk: """On-disk caching engine.""" diff --git a/src/clusterfuzz/_internal/batch/service.py b/src/clusterfuzz/_internal/batch/service.py index c97667797e..f8b396d1c4 100644 --- a/src/clusterfuzz/_internal/batch/service.py +++ b/src/clusterfuzz/_internal/batch/service.py @@ -1,8 +1,8 @@ # Copyright 2025 Google LLC # -# Licensed under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # @@ -18,14 +18,19 @@ and provides a simple interface for scheduling ClusterFuzz tasks. """ import collections +import json +import random import threading from typing import Dict from typing import List from typing import Tuple +import urllib.request import uuid +import google.auth.transport.requests from google.cloud import batch_v1 as batch +from clusterfuzz._internal.base import memoize from clusterfuzz._internal.base import retry from clusterfuzz._internal.base import tasks from clusterfuzz._internal.base import utils @@ -65,6 +70,13 @@ # See https://cloud.google.com/batch/quotas#job_limits MAX_CONCURRENT_VMS_PER_JOB = 1000 +MAX_QUEUE_SIZE = 100 + + +class AllRegionsOverloadedError(Exception): + """Raised when all batch regions are overloaded.""" + + _local = threading.local() DEFAULT_RETRY_COUNT = 0 @@ -184,6 +196,50 @@ def count_queued_or_scheduled_tasks(project: str, return (queued, scheduled) +@memoize.wrap(memoize.InMemory(60)) +def get_region_load(project: str, region: str) -> int: + """Gets the current load (queued and scheduled jobs) for a region.""" + creds, _ = credentials.get_default() + if not creds.valid: + creds.refresh(google.auth.transport.requests.Request()) + + headers = { + 'Authorization': f'Bearer {creds.token}', + 'Content-Type': 'application/json' + } + + try: + url = (f'https://batch.googleapis.com/v1alpha/projects/{project}/locations/' + f'{region}/jobs:countByState?states=QUEUED') + req = urllib.request.Request(url, headers=headers) + with urllib.request.urlopen(req) as response: + if response.status != 200: + logs.error( + f'Batch countByState failed: {response.status} {response.read()}') + return 0 + + data = json.loads(response.read()) + logs.info(f'Batch countByState response for {region}: {data}') + # The API returns a list of state counts. + # Example: { "jobCounts": { "QUEUED": "10" } } + total = 0 + + # Log data for debugging first few times if needed, or just rely on structure. + # We'll assume the structure is standard for Google APIs. + job_counts = data.get('jobCounts', {}) + for state, count in job_counts.items(): + count = int(count) + if state == 'QUEUED': + total += count + else: + logs.error(f'Unknown state: {state}') + + return total + except Exception as e: + logs.error(f'Failed to get region load for {region}: {e}') + return 0 + + def _get_batch_config(): """Returns the batch config. This function was made to make mocking easier.""" return local_config.BatchConfig() @@ -191,7 +247,7 @@ def _get_batch_config(): def is_remote_task(command: str, job_name: str) -> bool: """Returns whether a task is configured to run remotely on GCP Batch. - + This is determined by checking if a valid batch workload specification can be found for the given command and job type. """ @@ -242,15 +298,46 @@ def _get_config_names(batch_tasks: List[remote_task_types.RemoteTask]): def _get_subconfig(batch_config, instance_spec): - # TODO(metzman): Make this pick one at random or based on conditions. all_subconfigs = batch_config.get('subconfigs', {}) instance_subconfigs = instance_spec['subconfigs'] - weighted_subconfigs = [ - WeightedSubconfig(subconfig['name'], subconfig['weight']) - for subconfig in instance_subconfigs - ] - weighted_subconfig = utils.random_weighted_choice(weighted_subconfigs) - return all_subconfigs[weighted_subconfig.name] + + queue_check_regions = batch_config.get('queue_check_regions') + if not queue_check_regions: + logs.info( + 'Skipping batch load check because queue_check_regions is not configured.' + ) + weighted_subconfigs = [ + WeightedSubconfig(subconfig['name'], subconfig['weight']) + for subconfig in instance_subconfigs + ] + weighted_subconfig = utils.random_weighted_choice(weighted_subconfigs) + return all_subconfigs[weighted_subconfig.name] + + # Check load for configured regions. + healthy_subconfigs = [] + project = batch_config.get('project') + + for subconfig in instance_subconfigs: + name = subconfig['name'] + conf = all_subconfigs[name] + region = conf['region'] + + if region in queue_check_regions: + load = get_region_load(project, region) + logs.info(f'Region {region} has {load} queued jobs.') + if load >= MAX_QUEUE_SIZE: + logs.info(f'Region {region} overloaded (load={load}). Skipping.') + continue + + healthy_subconfigs.append(name) + + if not healthy_subconfigs: + logs.error('All candidate regions are overloaded.') + raise AllRegionsOverloadedError('All candidate regions are overloaded.') + + # Randomly pick one from healthy regions to avoid thundering herd. + chosen_name = random.choice(healthy_subconfigs) + return all_subconfigs[chosen_name] def _get_specs_from_config( @@ -277,7 +364,6 @@ def _get_specs_from_config( versioned_images_map = instance_spec.get('versioned_docker_images') if (base_os_version and versioned_images_map and base_os_version in versioned_images_map): - # New path: Use the versioned image if specified and available. docker_image_uri = versioned_images_map[base_os_version] else: # Fallback/legacy path: Use the original docker_image key. @@ -324,7 +410,7 @@ def _get_specs_from_config( class GcpBatchService(remote_task_types.RemoteTaskInterface): """A high-level service for creating and managing remote tasks. - + This service provides a simple interface for scheduling ClusterFuzz tasks on GCP Batch. It handles the details of creating batch jobs and tasks, and provides a way to check if a task is configured to run remotely. @@ -383,20 +469,27 @@ def create_utask_main_job(self, module: str, job_type: str, def create_utask_main_jobs(self, remote_tasks: List[remote_task_types.RemoteTask]): """Creates a batch job for a list of uworker main tasks. - + This method groups the tasks by their workload specification and creates a separate batch job for each group. This allows tasks with similar requirements to be processed together, which can improve efficiency. """ job_specs = collections.defaultdict(list) - specs = _get_specs_from_config(remote_tasks) + try: + specs = _get_specs_from_config(remote_tasks) + + # Return the remote tasks as uncreated task + # if all regions are overloaded + except AllRegionsOverloadedError: + return remote_tasks + for remote_task in remote_tasks: logs.info(f'Scheduling {remote_task.command}, {remote_task.job_type}.') spec = specs[(remote_task.command, remote_task.job_type)] job_specs[spec].append(remote_task.input_download_url) logs.info('Creating batch jobs.') - logs.info('Batching utask_mains.') + for spec, input_urls in job_specs.items(): for input_urls_portion in utils.batched(input_urls, MAX_CONCURRENT_VMS_PER_JOB - 1): diff --git a/src/clusterfuzz/_internal/bot/tasks/task_types.py b/src/clusterfuzz/_internal/bot/tasks/task_types.py index a9853ce03f..1d0e38a8ae 100644 --- a/src/clusterfuzz/_internal/bot/tasks/task_types.py +++ b/src/clusterfuzz/_internal/bot/tasks/task_types.py @@ -162,9 +162,10 @@ def execute(self, task_argument, job_type, uworker_env): utask_main_queue_size = tasks.get_utask_main_queue_size() utask_main_queue_limit = UTASK_MAIN_QUEUE_LIMIT_DEFAULT - utask_flag = feature_flags.FeatureFlags.UTASK_MAIN_QUEUE_LIMIT.flag - if utask_flag and utask_flag.enabled: - utask_main_queue_limit = utask_flag.content + utask_flag = feature_flags.FeatureFlags.UTASK_MAIN_QUEUE_LIMIT + if utask_flag.enabled and utask_flag.content: + utask_main_queue_limit = int(utask_flag.content) + if utask_main_queue_size > utask_main_queue_limit: base_os_version = environment.get_value('BASE_OS_VERSION') queue_name = UTASK_MAIN_QUEUE if not base_os_version else \ diff --git a/src/clusterfuzz/_internal/build_management/build_archive.py b/src/clusterfuzz/_internal/build_management/build_archive.py index 133fc1c2c1..cd2fb12de5 100644 --- a/src/clusterfuzz/_internal/build_management/build_archive.py +++ b/src/clusterfuzz/_internal/build_management/build_archive.py @@ -14,6 +14,7 @@ """Build Archive manager.""" import abc +import json import os from typing import BinaryIO from typing import Callable @@ -219,29 +220,144 @@ def unpack(self, class ChromeBuildArchive(DefaultBuildArchive): """Handles chrome build archives. This special cases the default behaviour by - looking at the content of the `.runtime_deps` file, in order to unpack all the - fuzzer dependencies correctly. - In case something goes wrong, this defaults to using the default unpacker. + looking at the content of the `.runtime_deps` file for each fuzzer target in + order to unpack all of its dependencies correctly. + + Expects a manifest file named `clusterfuzz_manifest.json` in the root of the + archive to decide which schema version to use when interpreting its contents. + The legacy schema is applied to archives with no manifest. Defaults to using + the default unpacker in case something goes wrong. + + Under the legacy schema, fuzz targets were assumed to be at the root of the + archive while runtime_deps starting with `../../` were remapped to + `/src_root/`. + + Given the following runtime_deps: + + my_fuzzer.runtime_deps: + ========== + ./my_fuzzer + my_fuzzer.options + my_fuzzer.owners + my_fuzzer.runtime_deps + ./libbase.so + ./libatomic.so + ../../.vpython3 + ../../third_party/instrumented_libs/binaries/msan-chained-origins-noble-lib + /lib/ + + + The legacy schema would expect an archive with the following structure: + ========== + my_fuzzer + my_fuzzer.options + my_ruzzer.owners + my_fuzzer.runtime_deps + libbase.so + libatomic.so + # etc. for all fuzz targets + src_root/ + .vpython3 + third_party/instrumented_libs/binaries/msan-chained-origins-noble-lib/lib/ + # all instrumented libs + # etc. for other deps + + Schema version 1 does away with `/src_root/` and interprets runtime_deps + entries as file paths relative to the runtime_deps file, which lives in the + build directory along with fuzz target binaries. + + Expected archive structure with the same runtime_deps: + ========== + out/build/my_fuzzer + out/build/my_fuzzer.options + out/build/my_fuzzer.owners + out/build/my_fuzzer.runtime_deps + out/build/libbase.so + out/build/libatomic.so + # etc. for all fuzz targets and deps in the build directory + .vpython3 + third_party/instrumented_libs/binaries/msan-chained-origins-noble-lib/lib/ + # all instrumented libs + # etc. for other deps """ + def __init__(self, + reader: archive.ArchiveReader, + default_archive_schema_version: int = 0): + """Initializes a `ChromiumBuildArchive` with the given reader. + + Arguments: + reader: See `DefaultBuildArchive`. + default_archive_schema_version: Specifies which version of a build archive + to expect if `clusterfuzz_manifest.json` is missing or badly formatted. + """ + super().__init__(reader) + # The manifest may not exist for earlier versions of archives. In this + # case, default to schema version 0. + manifest_path = 'clusterfuzz_manifest.json' + if self.file_exists(manifest_path): + with self.open(manifest_path) as f: + manifest = json.load(f) + self._archive_schema_version = manifest.get('archive_schema_version') + if self._archive_schema_version is None: + logs.warning( + 'clusterfuzz_manifest.json was incorrectly formatted or missing an ' + 'archive_schema_version field') + self._archive_schema_version = default_archive_schema_version + else: + self._archive_schema_version = default_archive_schema_version + def root_dir(self) -> str: if not hasattr(self, '_root_dir'): self._root_dir = super().root_dir() # pylint: disable=attribute-defined-outside-init return self._root_dir - def to_archive_path(self, path: str) -> str: - """Deps are relative to the Chrome root directory. However, there might be - a common root directory in the archive, which means we need to make sure - the file path is correct. + def archive_schema_version(self) -> int: + """Returns the schema version number for this archive.""" + return self._archive_schema_version + + def get_dependency_path(self, path: str, deps_file_path: str) -> str: + """Deps are given as paths relative to the deps file where they are listed, + so we need to translate them to the corresponding paths relative to the + archive root. Args: - path: the dependency path relative to Chrome's root directory. + path: the dependency path relative to the deps file. + deps_file_path: the path to the deps file, relative to the archive root. Returns: - the path relative to the archive. + the dependency path relative to the archive root. """ - path = os.path.normpath(path) + # Archive schema version 0 represents legacy behavior. For newer archive + # versions, runtime_deps that were formerly stored under + # {self.root_dir()}/src_root/ are now stored in the root directory, while + # the build artifacts formerly stored in the root directory are now stored + # in the build directory. + + if self._archive_schema_version > 0: + # Assumes the dependency path is relative to the deps file and + # transforms it into into a full path relative to the archive root. For + # example: + # + # deps_file_path: "/A/B/fuzz_target.runtime_deps" + # os.path.dirname(deps_file_path) => "/A/B/" (call this DEPS_DIR) + # path1: "./my_dep" + # path2: "../../C/my_dep2" + # path3: "D/my_dep3" + # + # os.path.join(DEPS_DIR, path1) => "/A/B/./my_dep" + # os.path.join(DEPS_DIR, path2) => "/A/B/../../C/my_dep2" + # os.path.join(DEPS_DIR, path3) => "/A/B/D/my_dep3" + # + # os.path.normpath(os.path.join(DEPS_DIR, path1)) => "/A/B/my_dep" + # os.path.normpath(os.path.join(DEPS_DIR, path2)) => "/C/my_dep2" + # os.path.normpath(os.path.join(DEPS_DIR, path3)) => "/A/B/D/my_dep3" + return os.path.normpath( + os.path.join(os.path.dirname(deps_file_path), path)) + + # Legacy behavior. Remap `../../` to `src_root/`. + path = os.path.normpath(path) if path.startswith('../../'): path = path.replace('../../', 'src_root/') @@ -271,7 +387,7 @@ def _get_common_files(self) -> List[str]: def get_target_dependencies( self, fuzz_target: str) -> List[archive.ArchiveMemberInfo]: - target_path = self.to_archive_path(fuzz_target) + target_path = self.get_path_for_target(fuzz_target) deps_file = f'{target_path}.runtime_deps' if not self.file_exists(deps_file): logs.warning(f'runtime_deps file not found for {target_path}') @@ -280,7 +396,10 @@ def get_target_dependencies( res = [] matchers = [] with self.open(deps_file) as f: - deps = [self.to_archive_path(l.decode()) for l in f.read().splitlines()] + deps = [ + self.get_dependency_path(l.decode(), deps_file) + for l in f.read().splitlines() + ] for dep in deps: # We need to match the file prefixes here, because some of the deps are # globering the whole directory. Same for files, on mac platform, we diff --git a/src/clusterfuzz/_internal/cron/chrome_tests_syncer.py b/src/clusterfuzz/_internal/cron/chrome_tests_syncer.py index 844039313b..c01833a535 100644 --- a/src/clusterfuzz/_internal/cron/chrome_tests_syncer.py +++ b/src/clusterfuzz/_internal/cron/chrome_tests_syncer.py @@ -123,23 +123,6 @@ def unpack_crash_testcases(crash_testcases_directory): shell.remove_directory(directory_path) - # Rename all fuzzed testcase files as regular files. - logs.info('Renaming testcase files.') - for root, _, files in os.walk(crash_testcases_directory): - for filename in files: - if not filename.startswith(testcase_manager.FUZZ_PREFIX): - continue - - file_path = os.path.join(root, filename) - stripped_file_name = os.path.basename(file_path)[len( - testcase_manager.FUZZ_PREFIX):] - stripped_file_path = os.path.join( - os.path.dirname(file_path), stripped_file_name) - try: - os.rename(file_path, stripped_file_path) - except Exception as e: - raise RuntimeError(f'Failed to rename testcase {file_path}') from e - # Remove empty files and dirs to avoid the case where a fuzzer randomly # chooses an empty dir/file and generates zero testcases. shell.remove_empty_files(crash_testcases_directory) @@ -253,6 +236,34 @@ def filter_members(member, path): shell.remove_file(local_archive) +def rename_testcase_files(directory): + """Rename files with the 'fuzz-' prefix to avoid picking them up for fuzzing + without modifying them with a fuzzer. + """ + # Rename all fuzzed testcase files as regular files. + for root, _, files in os.walk(directory): + for filename in files: + if not filename.startswith(testcase_manager.FUZZ_PREFIX): + continue + + file_path = os.path.join(root, filename) + stripped_file_name = os.path.basename(file_path)[len( + testcase_manager.FUZZ_PREFIX):] + stripped_file_path = os.path.join( + os.path.dirname(file_path), stripped_file_name) + try: + os.rename(file_path, stripped_file_path) + except Exception as e: + raise RuntimeError(f'Failed to rename testcase {file_path}') from e + + +def clean_up_filenames(tests_directory, test_folders): + """Rename files with the 'fuzz-' prefix in all test folders.""" + for folder in test_folders: + logs.info(f'Renaming testcase files in {folder}') + rename_testcase_files(os.path.join(tests_directory, folder)) + + def sync_tests(tests_archive_bucket: str, tests_archive_name: str, tests_directory: str): """Main sync routine.""" @@ -301,22 +312,28 @@ def sync_tests(tests_archive_bucket: str, tests_archive_name: str, create_symbolic_link(tests_directory, 'src/third_party/blink/web_tests', 'LayoutTests') + test_folders = [ + 'CrashTests', + 'LayoutTests', + 'WebKit/JSTests/es6', + 'WebKit/JSTests/stress', + 'WebKit/LayoutTests', + 'fuzzilli', + 'gecko-tests', + 'v8/test/mjsunit', + 'spidermonkey', + 'chakra', + 'webgl-conformance-tests', + ] + + clean_up_filenames(tests_directory, test_folders) + subprocess.check_call( [ 'zip', '-r', tests_archive_local, - 'CrashTests', - 'LayoutTests', - 'WebKit/JSTests/es6', - 'WebKit/JSTests/stress', - 'WebKit/LayoutTests', - 'fuzzilli', - 'gecko-tests', - 'v8/test/mjsunit', - 'spidermonkey', - 'chakra', - 'webgl-conformance-tests', + ] + test_folders + [ '-x', '*.cc', '-x', diff --git a/src/clusterfuzz/_internal/cron/oss_fuzz_cc_groups.py b/src/clusterfuzz/_internal/cron/oss_fuzz_cc_groups.py index 0df087ddf8..cd22eda774 100644 --- a/src/clusterfuzz/_internal/cron/oss_fuzz_cc_groups.py +++ b/src/clusterfuzz/_internal/cron/oss_fuzz_cc_groups.py @@ -13,24 +13,28 @@ # limitations under the License. """Cron to sync OSS-Fuzz projects groups used as CC in the issue tracker.""" +from clusterfuzz._internal.base import external_users from clusterfuzz._internal.base import utils from clusterfuzz._internal.cron import project_setup from clusterfuzz._internal.google_cloud_utils import google_groups from clusterfuzz._internal.metrics import logs -_CC_GROUP_SUFFIX = '-ccs@oss-fuzz.com' -_CC_GROUP_DESC = 'External CCs in OSS-Fuzz issue tracker for project' + +def get_project_cc_group_description(project_name: str) -> str: + """Return oss-fuzz issue tracker CC group description for a project.""" + oss_fuzz_cc_desc = 'External CCs in OSS-Fuzz issue tracker for project' + return f'{oss_fuzz_cc_desc}: {project_name}' def sync_project_cc_group(project_name, info): """Sync the project's google group used for CCing in the issue tracker.""" - group_name = f'{project_name}{_CC_GROUP_SUFFIX}' + group_name = external_users.get_oss_fuzz_project_cc_group(project_name) group_id = google_groups.get_group_id(group_name) # Create the group and bail out since the CIG API might delay to create a # new group. Add members will be done in the next cron run. if not group_id: - group_description = f'{_CC_GROUP_DESC}: {project_name}' + group_description = get_project_cc_group_description(project_name) created = google_groups.create_google_group( group_name, group_description=group_description) if not created: @@ -72,6 +76,7 @@ def sync_project_cc_group(project_name, info): def main(): """Sync OSS-Fuzz projects groups used to CC owners in the issue tracker.""" + logs.info('OSS-Fuzz CC groups sync started.') projects = project_setup.get_oss_fuzz_projects() for project, info in projects: diff --git a/src/clusterfuzz/_internal/cron/project_setup.py b/src/clusterfuzz/_internal/cron/project_setup.py index f60e9e47c9..34943ace3c 100644 --- a/src/clusterfuzz/_internal/cron/project_setup.py +++ b/src/clusterfuzz/_internal/cron/project_setup.py @@ -998,13 +998,13 @@ def sync_user_permissions(self, project, info): existing_permission = query.get() if existing_permission: continue - + # For OSS-Fuzz issue tracker, use the project cc google group. data_types.ExternalUserPermission( email=cc, entity_kind=data_types.PermissionEntityKind.JOB, entity_name=job_name, is_prefix=False, - auto_cc=data_types.AutoCCType.ALL).put() + auto_cc=data_types.AutoCCType.USE_CC_GROUP).put() def set_up(self, projects): """Do project setup. Return a list of all the project names that were set diff --git a/src/clusterfuzz/_internal/cron/schedule_fuzz.py b/src/clusterfuzz/_internal/cron/schedule_fuzz.py index ecd7a8d5c7..481b71cfb8 100644 --- a/src/clusterfuzz/_internal/cron/schedule_fuzz.py +++ b/src/clusterfuzz/_internal/cron/schedule_fuzz.py @@ -248,11 +248,10 @@ def schedule_fuzz_tasks() -> bool: creds = credentials.get_default()[0] preprocess_queue_size = get_queue_size(creds, project, tasks.PREPROCESS_QUEUE) + target_size = PREPROCESS_TARGET_SIZE_DEFAULT target_size_flag = feature_flags.FeatureFlags.PREPROCESS_QUEUE_SIZE_LIMIT - if target_size_flag and target_size_flag.enabled: - target_size = int(target_size_flag.value) - else: - target_size = PREPROCESS_TARGET_SIZE_DEFAULT + if target_size_flag.enabled and target_size_flag.content: + target_size = int(target_size_flag.content) num_tasks = target_size - preprocess_queue_size logs.info(f'Preprocess queue size: {preprocess_queue_size}. ' diff --git a/src/clusterfuzz/_internal/datastore/data_types.py b/src/clusterfuzz/_internal/datastore/data_types.py index e15966b61b..8b17953a73 100644 --- a/src/clusterfuzz/_internal/datastore/data_types.py +++ b/src/clusterfuzz/_internal/datastore/data_types.py @@ -208,6 +208,8 @@ class AutoCCType: ALL = 1 # Auto-CC only for security issues. SECURITY = 2 + # OSS-Fuzz specific - Auto-CC the user's project google group in all issues. + USE_CC_GROUP = 3 # Type of permission. Used by ExternalUserPermision. diff --git a/src/clusterfuzz/_internal/google_cloud_utils/credentials.py b/src/clusterfuzz/_internal/google_cloud_utils/credentials.py index aa1deffde4..e47e831efa 100644 --- a/src/clusterfuzz/_internal/google_cloud_utils/credentials.py +++ b/src/clusterfuzz/_internal/google_cloud_utils/credentials.py @@ -16,12 +16,14 @@ from google.auth import compute_engine from google.auth import credentials +from google.auth import impersonated_credentials from google.auth.transport import requests from google.oauth2 import service_account from clusterfuzz._internal.base import memoize from clusterfuzz._internal.base import retry from clusterfuzz._internal.base import utils +from clusterfuzz._internal.google_cloud_utils import compute_metadata from clusterfuzz._internal.google_cloud_utils import secret_manager from clusterfuzz._internal.metrics import logs from clusterfuzz._internal.system import environment @@ -99,3 +101,31 @@ def get_signing_credentials(service_account_info): request, '', service_account_email=creds.service_account_email) token = creds.token return signing_creds, token + + +def get_scoped_service_account_credentials( + scopes: list[str]) -> impersonated_credentials.Credentials | None: + """Gets scoped credentials by self-impersonating the service account.""" + creds, _ = get_default() + service_account_email = getattr(creds, 'service_account_email', None) + if service_account_email == 'default': + # Resolve the default service account email using GCE metadata server. + service_account_email = compute_metadata.get( + 'instance/service-accounts/default/email') + + if not service_account_email: + logs.warning('Could not retrieve service account email when getting scoped' + 'credentials.') + return None + + logs.info( + f'Using scoped credentials from service account: {service_account_email}') + scoped_creds = impersonated_credentials.Credentials( + source_credentials=creds, + target_principal=service_account_email, + target_scopes=scopes, + ) + request = requests.Request() + scoped_creds.refresh(request) + + return scoped_creds diff --git a/src/clusterfuzz/_internal/google_cloud_utils/google_groups.py b/src/clusterfuzz/_internal/google_cloud_utils/google_groups.py index 4096ba2065..e6407f6c8a 100644 --- a/src/clusterfuzz/_internal/google_cloud_utils/google_groups.py +++ b/src/clusterfuzz/_internal/google_cloud_utils/google_groups.py @@ -43,7 +43,8 @@ def get_identity_api() -> discovery.Resource | None: def get_group_settings_api() -> discovery.Resource | None: """Return the groups settings api client.""" if not hasattr(_local, 'groups_settings_service'): - creds, _ = credentials.get_default() + scopes = ['https://www.googleapis.com/auth/apps.groups.settings'] + creds = credentials.get_scoped_service_account_credentials(scopes) _local.groups_settings_service = discovery.build( 'groupssettings', 'v1', credentials=creds, cache_discovery=False) @@ -117,8 +118,8 @@ def create_google_group(group_name: str, """Create a google group.""" identity_service = get_identity_api() - customer_id = customer_id or str( - local_config.ProjectConfig().get('groups_customer_id')) + customer_id = customer_id or local_config.ProjectConfig().get( + 'groups_customer_id') if not customer_id: logs.error('No customer ID set. Unable to create a new google group.') return None diff --git a/src/clusterfuzz/_internal/k8s/job_template.yaml b/src/clusterfuzz/_internal/k8s/job_template.yaml index 5d54de49df..843231c9b9 100644 --- a/src/clusterfuzz/_internal/k8s/job_template.yaml +++ b/src/clusterfuzz/_internal/k8s/job_template.yaml @@ -85,9 +85,5 @@ spec: emptyDir: medium: Memory sizeLimit: 1.9Gi - {% if is_kata %} - nodeSelector: - cloud.google.com/gke-nodepool: kata-enabled-pool - {% endif %} restartPolicy: "{{restart_policy}}" - backoffLimit: 0 \ No newline at end of file + backoffLimit: 0 diff --git a/src/clusterfuzz/_internal/platforms/android/kernel_utils.py b/src/clusterfuzz/_internal/platforms/android/kernel_utils.py index e5e2c25478..a68a05691d 100644 --- a/src/clusterfuzz/_internal/platforms/android/kernel_utils.py +++ b/src/clusterfuzz/_internal/platforms/android/kernel_utils.py @@ -114,7 +114,7 @@ def get_kernel_prefix_and_full_hash(): target = get_kernel_name() if not build_id or not target: logs.error('Could not get kernel parameters, exiting.') - return None + return None, None android_kernel_repo_data = _get_repo_prop_data(build_id, target) if android_kernel_repo_data: @@ -144,4 +144,4 @@ def get_kernel_hash_and_build_id(): if match: return match.group(2), match.group(3) - return None + return None, None diff --git a/src/clusterfuzz/_internal/tests/core/batch/batch_service_test.py b/src/clusterfuzz/_internal/tests/core/batch/batch_service_test.py index 56b45e7b41..118621e51d 100644 --- a/src/clusterfuzz/_internal/tests/core/batch/batch_service_test.py +++ b/src/clusterfuzz/_internal/tests/core/batch/batch_service_test.py @@ -157,7 +157,7 @@ def setUp(self): self.batch_service = batch_service.GcpBatchService() self.mock.uuid4.side_effect = [uuid.UUID(u) for u in UUIDS] - def test_create_utask_main_jobs(self): + def test_create_uworker_main_batch_jobs(self): """Tests that create_utask_main_jobs works as expected.""" # Create mock data. spec1 = batch_service.BatchWorkloadSpec( @@ -217,7 +217,23 @@ def test_create_utask_main_jobs(self): mock.call(expected_create_request_2), ]) - def test_create_utask_main_job(self): + def test_create_uworker_main_batch_jobs_all_regions_overloaded(self): + """Tests that create_utask_main_jobs returns tasks when all regions are overloaded.""" + tasks = [ + remote_task_types.RemoteTask('command1', 'job1', 'url1'), + remote_task_types.RemoteTask('command2', 'job2', 'url2'), + ] + with mock.patch('clusterfuzz._internal.batch.service._get_specs_from_config' + ) as mock_get_specs_from_config: + mock_get_specs_from_config.side_effect = batch_service.AllRegionsOverloadedError( + 'All regions overloaded') + + result = self.batch_service.create_utask_main_jobs(tasks) + + self.assertEqual(result, tasks) + self.mock_batch_client_instance.create_job.assert_not_called() + + def test_create_uworker_main_batch_job(self): """Tests that create_utask_main_job works as expected.""" # Create mock data. spec1 = batch_service.BatchWorkloadSpec( @@ -254,7 +270,7 @@ def test_create_utask_main_job(self): UUIDS[0], spec1, ['url1']) self.mock_batch_client_instance.create_job.assert_called_with( expected_create_request) - self.assertIsNone(result) + self.assertEqual(result, None) @test_utils.with_cloud_emulators('datastore') @@ -278,10 +294,141 @@ def test_is_remote_task(self): self.assertFalse(batch_service.is_remote_task('progression', 'job')) -if __name__ == '__main__': - unittest.main() +@test_utils.with_cloud_emulators('datastore') +class GetRegionLoadTest(unittest.TestCase): + """Tests for get_region_load.""" + + def setUp(self): + helpers.patch(self, [ + 'urllib.request.urlopen', + ]) + + def test_get_region_load_success(self): + """Tests get_region_load with a successful API response.""" + mock_response = mock.Mock() + mock_response.status = 200 + mock_response.read.return_value = b'{"jobCounts": {"QUEUED": "15"}}' + self.mock.urlopen.return_value.__enter__.return_value = mock_response + + load = batch_service.get_region_load('project_success', 'us-central1') + self.assertEqual(load, 15) -# pylint: disable=protected-access + def test_get_region_load_empty(self): + """Tests get_region_load with an empty response.""" + mock_response = mock.Mock() + mock_response.status = 200 + mock_response.read.return_value = b'{}' + self.mock.urlopen.return_value.__enter__.return_value = mock_response + + load = batch_service.get_region_load('project_empty', 'us-central1') + self.assertEqual(load, 0) + + def test_get_region_load_error(self): + """Tests get_region_load with an API error.""" + self.mock.urlopen.side_effect = Exception('error') + + load = batch_service.get_region_load('project_error', 'us-central1') + self.assertEqual(load, 0) + + +@test_utils.with_cloud_emulators('datastore') +class GetSubconfigLoadBalancingTest(unittest.TestCase): + """Tests for load balancing in _get_subconfig.""" + + def setUp(self): + helpers.patch(self, [ + 'clusterfuzz._internal.batch.service.get_region_load', + 'clusterfuzz._internal.batch.service.random.choice', + 'clusterfuzz._internal.base.utils.random_weighted_choice', + ]) + self.batch_config = { + 'project': 'test-project', + 'queue_check_regions': ['us-central1', 'us-east4'], + 'subconfigs': { + 'central1': { + 'region': 'us-central1', + 'network': 'n1' + }, + 'east4': { + 'region': 'us-east4', + 'network': 'n2' + }, + 'west1': { + 'region': 'us-west1', + 'network': 'n3' + }, + } + } + self.instance_spec = { + 'subconfigs': [ + { + 'name': 'central1', + 'weight': 1 + }, + { + 'name': 'east4', + 'weight': 1 + }, + ] + } + + def test_all_regions_healthy(self): + """Tests that a region is picked when all are healthy.""" + self.mock.get_region_load.return_value = 2 # Total load 2 < 100 + self.mock.choice.side_effect = lambda x: x[0] + + subconfig = batch_service._get_subconfig(self.batch_config, + self.instance_spec) + self.assertEqual(subconfig['region'], 'us-central1') + + def test_one_region_overloaded(self): + """Tests that overloaded regions are skipped.""" + # us-central1 (load 100) is overloaded, us-east4 (load 2) is healthy. + self.mock.get_region_load.side_effect = [ + 100, # us-central1 + 2, # us-east4 + ] + + # random.choice should only see ['east4'] + def mock_choice(items): + self.assertEqual(items, ['east4']) + return items[0] + + self.mock.choice.side_effect = mock_choice + + subconfig = batch_service._get_subconfig(self.batch_config, + self.instance_spec) + self.assertEqual(subconfig['region'], 'us-east4') + + def test_all_regions_overloaded(self): + """Tests that AllRegionsOverloadedError is raised when no healthy regions exist.""" + self.mock.get_region_load.return_value = 100 # Load 100 is threshold for "overloaded" + + with self.assertRaises(batch_service.AllRegionsOverloadedError): + batch_service._get_subconfig(self.batch_config, self.instance_spec) + + def test_skip_load_check_if_not_in_config(self): + """Tests that load check is skipped for regions not in queue_check_regions.""" + instance_spec = {'subconfigs': [{'name': 'central1', 'weight': 1},]} + self.batch_config['queue_check_regions'] = [ + ] # Empty list, so central1 is not checked + self.mock.random_weighted_choice.return_value = mock.Mock(name='central1') + self.mock.random_weighted_choice.return_value.name = 'central1' + + subconfig = batch_service._get_subconfig(self.batch_config, instance_spec) + self.assertEqual(subconfig['region'], 'us-central1') + self.assertFalse(self.mock.get_region_load.called) + + def test_skip_load_check_if_disabled(self): + """Tests that load check is skipped if queue_check_regions is missing.""" + del self.batch_config['queue_check_regions'] + self.mock.random_weighted_choice.return_value = mock.Mock(name='central1') + self.mock.random_weighted_choice.return_value.name = 'central1' + + subconfig = batch_service._get_subconfig(self.batch_config, + self.instance_spec) + self.assertEqual(subconfig['region'], 'us-central1') + self.assertFalse(self.mock.get_region_load.called) @test_utils.with_cloud_emulators('datastore') @@ -294,11 +441,15 @@ def setUp(self): self.job.put() helpers.patch(self, [ 'clusterfuzz._internal.base.utils.random_weighted_choice', + 'clusterfuzz._internal.batch.service.random.choice', + 'clusterfuzz._internal.batch.service.get_region_load', ]) self.mock.random_weighted_choice.return_value = batch_service.WeightedSubconfig( name='east4-network2', weight=1, ) + self.mock.choice.return_value = 'east4-network2' + self.mock.get_region_load.return_value = 0 def test_nonpreemptible(self): """Tests that _get_specs_from_config works for non-preemptibles as diff --git a/src/clusterfuzz/_internal/tests/core/build_management/build_archive_test.py b/src/clusterfuzz/_internal/tests/core/build_management/build_archive_test.py index c56180ea1f..791446f9fa 100644 --- a/src/clusterfuzz/_internal/tests/core/build_management/build_archive_test.py +++ b/src/clusterfuzz/_internal/tests/core/build_management/build_archive_test.py @@ -13,6 +13,7 @@ # limitations under the License. """Build archive tests.""" import io +import json import os import tempfile import unittest @@ -152,8 +153,18 @@ def _add_files_to_archive(self, files): name=file, is_dir=False, size_bytes=0, mode=0)) self.mock.open.return_value.list_members.return_value = res - def _generate_possible_fuzzer_dependencies(self, dir_prefix, fuzz_target): - """Generates all possible dependencies for the given target.""" + def _generate_possible_fuzzer_dependencies_legacy(self, dir_prefix, + fuzz_target): + """Generates all possible dependencies for the given target. + + This implementation represents the legacy archive schema prior to version 1 + and should not be used for new tests; we keep it around for backwards + compatibility. + + New tests should use a combination of + `_generate_possible_fuzzer_dependencies()` and + `_resolve_relative_dependency_paths()`. + """ needed_files = [ f'{fuzz_target}', f'{fuzz_target}.exe', @@ -175,6 +186,40 @@ def _generate_possible_fuzzer_dependencies(self, dir_prefix, fuzz_target): ] return [os.path.join(dir_prefix, file) for file in needed_files] + def _generate_possible_fuzzer_dependencies(self, fuzz_target): + """Returns a list of dependencies as file paths relative to + {fuzz_target}.runtime_deps, as they appear in runtime_deps files in real + archives. + """ + return [ + f'./{fuzz_target}', + f'{fuzz_target}.owners', + f'{fuzz_target}.runtime_deps', + f'{fuzz_target}.dSYM/Contents/Resources/DWARF/some_dependency', + './libbase.so', + '../../tools/valgrind/asan/', + '../../third_party/llvm-build/Release+Asserts/bin/llvm-symbolizer', + '../../third_party/instrumented_libs/binaries/msan-chained-origins-noble-lib/lib', + 'third_party/instrumented_libs/binaries/msan-chained-origins-noble-lib/lib/ld-linux-x86-64.so.2', + './libatomic.so', + 'icudtl.dat', + f'bin/run_{fuzz_target}', + '../../testing/location_tags.json', + ] + + def _resolve_relative_dependency_paths(self, deps_paths): + """Returns a list of dependencies as normalized file paths, i.e. with + relative path separators like './' and '../' resolved to their true + directory names. + """ + + # Runtime deps include file paths that begin with ../../ so the build + # directory is assumed to be two levels deep into the file tree. + return [ + os.path.normpath(os.path.join('out/build/', file)) + for file in deps_paths + ] + def _generate_runtime_deps(self, deps): def _mock_open(_): @@ -189,12 +234,19 @@ def _mock_open(_): def _declare_fuzzers(self, fuzzers): self._declared_fuzzers = fuzzers + def _set_archive_schema_version(self, version): + self.build = build_archive.ChromeBuildArchive(self.mock.open.return_value, + version) + @parameterized.parameterized.expand(['/b/build/', 'build/', '']) - def test_possible_dependencies(self, dir_prefix): + def test_possible_dependencies_legacy(self, dir_prefix): """Tests that all the necessary dependencies are correctly extracted from - the runtime_deps file.""" - deps_files = self._generate_possible_fuzzer_dependencies('', 'my_fuzzer') - needed_files = self._generate_possible_fuzzer_dependencies( + the runtime_deps file, using the legacy archive schema where dependency + paths are interpreted as relative to the archive root and `../../` is + remapped to `src_root/`.""" + deps_files = self._generate_possible_fuzzer_dependencies_legacy( + '', 'my_fuzzer') + needed_files = self._generate_possible_fuzzer_dependencies_legacy( dir_prefix, 'my_fuzzer') self._add_files_to_archive(needed_files) self._generate_runtime_deps(deps_files) @@ -204,29 +256,13 @@ def test_possible_dependencies(self, dir_prefix): self.assertCountEqual(to_extract, needed_files) @parameterized.parameterized.expand(['/b/build/', 'build/', '']) - def test_possible_dependencies_archive_without_normalized_path( + def test_possible_dependencies_deps_without_normalized_path_legacy( self, dir_prefix): """Tests that the chrome build handler correctly handles mixed-up normalized and not normalized path.""" - deps_files = self._generate_possible_fuzzer_dependencies('', 'my_fuzzer') - needed_files = self._generate_possible_fuzzer_dependencies( - dir_prefix, 'my_fuzzer') - self._add_files_to_archive(needed_files) - - # we want our runtime_deps to have normalized path so that they do not - # exactly match the archive paths. - self._generate_runtime_deps(deps_files) - self._declare_fuzzers(['my_fuzzer']) - to_extract = self.build.get_target_dependencies('my_fuzzer') - to_extract = [f.name for f in to_extract] - self.assertCountEqual(to_extract, needed_files) - - @parameterized.parameterized.expand(['/b/build/', 'build/', '']) - def test_possible_dependencies_deps_without_normalized_path(self, dir_prefix): - """Tests that the chrome build handler correctly handles mixed-up - normalized and not normalized path.""" - deps_files = self._generate_possible_fuzzer_dependencies('', 'my_fuzzer') - needed_files = self._generate_possible_fuzzer_dependencies( + deps_files = self._generate_possible_fuzzer_dependencies_legacy( + '', 'my_fuzzer') + needed_files = self._generate_possible_fuzzer_dependencies_legacy( dir_prefix, 'my_fuzzer') self._add_files_to_archive([os.path.normpath(f) for f in needed_files]) self._generate_runtime_deps(deps_files) @@ -237,13 +273,14 @@ def test_possible_dependencies_deps_without_normalized_path(self, dir_prefix): [os.path.normpath(f) for f in needed_files]) @parameterized.parameterized.expand(['/b/build/', 'build/', '']) - def test_other_fuzzer_not_extracted(self, dir_prefix): + def test_other_fuzzer_not_extracted_legacy(self, dir_prefix): """Tests that the chrome build handler only unpacks dependencies for the requested fuzzer, even if other fuzzers exist in the build.""" - deps_files = self._generate_possible_fuzzer_dependencies('', 'my_fuzzer') - needed_files = self._generate_possible_fuzzer_dependencies( + deps_files = self._generate_possible_fuzzer_dependencies_legacy( + '', 'my_fuzzer') + needed_files = self._generate_possible_fuzzer_dependencies_legacy( dir_prefix, 'my_fuzzer') - other_fuzzer = self._generate_possible_fuzzer_dependencies( + other_fuzzer = self._generate_possible_fuzzer_dependencies_legacy( dir_prefix, 'other_fuzzer') self._add_files_to_archive(list(set(needed_files + other_fuzzer))) self._generate_runtime_deps(deps_files) @@ -253,10 +290,10 @@ def test_other_fuzzer_not_extracted(self, dir_prefix): self.assertCountEqual(to_extract, needed_files) @parameterized.parameterized.expand(['/b/build/', 'build/', '']) - def test_dsyms_are_correctly_unpacked(self, dir_prefix): + def test_dsyms_are_correctly_unpacked_legacy(self, dir_prefix): """Tests that even if not listed in the runtime deps, dSYMs are correctly unpacked. """ - needed_files = self._generate_possible_fuzzer_dependencies( + needed_files = self._generate_possible_fuzzer_dependencies_legacy( dir_prefix, 'my_fuzzer') self._add_files_to_archive(needed_files) self._generate_runtime_deps(['my_fuzzer']) @@ -265,3 +302,110 @@ def test_dsyms_are_correctly_unpacked(self, dir_prefix): dsym_path = os.path.join( dir_prefix, 'my_fuzzer.dSYM/Contents/Resources/DWARF/some_dependency') self.assertIn(dsym_path, to_extract) + + def test_possible_dependencies(self): + """Tests that all the necessary dependencies are correctly extracted from + the runtime_deps file. + + Under archive schema version 1, dependency paths in `runtime_deps` files + are interpreted as being relative to the file itself, meaning that they must + be normalized to the equivalent path relative to the archive root before + they can be extracted. + """ + self._set_archive_schema_version(1) + deps_entries = self._generate_possible_fuzzer_dependencies('my_fuzzer') + deps_files = self._resolve_relative_dependency_paths(deps_entries) + self._add_files_to_archive(deps_files) + self._generate_runtime_deps(deps_entries) + self._declare_fuzzers(['my_fuzzer']) + to_extract = self.build.get_target_dependencies('my_fuzzer') + to_extract = [f.name for f in to_extract] + self.assertCountEqual(to_extract, deps_files) + + def test_other_fuzzer_not_extracted(self): + """Tests that the chrome build handler only unpacks dependencies for the + requested fuzzer, even if other fuzzers exist in the build.""" + self._set_archive_schema_version(1) + deps_entries = self._generate_possible_fuzzer_dependencies('my_fuzzer') + needed_files = self._resolve_relative_dependency_paths(deps_entries) + other_fuzzer = self._resolve_relative_dependency_paths( + self._generate_possible_fuzzer_dependencies('other_fuzzer')) + self._add_files_to_archive(list(set(needed_files + other_fuzzer))) + self._generate_runtime_deps(deps_entries) + self._declare_fuzzers(['my_fuzzer', 'other_fuzzer']) + to_extract = self.build.get_target_dependencies('my_fuzzer') + to_extract = [f.name for f in to_extract] + self.assertCountEqual(to_extract, needed_files) + + def test_dsyms_are_correctly_unpacked(self): + """Tests that even if not listed in the runtime deps, dSYMs are correctly + unpacked.""" + self._set_archive_schema_version(1) + needed_files = self._resolve_relative_dependency_paths( + self._generate_possible_fuzzer_dependencies('my_fuzzer')) + self._add_files_to_archive(needed_files) + self._generate_runtime_deps(['my_fuzzer']) + to_extract = self.build.get_target_dependencies('my_fuzzer') + to_extract = [f.name for f in to_extract] + self.assertIn( + 'out/build/my_fuzzer.dSYM/Contents/Resources/DWARF/some_dependency', + to_extract) + + +class ChromeBuildArchiveManifestTest(unittest.TestCase): + """Test for reading clusterfuzz_manifest.json for Chrome archives.""" + + def setUp(self): + test_helpers.patch(self, [ + 'clusterfuzz._internal.system.archive.ArchiveReader.file_exists', + 'clusterfuzz._internal.system.archive.ArchiveReader', + 'clusterfuzz._internal.system.archive.open', + ]) + self.mock.file_exists.return_value = False + + def _generate_manifest(self, archive_schema_version): + """Mocks open calls so that they return a buffer containing valid JSON for + the given archive schema version.""" + + def _mock_open(_): + buffer = io.BytesIO(b'') + buffer.write( + json.dumps({ + 'archive_schema_version': archive_schema_version + }).encode()) + buffer.seek(0) + return buffer + + self.mock.open.return_value.open.side_effect = _mock_open + + def _generate_invalid_manifest(self): + """Mocks open calls so that they return a buffer containing invalid contents + for clusterfuzz_manifest.json.""" + + def _mock_open(_): + buffer = io.BytesIO(b'') + buffer.write(json.dumps({'my_field': 1}).encode()) + buffer.seek(0) + return buffer + + self.mock.open.return_value.open.side_effect = _mock_open + + def test_manifest_is_correctly_read(self): + """Tests that the manifest is correctly read and used to set the archive + schema version if it exists and that the cases of a missing or invalid + manifest are handled correctly.""" + + # No manifest exists; should default to archive schema version 0 (legacy). + test_archive = build_archive.ChromeBuildArchive(self.mock.open.return_value) + self.assertEqual(test_archive.archive_schema_version(), 0) + + # Invalid manifest; should default to version 0. + self.mock.file_exists.return_value = True + self._generate_invalid_manifest() + test_archive = build_archive.ChromeBuildArchive(self.mock.open.return_value) + self.assertEqual(test_archive.archive_schema_version(), 0) + + # Valid manifest. + self._generate_manifest(1) + test_archive = build_archive.ChromeBuildArchive(self.mock.open.return_value) + self.assertEqual(test_archive.archive_schema_version(), 1) diff --git a/src/clusterfuzz/_internal/tests/core/platforms/android/kernel_utils_test.py b/src/clusterfuzz/_internal/tests/core/platforms/android/kernel_utils_test.py new file mode 100644 index 0000000000..d908b71195 --- /dev/null +++ b/src/clusterfuzz/_internal/tests/core/platforms/android/kernel_utils_test.py @@ -0,0 +1,199 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for kernel_utils.""" + +# pylint: disable=protected-access + +import unittest +from unittest import mock + +from clusterfuzz._internal.platforms.android import kernel_utils +from clusterfuzz._internal.tests.test_libs import helpers + + +class GetCleanKernelPathTest(unittest.TestCase): + """Tests for _get_clean_kernel_path.""" + + def test_clean_path(self): + """Test that path is cleaned correctly.""" + path = '/buildbot/src/partner-android/BRANCH/private/PROJ/kernel/msm/arch/arm64/kernel/traps.c' + self.assertEqual( + kernel_utils._get_clean_kernel_path(path), + 'kernel/msm/arch/arm64/kernel/traps.c') + + def test_no_private(self): + """Test path without 'private'.""" + path = '/path/to/something/else' + self.assertEqual(kernel_utils._get_clean_kernel_path(path), path) + + +class GetKernelStackFrameLinkTest(unittest.TestCase): + """Tests for get_kernel_stack_frame_link.""" + + def test_link_creation(self): + """Test link creation.""" + stack_frame = ' [] __do_fault+0x44/0x8c /buildbot/src/partner-android/BRANCH/private/PROJ/kernel/msm/mm/memory.c:3095' + kernel_prefix = 'kernel/private/msm-google' + kernel_hash = 'abcdef123456' + + # The expected output should replace the path:line with the link info. + # The original path was /buildbot/src/partner-android/BRANCH/private/PROJ/kernel/msm/mm/memory.c + # _get_clean_kernel_path converts it to kernel/msm/mm/memory.c (PROJ removed) + # kernel_prefix is stripped to msm-google + + # The function constructs: + # prefix = msm-google + # hash = abcdef123456 + # path = kernel/msm/mm/memory.c + # line = 3095 + # display_path = msm-google/kernel/msm/mm/memory.c:3095 + + expected_link_part = ('http://go/pakernel/msm-google/+/abcdef123456/' + 'kernel/msm/mm/memory.c#3095;' + 'msm-google/kernel/msm/mm/memory.c:3095;') + + result = kernel_utils.get_kernel_stack_frame_link( + stack_frame, kernel_prefix, kernel_hash) + self.assertIn(expected_link_part, result) + + def test_no_match(self): + """Test when regex doesn't match.""" + stack_frame = 'random string' + self.assertEqual( + kernel_utils.get_kernel_stack_frame_link(stack_frame, 'prefix', 'hash'), + stack_frame) + + +class GetPrefixAndFullHashTest(unittest.TestCase): + """Tests for _get_prefix_and_full_hash.""" + + def test_found(self): + """Test when hash is found.""" + repo_data = "prefix1 u'fullhash1\nprefix2 u'fullhash2" + prefix, full_hash = kernel_utils._get_prefix_and_full_hash( + repo_data, 'fullhash1') + self.assertEqual(prefix, 'prefix1') + self.assertEqual(full_hash, 'fullhash1') + + def test_not_found(self): + """Test when hash is not found.""" + repo_data = "prefix1 u'fullhash1\nprefix2 u'fullhash2" + prefix, full_hash = kernel_utils._get_prefix_and_full_hash( + repo_data, 'nonexistent') + self.assertIsNone(prefix) + self.assertIsNone(full_hash) + + +class GetRepoPropDataTest(unittest.TestCase): + """Tests for _get_repo_prop_data.""" + + def setUp(self): + """Set up mocks.""" + helpers.patch(self, [ + 'clusterfuzz._internal.system.environment.get_value', + 'clusterfuzz._internal.platforms.android.symbols_downloader.get_repo_prop_archive_filename', + 'clusterfuzz._internal.platforms.android.symbols_downloader.download_kernel_repo_prop_if_needed', + 'clusterfuzz._internal.base.utils.find_binary_path', + 'clusterfuzz._internal.base.utils.read_data_from_file', + 'os.path.exists', + ]) + self.mock.get_value.return_value = '/symbols' + self.mock.get_repo_prop_archive_filename.return_value = 'repo.prop' + + def test_success(self): + """Test success path.""" + self.mock.find_binary_path.return_value = '/symbols/repo.prop' + self.mock.exists.return_value = True + self.mock.read_data_from_file.return_value = b'data' + + result = kernel_utils._get_repo_prop_data('build_id', 'target') + self.assertEqual(result, 'data') + + def test_failure(self): + """Test failure path.""" + self.mock.find_binary_path.return_value = None + + result = kernel_utils._get_repo_prop_data('build_id', 'target') + self.assertIsNone(result) + + +class GetKernelNameTest(unittest.TestCase): + """Tests for get_kernel_name.""" + + def setUp(self): + """Set up mocks.""" + helpers.patch(self, [ + 'clusterfuzz._internal.platforms.android.settings.get_product_name', + 'clusterfuzz._internal.platforms.android.settings.get_build_product', + ]) + + def test_default(self): + """Test default kernel name.""" + self.mock.get_product_name.return_value = 'product' + self.mock.get_build_product.return_value = 'product' + + with mock.patch.dict( + 'clusterfuzz._internal.platforms.android.constants.PRODUCT_TO_KERNEL', + {}, + clear=True): + self.assertEqual(kernel_utils.get_kernel_name(), 'product') + + def test_kasan_strip(self): + """Test that _kasan is handled.""" + self.mock.get_product_name.return_value = 'product_kasan' + self.mock.get_build_product.return_value = 'product' + + with mock.patch.dict( + 'clusterfuzz._internal.platforms.android.constants.PRODUCT_TO_KERNEL', + {}, + clear=True): + # Based on current implementation, _kasan is NOT stripped because the + # return value of utils.strip_from_right is ignored. + self.assertEqual(kernel_utils.get_kernel_name(), 'product_kasan') + + def test_mapping(self): + """Test mapping.""" + self.mock.get_product_name.return_value = 'product' + self.mock.get_build_product.return_value = 'alias_product' + + with mock.patch.dict( + 'clusterfuzz._internal.platforms.android.constants.PRODUCT_TO_KERNEL', + {'alias_product': 'real_product'}, + clear=True): + self.assertEqual(kernel_utils.get_kernel_name(), 'real_product') + + +class GetKernelHashAndBuildIdTest(unittest.TestCase): + """Tests for get_kernel_hash_and_build_id.""" + + def setUp(self): + """Set up mocks.""" + helpers.patch(self, [ + 'clusterfuzz._internal.platforms.android.settings.get_kernel_version_string', + ]) + + def test_match(self): + """Test with matching kernel version string.""" + self.mock.get_kernel_version_string.return_value = ( + 'Linux version 3.18.0-g8de8e79-ab1234567 (android-build@google.com)') + # Expected: (match.group(2), match.group(3)) + # match.group(2) is 'ab1234567 ' (with space) + # match.group(3) is '1234567' + self.assertEqual(kernel_utils.get_kernel_hash_and_build_id(), + ('ab1234567 ', '1234567')) + + def test_no_match(self): + """Test with non-matching kernel version string.""" + self.mock.get_kernel_version_string.return_value = 'invalid' + self.assertEqual(kernel_utils.get_kernel_hash_and_build_id(), (None, None))