From 2b9e1055490956fb09a2ec7d33adae50136f487d Mon Sep 17 00:00:00 2001 From: Vinicius da Costa Date: Tue, 10 Feb 2026 13:07:29 +0000 Subject: [PATCH 01/10] Use cc groups for oss fuzz tracker --- .../_internal/base/external_users.py | 45 +++++++++++++++++-- .../_internal/cron/project_setup.py | 4 +- .../_internal/datastore/data_types.py | 2 + 3 files changed, 46 insertions(+), 5 deletions(-) diff --git a/src/clusterfuzz/_internal/base/external_users.py b/src/clusterfuzz/_internal/base/external_users.py index 7c84809bb2..e61c245488 100644 --- a/src/clusterfuzz/_internal/base/external_users.py +++ b/src/clusterfuzz/_internal/base/external_users.py @@ -22,6 +22,10 @@ MEMCACHE_TTL_IN_SECONDS = 15 * 60 +# OSS-Fuzz issue tracker CC group +OSS_FUZZ_CC_GROUP_SUFFIX = '-ccs@oss-fuzz.com' +OSS_FUZZ_CC_GROUP_DESC = 'External CCs in OSS-Fuzz issue tracker for project' + def _fuzzers_for_job(job_type, include_parents): """Return all fuzzers that have the job associated. @@ -198,7 +202,10 @@ def _allowed_users_for_entity(name, entity_kind, auto_cc=None): return sorted(allowed_users) -def _cc_users_for_entity(name, entity_type, security_flag): +def _cc_users_for_entity(name, + entity_type, + security_flag, + allow_cc_group_for_job=True): """Return CC users for entity.""" users = _allowed_users_for_entity(name, entity_type, data_types.AutoCCType.ALL) @@ -208,6 +215,20 @@ def _cc_users_for_entity(name, entity_type, security_flag): _allowed_users_for_entity(name, entity_type, data_types.AutoCCType.SECURITY)) + if (entity_type != data_types.PermissionEntityKind.JOB or + not allow_cc_group_for_job): + return sorted(users) + + # CC group is only available for jobs, as it is not possible to infer the + # project from the other permission entity kinds alone. + users_in_cc_group = _allowed_users_for_entity( + name, entity_type, data_types.AutoCCType.USE_CC_GROUP) + if users_in_cc_group: + # Assume users are synced with the project group. + group_name = get_cc_group_from_job(name) + if group_name: + users.append(group_name) + return sorted(users) @@ -336,15 +357,33 @@ def is_upload_allowed_for_user(user_email): return bool(permissions.get()) -def cc_users_for_job(job_type, security_flag): +def cc_users_for_job(job_type, security_flag, allow_cc_group=True): """Return external users that should be CC'ed according to the given rule. Args: job_type: The name of the job security_flag: Whether or not the CC is for a security issue. + allow_cc_group: Whether to allow including the project cc group from the + job, if exists any user with the use cc group auto_cc type. Returns: A list of user emails that should be CC'ed. """ return _cc_users_for_entity(job_type, data_types.PermissionEntityKind.JOB, - security_flag) + security_flag, allow_cc_group) + + +def get_cc_group_from_job(job_type: str) -> str: + """Docstring for get_cc_group_from_entity""" + project_name = data_handler.get_project_name(job_type) + return get_project_cc_group_name(project_name) + + +def get_project_cc_group_name(project_name: str) -> str: + """Return oss-fuzz issue tracker CC group email for a project.""" + return f'{project_name}{OSS_FUZZ_CC_GROUP_SUFFIX}' + + +def get_project_cc_group_description(project_name: str) -> str: + """Return oss-fuzz issue tracker CC group description for a project.""" + return f'{OSS_FUZZ_CC_GROUP_DESC}: {project_name}' diff --git a/src/clusterfuzz/_internal/cron/project_setup.py b/src/clusterfuzz/_internal/cron/project_setup.py index f60e9e47c9..2eac772344 100644 --- a/src/clusterfuzz/_internal/cron/project_setup.py +++ b/src/clusterfuzz/_internal/cron/project_setup.py @@ -998,13 +998,13 @@ def sync_user_permissions(self, project, info): existing_permission = query.get() if existing_permission: continue - + # OSS-Fuzz issue tracker ccs use the corresponding project cc group. data_types.ExternalUserPermission( email=cc, entity_kind=data_types.PermissionEntityKind.JOB, entity_name=job_name, is_prefix=False, - auto_cc=data_types.AutoCCType.ALL).put() + auto_cc=data_types.AutoCCType.USE_CC_GROUP).put() def set_up(self, projects): """Do project setup. Return a list of all the project names that were set diff --git a/src/clusterfuzz/_internal/datastore/data_types.py b/src/clusterfuzz/_internal/datastore/data_types.py index e15966b61b..3adea20bb7 100644 --- a/src/clusterfuzz/_internal/datastore/data_types.py +++ b/src/clusterfuzz/_internal/datastore/data_types.py @@ -208,6 +208,8 @@ class AutoCCType: ALL = 1 # Auto-CC only for security issues. SECURITY = 2 + # Use cc google group for all issues - oss-fuzz specific. + USE_CC_GROUP = 3 # Type of permission. Used by ExternalUserPermision. From 6d1b99fccb3296e3090002e5f0b0e6dc30251411 Mon Sep 17 00:00:00 2001 From: Javan Lacerda Date: Mon, 9 Feb 2026 16:03:59 -0300 Subject: [PATCH 02/10] fix feature flag (#5156) Fix feature flag for using the enum properties properly. --------- Signed-off-by: Javan Lacerda --- src/clusterfuzz/_internal/bot/tasks/task_types.py | 7 ++++--- src/clusterfuzz/_internal/cron/schedule_fuzz.py | 7 +++---- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/clusterfuzz/_internal/bot/tasks/task_types.py b/src/clusterfuzz/_internal/bot/tasks/task_types.py index a9853ce03f..1d0e38a8ae 100644 --- a/src/clusterfuzz/_internal/bot/tasks/task_types.py +++ b/src/clusterfuzz/_internal/bot/tasks/task_types.py @@ -162,9 +162,10 @@ def execute(self, task_argument, job_type, uworker_env): utask_main_queue_size = tasks.get_utask_main_queue_size() utask_main_queue_limit = UTASK_MAIN_QUEUE_LIMIT_DEFAULT - utask_flag = feature_flags.FeatureFlags.UTASK_MAIN_QUEUE_LIMIT.flag - if utask_flag and utask_flag.enabled: - utask_main_queue_limit = utask_flag.content + utask_flag = feature_flags.FeatureFlags.UTASK_MAIN_QUEUE_LIMIT + if utask_flag.enabled and utask_flag.content: + utask_main_queue_limit = int(utask_flag.content) + if utask_main_queue_size > utask_main_queue_limit: base_os_version = environment.get_value('BASE_OS_VERSION') queue_name = UTASK_MAIN_QUEUE if not base_os_version else \ diff --git a/src/clusterfuzz/_internal/cron/schedule_fuzz.py b/src/clusterfuzz/_internal/cron/schedule_fuzz.py index ecd7a8d5c7..481b71cfb8 100644 --- a/src/clusterfuzz/_internal/cron/schedule_fuzz.py +++ b/src/clusterfuzz/_internal/cron/schedule_fuzz.py @@ -248,11 +248,10 @@ def schedule_fuzz_tasks() -> bool: creds = credentials.get_default()[0] preprocess_queue_size = get_queue_size(creds, project, tasks.PREPROCESS_QUEUE) + target_size = PREPROCESS_TARGET_SIZE_DEFAULT target_size_flag = feature_flags.FeatureFlags.PREPROCESS_QUEUE_SIZE_LIMIT - if target_size_flag and target_size_flag.enabled: - target_size = int(target_size_flag.value) - else: - target_size = PREPROCESS_TARGET_SIZE_DEFAULT + if target_size_flag.enabled and target_size_flag.content: + target_size = int(target_size_flag.content) num_tasks = target_size - preprocess_queue_size logs.info(f'Preprocess queue size: {preprocess_queue_size}. ' From 43e21e2cfbba657a04eb503060102d0d1134ae70 Mon Sep 17 00:00:00 2001 From: Javan Lacerda Date: Mon, 9 Feb 2026 16:39:27 -0300 Subject: [PATCH 03/10] Implement job limiter for GCP Batch (#5158) It implements the a job limiter for the GCP Batch adapter for remote tasks. It uses a private API for checking the availability of the regions for scheduling jobs, if all of them are loaded, the tasks are returned as unscheduled tasks and sent back to the queue. Signed-off-by: Javan Lacerda --- configs/test/batch/batch.yaml | 3 + src/clusterfuzz/_internal/base/memoize.py | 25 +++ src/clusterfuzz/_internal/batch/service.py | 125 ++++++++++++-- .../tests/core/batch/batch_service_test.py | 163 +++++++++++++++++- 4 files changed, 294 insertions(+), 22 deletions(-) diff --git a/configs/test/batch/batch.yaml b/configs/test/batch/batch.yaml index 12788e149d..a77ee1e01e 100644 --- a/configs/test/batch/batch.yaml +++ b/configs/test/batch/batch.yaml @@ -76,6 +76,9 @@ mapping: name: east4-network2 weight: 1 project: 'test-clusterfuzz' +queue_check_regions: + - us-central1 + - us-east4 subconfigs: central1-network1: region: 'us-central1' diff --git a/src/clusterfuzz/_internal/base/memoize.py b/src/clusterfuzz/_internal/base/memoize.py index ae3063b0ce..8b214f9f91 100644 --- a/src/clusterfuzz/_internal/base/memoize.py +++ b/src/clusterfuzz/_internal/base/memoize.py @@ -17,6 +17,7 @@ import functools import json import threading +import time from clusterfuzz._internal.base import persistent_cache from clusterfuzz._internal.metrics import logs @@ -89,6 +90,30 @@ def get_key(self, func, args, kwargs): return _default_key(func, args, kwargs) +class InMemory(FifoInMemory): + """In-memory caching engine with TTL.""" + + def __init__(self, ttl_in_seconds, capacity=1000): + super().__init__(capacity) + self.ttl_in_seconds = ttl_in_seconds + + def put(self, key, value): + """Put (key, value) into cache.""" + super().put(key, (value, time.time() + self.ttl_in_seconds)) + + def get(self, key): + """Get the value from cache.""" + entry = super().get(key) + if entry is None: + return None + + value, expiry = entry + if expiry < time.time(): + return None + + return value + + class FifoOnDisk: """On-disk caching engine.""" diff --git a/src/clusterfuzz/_internal/batch/service.py b/src/clusterfuzz/_internal/batch/service.py index c97667797e..f8b396d1c4 100644 --- a/src/clusterfuzz/_internal/batch/service.py +++ b/src/clusterfuzz/_internal/batch/service.py @@ -1,8 +1,8 @@ # Copyright 2025 Google LLC # -# Licensed under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # @@ -18,14 +18,19 @@ and provides a simple interface for scheduling ClusterFuzz tasks. """ import collections +import json +import random import threading from typing import Dict from typing import List from typing import Tuple +import urllib.request import uuid +import google.auth.transport.requests from google.cloud import batch_v1 as batch +from clusterfuzz._internal.base import memoize from clusterfuzz._internal.base import retry from clusterfuzz._internal.base import tasks from clusterfuzz._internal.base import utils @@ -65,6 +70,13 @@ # See https://cloud.google.com/batch/quotas#job_limits MAX_CONCURRENT_VMS_PER_JOB = 1000 +MAX_QUEUE_SIZE = 100 + + +class AllRegionsOverloadedError(Exception): + """Raised when all batch regions are overloaded.""" + + _local = threading.local() DEFAULT_RETRY_COUNT = 0 @@ -184,6 +196,50 @@ def count_queued_or_scheduled_tasks(project: str, return (queued, scheduled) +@memoize.wrap(memoize.InMemory(60)) +def get_region_load(project: str, region: str) -> int: + """Gets the current load (queued and scheduled jobs) for a region.""" + creds, _ = credentials.get_default() + if not creds.valid: + creds.refresh(google.auth.transport.requests.Request()) + + headers = { + 'Authorization': f'Bearer {creds.token}', + 'Content-Type': 'application/json' + } + + try: + url = (f'https://batch.googleapis.com/v1alpha/projects/{project}/locations/' + f'{region}/jobs:countByState?states=QUEUED') + req = urllib.request.Request(url, headers=headers) + with urllib.request.urlopen(req) as response: + if response.status != 200: + logs.error( + f'Batch countByState failed: {response.status} {response.read()}') + return 0 + + data = json.loads(response.read()) + logs.info(f'Batch countByState response for {region}: {data}') + # The API returns a list of state counts. + # Example: { "jobCounts": { "QUEUED": "10" } } + total = 0 + + # Log data for debugging first few times if needed, or just rely on structure. + # We'll assume the structure is standard for Google APIs. + job_counts = data.get('jobCounts', {}) + for state, count in job_counts.items(): + count = int(count) + if state == 'QUEUED': + total += count + else: + logs.error(f'Unknown state: {state}') + + return total + except Exception as e: + logs.error(f'Failed to get region load for {region}: {e}') + return 0 + + def _get_batch_config(): """Returns the batch config. This function was made to make mocking easier.""" return local_config.BatchConfig() @@ -191,7 +247,7 @@ def _get_batch_config(): def is_remote_task(command: str, job_name: str) -> bool: """Returns whether a task is configured to run remotely on GCP Batch. - + This is determined by checking if a valid batch workload specification can be found for the given command and job type. """ @@ -242,15 +298,46 @@ def _get_config_names(batch_tasks: List[remote_task_types.RemoteTask]): def _get_subconfig(batch_config, instance_spec): - # TODO(metzman): Make this pick one at random or based on conditions. all_subconfigs = batch_config.get('subconfigs', {}) instance_subconfigs = instance_spec['subconfigs'] - weighted_subconfigs = [ - WeightedSubconfig(subconfig['name'], subconfig['weight']) - for subconfig in instance_subconfigs - ] - weighted_subconfig = utils.random_weighted_choice(weighted_subconfigs) - return all_subconfigs[weighted_subconfig.name] + + queue_check_regions = batch_config.get('queue_check_regions') + if not queue_check_regions: + logs.info( + 'Skipping batch load check because queue_check_regions is not configured.' + ) + weighted_subconfigs = [ + WeightedSubconfig(subconfig['name'], subconfig['weight']) + for subconfig in instance_subconfigs + ] + weighted_subconfig = utils.random_weighted_choice(weighted_subconfigs) + return all_subconfigs[weighted_subconfig.name] + + # Check load for configured regions. + healthy_subconfigs = [] + project = batch_config.get('project') + + for subconfig in instance_subconfigs: + name = subconfig['name'] + conf = all_subconfigs[name] + region = conf['region'] + + if region in queue_check_regions: + load = get_region_load(project, region) + logs.info(f'Region {region} has {load} queued jobs.') + if load >= MAX_QUEUE_SIZE: + logs.info(f'Region {region} overloaded (load={load}). Skipping.') + continue + + healthy_subconfigs.append(name) + + if not healthy_subconfigs: + logs.error('All candidate regions are overloaded.') + raise AllRegionsOverloadedError('All candidate regions are overloaded.') + + # Randomly pick one from healthy regions to avoid thundering herd. + chosen_name = random.choice(healthy_subconfigs) + return all_subconfigs[chosen_name] def _get_specs_from_config( @@ -277,7 +364,6 @@ def _get_specs_from_config( versioned_images_map = instance_spec.get('versioned_docker_images') if (base_os_version and versioned_images_map and base_os_version in versioned_images_map): - # New path: Use the versioned image if specified and available. docker_image_uri = versioned_images_map[base_os_version] else: # Fallback/legacy path: Use the original docker_image key. @@ -324,7 +410,7 @@ def _get_specs_from_config( class GcpBatchService(remote_task_types.RemoteTaskInterface): """A high-level service for creating and managing remote tasks. - + This service provides a simple interface for scheduling ClusterFuzz tasks on GCP Batch. It handles the details of creating batch jobs and tasks, and provides a way to check if a task is configured to run remotely. @@ -383,20 +469,27 @@ def create_utask_main_job(self, module: str, job_type: str, def create_utask_main_jobs(self, remote_tasks: List[remote_task_types.RemoteTask]): """Creates a batch job for a list of uworker main tasks. - + This method groups the tasks by their workload specification and creates a separate batch job for each group. This allows tasks with similar requirements to be processed together, which can improve efficiency. """ job_specs = collections.defaultdict(list) - specs = _get_specs_from_config(remote_tasks) + try: + specs = _get_specs_from_config(remote_tasks) + + # Return the remote tasks as uncreated task + # if all regions are overloaded + except AllRegionsOverloadedError: + return remote_tasks + for remote_task in remote_tasks: logs.info(f'Scheduling {remote_task.command}, {remote_task.job_type}.') spec = specs[(remote_task.command, remote_task.job_type)] job_specs[spec].append(remote_task.input_download_url) logs.info('Creating batch jobs.') - logs.info('Batching utask_mains.') + for spec, input_urls in job_specs.items(): for input_urls_portion in utils.batched(input_urls, MAX_CONCURRENT_VMS_PER_JOB - 1): diff --git a/src/clusterfuzz/_internal/tests/core/batch/batch_service_test.py b/src/clusterfuzz/_internal/tests/core/batch/batch_service_test.py index 56b45e7b41..118621e51d 100644 --- a/src/clusterfuzz/_internal/tests/core/batch/batch_service_test.py +++ b/src/clusterfuzz/_internal/tests/core/batch/batch_service_test.py @@ -157,7 +157,7 @@ def setUp(self): self.batch_service = batch_service.GcpBatchService() self.mock.uuid4.side_effect = [uuid.UUID(u) for u in UUIDS] - def test_create_utask_main_jobs(self): + def test_create_uworker_main_batch_jobs(self): """Tests that create_utask_main_jobs works as expected.""" # Create mock data. spec1 = batch_service.BatchWorkloadSpec( @@ -217,7 +217,23 @@ def test_create_utask_main_jobs(self): mock.call(expected_create_request_2), ]) - def test_create_utask_main_job(self): + def test_create_uworker_main_batch_jobs_all_regions_overloaded(self): + """Tests that create_utask_main_jobs returns tasks when all regions are overloaded.""" + tasks = [ + remote_task_types.RemoteTask('command1', 'job1', 'url1'), + remote_task_types.RemoteTask('command2', 'job2', 'url2'), + ] + with mock.patch('clusterfuzz._internal.batch.service._get_specs_from_config' + ) as mock_get_specs_from_config: + mock_get_specs_from_config.side_effect = batch_service.AllRegionsOverloadedError( + 'All regions overloaded') + + result = self.batch_service.create_utask_main_jobs(tasks) + + self.assertEqual(result, tasks) + self.mock_batch_client_instance.create_job.assert_not_called() + + def test_create_uworker_main_batch_job(self): """Tests that create_utask_main_job works as expected.""" # Create mock data. spec1 = batch_service.BatchWorkloadSpec( @@ -254,7 +270,7 @@ def test_create_utask_main_job(self): UUIDS[0], spec1, ['url1']) self.mock_batch_client_instance.create_job.assert_called_with( expected_create_request) - self.assertIsNone(result) + self.assertEqual(result, None) @test_utils.with_cloud_emulators('datastore') @@ -278,10 +294,141 @@ def test_is_remote_task(self): self.assertFalse(batch_service.is_remote_task('progression', 'job')) -if __name__ == '__main__': - unittest.main() +@test_utils.with_cloud_emulators('datastore') +class GetRegionLoadTest(unittest.TestCase): + """Tests for get_region_load.""" + + def setUp(self): + helpers.patch(self, [ + 'urllib.request.urlopen', + ]) + + def test_get_region_load_success(self): + """Tests get_region_load with a successful API response.""" + mock_response = mock.Mock() + mock_response.status = 200 + mock_response.read.return_value = b'{"jobCounts": {"QUEUED": "15"}}' + self.mock.urlopen.return_value.__enter__.return_value = mock_response + + load = batch_service.get_region_load('project_success', 'us-central1') + self.assertEqual(load, 15) -# pylint: disable=protected-access + def test_get_region_load_empty(self): + """Tests get_region_load with an empty response.""" + mock_response = mock.Mock() + mock_response.status = 200 + mock_response.read.return_value = b'{}' + self.mock.urlopen.return_value.__enter__.return_value = mock_response + + load = batch_service.get_region_load('project_empty', 'us-central1') + self.assertEqual(load, 0) + + def test_get_region_load_error(self): + """Tests get_region_load with an API error.""" + self.mock.urlopen.side_effect = Exception('error') + + load = batch_service.get_region_load('project_error', 'us-central1') + self.assertEqual(load, 0) + + +@test_utils.with_cloud_emulators('datastore') +class GetSubconfigLoadBalancingTest(unittest.TestCase): + """Tests for load balancing in _get_subconfig.""" + + def setUp(self): + helpers.patch(self, [ + 'clusterfuzz._internal.batch.service.get_region_load', + 'clusterfuzz._internal.batch.service.random.choice', + 'clusterfuzz._internal.base.utils.random_weighted_choice', + ]) + self.batch_config = { + 'project': 'test-project', + 'queue_check_regions': ['us-central1', 'us-east4'], + 'subconfigs': { + 'central1': { + 'region': 'us-central1', + 'network': 'n1' + }, + 'east4': { + 'region': 'us-east4', + 'network': 'n2' + }, + 'west1': { + 'region': 'us-west1', + 'network': 'n3' + }, + } + } + self.instance_spec = { + 'subconfigs': [ + { + 'name': 'central1', + 'weight': 1 + }, + { + 'name': 'east4', + 'weight': 1 + }, + ] + } + + def test_all_regions_healthy(self): + """Tests that a region is picked when all are healthy.""" + self.mock.get_region_load.return_value = 2 # Total load 2 < 100 + self.mock.choice.side_effect = lambda x: x[0] + + subconfig = batch_service._get_subconfig(self.batch_config, + self.instance_spec) + self.assertEqual(subconfig['region'], 'us-central1') + + def test_one_region_overloaded(self): + """Tests that overloaded regions are skipped.""" + # us-central1 (load 100) is overloaded, us-east4 (load 2) is healthy. + self.mock.get_region_load.side_effect = [ + 100, # us-central1 + 2, # us-east4 + ] + + # random.choice should only see ['east4'] + def mock_choice(items): + self.assertEqual(items, ['east4']) + return items[0] + + self.mock.choice.side_effect = mock_choice + + subconfig = batch_service._get_subconfig(self.batch_config, + self.instance_spec) + self.assertEqual(subconfig['region'], 'us-east4') + + def test_all_regions_overloaded(self): + """Tests that AllRegionsOverloadedError is raised when no healthy regions exist.""" + self.mock.get_region_load.return_value = 100 # Load 100 is threshold for "overloaded" + + with self.assertRaises(batch_service.AllRegionsOverloadedError): + batch_service._get_subconfig(self.batch_config, self.instance_spec) + + def test_skip_load_check_if_not_in_config(self): + """Tests that load check is skipped for regions not in queue_check_regions.""" + instance_spec = {'subconfigs': [{'name': 'central1', 'weight': 1},]} + self.batch_config['queue_check_regions'] = [ + ] # Empty list, so central1 is not checked + self.mock.random_weighted_choice.return_value = mock.Mock(name='central1') + self.mock.random_weighted_choice.return_value.name = 'central1' + + subconfig = batch_service._get_subconfig(self.batch_config, instance_spec) + self.assertEqual(subconfig['region'], 'us-central1') + self.assertFalse(self.mock.get_region_load.called) + + def test_skip_load_check_if_disabled(self): + """Tests that load check is skipped if queue_check_regions is missing.""" + del self.batch_config['queue_check_regions'] + self.mock.random_weighted_choice.return_value = mock.Mock(name='central1') + self.mock.random_weighted_choice.return_value.name = 'central1' + + subconfig = batch_service._get_subconfig(self.batch_config, + self.instance_spec) + self.assertEqual(subconfig['region'], 'us-central1') + self.assertFalse(self.mock.get_region_load.called) @test_utils.with_cloud_emulators('datastore') @@ -294,11 +441,15 @@ def setUp(self): self.job.put() helpers.patch(self, [ 'clusterfuzz._internal.base.utils.random_weighted_choice', + 'clusterfuzz._internal.batch.service.random.choice', + 'clusterfuzz._internal.batch.service.get_region_load', ]) self.mock.random_weighted_choice.return_value = batch_service.WeightedSubconfig( name='east4-network2', weight=1, ) + self.mock.choice.return_value = 'east4-network2' + self.mock.get_region_load.return_value = 0 def test_nonpreemptible(self): """Tests that _get_specs_from_config works for non-preemptibles as From 2f5989a0706a3a9aae87ee289ba42bc031d1256c Mon Sep 17 00:00:00 2001 From: Javan Lacerda Date: Tue, 10 Feb 2026 13:27:13 -0300 Subject: [PATCH 04/10] fix unhandled tuple (#5159) fix b/482257453 Update get_kernel_hash_and_build_id to return a tuple with None, None if the match fails. It avoids to have `TypeError: cannot unpack non-iterable NoneType object`. It also create unit tests for src/clusterfuzz/_internal/platforms/android/kernel_utils.py. --------- Signed-off-by: Javan Lacerda --- .../platforms/android/kernel_utils.py | 4 +- .../platforms/android/kernel_utils_test.py | 199 ++++++++++++++++++ 2 files changed, 201 insertions(+), 2 deletions(-) create mode 100644 src/clusterfuzz/_internal/tests/core/platforms/android/kernel_utils_test.py diff --git a/src/clusterfuzz/_internal/platforms/android/kernel_utils.py b/src/clusterfuzz/_internal/platforms/android/kernel_utils.py index e5e2c25478..a68a05691d 100644 --- a/src/clusterfuzz/_internal/platforms/android/kernel_utils.py +++ b/src/clusterfuzz/_internal/platforms/android/kernel_utils.py @@ -114,7 +114,7 @@ def get_kernel_prefix_and_full_hash(): target = get_kernel_name() if not build_id or not target: logs.error('Could not get kernel parameters, exiting.') - return None + return None, None android_kernel_repo_data = _get_repo_prop_data(build_id, target) if android_kernel_repo_data: @@ -144,4 +144,4 @@ def get_kernel_hash_and_build_id(): if match: return match.group(2), match.group(3) - return None + return None, None diff --git a/src/clusterfuzz/_internal/tests/core/platforms/android/kernel_utils_test.py b/src/clusterfuzz/_internal/tests/core/platforms/android/kernel_utils_test.py new file mode 100644 index 0000000000..d908b71195 --- /dev/null +++ b/src/clusterfuzz/_internal/tests/core/platforms/android/kernel_utils_test.py @@ -0,0 +1,199 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for kernel_utils.""" + +# pylint: disable=protected-access + +import unittest +from unittest import mock + +from clusterfuzz._internal.platforms.android import kernel_utils +from clusterfuzz._internal.tests.test_libs import helpers + + +class GetCleanKernelPathTest(unittest.TestCase): + """Tests for _get_clean_kernel_path.""" + + def test_clean_path(self): + """Test that path is cleaned correctly.""" + path = '/buildbot/src/partner-android/BRANCH/private/PROJ/kernel/msm/arch/arm64/kernel/traps.c' + self.assertEqual( + kernel_utils._get_clean_kernel_path(path), + 'kernel/msm/arch/arm64/kernel/traps.c') + + def test_no_private(self): + """Test path without 'private'.""" + path = '/path/to/something/else' + self.assertEqual(kernel_utils._get_clean_kernel_path(path), path) + + +class GetKernelStackFrameLinkTest(unittest.TestCase): + """Tests for get_kernel_stack_frame_link.""" + + def test_link_creation(self): + """Test link creation.""" + stack_frame = ' [] __do_fault+0x44/0x8c /buildbot/src/partner-android/BRANCH/private/PROJ/kernel/msm/mm/memory.c:3095' + kernel_prefix = 'kernel/private/msm-google' + kernel_hash = 'abcdef123456' + + # The expected output should replace the path:line with the link info. + # The original path was /buildbot/src/partner-android/BRANCH/private/PROJ/kernel/msm/mm/memory.c + # _get_clean_kernel_path converts it to kernel/msm/mm/memory.c (PROJ removed) + # kernel_prefix is stripped to msm-google + + # The function constructs: + # prefix = msm-google + # hash = abcdef123456 + # path = kernel/msm/mm/memory.c + # line = 3095 + # display_path = msm-google/kernel/msm/mm/memory.c:3095 + + expected_link_part = ('http://go/pakernel/msm-google/+/abcdef123456/' + 'kernel/msm/mm/memory.c#3095;' + 'msm-google/kernel/msm/mm/memory.c:3095;') + + result = kernel_utils.get_kernel_stack_frame_link( + stack_frame, kernel_prefix, kernel_hash) + self.assertIn(expected_link_part, result) + + def test_no_match(self): + """Test when regex doesn't match.""" + stack_frame = 'random string' + self.assertEqual( + kernel_utils.get_kernel_stack_frame_link(stack_frame, 'prefix', 'hash'), + stack_frame) + + +class GetPrefixAndFullHashTest(unittest.TestCase): + """Tests for _get_prefix_and_full_hash.""" + + def test_found(self): + """Test when hash is found.""" + repo_data = "prefix1 u'fullhash1\nprefix2 u'fullhash2" + prefix, full_hash = kernel_utils._get_prefix_and_full_hash( + repo_data, 'fullhash1') + self.assertEqual(prefix, 'prefix1') + self.assertEqual(full_hash, 'fullhash1') + + def test_not_found(self): + """Test when hash is not found.""" + repo_data = "prefix1 u'fullhash1\nprefix2 u'fullhash2" + prefix, full_hash = kernel_utils._get_prefix_and_full_hash( + repo_data, 'nonexistent') + self.assertIsNone(prefix) + self.assertIsNone(full_hash) + + +class GetRepoPropDataTest(unittest.TestCase): + """Tests for _get_repo_prop_data.""" + + def setUp(self): + """Set up mocks.""" + helpers.patch(self, [ + 'clusterfuzz._internal.system.environment.get_value', + 'clusterfuzz._internal.platforms.android.symbols_downloader.get_repo_prop_archive_filename', + 'clusterfuzz._internal.platforms.android.symbols_downloader.download_kernel_repo_prop_if_needed', + 'clusterfuzz._internal.base.utils.find_binary_path', + 'clusterfuzz._internal.base.utils.read_data_from_file', + 'os.path.exists', + ]) + self.mock.get_value.return_value = '/symbols' + self.mock.get_repo_prop_archive_filename.return_value = 'repo.prop' + + def test_success(self): + """Test success path.""" + self.mock.find_binary_path.return_value = '/symbols/repo.prop' + self.mock.exists.return_value = True + self.mock.read_data_from_file.return_value = b'data' + + result = kernel_utils._get_repo_prop_data('build_id', 'target') + self.assertEqual(result, 'data') + + def test_failure(self): + """Test failure path.""" + self.mock.find_binary_path.return_value = None + + result = kernel_utils._get_repo_prop_data('build_id', 'target') + self.assertIsNone(result) + + +class GetKernelNameTest(unittest.TestCase): + """Tests for get_kernel_name.""" + + def setUp(self): + """Set up mocks.""" + helpers.patch(self, [ + 'clusterfuzz._internal.platforms.android.settings.get_product_name', + 'clusterfuzz._internal.platforms.android.settings.get_build_product', + ]) + + def test_default(self): + """Test default kernel name.""" + self.mock.get_product_name.return_value = 'product' + self.mock.get_build_product.return_value = 'product' + + with mock.patch.dict( + 'clusterfuzz._internal.platforms.android.constants.PRODUCT_TO_KERNEL', + {}, + clear=True): + self.assertEqual(kernel_utils.get_kernel_name(), 'product') + + def test_kasan_strip(self): + """Test that _kasan is handled.""" + self.mock.get_product_name.return_value = 'product_kasan' + self.mock.get_build_product.return_value = 'product' + + with mock.patch.dict( + 'clusterfuzz._internal.platforms.android.constants.PRODUCT_TO_KERNEL', + {}, + clear=True): + # Based on current implementation, _kasan is NOT stripped because the + # return value of utils.strip_from_right is ignored. + self.assertEqual(kernel_utils.get_kernel_name(), 'product_kasan') + + def test_mapping(self): + """Test mapping.""" + self.mock.get_product_name.return_value = 'product' + self.mock.get_build_product.return_value = 'alias_product' + + with mock.patch.dict( + 'clusterfuzz._internal.platforms.android.constants.PRODUCT_TO_KERNEL', + {'alias_product': 'real_product'}, + clear=True): + self.assertEqual(kernel_utils.get_kernel_name(), 'real_product') + + +class GetKernelHashAndBuildIdTest(unittest.TestCase): + """Tests for get_kernel_hash_and_build_id.""" + + def setUp(self): + """Set up mocks.""" + helpers.patch(self, [ + 'clusterfuzz._internal.platforms.android.settings.get_kernel_version_string', + ]) + + def test_match(self): + """Test with matching kernel version string.""" + self.mock.get_kernel_version_string.return_value = ( + 'Linux version 3.18.0-g8de8e79-ab1234567 (android-build@google.com)') + # Expected: (match.group(2), match.group(3)) + # match.group(2) is 'ab1234567 ' (with space) + # match.group(3) is '1234567' + self.assertEqual(kernel_utils.get_kernel_hash_and_build_id(), + ('ab1234567 ', '1234567')) + + def test_no_match(self): + """Test with non-matching kernel version string.""" + self.mock.get_kernel_version_string.return_value = 'invalid' + self.assertEqual(kernel_utils.get_kernel_hash_and_build_id(), (None, None)) From 77545f8e72cc6842173927b57ecf7c219a86abb6 Mon Sep 17 00:00:00 2001 From: Titouan Rigoudy Date: Tue, 10 Feb 2026 19:57:27 +0100 Subject: [PATCH 05/10] Add handling for reformatted archives conditional on archive format version (#5145) Clusterfuzz will now look for a file called clusterfuzz_manifest.json at the root of Chrome archives and decide how to interpret runtime dependencies based on the json's version field. Version 0 (or no manifest file found) means to keep using the legacy logic while version 1 means to interpret relative dependency paths as relative to the corresponding runtime_deps file instead of the archive root. Version 1 also stops expecting a src_root/ directory in the archive root. --------- Co-authored-by: Martin Verde --- .../build_management/build_archive.py | 143 +++++++++++- .../build_management/build_archive_test.py | 208 +++++++++++++++--- 2 files changed, 307 insertions(+), 44 deletions(-) diff --git a/src/clusterfuzz/_internal/build_management/build_archive.py b/src/clusterfuzz/_internal/build_management/build_archive.py index 133fc1c2c1..cd2fb12de5 100644 --- a/src/clusterfuzz/_internal/build_management/build_archive.py +++ b/src/clusterfuzz/_internal/build_management/build_archive.py @@ -14,6 +14,7 @@ """Build Archive manager.""" import abc +import json import os from typing import BinaryIO from typing import Callable @@ -219,29 +220,144 @@ def unpack(self, class ChromeBuildArchive(DefaultBuildArchive): """Handles chrome build archives. This special cases the default behaviour by - looking at the content of the `.runtime_deps` file, in order to unpack all the - fuzzer dependencies correctly. - In case something goes wrong, this defaults to using the default unpacker. + looking at the content of the `.runtime_deps` file for each fuzzer target in + order to unpack all of its dependencies correctly. + + Expects a manifest file named `clusterfuzz_manifest.json` in the root of the + archive to decide which schema version to use when interpreting its contents. + The legacy schema is applied to archives with no manifest. Defaults to using + the default unpacker in case something goes wrong. + + Under the legacy schema, fuzz targets were assumed to be at the root of the + archive while runtime_deps starting with `../../` were remapped to + `/src_root/`. + + Given the following runtime_deps: + + my_fuzzer.runtime_deps: + ========== + ./my_fuzzer + my_fuzzer.options + my_fuzzer.owners + my_fuzzer.runtime_deps + ./libbase.so + ./libatomic.so + ../../.vpython3 + ../../third_party/instrumented_libs/binaries/msan-chained-origins-noble-lib + /lib/ + + + The legacy schema would expect an archive with the following structure: + ========== + my_fuzzer + my_fuzzer.options + my_ruzzer.owners + my_fuzzer.runtime_deps + libbase.so + libatomic.so + # etc. for all fuzz targets + src_root/ + .vpython3 + third_party/instrumented_libs/binaries/msan-chained-origins-noble-lib/lib/ + # all instrumented libs + # etc. for other deps + + Schema version 1 does away with `/src_root/` and interprets runtime_deps + entries as file paths relative to the runtime_deps file, which lives in the + build directory along with fuzz target binaries. + + Expected archive structure with the same runtime_deps: + ========== + out/build/my_fuzzer + out/build/my_fuzzer.options + out/build/my_fuzzer.owners + out/build/my_fuzzer.runtime_deps + out/build/libbase.so + out/build/libatomic.so + # etc. for all fuzz targets and deps in the build directory + .vpython3 + third_party/instrumented_libs/binaries/msan-chained-origins-noble-lib/lib/ + # all instrumented libs + # etc. for other deps """ + def __init__(self, + reader: archive.ArchiveReader, + default_archive_schema_version: int = 0): + """Initializes a `ChromiumBuildArchive` with the given reader. + + Arguments: + reader: See `DefaultBuildArchive`. + default_archive_schema_version: Specifies which version of a build archive + to expect if `clusterfuzz_manifest.json` is missing or badly formatted. + """ + super().__init__(reader) + # The manifest may not exist for earlier versions of archives. In this + # case, default to schema version 0. + manifest_path = 'clusterfuzz_manifest.json' + if self.file_exists(manifest_path): + with self.open(manifest_path) as f: + manifest = json.load(f) + self._archive_schema_version = manifest.get('archive_schema_version') + if self._archive_schema_version is None: + logs.warning( + 'clusterfuzz_manifest.json was incorrectly formatted or missing an ' + 'archive_schema_version field') + self._archive_schema_version = default_archive_schema_version + else: + self._archive_schema_version = default_archive_schema_version + def root_dir(self) -> str: if not hasattr(self, '_root_dir'): self._root_dir = super().root_dir() # pylint: disable=attribute-defined-outside-init return self._root_dir - def to_archive_path(self, path: str) -> str: - """Deps are relative to the Chrome root directory. However, there might be - a common root directory in the archive, which means we need to make sure - the file path is correct. + def archive_schema_version(self) -> int: + """Returns the schema version number for this archive.""" + return self._archive_schema_version + + def get_dependency_path(self, path: str, deps_file_path: str) -> str: + """Deps are given as paths relative to the deps file where they are listed, + so we need to translate them to the corresponding paths relative to the + archive root. Args: - path: the dependency path relative to Chrome's root directory. + path: the dependency path relative to the deps file. + deps_file_path: the path to the deps file, relative to the archive root. Returns: - the path relative to the archive. + the dependency path relative to the archive root. """ - path = os.path.normpath(path) + # Archive schema version 0 represents legacy behavior. For newer archive + # versions, runtime_deps that were formerly stored under + # {self.root_dir()}/src_root/ are now stored in the root directory, while + # the build artifacts formerly stored in the root directory are now stored + # in the build directory. + + if self._archive_schema_version > 0: + # Assumes the dependency path is relative to the deps file and + # transforms it into into a full path relative to the archive root. For + # example: + # + # deps_file_path: "/A/B/fuzz_target.runtime_deps" + # os.path.dirname(deps_file_path) => "/A/B/" (call this DEPS_DIR) + # path1: "./my_dep" + # path2: "../../C/my_dep2" + # path3: "D/my_dep3" + # + # os.path.join(DEPS_DIR, path1) => "/A/B/./my_dep" + # os.path.join(DEPS_DIR, path2) => "/A/B/../../C/my_dep2" + # os.path.join(DEPS_DIR, path3) => "/A/B/D/my_dep3" + # + # os.path.normpath(os.path.join(DEPS_DIR, path1)) => "/A/B/my_dep" + # os.path.normpath(os.path.join(DEPS_DIR, path2)) => "/C/my_dep2" + # os.path.normpath(os.path.join(DEPS_DIR, path3)) => "/A/B/D/my_dep3" + return os.path.normpath( + os.path.join(os.path.dirname(deps_file_path), path)) + + # Legacy behavior. Remap `../../` to `src_root/`. + path = os.path.normpath(path) if path.startswith('../../'): path = path.replace('../../', 'src_root/') @@ -271,7 +387,7 @@ def _get_common_files(self) -> List[str]: def get_target_dependencies( self, fuzz_target: str) -> List[archive.ArchiveMemberInfo]: - target_path = self.to_archive_path(fuzz_target) + target_path = self.get_path_for_target(fuzz_target) deps_file = f'{target_path}.runtime_deps' if not self.file_exists(deps_file): logs.warning(f'runtime_deps file not found for {target_path}') @@ -280,7 +396,10 @@ def get_target_dependencies( res = [] matchers = [] with self.open(deps_file) as f: - deps = [self.to_archive_path(l.decode()) for l in f.read().splitlines()] + deps = [ + self.get_dependency_path(l.decode(), deps_file) + for l in f.read().splitlines() + ] for dep in deps: # We need to match the file prefixes here, because some of the deps are # globering the whole directory. Same for files, on mac platform, we diff --git a/src/clusterfuzz/_internal/tests/core/build_management/build_archive_test.py b/src/clusterfuzz/_internal/tests/core/build_management/build_archive_test.py index c56180ea1f..791446f9fa 100644 --- a/src/clusterfuzz/_internal/tests/core/build_management/build_archive_test.py +++ b/src/clusterfuzz/_internal/tests/core/build_management/build_archive_test.py @@ -13,6 +13,7 @@ # limitations under the License. """Build archive tests.""" import io +import json import os import tempfile import unittest @@ -152,8 +153,18 @@ def _add_files_to_archive(self, files): name=file, is_dir=False, size_bytes=0, mode=0)) self.mock.open.return_value.list_members.return_value = res - def _generate_possible_fuzzer_dependencies(self, dir_prefix, fuzz_target): - """Generates all possible dependencies for the given target.""" + def _generate_possible_fuzzer_dependencies_legacy(self, dir_prefix, + fuzz_target): + """Generates all possible dependencies for the given target. + + This implementation represents the legacy archive schema prior to version 1 + and should not be used for new tests; we keep it around for backwards + compatibility. + + New tests should use a combination of + `_generate_possible_fuzzer_dependencies()` and + `_resolve_relative_dependency_paths()`. + """ needed_files = [ f'{fuzz_target}', f'{fuzz_target}.exe', @@ -175,6 +186,40 @@ def _generate_possible_fuzzer_dependencies(self, dir_prefix, fuzz_target): ] return [os.path.join(dir_prefix, file) for file in needed_files] + def _generate_possible_fuzzer_dependencies(self, fuzz_target): + """Returns a list of dependencies as file paths relative to + {fuzz_target}.runtime_deps, as they appear in runtime_deps files in real + archives. + """ + return [ + f'./{fuzz_target}', + f'{fuzz_target}.owners', + f'{fuzz_target}.runtime_deps', + f'{fuzz_target}.dSYM/Contents/Resources/DWARF/some_dependency', + './libbase.so', + '../../tools/valgrind/asan/', + '../../third_party/llvm-build/Release+Asserts/bin/llvm-symbolizer', + '../../third_party/instrumented_libs/binaries/msan-chained-origins-noble-lib/lib', + 'third_party/instrumented_libs/binaries/msan-chained-origins-noble-lib/lib/ld-linux-x86-64.so.2', + './libatomic.so', + 'icudtl.dat', + f'bin/run_{fuzz_target}', + '../../testing/location_tags.json', + ] + + def _resolve_relative_dependency_paths(self, deps_paths): + """Returns a list of dependencies as normalized file paths, i.e. with + relative path separators like './' and '../' resolved to their true + directory names. + """ + + # Runtime deps include file paths that begin with ../../ so the build + # directory is assumed to be two levels deep into the file tree. + return [ + os.path.normpath(os.path.join('out/build/', file)) + for file in deps_paths + ] + def _generate_runtime_deps(self, deps): def _mock_open(_): @@ -189,12 +234,19 @@ def _mock_open(_): def _declare_fuzzers(self, fuzzers): self._declared_fuzzers = fuzzers + def _set_archive_schema_version(self, version): + self.build = build_archive.ChromeBuildArchive(self.mock.open.return_value, + version) + @parameterized.parameterized.expand(['/b/build/', 'build/', '']) - def test_possible_dependencies(self, dir_prefix): + def test_possible_dependencies_legacy(self, dir_prefix): """Tests that all the necessary dependencies are correctly extracted from - the runtime_deps file.""" - deps_files = self._generate_possible_fuzzer_dependencies('', 'my_fuzzer') - needed_files = self._generate_possible_fuzzer_dependencies( + the runtime_deps file, using the legacy archive schema where dependency + paths are interpreted as relative to the archive root and `../../` is + remapped to `src_root/`.""" + deps_files = self._generate_possible_fuzzer_dependencies_legacy( + '', 'my_fuzzer') + needed_files = self._generate_possible_fuzzer_dependencies_legacy( dir_prefix, 'my_fuzzer') self._add_files_to_archive(needed_files) self._generate_runtime_deps(deps_files) @@ -204,29 +256,13 @@ def test_possible_dependencies(self, dir_prefix): self.assertCountEqual(to_extract, needed_files) @parameterized.parameterized.expand(['/b/build/', 'build/', '']) - def test_possible_dependencies_archive_without_normalized_path( + def test_possible_dependencies_deps_without_normalized_path_legacy( self, dir_prefix): """Tests that the chrome build handler correctly handles mixed-up normalized and not normalized path.""" - deps_files = self._generate_possible_fuzzer_dependencies('', 'my_fuzzer') - needed_files = self._generate_possible_fuzzer_dependencies( - dir_prefix, 'my_fuzzer') - self._add_files_to_archive(needed_files) - - # we want our runtime_deps to have normalized path so that they do not - # exactly match the archive paths. - self._generate_runtime_deps(deps_files) - self._declare_fuzzers(['my_fuzzer']) - to_extract = self.build.get_target_dependencies('my_fuzzer') - to_extract = [f.name for f in to_extract] - self.assertCountEqual(to_extract, needed_files) - - @parameterized.parameterized.expand(['/b/build/', 'build/', '']) - def test_possible_dependencies_deps_without_normalized_path(self, dir_prefix): - """Tests that the chrome build handler correctly handles mixed-up - normalized and not normalized path.""" - deps_files = self._generate_possible_fuzzer_dependencies('', 'my_fuzzer') - needed_files = self._generate_possible_fuzzer_dependencies( + deps_files = self._generate_possible_fuzzer_dependencies_legacy( + '', 'my_fuzzer') + needed_files = self._generate_possible_fuzzer_dependencies_legacy( dir_prefix, 'my_fuzzer') self._add_files_to_archive([os.path.normpath(f) for f in needed_files]) self._generate_runtime_deps(deps_files) @@ -237,13 +273,14 @@ def test_possible_dependencies_deps_without_normalized_path(self, dir_prefix): [os.path.normpath(f) for f in needed_files]) @parameterized.parameterized.expand(['/b/build/', 'build/', '']) - def test_other_fuzzer_not_extracted(self, dir_prefix): + def test_other_fuzzer_not_extracted_legacy(self, dir_prefix): """Tests that the chrome build handler only unpacks dependencies for the requested fuzzer, even if other fuzzers exist in the build.""" - deps_files = self._generate_possible_fuzzer_dependencies('', 'my_fuzzer') - needed_files = self._generate_possible_fuzzer_dependencies( + deps_files = self._generate_possible_fuzzer_dependencies_legacy( + '', 'my_fuzzer') + needed_files = self._generate_possible_fuzzer_dependencies_legacy( dir_prefix, 'my_fuzzer') - other_fuzzer = self._generate_possible_fuzzer_dependencies( + other_fuzzer = self._generate_possible_fuzzer_dependencies_legacy( dir_prefix, 'other_fuzzer') self._add_files_to_archive(list(set(needed_files + other_fuzzer))) self._generate_runtime_deps(deps_files) @@ -253,10 +290,10 @@ def test_other_fuzzer_not_extracted(self, dir_prefix): self.assertCountEqual(to_extract, needed_files) @parameterized.parameterized.expand(['/b/build/', 'build/', '']) - def test_dsyms_are_correctly_unpacked(self, dir_prefix): + def test_dsyms_are_correctly_unpacked_legacy(self, dir_prefix): """Tests that even if not listed in the runtime deps, dSYMs are correctly unpacked. """ - needed_files = self._generate_possible_fuzzer_dependencies( + needed_files = self._generate_possible_fuzzer_dependencies_legacy( dir_prefix, 'my_fuzzer') self._add_files_to_archive(needed_files) self._generate_runtime_deps(['my_fuzzer']) @@ -265,3 +302,110 @@ def test_dsyms_are_correctly_unpacked(self, dir_prefix): dsym_path = os.path.join( dir_prefix, 'my_fuzzer.dSYM/Contents/Resources/DWARF/some_dependency') self.assertIn(dsym_path, to_extract) + + def test_possible_dependencies(self): + """Tests that all the necessary dependencies are correctly extracted from + the runtime_deps file. + + Under archive schema version 1, dependency paths in `runtime_deps` files + are interpreted as being relative to the file itself, meaning that they must + be normalized to the equivalent path relative to the archive root before + they can be extracted. + """ + self._set_archive_schema_version(1) + deps_entries = self._generate_possible_fuzzer_dependencies('my_fuzzer') + deps_files = self._resolve_relative_dependency_paths(deps_entries) + self._add_files_to_archive(deps_files) + self._generate_runtime_deps(deps_entries) + self._declare_fuzzers(['my_fuzzer']) + to_extract = self.build.get_target_dependencies('my_fuzzer') + to_extract = [f.name for f in to_extract] + self.assertCountEqual(to_extract, deps_files) + + def test_other_fuzzer_not_extracted(self): + """Tests that the chrome build handler only unpacks dependencies for the + requested fuzzer, even if other fuzzers exist in the build.""" + self._set_archive_schema_version(1) + deps_entries = self._generate_possible_fuzzer_dependencies('my_fuzzer') + needed_files = self._resolve_relative_dependency_paths(deps_entries) + other_fuzzer = self._resolve_relative_dependency_paths( + self._generate_possible_fuzzer_dependencies('other_fuzzer')) + self._add_files_to_archive(list(set(needed_files + other_fuzzer))) + self._generate_runtime_deps(deps_entries) + self._declare_fuzzers(['my_fuzzer', 'other_fuzzer']) + to_extract = self.build.get_target_dependencies('my_fuzzer') + to_extract = [f.name for f in to_extract] + self.assertCountEqual(to_extract, needed_files) + + def test_dsyms_are_correctly_unpacked(self): + """Tests that even if not listed in the runtime deps, dSYMs are correctly + unpacked.""" + self._set_archive_schema_version(1) + needed_files = self._resolve_relative_dependency_paths( + self._generate_possible_fuzzer_dependencies('my_fuzzer')) + self._add_files_to_archive(needed_files) + self._generate_runtime_deps(['my_fuzzer']) + to_extract = self.build.get_target_dependencies('my_fuzzer') + to_extract = [f.name for f in to_extract] + self.assertIn( + 'out/build/my_fuzzer.dSYM/Contents/Resources/DWARF/some_dependency', + to_extract) + + +class ChromeBuildArchiveManifestTest(unittest.TestCase): + """Test for reading clusterfuzz_manifest.json for Chrome archives.""" + + def setUp(self): + test_helpers.patch(self, [ + 'clusterfuzz._internal.system.archive.ArchiveReader.file_exists', + 'clusterfuzz._internal.system.archive.ArchiveReader', + 'clusterfuzz._internal.system.archive.open', + ]) + self.mock.file_exists.return_value = False + + def _generate_manifest(self, archive_schema_version): + """Mocks open calls so that they return a buffer containing valid JSON for + the given archive schema version.""" + + def _mock_open(_): + buffer = io.BytesIO(b'') + buffer.write( + json.dumps({ + 'archive_schema_version': archive_schema_version + }).encode()) + buffer.seek(0) + return buffer + + self.mock.open.return_value.open.side_effect = _mock_open + + def _generate_invalid_manifest(self): + """Mocks open calls so that they return a buffer containing invalid contents + for clusterfuzz_manifest.json.""" + + def _mock_open(_): + buffer = io.BytesIO(b'') + buffer.write(json.dumps({'my_field': 1}).encode()) + buffer.seek(0) + return buffer + + self.mock.open.return_value.open.side_effect = _mock_open + + def test_manifest_is_correctly_read(self): + """Tests that the manifest is correctly read and used to set the archive + schema version if it exists and that the cases of a missing or invalid + manifest are handled correctly.""" + + # No manifest exists; should default to archive schema version 0 (legacy). + test_archive = build_archive.ChromeBuildArchive(self.mock.open.return_value) + self.assertEqual(test_archive.archive_schema_version(), 0) + + # Invalid manifest; should default to version 0. + self.mock.file_exists.return_value = True + self._generate_invalid_manifest() + test_archive = build_archive.ChromeBuildArchive(self.mock.open.return_value) + self.assertEqual(test_archive.archive_schema_version(), 0) + + # Valid manifest. + self._generate_manifest(1) + test_archive = build_archive.ChromeBuildArchive(self.mock.open.return_value) + self.assertEqual(test_archive.archive_schema_version(), 1) From 1e173512a34055de6956b1604169f7d242b67b94 Mon Sep 17 00:00:00 2001 From: Javan Lacerda Date: Wed, 11 Feb 2026 10:39:26 -0300 Subject: [PATCH 06/10] remove k8s node selector (#5162) Remove node selector from K8s jobs template. This node selector was necessary while we had a single cluster for running both the kata jobs and the cronjobs. Now we have a separate cluster for kata jobs, and removing the node selector allow us to have many different node pools able to run kata containers. Signed-off-by: Javan Lacerda --- src/clusterfuzz/_internal/k8s/job_template.yaml | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/clusterfuzz/_internal/k8s/job_template.yaml b/src/clusterfuzz/_internal/k8s/job_template.yaml index 5d54de49df..843231c9b9 100644 --- a/src/clusterfuzz/_internal/k8s/job_template.yaml +++ b/src/clusterfuzz/_internal/k8s/job_template.yaml @@ -85,9 +85,5 @@ spec: emptyDir: medium: Memory sizeLimit: 1.9Gi - {% if is_kata %} - nodeSelector: - cloud.google.com/gke-nodepool: kata-enabled-pool - {% endif %} restartPolicy: "{{restart_policy}}" - backoffLimit: 0 \ No newline at end of file + backoffLimit: 0 From 8f544734b4e2e1023a1b8656745a71185903333c Mon Sep 17 00:00:00 2001 From: Vinicius da Costa Date: Wed, 11 Feb 2026 10:43:19 -0300 Subject: [PATCH 07/10] Fix handle of missing customer id in oss-fuzz groups (#5163) Doing `str()` for the value of customer ID from config was preventing the code from checking if it is missing, hindering our debug capability. Also, added a log for the start of the cronjob. --- src/clusterfuzz/_internal/cron/oss_fuzz_cc_groups.py | 1 + src/clusterfuzz/_internal/google_cloud_utils/google_groups.py | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/clusterfuzz/_internal/cron/oss_fuzz_cc_groups.py b/src/clusterfuzz/_internal/cron/oss_fuzz_cc_groups.py index 0df087ddf8..7f560e8bf0 100644 --- a/src/clusterfuzz/_internal/cron/oss_fuzz_cc_groups.py +++ b/src/clusterfuzz/_internal/cron/oss_fuzz_cc_groups.py @@ -72,6 +72,7 @@ def sync_project_cc_group(project_name, info): def main(): """Sync OSS-Fuzz projects groups used to CC owners in the issue tracker.""" + logs.info('OSS-Fuzz CC groups sync started.') projects = project_setup.get_oss_fuzz_projects() for project, info in projects: diff --git a/src/clusterfuzz/_internal/google_cloud_utils/google_groups.py b/src/clusterfuzz/_internal/google_cloud_utils/google_groups.py index 4096ba2065..9e0a414bb5 100644 --- a/src/clusterfuzz/_internal/google_cloud_utils/google_groups.py +++ b/src/clusterfuzz/_internal/google_cloud_utils/google_groups.py @@ -117,8 +117,8 @@ def create_google_group(group_name: str, """Create a google group.""" identity_service = get_identity_api() - customer_id = customer_id or str( - local_config.ProjectConfig().get('groups_customer_id')) + customer_id = customer_id or local_config.ProjectConfig().get( + 'groups_customer_id') if not customer_id: logs.error('No customer ID set. Unable to create a new google group.') return None From 5c4d421c7b3dc7aee3b86a8eb4099ada26daf111 Mon Sep 17 00:00:00 2001 From: mi-ac Date: Wed, 11 Feb 2026 17:16:25 +0100 Subject: [PATCH 08/10] [tests-syncer] Rename fuzz files in the whole archive (#5164) We already renamed "fuzz-" test cases from crash tests, so that clusterfuzz doesn't confuse them with output cases. This change expands this functionality to all synced folders, also external repos. BUG=http://b/379684065 --- .../_internal/cron/chrome_tests_syncer.py | 73 ++++++++++++------- 1 file changed, 45 insertions(+), 28 deletions(-) diff --git a/src/clusterfuzz/_internal/cron/chrome_tests_syncer.py b/src/clusterfuzz/_internal/cron/chrome_tests_syncer.py index 844039313b..c01833a535 100644 --- a/src/clusterfuzz/_internal/cron/chrome_tests_syncer.py +++ b/src/clusterfuzz/_internal/cron/chrome_tests_syncer.py @@ -123,23 +123,6 @@ def unpack_crash_testcases(crash_testcases_directory): shell.remove_directory(directory_path) - # Rename all fuzzed testcase files as regular files. - logs.info('Renaming testcase files.') - for root, _, files in os.walk(crash_testcases_directory): - for filename in files: - if not filename.startswith(testcase_manager.FUZZ_PREFIX): - continue - - file_path = os.path.join(root, filename) - stripped_file_name = os.path.basename(file_path)[len( - testcase_manager.FUZZ_PREFIX):] - stripped_file_path = os.path.join( - os.path.dirname(file_path), stripped_file_name) - try: - os.rename(file_path, stripped_file_path) - except Exception as e: - raise RuntimeError(f'Failed to rename testcase {file_path}') from e - # Remove empty files and dirs to avoid the case where a fuzzer randomly # chooses an empty dir/file and generates zero testcases. shell.remove_empty_files(crash_testcases_directory) @@ -253,6 +236,34 @@ def filter_members(member, path): shell.remove_file(local_archive) +def rename_testcase_files(directory): + """Rename files with the 'fuzz-' prefix to avoid picking them up for fuzzing + without modifying them with a fuzzer. + """ + # Rename all fuzzed testcase files as regular files. + for root, _, files in os.walk(directory): + for filename in files: + if not filename.startswith(testcase_manager.FUZZ_PREFIX): + continue + + file_path = os.path.join(root, filename) + stripped_file_name = os.path.basename(file_path)[len( + testcase_manager.FUZZ_PREFIX):] + stripped_file_path = os.path.join( + os.path.dirname(file_path), stripped_file_name) + try: + os.rename(file_path, stripped_file_path) + except Exception as e: + raise RuntimeError(f'Failed to rename testcase {file_path}') from e + + +def clean_up_filenames(tests_directory, test_folders): + """Rename files with the 'fuzz-' prefix in all test folders.""" + for folder in test_folders: + logs.info(f'Renaming testcase files in {folder}') + rename_testcase_files(os.path.join(tests_directory, folder)) + + def sync_tests(tests_archive_bucket: str, tests_archive_name: str, tests_directory: str): """Main sync routine.""" @@ -301,22 +312,28 @@ def sync_tests(tests_archive_bucket: str, tests_archive_name: str, create_symbolic_link(tests_directory, 'src/third_party/blink/web_tests', 'LayoutTests') + test_folders = [ + 'CrashTests', + 'LayoutTests', + 'WebKit/JSTests/es6', + 'WebKit/JSTests/stress', + 'WebKit/LayoutTests', + 'fuzzilli', + 'gecko-tests', + 'v8/test/mjsunit', + 'spidermonkey', + 'chakra', + 'webgl-conformance-tests', + ] + + clean_up_filenames(tests_directory, test_folders) + subprocess.check_call( [ 'zip', '-r', tests_archive_local, - 'CrashTests', - 'LayoutTests', - 'WebKit/JSTests/es6', - 'WebKit/JSTests/stress', - 'WebKit/LayoutTests', - 'fuzzilli', - 'gecko-tests', - 'v8/test/mjsunit', - 'spidermonkey', - 'chakra', - 'webgl-conformance-tests', + ] + test_folders + [ '-x', '*.cc', '-x', From cb7431b7f610aa48f02e4e21e908edb9ef44b2f1 Mon Sep 17 00:00:00 2001 From: Vinicius da Costa Date: Fri, 13 Feb 2026 15:43:41 -0300 Subject: [PATCH 09/10] Fix credentials for groups settings api (#5165) #### Motivation In order to call the groups settings API to allow adding external members to groups, the service account credentials need to contain the correct scope `'https://www.googleapis.com/auth/apps.groups.settings'` to verify its admin role in the correspondent Google Workspace (oss-fuzz.com in this case). #### Rationale Calling the get default creds with this scope does not work correctly. My guess is that the GKE/GCE gets the Application Default Credentials via its metadata server, which is configured by default to issue tokens within a limited set of defined scopes (e.g., `cloud-platform`). An alternative is self-impersonating the service account to generate new Credentials with the right scopes. This avoids having to deal with creating a secret containing a new key for the default service account and then generating the credentials based on this key. Note: For this to work, the SA must have the `Service Account Token Creator` role. This is already set for the Compute Engine default account in all prod environments. #### Tests Tested in dev by running the oss_fuzz_cc_groups cronjob with test groups. logs: https://screenshot.googleplex.com/76a7vJjjKC4NhCe.png Check complete investigation on: b/477964128 --- .../google_cloud_utils/credentials.py | 30 +++++++++++++++++++ .../google_cloud_utils/google_groups.py | 3 +- 2 files changed, 32 insertions(+), 1 deletion(-) diff --git a/src/clusterfuzz/_internal/google_cloud_utils/credentials.py b/src/clusterfuzz/_internal/google_cloud_utils/credentials.py index aa1deffde4..e47e831efa 100644 --- a/src/clusterfuzz/_internal/google_cloud_utils/credentials.py +++ b/src/clusterfuzz/_internal/google_cloud_utils/credentials.py @@ -16,12 +16,14 @@ from google.auth import compute_engine from google.auth import credentials +from google.auth import impersonated_credentials from google.auth.transport import requests from google.oauth2 import service_account from clusterfuzz._internal.base import memoize from clusterfuzz._internal.base import retry from clusterfuzz._internal.base import utils +from clusterfuzz._internal.google_cloud_utils import compute_metadata from clusterfuzz._internal.google_cloud_utils import secret_manager from clusterfuzz._internal.metrics import logs from clusterfuzz._internal.system import environment @@ -99,3 +101,31 @@ def get_signing_credentials(service_account_info): request, '', service_account_email=creds.service_account_email) token = creds.token return signing_creds, token + + +def get_scoped_service_account_credentials( + scopes: list[str]) -> impersonated_credentials.Credentials | None: + """Gets scoped credentials by self-impersonating the service account.""" + creds, _ = get_default() + service_account_email = getattr(creds, 'service_account_email', None) + if service_account_email == 'default': + # Resolve the default service account email using GCE metadata server. + service_account_email = compute_metadata.get( + 'instance/service-accounts/default/email') + + if not service_account_email: + logs.warning('Could not retrieve service account email when getting scoped' + 'credentials.') + return None + + logs.info( + f'Using scoped credentials from service account: {service_account_email}') + scoped_creds = impersonated_credentials.Credentials( + source_credentials=creds, + target_principal=service_account_email, + target_scopes=scopes, + ) + request = requests.Request() + scoped_creds.refresh(request) + + return scoped_creds diff --git a/src/clusterfuzz/_internal/google_cloud_utils/google_groups.py b/src/clusterfuzz/_internal/google_cloud_utils/google_groups.py index 9e0a414bb5..e6407f6c8a 100644 --- a/src/clusterfuzz/_internal/google_cloud_utils/google_groups.py +++ b/src/clusterfuzz/_internal/google_cloud_utils/google_groups.py @@ -43,7 +43,8 @@ def get_identity_api() -> discovery.Resource | None: def get_group_settings_api() -> discovery.Resource | None: """Return the groups settings api client.""" if not hasattr(_local, 'groups_settings_service'): - creds, _ = credentials.get_default() + scopes = ['https://www.googleapis.com/auth/apps.groups.settings'] + creds = credentials.get_scoped_service_account_credentials(scopes) _local.groups_settings_service = discovery.build( 'groupssettings', 'v1', credentials=creds, cache_discovery=False) From 37736db8c13624ffaec0a7d87f888253d0bb3b0d Mon Sep 17 00:00:00 2001 From: Vinicius da Costa Date: Wed, 18 Feb 2026 23:21:30 +0000 Subject: [PATCH 10/10] Fixes --- src/clusterfuzz/_internal/base/external_users.py | 10 ++-------- src/clusterfuzz/_internal/cron/oss_fuzz_cc_groups.py | 12 ++++++++---- src/clusterfuzz/_internal/cron/project_setup.py | 2 +- src/clusterfuzz/_internal/datastore/data_types.py | 2 +- 4 files changed, 12 insertions(+), 14 deletions(-) diff --git a/src/clusterfuzz/_internal/base/external_users.py b/src/clusterfuzz/_internal/base/external_users.py index e61c245488..a9d4da563c 100644 --- a/src/clusterfuzz/_internal/base/external_users.py +++ b/src/clusterfuzz/_internal/base/external_users.py @@ -24,7 +24,6 @@ # OSS-Fuzz issue tracker CC group OSS_FUZZ_CC_GROUP_SUFFIX = '-ccs@oss-fuzz.com' -OSS_FUZZ_CC_GROUP_DESC = 'External CCs in OSS-Fuzz issue tracker for project' def _fuzzers_for_job(job_type, include_parents): @@ -376,14 +375,9 @@ def cc_users_for_job(job_type, security_flag, allow_cc_group=True): def get_cc_group_from_job(job_type: str) -> str: """Docstring for get_cc_group_from_entity""" project_name = data_handler.get_project_name(job_type) - return get_project_cc_group_name(project_name) + return get_oss_fuzz_project_cc_group(project_name) -def get_project_cc_group_name(project_name: str) -> str: +def get_oss_fuzz_project_cc_group(project_name: str) -> str | None: """Return oss-fuzz issue tracker CC group email for a project.""" return f'{project_name}{OSS_FUZZ_CC_GROUP_SUFFIX}' - - -def get_project_cc_group_description(project_name: str) -> str: - """Return oss-fuzz issue tracker CC group description for a project.""" - return f'{OSS_FUZZ_CC_GROUP_DESC}: {project_name}' diff --git a/src/clusterfuzz/_internal/cron/oss_fuzz_cc_groups.py b/src/clusterfuzz/_internal/cron/oss_fuzz_cc_groups.py index 7f560e8bf0..cd22eda774 100644 --- a/src/clusterfuzz/_internal/cron/oss_fuzz_cc_groups.py +++ b/src/clusterfuzz/_internal/cron/oss_fuzz_cc_groups.py @@ -13,24 +13,28 @@ # limitations under the License. """Cron to sync OSS-Fuzz projects groups used as CC in the issue tracker.""" +from clusterfuzz._internal.base import external_users from clusterfuzz._internal.base import utils from clusterfuzz._internal.cron import project_setup from clusterfuzz._internal.google_cloud_utils import google_groups from clusterfuzz._internal.metrics import logs -_CC_GROUP_SUFFIX = '-ccs@oss-fuzz.com' -_CC_GROUP_DESC = 'External CCs in OSS-Fuzz issue tracker for project' + +def get_project_cc_group_description(project_name: str) -> str: + """Return oss-fuzz issue tracker CC group description for a project.""" + oss_fuzz_cc_desc = 'External CCs in OSS-Fuzz issue tracker for project' + return f'{oss_fuzz_cc_desc}: {project_name}' def sync_project_cc_group(project_name, info): """Sync the project's google group used for CCing in the issue tracker.""" - group_name = f'{project_name}{_CC_GROUP_SUFFIX}' + group_name = external_users.get_oss_fuzz_project_cc_group(project_name) group_id = google_groups.get_group_id(group_name) # Create the group and bail out since the CIG API might delay to create a # new group. Add members will be done in the next cron run. if not group_id: - group_description = f'{_CC_GROUP_DESC}: {project_name}' + group_description = get_project_cc_group_description(project_name) created = google_groups.create_google_group( group_name, group_description=group_description) if not created: diff --git a/src/clusterfuzz/_internal/cron/project_setup.py b/src/clusterfuzz/_internal/cron/project_setup.py index 2eac772344..34943ace3c 100644 --- a/src/clusterfuzz/_internal/cron/project_setup.py +++ b/src/clusterfuzz/_internal/cron/project_setup.py @@ -998,7 +998,7 @@ def sync_user_permissions(self, project, info): existing_permission = query.get() if existing_permission: continue - # OSS-Fuzz issue tracker ccs use the corresponding project cc group. + # For OSS-Fuzz issue tracker, use the project cc google group. data_types.ExternalUserPermission( email=cc, entity_kind=data_types.PermissionEntityKind.JOB, diff --git a/src/clusterfuzz/_internal/datastore/data_types.py b/src/clusterfuzz/_internal/datastore/data_types.py index 3adea20bb7..8b17953a73 100644 --- a/src/clusterfuzz/_internal/datastore/data_types.py +++ b/src/clusterfuzz/_internal/datastore/data_types.py @@ -208,7 +208,7 @@ class AutoCCType: ALL = 1 # Auto-CC only for security issues. SECURITY = 2 - # Use cc google group for all issues - oss-fuzz specific. + # OSS-Fuzz specific - Auto-CC the user's project google group in all issues. USE_CC_GROUP = 3