From f8fc284c124c63fa36a340f6fae4a255b911eb53 Mon Sep 17 00:00:00 2001 From: kmontemayor Date: Tue, 2 Jun 2026 23:38:09 +0000 Subject: [PATCH 1/2] Pass resource config URI to graph store networking integration test workers get_graph_store_info() reads get_resource_config() to build the readiness URI (added in #533), but the launched Vertex AI workers had no GIGL_RESOURCE_CONFIG_URI in their env, so they failed with "No resource config provided". Inject it via the compute pool's environment_variables, which launch_graph_store_job propagates to both compute and storage pools. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../distributed/utils/networking_test.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tests/integration/distributed/utils/networking_test.py b/tests/integration/distributed/utils/networking_test.py index 4bfeeb838..9c48829b9 100644 --- a/tests/integration/distributed/utils/networking_test.py +++ b/tests/integration/distributed/utils/networking_test.py @@ -1,10 +1,12 @@ import uuid from textwrap import dedent +from google.cloud.aiplatform_v1.types import env_var from parameterized import param, parameterized from gigl.common.constants import DEFAULT_GIGL_RELEASE_SRC_IMAGE_CPU from gigl.common.services.vertex_ai import VertexAiJobConfig, VertexAIService +from gigl.env.constants import GIGL_RESOURCE_CONFIG_URI_ENV_KEY from gigl.env.pipelines_config import get_resource_config from tests.test_assets.test_case import TestCase @@ -63,12 +65,23 @@ def test_get_graph_store_info(self, _, storage_nodes, compute_nodes): """ ), ] + # get_graph_store_info() calls get_resource_config() (to build the readiness + # URI), so the launched workers need GIGL_RESOURCE_CONFIG_URI in their env. + # launch_graph_store_job propagates the compute pool's environment_variables + # to both the compute and storage container specs. + resource_config_env_vars = [ + env_var.EnvVar( + name=GIGL_RESOURCE_CONFIG_URI_ENV_KEY, + value=self._resource_config.get_resource_config_uri, + ) + ] compute_cluster_config = VertexAiJobConfig( job_name=job_name, container_uri=DEFAULT_GIGL_RELEASE_SRC_IMAGE_CPU, replica_count=compute_nodes, command=command, machine_type="n2-standard-8", + environment_variables=resource_config_env_vars, ) storage_cluster_config = VertexAiJobConfig( job_name=job_name, From 6aa633538850f8a1799e664bddfc65255c07d2c0 Mon Sep 17 00:00:00 2001 From: kmontemayor Date: Tue, 2 Jun 2026 23:41:10 +0000 Subject: [PATCH 2/2] Upload resource config to regional bucket for graph store networking test workers The test runner's resource config URI may be a local path that does not exist on the worker Docker image, so passing it directly to the workers via GIGL_RESOURCE_CONFIG_URI would not be readable. Instead, upload the in-memory resource config to the regional bucket (accessible to workers over GCS), pass that URI to the workers, and delete it in tearDown. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../distributed/utils/networking_test.py | 32 ++++++++++++++++--- 1 file changed, 28 insertions(+), 4 deletions(-) diff --git a/tests/integration/distributed/utils/networking_test.py b/tests/integration/distributed/utils/networking_test.py index 9c48829b9..21925d66a 100644 --- a/tests/integration/distributed/utils/networking_test.py +++ b/tests/integration/distributed/utils/networking_test.py @@ -6,8 +6,10 @@ from gigl.common.constants import DEFAULT_GIGL_RELEASE_SRC_IMAGE_CPU from gigl.common.services.vertex_ai import VertexAiJobConfig, VertexAIService +from gigl.common.utils.proto_utils import ProtoUtils from gigl.env.constants import GIGL_RESOURCE_CONFIG_URI_ENV_KEY from gigl.env.pipelines_config import get_resource_config +from gigl.src.common.utils.file_loader import FileLoader from tests.test_assets.test_case import TestCase @@ -26,8 +28,31 @@ def setUp(self): service_account=self._service_account, staging_bucket=self._staging_bucket, ) + + # get_graph_store_info() (run on the launched workers) calls + # get_resource_config() to build the readiness URI, so the workers need a + # resource config they can read. The test runner's resource config URI may + # be a local path that does not exist on the worker image, so we upload the + # in-memory resource config to the regional bucket (which the workers can + # read from GCS) and pass that URI via GIGL_RESOURCE_CONFIG_URI. + self._file_loader = FileLoader() + self._remote_resource_config_uri = ( + self._resource_config.temp_assets_regional_bucket_path + / "gigl" + / "integration_tests" + / "networking" + / f"resource_config_{uuid.uuid4()}.yaml" + ) + ProtoUtils().write_proto_to_yaml( + proto=self._resource_config.resource_config, + uri=self._remote_resource_config_uri, + ) super().setUp() + def tearDown(self): + self._file_loader.delete_files([self._remote_resource_config_uri]) + super().tearDown() + @parameterized.expand( [ param( @@ -65,14 +90,13 @@ def test_get_graph_store_info(self, _, storage_nodes, compute_nodes): """ ), ] - # get_graph_store_info() calls get_resource_config() (to build the readiness - # URI), so the launched workers need GIGL_RESOURCE_CONFIG_URI in their env. # launch_graph_store_job propagates the compute pool's environment_variables - # to both the compute and storage container specs. + # to both the compute and storage container specs, so the uploaded resource + # config URI is visible to every worker. resource_config_env_vars = [ env_var.EnvVar( name=GIGL_RESOURCE_CONFIG_URI_ENV_KEY, - value=self._resource_config.get_resource_config_uri, + value=self._remote_resource_config_uri.uri, ) ] compute_cluster_config = VertexAiJobConfig(