From f1ea9d54f41a36cb8e9699e63953f44214d1fd47 Mon Sep 17 00:00:00 2001 From: aviruthen <91846056+aviruthen@users.noreply.github.com> Date: Tue, 7 Apr 2026 14:18:30 -0400 Subject: [PATCH 1/3] fix: ModelTrainer and HyperparameterTuner missing environment variables (5613) --- sagemaker-train/src/sagemaker/train/tuner.py | 11 ++-- .../tests/unit/train/test_tuner.py | 58 +++++++++++++++++++ 2 files changed, 64 insertions(+), 5 deletions(-) diff --git a/sagemaker-train/src/sagemaker/train/tuner.py b/sagemaker-train/src/sagemaker/train/tuner.py index a07a5472f6..92ecc29c52 100644 --- a/sagemaker-train/src/sagemaker/train/tuner.py +++ b/sagemaker-train/src/sagemaker/train/tuner.py @@ -1504,6 +1504,11 @@ def _build_training_job_definition(self, inputs): model_trainer.stopping_condition.max_wait_time_in_seconds ) + # Get environment variables from model_trainer + env = getattr(model_trainer, "environment", None) + if not env or not isinstance(env, dict): + env = None + definition = HyperParameterTrainingJobDefinition( algorithm_specification=algorithm_spec, role_arn=model_trainer.role, @@ -1513,13 +1518,9 @@ def _build_training_job_definition(self, inputs): stopping_condition=stopping_condition, static_hyper_parameters=getattr(self, "static_hyperparameters", None) or {}, enable_managed_spot_training=model_trainer.compute.enable_managed_spot_training, + environment=env, ) - # Pass through environment variables from model_trainer - env = getattr(model_trainer, "environment", None) - if env and isinstance(env, dict): - definition.environment = env - # Pass through VPC config from model_trainer networking = getattr(model_trainer, "networking", None) if networking and hasattr(networking, "_to_vpc_config"): diff --git a/sagemaker-train/tests/unit/train/test_tuner.py b/sagemaker-train/tests/unit/train/test_tuner.py index d1062c680a..388adb4307 100644 --- a/sagemaker-train/tests/unit/train/test_tuner.py +++ b/sagemaker-train/tests/unit/train/test_tuner.py @@ -596,3 +596,61 @@ def test_build_training_job_definition_includes_spot_params(self): assert isinstance( definition.stopping_condition.max_wait_time_in_seconds, int ), "Max wait time should be set" + + def test_build_training_job_definition_includes_environment_variables(self): + """Test that _build_training_job_definition includes environment variables. + + This test verifies the fix for GitHub issue #5613 where tuning jobs were + missing environment variables that were set on the ModelTrainer. + """ + mock_trainer = _create_mock_model_trainer() + mock_trainer.environment = { + "FOO": "bar", + "RANDOM_STATE": "42", + } + + tuner = HyperparameterTuner( + model_trainer=mock_trainer, + objective_metric_name="accuracy", + hyperparameter_ranges=_create_single_hp_range(), + ) + + definition = tuner._build_training_job_definition(None) + + assert definition.environment is not None, "Environment should not be None" + assert definition.environment == { + "FOO": "bar", + "RANDOM_STATE": "42", + }, "Environment variables should match those set on ModelTrainer" + + def test_build_training_job_definition_with_none_environment(self): + """Test that _build_training_job_definition handles None environment gracefully.""" + mock_trainer = _create_mock_model_trainer() + mock_trainer.environment = None + + tuner = HyperparameterTuner( + model_trainer=mock_trainer, + objective_metric_name="accuracy", + hyperparameter_ranges=_create_single_hp_range(), + ) + + definition = tuner._build_training_job_definition(None) + + assert definition.environment is None, "Environment should be None when not set" + + def test_build_training_job_definition_with_empty_environment(self): + """Test that _build_training_job_definition handles empty environment gracefully.""" + mock_trainer = _create_mock_model_trainer() + mock_trainer.environment = {} + + tuner = HyperparameterTuner( + model_trainer=mock_trainer, + objective_metric_name="accuracy", + hyperparameter_ranges=_create_single_hp_range(), + ) + + definition = tuner._build_training_job_definition(None) + + assert definition.environment is None, ( + "Environment should be None when empty dict is provided" + ) From dec47aba821d0896ec9ce9af2a6a2ae0024a526d Mon Sep 17 00:00:00 2001 From: aviruthen <91846056+aviruthen@users.noreply.github.com> Date: Tue, 7 Apr 2026 14:28:31 -0400 Subject: [PATCH 2/3] fix: address review comments (iteration #1) --- sagemaker-train/src/sagemaker/train/tuner.py | 8 ++++---- sagemaker-train/tests/unit/train/test_tuner.py | 10 +++++++--- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/sagemaker-train/src/sagemaker/train/tuner.py b/sagemaker-train/src/sagemaker/train/tuner.py index 92ecc29c52..533cc0e484 100644 --- a/sagemaker-train/src/sagemaker/train/tuner.py +++ b/sagemaker-train/src/sagemaker/train/tuner.py @@ -1504,10 +1504,10 @@ def _build_training_job_definition(self, inputs): model_trainer.stopping_condition.max_wait_time_in_seconds ) - # Get environment variables from model_trainer - env = getattr(model_trainer, "environment", None) - if not env or not isinstance(env, dict): - env = None + # Get environment variables from model_trainer. + # environment is a defined attribute on ModelTrainer (dict | None). + # We pass it through as-is; even an empty dict is valid for the API. + env = model_trainer.environment definition = HyperParameterTrainingJobDefinition( algorithm_specification=algorithm_spec, diff --git a/sagemaker-train/tests/unit/train/test_tuner.py b/sagemaker-train/tests/unit/train/test_tuner.py index 388adb4307..537971562a 100644 --- a/sagemaker-train/tests/unit/train/test_tuner.py +++ b/sagemaker-train/tests/unit/train/test_tuner.py @@ -639,7 +639,11 @@ def test_build_training_job_definition_with_none_environment(self): assert definition.environment is None, "Environment should be None when not set" def test_build_training_job_definition_with_empty_environment(self): - """Test that _build_training_job_definition handles empty environment gracefully.""" + """Test that _build_training_job_definition passes through empty environment. + + An empty dict is valid for the SageMaker API, so we pass it through as-is + rather than silently converting it to None. + """ mock_trainer = _create_mock_model_trainer() mock_trainer.environment = {} @@ -651,6 +655,6 @@ def test_build_training_job_definition_with_empty_environment(self): definition = tuner._build_training_job_definition(None) - assert definition.environment is None, ( - "Environment should be None when empty dict is provided" + assert definition.environment == {}, ( + "Empty dict environment should be passed through as-is" ) From 862ff2d13c060025e30b626e0b904c52883b55be Mon Sep 17 00:00:00 2001 From: aviruthen <91846056+aviruthen@users.noreply.github.com> Date: Wed, 8 Apr 2026 13:02:04 -0400 Subject: [PATCH 3/3] fix: address review comments (iteration #2) --- sagemaker-train/src/sagemaker/train/tuner.py | 19 +++++++-- .../tests/unit/train/test_tuner.py | 12 +++++- .../unit/train/test_tuner_driver_channels.py | 39 +++++++++++++++++-- 3 files changed, 60 insertions(+), 10 deletions(-) diff --git a/sagemaker-train/src/sagemaker/train/tuner.py b/sagemaker-train/src/sagemaker/train/tuner.py index 533cc0e484..b9ab6bac10 100644 --- a/sagemaker-train/src/sagemaker/train/tuner.py +++ b/sagemaker-train/src/sagemaker/train/tuner.py @@ -1505,11 +1505,15 @@ def _build_training_job_definition(self, inputs): ) # Get environment variables from model_trainer. - # environment is a defined attribute on ModelTrainer (dict | None). - # We pass it through as-is; even an empty dict is valid for the API. + # environment is a defined attribute on ModelTrainer (typed as dict | None). + # We access it directly (consistent with how role, compute, etc. are accessed). + # We pass it through as-is when it's a dict — even an empty dict is valid for the API. + # When it's None or not a dict, we omit it from the constructor so the Pydantic + # model keeps its default (Unassigned), which is then excluded during serialization. env = model_trainer.environment - definition = HyperParameterTrainingJobDefinition( + # Build base kwargs for the definition + definition_kwargs = dict( algorithm_specification=algorithm_spec, role_arn=model_trainer.role, input_data_config=input_data_config if input_data_config else None, @@ -1518,9 +1522,16 @@ def _build_training_job_definition(self, inputs): stopping_condition=stopping_condition, static_hyper_parameters=getattr(self, "static_hyperparameters", None) or {}, enable_managed_spot_training=model_trainer.compute.enable_managed_spot_training, - environment=env, ) + # Only include environment when it's a dict (including empty dict). + # This avoids Pydantic validation errors for non-dict values and keeps + # the field as Unassigned (excluded from serialization) when not set. + if isinstance(env, dict): + definition_kwargs["environment"] = env + + definition = HyperParameterTrainingJobDefinition(**definition_kwargs) + # Pass through VPC config from model_trainer networking = getattr(model_trainer, "networking", None) if networking and hasattr(networking, "_to_vpc_config"): diff --git a/sagemaker-train/tests/unit/train/test_tuner.py b/sagemaker-train/tests/unit/train/test_tuner.py index 537971562a..057ca364ed 100644 --- a/sagemaker-train/tests/unit/train/test_tuner.py +++ b/sagemaker-train/tests/unit/train/test_tuner.py @@ -624,7 +624,13 @@ def test_build_training_job_definition_includes_environment_variables(self): }, "Environment variables should match those set on ModelTrainer" def test_build_training_job_definition_with_none_environment(self): - """Test that _build_training_job_definition handles None environment gracefully.""" + """Test that _build_training_job_definition handles None environment gracefully. + + When environment is None, it should not be passed to the Pydantic constructor, + so the field stays as Unassigned (excluded from serialization). + """ + from sagemaker.core.utils.utils import Unassigned + mock_trainer = _create_mock_model_trainer() mock_trainer.environment = None @@ -636,7 +642,9 @@ def test_build_training_job_definition_with_none_environment(self): definition = tuner._build_training_job_definition(None) - assert definition.environment is None, "Environment should be None when not set" + assert isinstance(definition.environment, Unassigned), ( + "Environment should be Unassigned when model_trainer.environment is None" + ) def test_build_training_job_definition_with_empty_environment(self): """Test that _build_training_job_definition passes through empty environment. diff --git a/sagemaker-train/tests/unit/train/test_tuner_driver_channels.py b/sagemaker-train/tests/unit/train/test_tuner_driver_channels.py index c52559f5cf..d7bdb18686 100644 --- a/sagemaker-train/tests/unit/train/test_tuner_driver_channels.py +++ b/sagemaker-train/tests/unit/train/test_tuner_driver_channels.py @@ -405,8 +405,31 @@ def test_passes_environment_variables(self): definition = tuner._build_training_job_definition(inputs=None) assert definition.environment == {"MY_VAR": "value", "OTHER": "123"} + def test_passes_empty_environment(self): + """Should pass through empty dict environment as-is. + + An empty dict is valid for the SageMaker API, so we pass it through + rather than silently converting it to None/Unassigned. + """ + trainer = _mock_model_trainer(environment={}) + + tuner = HyperparameterTuner( + model_trainer=trainer, + objective_metric_name="accuracy", + hyperparameter_ranges=_hp_ranges(), + ) + + definition = tuner._build_training_job_definition(inputs=None) + assert definition.environment == {}, ( + "Empty dict environment should be passed through as-is" + ) + def test_skips_environment_when_none(self): - """Should not set environment when model_trainer.environment is None.""" + """Should not set environment when model_trainer.environment is None. + + When environment is None, it is not passed to the Pydantic constructor, + so the field stays as Unassigned (excluded from serialization). + """ trainer = _mock_model_trainer(environment=None) tuner = HyperparameterTuner( @@ -416,10 +439,16 @@ def test_skips_environment_when_none(self): ) definition = tuner._build_training_job_definition(inputs=None) - assert _is_unassigned(definition.environment) + assert _is_unassigned(definition.environment), ( + "Environment should be Unassigned when model_trainer.environment is None" + ) def test_skips_environment_when_not_dict(self): - """Should not set environment when it's not a dict (e.g. MagicMock).""" + """Should not set environment when it's not a dict (e.g. MagicMock). + + Non-dict values are not passed to the Pydantic constructor to avoid + validation errors. The field stays as Unassigned. + """ trainer = _mock_model_trainer(environment=MagicMock()) tuner = HyperparameterTuner( @@ -429,7 +458,9 @@ def test_skips_environment_when_not_dict(self): ) definition = tuner._build_training_job_definition(inputs=None) - assert _is_unassigned(definition.environment) + assert _is_unassigned(definition.environment), ( + "Environment should be Unassigned when model_trainer.environment is not a dict" + ) def test_passes_vpc_config(self): """Should set definition.vpc_config from model_trainer.networking._to_vpc_config()."""