From 980eff7675aaf055ed4c2d6a455932dad3d2e34f Mon Sep 17 00:00:00 2001 From: voorhs Date: Sun, 15 Mar 2026 15:09:28 +0300 Subject: [PATCH 1/4] add `from_preset` to `OptimizationConfig` --- src/autointent/_optimization_config.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/autointent/_optimization_config.py b/src/autointent/_optimization_config.py index 25b591be..547474a9 100644 --- a/src/autointent/_optimization_config.py +++ b/src/autointent/_optimization_config.py @@ -1,6 +1,6 @@ from __future__ import annotations -from typing import Any +from typing import TYPE_CHECKING, Any from pydantic import BaseModel, Field, PositiveInt, field_validator @@ -14,6 +14,10 @@ get_default_hfmodel_config, initialize_embedder_config, ) +from .utils import load_preset + +if TYPE_CHECKING: + from .custom_types import SearchSpacePreset class OptimizationConfig(BaseModel): @@ -46,3 +50,7 @@ def validate_embedder_config(cls, v: Any) -> EmbedderConfig: # noqa: ANN401 hpo_config: HPOConfig = HPOConfig() seed: PositiveInt = 42 + + @classmethod + def from_preset(cls, preset: SearchSpacePreset) -> OptimizationConfig: + return cls.model_validate(load_preset(preset)) From ce24b478cdb5713e81793da6906b70335045c034 Mon Sep 17 00:00:00 2001 From: voorhs Date: Sun, 15 Mar 2026 15:21:48 +0300 Subject: [PATCH 2/4] set default separation ratio to None --- src/autointent/configs/_optimization.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/autointent/configs/_optimization.py b/src/autointent/configs/_optimization.py index 25f2e2a1..8075fde1 100644 --- a/src/autointent/configs/_optimization.py +++ b/src/autointent/configs/_optimization.py @@ -29,7 +29,7 @@ class DataConfig(BaseModel): ) """Fraction of train samples to allocate for validation (if input dataset doesn't contain validation split).""" separation_ratio: FloatFromZeroToOne | None = Field( - 0.5, description="Set to float to prevent data leak between scoring and decision nodes." + None, description="Set to float to prevent data leak between scoring and decision nodes." ) """Set to float to prevent data leak between scoring and decision nodes.""" is_few_shot_train: bool = Field(False, description="Whether to use few-shot training.") From e295ebcf185da6801e0a239b5308ff28a3de2dda Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Sun, 15 Mar 2026 12:22:58 +0000 Subject: [PATCH 3/4] Update optimizer_config.schema.json --- docs/optimizer_config.schema.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/optimizer_config.schema.json b/docs/optimizer_config.schema.json index e45d8cbb..9b0a8918 100644 --- a/docs/optimizer_config.schema.json +++ b/docs/optimizer_config.schema.json @@ -113,7 +113,7 @@ "type": "null" } ], - "default": 0.5, + "default": null, "description": "Set to float to prevent data leak between scoring and decision nodes.", "title": "Separation Ratio" }, @@ -498,7 +498,7 @@ "scheme": "ho", "n_folds": 3, "validation_size": 0.2, - "separation_ratio": 0.5, + "separation_ratio": null, "is_few_shot_train": false, "examples_per_intent": 8 } From 716e904c0153b156310e313b4b19d3caf68f7f5f Mon Sep 17 00:00:00 2001 From: voorhs Date: Sun, 15 Mar 2026 15:36:22 +0300 Subject: [PATCH 4/4] upd unit tests --- tests/data/test_data_handler.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/data/test_data_handler.py b/tests/data/test_data_handler.py index 58dbb3ee..d1fbf049 100644 --- a/tests/data/test_data_handler.py +++ b/tests/data/test_data_handler.py @@ -73,7 +73,9 @@ def mock_split(): def test_data_handler_initialization(sample_multiclass_data): - handler = DataHandler(dataset=Dataset.from_dict(sample_multiclass_data), random_seed=42) + handler = DataHandler( + dataset=Dataset.from_dict(sample_multiclass_data), config=DataConfig(separation_ratio=0.5), random_seed=42 + ) assert handler.multilabel is False assert handler.dataset.n_classes == 2 @@ -84,7 +86,9 @@ def test_data_handler_initialization(sample_multiclass_data): def test_data_handler_multilabel_mode(sample_multilabel_data): - handler = DataHandler(dataset=Dataset.from_dict(sample_multilabel_data), random_seed=42) + handler = DataHandler( + dataset=Dataset.from_dict(sample_multilabel_data), config=DataConfig(separation_ratio=0.5), random_seed=42 + ) assert handler.multilabel is True assert handler.dataset.n_classes == 2