diff --git a/packages/dash_evals/src/dash_evals/runner/tasks/task_helpers.py b/packages/dash_evals/src/dash_evals/runner/tasks/task_helpers.py index b4856e8..f10ce71 100644 --- a/packages/dash_evals/src/dash_evals/runner/tasks/task_helpers.py +++ b/packages/dash_evals/src/dash_evals/runner/tasks/task_helpers.py @@ -90,9 +90,17 @@ def append_context_injection(solver_chain: list, config: dict) -> None: Args: solver_chain: The solver chain list to append to. - config: Task manifest entry with 'variant' key. + config: Task manifest entry with 'variant' key or 'metadata' key. """ - variant = config.get("variant", {}) + metadata = config.get("metadata", {}) + variant = metadata.get("variant_config") + if variant is None: + variant = config.get("variant", {}) + + # If variant is still just a name string, we can't extract files from it. + if isinstance(variant, str): + return + # Support both old "context_files" and new "files" key context_files = variant.get("files") or variant.get("context_files", []) if context_files: @@ -109,12 +117,18 @@ def append_model_interaction( Args: solver_chain: The solver chain list to append to. - config: Task manifest entry with 'variant' key. + config: Task manifest entry with 'variant' key or 'metadata' key. extra_tools: Additional tools to include alongside MCP (optional). """ tools: list[Tool | MCPServer] = [] - variant = config.get("variant", {}) - mcp_servers_config = variant.get("mcp_servers", []) + metadata = config.get("metadata", {}) + variant = metadata.get("variant_config") + if variant is None: + variant = config.get("variant", {}) + + mcp_servers_config = [] + if not isinstance(variant, str): + mcp_servers_config = variant.get("mcp_servers", []) if mcp_servers_config: sandbox_type = config.get("sandbox_type", "local") diff --git a/packages/dataset_config_dart/lib/src/models/task.dart b/packages/dataset_config_dart/lib/src/models/task.dart index 1ba0380..10a29f1 100644 --- a/packages/dataset_config_dart/lib/src/models/task.dart +++ b/packages/dataset_config_dart/lib/src/models/task.dart @@ -121,7 +121,32 @@ sealed class Task with _$Task { Map? metadata, }) = _Task; + const Task._(); + factory Task.fromJson(Map json) => _$TaskFromJson(json); + + /// Get a job-level task argument. + Object? getArg(String key, [Object? defaultValue]) { + final args = metadata?['args'] as Map?; + return args?[key] ?? defaultValue; + } + + /// Hydrate and return MCP servers defined in the variant. + /// + /// This is a convenience method that returns the raw config maps. + /// In Dart, hydration happens in the Python runner, so we return + /// the config maps that the runner will use. + List> getMcp() { + final vcfg = metadata?['variant_config'] as Map?; + return (vcfg?['mcp_servers'] as List?)?.cast>() ?? + const []; + } + + /// Return the skill paths defined in the variant. + List getSkills() { + final vcfg = metadata?['variant_config'] as Map?; + return (vcfg?['skills'] as List?)?.cast() ?? const []; + } } class TaskMetadata { diff --git a/packages/dataset_config_dart/lib/src/models/task.freezed.dart b/packages/dataset_config_dart/lib/src/models/task.freezed.dart index bbf94e6..f4352a6 100644 --- a/packages/dataset_config_dart/lib/src/models/task.freezed.dart +++ b/packages/dataset_config_dart/lib/src/models/task.freezed.dart @@ -281,8 +281,8 @@ return $default(_that.dataset,_that.files,_that.setup,_that.solver,_that.cleanup /// @nodoc @JsonSerializable() -class _Task implements Task { - const _Task({this.dataset, final Map? files, this.setup, this.solver, this.cleanup, this.scorer, this.metrics, this.model, this.config, @JsonKey(name: 'model_roles') final Map? modelRoles, this.sandbox, this.approval, this.epochs, @JsonKey(name: 'fail_on_error') this.failOnError, @JsonKey(name: 'continue_on_fail') this.continueOnFail, @JsonKey(name: 'message_limit') this.messageLimit, @JsonKey(name: 'token_limit') this.tokenLimit, @JsonKey(name: 'time_limit') this.timeLimit, @JsonKey(name: 'working_limit') this.workingLimit, @JsonKey(name: 'cost_limit') this.costLimit, @JsonKey(name: 'early_stopping') this.earlyStopping, @JsonKey(name: 'display_name') this.displayName, @JsonKey(name: 'func') this.func, @JsonKey(name: 'system_message') this.systemMessage, @JsonKey(name: 'sandbox_parameters') final Map? sandboxParameters, this.name, this.version = 0, final Map? metadata}): _files = files,_modelRoles = modelRoles,_sandboxParameters = sandboxParameters,_metadata = metadata; +class _Task extends Task { + const _Task({this.dataset, final Map? files, this.setup, this.solver, this.cleanup, this.scorer, this.metrics, this.model, this.config, @JsonKey(name: 'model_roles') final Map? modelRoles, this.sandbox, this.approval, this.epochs, @JsonKey(name: 'fail_on_error') this.failOnError, @JsonKey(name: 'continue_on_fail') this.continueOnFail, @JsonKey(name: 'message_limit') this.messageLimit, @JsonKey(name: 'token_limit') this.tokenLimit, @JsonKey(name: 'time_limit') this.timeLimit, @JsonKey(name: 'working_limit') this.workingLimit, @JsonKey(name: 'cost_limit') this.costLimit, @JsonKey(name: 'early_stopping') this.earlyStopping, @JsonKey(name: 'display_name') this.displayName, @JsonKey(name: 'func') this.func, @JsonKey(name: 'system_message') this.systemMessage, @JsonKey(name: 'sandbox_parameters') final Map? sandboxParameters, this.name, this.version = 0, final Map? metadata}): _files = files,_modelRoles = modelRoles,_sandboxParameters = sandboxParameters,_metadata = metadata,super._(); factory _Task.fromJson(Map json) => _$TaskFromJson(json); /// Dataset to evaluate. diff --git a/packages/dataset_config_dart/lib/src/models/variant.dart b/packages/dataset_config_dart/lib/src/models/variant.dart index 15a3bbb..db717af 100644 --- a/packages/dataset_config_dart/lib/src/models/variant.dart +++ b/packages/dataset_config_dart/lib/src/models/variant.dart @@ -53,6 +53,12 @@ sealed class Variant with _$Variant { @JsonKey(name: 'task_parameters') @Default({}) Map taskParameters, + + /// Optional metadata for the variant. + @JsonKey(name: 'metadata') @Default({}) Map metadata, + + /// Optional tags for the variant. + @JsonKey(name: 'tags') @Default([]) List tags, }) = _Variant; const Variant._(); diff --git a/packages/dataset_config_dart/lib/src/models/variant.freezed.dart b/packages/dataset_config_dart/lib/src/models/variant.freezed.dart index f322f5a..fcd2f80 100644 --- a/packages/dataset_config_dart/lib/src/models/variant.freezed.dart +++ b/packages/dataset_config_dart/lib/src/models/variant.freezed.dart @@ -21,7 +21,9 @@ mixin _$Variant { @JsonKey(name: 'mcp_servers') List> get mcpServers;/// Resolved paths to agent skill directories. /// Each directory must contain a `SKILL.md` file. @JsonKey(name: 'skills') List get skills;/// Optional parameters merged into the task config dict at runtime. -@JsonKey(name: 'task_parameters') Map get taskParameters; +@JsonKey(name: 'task_parameters') Map get taskParameters;/// Optional metadata for the variant. +@JsonKey(name: 'metadata') Map get metadata;/// Optional tags for the variant. +@JsonKey(name: 'tags') List get tags; /// Create a copy of Variant /// with the given fields replaced by the non-null parameter values. @JsonKey(includeFromJson: false, includeToJson: false) @@ -34,16 +36,16 @@ $VariantCopyWith get copyWith => _$VariantCopyWithImpl(this as @override bool operator ==(Object other) { - return identical(this, other) || (other.runtimeType == runtimeType&&other is Variant&&(identical(other.name, name) || other.name == name)&&const DeepCollectionEquality().equals(other.files, files)&&const DeepCollectionEquality().equals(other.mcpServers, mcpServers)&&const DeepCollectionEquality().equals(other.skills, skills)&&const DeepCollectionEquality().equals(other.taskParameters, taskParameters)); + return identical(this, other) || (other.runtimeType == runtimeType&&other is Variant&&(identical(other.name, name) || other.name == name)&&const DeepCollectionEquality().equals(other.files, files)&&const DeepCollectionEquality().equals(other.mcpServers, mcpServers)&&const DeepCollectionEquality().equals(other.skills, skills)&&const DeepCollectionEquality().equals(other.taskParameters, taskParameters)&&const DeepCollectionEquality().equals(other.metadata, metadata)&&const DeepCollectionEquality().equals(other.tags, tags)); } @JsonKey(includeFromJson: false, includeToJson: false) @override -int get hashCode => Object.hash(runtimeType,name,const DeepCollectionEquality().hash(files),const DeepCollectionEquality().hash(mcpServers),const DeepCollectionEquality().hash(skills),const DeepCollectionEquality().hash(taskParameters)); +int get hashCode => Object.hash(runtimeType,name,const DeepCollectionEquality().hash(files),const DeepCollectionEquality().hash(mcpServers),const DeepCollectionEquality().hash(skills),const DeepCollectionEquality().hash(taskParameters),const DeepCollectionEquality().hash(metadata),const DeepCollectionEquality().hash(tags)); @override String toString() { - return 'Variant(name: $name, files: $files, mcpServers: $mcpServers, skills: $skills, taskParameters: $taskParameters)'; + return 'Variant(name: $name, files: $files, mcpServers: $mcpServers, skills: $skills, taskParameters: $taskParameters, metadata: $metadata, tags: $tags)'; } @@ -54,7 +56,7 @@ abstract mixin class $VariantCopyWith<$Res> { factory $VariantCopyWith(Variant value, $Res Function(Variant) _then) = _$VariantCopyWithImpl; @useResult $Res call({ - String name,@JsonKey(name: 'files') List files,@JsonKey(name: 'mcp_servers') List> mcpServers,@JsonKey(name: 'skills') List skills,@JsonKey(name: 'task_parameters') Map taskParameters + String name,@JsonKey(name: 'files') List files,@JsonKey(name: 'mcp_servers') List> mcpServers,@JsonKey(name: 'skills') List skills,@JsonKey(name: 'task_parameters') Map taskParameters,@JsonKey(name: 'metadata') Map metadata,@JsonKey(name: 'tags') List tags }); @@ -71,14 +73,16 @@ class _$VariantCopyWithImpl<$Res> /// Create a copy of Variant /// with the given fields replaced by the non-null parameter values. -@pragma('vm:prefer-inline') @override $Res call({Object? name = null,Object? files = null,Object? mcpServers = null,Object? skills = null,Object? taskParameters = null,}) { +@pragma('vm:prefer-inline') @override $Res call({Object? name = null,Object? files = null,Object? mcpServers = null,Object? skills = null,Object? taskParameters = null,Object? metadata = null,Object? tags = null,}) { return _then(_self.copyWith( name: null == name ? _self.name : name // ignore: cast_nullable_to_non_nullable as String,files: null == files ? _self.files : files // ignore: cast_nullable_to_non_nullable as List,mcpServers: null == mcpServers ? _self.mcpServers : mcpServers // ignore: cast_nullable_to_non_nullable as List>,skills: null == skills ? _self.skills : skills // ignore: cast_nullable_to_non_nullable as List,taskParameters: null == taskParameters ? _self.taskParameters : taskParameters // ignore: cast_nullable_to_non_nullable -as Map, +as Map,metadata: null == metadata ? _self.metadata : metadata // ignore: cast_nullable_to_non_nullable +as Map,tags: null == tags ? _self.tags : tags // ignore: cast_nullable_to_non_nullable +as List, )); } @@ -160,10 +164,10 @@ return $default(_that);case _: /// } /// ``` -@optionalTypeArgs TResult maybeWhen(TResult Function( String name, @JsonKey(name: 'files') List files, @JsonKey(name: 'mcp_servers') List> mcpServers, @JsonKey(name: 'skills') List skills, @JsonKey(name: 'task_parameters') Map taskParameters)? $default,{required TResult orElse(),}) {final _that = this; +@optionalTypeArgs TResult maybeWhen(TResult Function( String name, @JsonKey(name: 'files') List files, @JsonKey(name: 'mcp_servers') List> mcpServers, @JsonKey(name: 'skills') List skills, @JsonKey(name: 'task_parameters') Map taskParameters, @JsonKey(name: 'metadata') Map metadata, @JsonKey(name: 'tags') List tags)? $default,{required TResult orElse(),}) {final _that = this; switch (_that) { case _Variant() when $default != null: -return $default(_that.name,_that.files,_that.mcpServers,_that.skills,_that.taskParameters);case _: +return $default(_that.name,_that.files,_that.mcpServers,_that.skills,_that.taskParameters,_that.metadata,_that.tags);case _: return orElse(); } @@ -181,10 +185,10 @@ return $default(_that.name,_that.files,_that.mcpServers,_that.skills,_that.taskP /// } /// ``` -@optionalTypeArgs TResult when(TResult Function( String name, @JsonKey(name: 'files') List files, @JsonKey(name: 'mcp_servers') List> mcpServers, @JsonKey(name: 'skills') List skills, @JsonKey(name: 'task_parameters') Map taskParameters) $default,) {final _that = this; +@optionalTypeArgs TResult when(TResult Function( String name, @JsonKey(name: 'files') List files, @JsonKey(name: 'mcp_servers') List> mcpServers, @JsonKey(name: 'skills') List skills, @JsonKey(name: 'task_parameters') Map taskParameters, @JsonKey(name: 'metadata') Map metadata, @JsonKey(name: 'tags') List tags) $default,) {final _that = this; switch (_that) { case _Variant(): -return $default(_that.name,_that.files,_that.mcpServers,_that.skills,_that.taskParameters);} +return $default(_that.name,_that.files,_that.mcpServers,_that.skills,_that.taskParameters,_that.metadata,_that.tags);} } /// A variant of `when` that fallback to returning `null` /// @@ -198,10 +202,10 @@ return $default(_that.name,_that.files,_that.mcpServers,_that.skills,_that.taskP /// } /// ``` -@optionalTypeArgs TResult? whenOrNull(TResult? Function( String name, @JsonKey(name: 'files') List files, @JsonKey(name: 'mcp_servers') List> mcpServers, @JsonKey(name: 'skills') List skills, @JsonKey(name: 'task_parameters') Map taskParameters)? $default,) {final _that = this; +@optionalTypeArgs TResult? whenOrNull(TResult? Function( String name, @JsonKey(name: 'files') List files, @JsonKey(name: 'mcp_servers') List> mcpServers, @JsonKey(name: 'skills') List skills, @JsonKey(name: 'task_parameters') Map taskParameters, @JsonKey(name: 'metadata') Map metadata, @JsonKey(name: 'tags') List tags)? $default,) {final _that = this; switch (_that) { case _Variant() when $default != null: -return $default(_that.name,_that.files,_that.mcpServers,_that.skills,_that.taskParameters);case _: +return $default(_that.name,_that.files,_that.mcpServers,_that.skills,_that.taskParameters,_that.metadata,_that.tags);case _: return null; } @@ -213,7 +217,7 @@ return $default(_that.name,_that.files,_that.mcpServers,_that.skills,_that.taskP @JsonSerializable() class _Variant extends Variant { - const _Variant({this.name = 'baseline', @JsonKey(name: 'files') final List files = const [], @JsonKey(name: 'mcp_servers') final List> mcpServers = const [], @JsonKey(name: 'skills') final List skills = const [], @JsonKey(name: 'task_parameters') final Map taskParameters = const {}}): _files = files,_mcpServers = mcpServers,_skills = skills,_taskParameters = taskParameters,super._(); + const _Variant({this.name = 'baseline', @JsonKey(name: 'files') final List files = const [], @JsonKey(name: 'mcp_servers') final List> mcpServers = const [], @JsonKey(name: 'skills') final List skills = const [], @JsonKey(name: 'task_parameters') final Map taskParameters = const {}, @JsonKey(name: 'metadata') final Map metadata = const {}, @JsonKey(name: 'tags') final List tags = const []}): _files = files,_mcpServers = mcpServers,_skills = skills,_taskParameters = taskParameters,_metadata = metadata,_tags = tags,super._(); factory _Variant.fromJson(Map json) => _$VariantFromJson(json); /// User-defined variant name from the job file. @@ -256,6 +260,24 @@ class _Variant extends Variant { return EqualUnmodifiableMapView(_taskParameters); } +/// Optional metadata for the variant. + final Map _metadata; +/// Optional metadata for the variant. +@override@JsonKey(name: 'metadata') Map get metadata { + if (_metadata is EqualUnmodifiableMapView) return _metadata; + // ignore: implicit_dynamic_type + return EqualUnmodifiableMapView(_metadata); +} + +/// Optional tags for the variant. + final List _tags; +/// Optional tags for the variant. +@override@JsonKey(name: 'tags') List get tags { + if (_tags is EqualUnmodifiableListView) return _tags; + // ignore: implicit_dynamic_type + return EqualUnmodifiableListView(_tags); +} + /// Create a copy of Variant /// with the given fields replaced by the non-null parameter values. @@ -270,16 +292,16 @@ Map toJson() { @override bool operator ==(Object other) { - return identical(this, other) || (other.runtimeType == runtimeType&&other is _Variant&&(identical(other.name, name) || other.name == name)&&const DeepCollectionEquality().equals(other._files, _files)&&const DeepCollectionEquality().equals(other._mcpServers, _mcpServers)&&const DeepCollectionEquality().equals(other._skills, _skills)&&const DeepCollectionEquality().equals(other._taskParameters, _taskParameters)); + return identical(this, other) || (other.runtimeType == runtimeType&&other is _Variant&&(identical(other.name, name) || other.name == name)&&const DeepCollectionEquality().equals(other._files, _files)&&const DeepCollectionEquality().equals(other._mcpServers, _mcpServers)&&const DeepCollectionEquality().equals(other._skills, _skills)&&const DeepCollectionEquality().equals(other._taskParameters, _taskParameters)&&const DeepCollectionEquality().equals(other._metadata, _metadata)&&const DeepCollectionEquality().equals(other._tags, _tags)); } @JsonKey(includeFromJson: false, includeToJson: false) @override -int get hashCode => Object.hash(runtimeType,name,const DeepCollectionEquality().hash(_files),const DeepCollectionEquality().hash(_mcpServers),const DeepCollectionEquality().hash(_skills),const DeepCollectionEquality().hash(_taskParameters)); +int get hashCode => Object.hash(runtimeType,name,const DeepCollectionEquality().hash(_files),const DeepCollectionEquality().hash(_mcpServers),const DeepCollectionEquality().hash(_skills),const DeepCollectionEquality().hash(_taskParameters),const DeepCollectionEquality().hash(_metadata),const DeepCollectionEquality().hash(_tags)); @override String toString() { - return 'Variant(name: $name, files: $files, mcpServers: $mcpServers, skills: $skills, taskParameters: $taskParameters)'; + return 'Variant(name: $name, files: $files, mcpServers: $mcpServers, skills: $skills, taskParameters: $taskParameters, metadata: $metadata, tags: $tags)'; } @@ -290,7 +312,7 @@ abstract mixin class _$VariantCopyWith<$Res> implements $VariantCopyWith<$Res> { factory _$VariantCopyWith(_Variant value, $Res Function(_Variant) _then) = __$VariantCopyWithImpl; @override @useResult $Res call({ - String name,@JsonKey(name: 'files') List files,@JsonKey(name: 'mcp_servers') List> mcpServers,@JsonKey(name: 'skills') List skills,@JsonKey(name: 'task_parameters') Map taskParameters + String name,@JsonKey(name: 'files') List files,@JsonKey(name: 'mcp_servers') List> mcpServers,@JsonKey(name: 'skills') List skills,@JsonKey(name: 'task_parameters') Map taskParameters,@JsonKey(name: 'metadata') Map metadata,@JsonKey(name: 'tags') List tags }); @@ -307,14 +329,16 @@ class __$VariantCopyWithImpl<$Res> /// Create a copy of Variant /// with the given fields replaced by the non-null parameter values. -@override @pragma('vm:prefer-inline') $Res call({Object? name = null,Object? files = null,Object? mcpServers = null,Object? skills = null,Object? taskParameters = null,}) { +@override @pragma('vm:prefer-inline') $Res call({Object? name = null,Object? files = null,Object? mcpServers = null,Object? skills = null,Object? taskParameters = null,Object? metadata = null,Object? tags = null,}) { return _then(_Variant( name: null == name ? _self.name : name // ignore: cast_nullable_to_non_nullable as String,files: null == files ? _self._files : files // ignore: cast_nullable_to_non_nullable as List,mcpServers: null == mcpServers ? _self._mcpServers : mcpServers // ignore: cast_nullable_to_non_nullable as List>,skills: null == skills ? _self._skills : skills // ignore: cast_nullable_to_non_nullable as List,taskParameters: null == taskParameters ? _self._taskParameters : taskParameters // ignore: cast_nullable_to_non_nullable -as Map, +as Map,metadata: null == metadata ? _self._metadata : metadata // ignore: cast_nullable_to_non_nullable +as Map,tags: null == tags ? _self._tags : tags // ignore: cast_nullable_to_non_nullable +as List, )); } diff --git a/packages/dataset_config_dart/lib/src/models/variant.g.dart b/packages/dataset_config_dart/lib/src/models/variant.g.dart index 35e3d0c..c9d5b96 100644 --- a/packages/dataset_config_dart/lib/src/models/variant.g.dart +++ b/packages/dataset_config_dart/lib/src/models/variant.g.dart @@ -22,6 +22,10 @@ _Variant _$VariantFromJson(Map json) => _Variant( (json['skills'] as List?)?.map((e) => e as String).toList() ?? const [], taskParameters: json['task_parameters'] as Map? ?? const {}, + metadata: json['metadata'] as Map? ?? const {}, + tags: + (json['tags'] as List?)?.map((e) => e as String).toList() ?? + const [], ); Map _$VariantToJson(_Variant instance) => { @@ -30,4 +34,6 @@ Map _$VariantToJson(_Variant instance) => { 'mcp_servers': instance.mcpServers, 'skills': instance.skills, 'task_parameters': instance.taskParameters, + 'metadata': instance.metadata, + 'tags': instance.tags, }; diff --git a/packages/dataset_config_dart/lib/src/resolvers/eval_set_resolver.dart b/packages/dataset_config_dart/lib/src/resolvers/eval_set_resolver.dart index 887a18e..10a4f7f 100644 --- a/packages/dataset_config_dart/lib/src/resolvers/eval_set_resolver.dart +++ b/packages/dataset_config_dart/lib/src/resolvers/eval_set_resolver.dart @@ -109,7 +109,22 @@ class EvalSetResolver { // Enrich each sample with task-level metadata final inspectSamples = []; for (final sample in tc.samples) { - final enriched = {...?sample.metadata}; + // Priority: Sample > Variant > Task + final enriched = { + ...?tc.metadata, + ...tc.variant.metadata, + ...?sample.metadata, + }; + + // Merge tags + final allTags = { + ..._parseTags(tc.metadata?['tags']), + ...tc.variant.tags, + ..._parseTags(sample.metadata?['tags']), + }; + if (allTags.isNotEmpty) { + enriched['tags'] = allTags.toList()..sort(); + } if (tc.saveExamples) { enriched['save_examples'] = true; @@ -178,13 +193,22 @@ class EvalSetResolver { if (tc.systemMessage != null) 'system_message': tc.systemMessage, if (tc.saveExamples) 'save_examples': true, if (tc.examplesDir != null) 'examples_dir': tc.examplesDir, - // Propagate image_prefix from sandbox for container image resolution if (sandboxCfg['image_prefix'] != null) 'image_prefix': sandboxCfg['image_prefix'], - // Merge any task-level metadata from YAML + // Priority: Variant > Task ...?tc.metadata, + ...tc.variant.metadata, }; + // Merge task-level tags + final allTaskTags = { + ..._parseTags(tc.metadata?['tags']), + ...tc.variant.tags, + }; + if (allTaskTags.isNotEmpty) { + metadata['tags'] = allTaskTags.toList()..sort(); + } + // Determine sandbox for this task Object? taskSandbox; if (tc.sandbox != null) { @@ -565,12 +589,18 @@ class EvalSetResolver { // Parse task_parameters final taskParameters = (vDef['task_parameters'] as Map?)?.cast() ?? {}; + // Parse metadata and tags + final metadata = (vDef['metadata'] as Map?)?.cast() ?? {}; + final tags = (vDef['tags'] as List?)?.cast() ?? const []; + return Variant( name: name, files: files, mcpServers: mcpServers, skills: skills, taskParameters: taskParameters, + metadata: metadata, + tags: tags, ); } @@ -601,6 +631,21 @@ class EvalSetResolver { static bool _isGlob(String pattern) => pattern.contains('*') || pattern.contains('?') || pattern.contains('['); + static List _parseTags(dynamic tags) { + if (tags == null) return const []; + if (tags is String) { + return tags + .split(',') + .map((t) => t.trim()) + .where((t) => t.isNotEmpty) + .toList(); + } + if (tags is List) { + return tags.map((t) => t.toString()).toList(); + } + return [tags.toString()]; + } + /// Expand a glob pattern relative to [baseDir], returning matching files. static List _expandGlobFiles(String baseDir, String pattern) { final glob = Glob(pattern); diff --git a/packages/dataset_config_python/src/dataset_config_python/hydrate.py b/packages/dataset_config_python/src/dataset_config_python/hydrate.py index 0ca148a..1fabc6e 100644 --- a/packages/dataset_config_python/src/dataset_config_python/hydrate.py +++ b/packages/dataset_config_python/src/dataset_config_python/hydrate.py @@ -233,12 +233,21 @@ def get_skill_tool(config: dict) -> Tool | None: """Create the skill tool if the variant has skills configured. Args: - config: Task manifest entry with 'variant' key. + config: Task manifest entry with 'variant' key or 'metadata' key. Returns: The skill Tool, or None if no skills are configured. """ - variant = config.get("variant", {}) + # Try metadata first (resolver structure) + variant = config.get("metadata", {}).get("variant_config") + if variant is None: + # Fallback to top-level variant field (legacy or direct call) + variant = config.get("variant", config) + + # If variant is just a name string, we can't extract skills from it. + if isinstance(variant, str): + return None + # Support both old "skill_paths" and new "skills" key skill_paths = variant.get("skills") or variant.get("skill_paths", []) if skill_paths: @@ -255,19 +264,10 @@ def build_task_metadata(config: dict) -> dict: """Build task metadata dictionary from manifest config. Args: - config: Task manifest entry with 'variant', 'save_examples', etc. + config: Task manifest entry with 'metadata' dictionary. Returns: Metadata dictionary for Task. """ - metadata: dict[str, Any] = {} - variant = config.get("variant", {}) - if variant: - metadata["variant_config"] = variant - - if config.get("save_examples") and config.get("examples_dir"): - metadata["save_examples"] = True - metadata["examples_dir"] = config["examples_dir"] - metadata["task_variant"] = config.get("task_name", "unknown") - - return metadata + # The resolver.py already builds and merges the full metadata dictionary. + return config.get("metadata", {}) diff --git a/packages/dataset_config_python/src/dataset_config_python/models/task.py b/packages/dataset_config_python/src/dataset_config_python/models/task.py index bfa0c4d..1f6da63 100644 --- a/packages/dataset_config_python/src/dataset_config_python/models/task.py +++ b/packages/dataset_config_python/src/dataset_config_python/models/task.py @@ -99,3 +99,38 @@ class Task(BaseModel): version: Any = 0 """Version of task.""" + + def get_arg(self, key: str, default: Any = None) -> Any: + """Get a job-level task argument. + + Args: + key: Argument key. + default: Default value if key is missing. + + Returns: + The argument value or default. + """ + return self.metadata.get("args", {}).get(key, default) if self.metadata else default + + def get_mcp(self) -> list[Any]: + """Hydrate and return MCP servers defined in the variant. + + Returns: + List of hydrated MCPServer objects. + """ + from dataset_config_python.hydrate import create_mcp_servers + + vcfg = self.metadata.get("variant_config", {}) if self.metadata else {} + mcp_configs = vcfg.get("mcp_servers", []) + return create_mcp_servers(mcp_configs) + + def get_skills(self) -> Any: + """Hydrate and return the skill tool defined in the variant. + + Returns: + The hydrated skill Tool object, or None if no skills are defined. + """ + from dataset_config_python.hydrate import get_skill_tool + + vcfg = self.metadata.get("variant_config", {}) if self.metadata else {} + return get_skill_tool(vcfg) diff --git a/packages/dataset_config_python/src/dataset_config_python/models/variant.py b/packages/dataset_config_python/src/dataset_config_python/models/variant.py index 81eb40c..3c5ee8e 100644 --- a/packages/dataset_config_python/src/dataset_config_python/models/variant.py +++ b/packages/dataset_config_python/src/dataset_config_python/models/variant.py @@ -37,3 +37,9 @@ class Variant(BaseModel): task_parameters: dict[str, Any] = Field(default_factory=dict) """Optional parameters merged into the task config dict at runtime.""" + + metadata: dict[str, Any] = Field(default_factory=dict) + """Optional metadata for the variant.""" + + tags: list[str] = Field(default_factory=list) + """Optional tags for the variant.""" diff --git a/packages/dataset_config_python/src/dataset_config_python/resolver.py b/packages/dataset_config_python/src/dataset_config_python/resolver.py index 1160782..73649ad 100644 --- a/packages/dataset_config_python/src/dataset_config_python/resolver.py +++ b/packages/dataset_config_python/src/dataset_config_python/resolver.py @@ -38,6 +38,17 @@ def _is_glob(pattern: str) -> bool: return "*" in pattern or "?" in pattern or "[" in pattern +def _parse_tags(tags: Any) -> list[str]: + """Parse tags into a list of strings, handling lists or comma-separated strings.""" + if not tags: + return [] + if isinstance(tags, str): + return [t.strip() for t in tags.split(",") if t.strip()] + if isinstance(tags, list): + return [str(t) for t in tags] + return [str(tags)] + + def resolve( dataset_path: str, job_names: list[str], @@ -164,7 +175,22 @@ def _build_eval_set( # Enrich each sample with task-level metadata inspect_samples: list[Sample] = [] for sample in tc.samples: - enriched: dict[str, Any] = {**(sample.metadata or {})} + # Priority: Sample > Variant > Task + enriched: dict[str, Any] = dict(tc.metadata or {}) + enriched.update(tc.variant.metadata or {}) + enriched.update(sample.metadata or {}) + + # Handle tags merge + all_tags: set[str] = set() + if tc.metadata and "tags" in tc.metadata: + all_tags.update(_parse_tags(tc.metadata["tags"])) + if tc.variant.tags: + all_tags.update(_parse_tags(tc.variant.tags)) + if sample.metadata and "tags" in sample.metadata: + all_tags.update(_parse_tags(sample.metadata["tags"])) + + if all_tags: + enriched["tags"] = sorted(list(all_tags)) if tc.save_examples: enriched["save_examples"] = True @@ -232,8 +258,22 @@ def _build_eval_set( # Propagate image_prefix from job for container image resolution if (job.sandbox or {}).get("image_prefix"): task_metadata["image_prefix"] = job.sandbox["image_prefix"] + + # Priority: Variant > Task if tc.metadata: task_metadata.update(tc.metadata) + if tc.variant.metadata: + task_metadata.update(tc.variant.metadata) + + # Handle tags merge + all_task_tags: set[str] = set() + if tc.metadata and "tags" in tc.metadata: + all_task_tags.update(_parse_tags(tc.metadata["tags"])) + if tc.variant.tags: + all_task_tags.update(_parse_tags(tc.variant.tags)) + + if all_task_tags: + task_metadata["tags"] = sorted(list(all_task_tags)) # Determine sandbox for this task task_sandbox = None @@ -547,8 +587,10 @@ def _resolve_variant( for raw in raw_mcp: mcp_servers.append(McpServerConfig.from_yaml(raw)) - # Task parameters + # Resolve task_parameters, metadata, and tags task_parameters: dict[str, Any] = vdef.get("task_parameters") or {} + metadata: dict[str, Any] = vdef.get("metadata") or {} + tags: list[str] = vdef.get("tags") or [] return Variant( name=name, @@ -556,6 +598,8 @@ def _resolve_variant( mcp_servers=mcp_servers, skills=skill_paths, task_parameters=task_parameters, + metadata=metadata, + tags=tags, )