diff --git a/.github/workflows/cli_tests.yml b/.github/workflows/cli_tests.yml
new file mode 100644
index 0000000..9c8a472
--- /dev/null
+++ b/.github/workflows/cli_tests.yml
@@ -0,0 +1,41 @@
+name: CLI Tests
+
+on:
+  pull_request:
+    paths:
+      - 'packages/eval_cli/**'
+      - 'packages/eval_config/**'
+      - '.github/workflows/cli_tests.yml'
+  push:
+    branches:
+      - main
+    paths:
+      - 'packages/eval_cli/**'
+      - 'packages/eval_config/**'
+      - '.github/workflows/cli_tests.yml'
+
+jobs:
+  cli-tests:
+    runs-on: ubuntu-latest
+    timeout-minutes: 10
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v6
+
+      - name: Install Flutter
+        run: |
+          git clone https://github.com/flutter/flutter.git --depth 1 -b stable $HOME/flutter
+          echo "$HOME/flutter/bin" >> $GITHUB_PATH
+          echo "$HOME/.pub-cache/bin" >> $GITHUB_PATH
+
+      - name: Install dependencies
+        run: flutter pub get
+
+      - name: Analyze
+        working-directory: packages/eval_cli
+        run: dart analyze --fatal-infos
+
+      - name: Run tests
+        working-directory: packages/eval_cli
+        run: flutter test
diff --git a/.github/workflows/config_tests.yml b/.github/workflows/config_tests.yml
new file mode 100644
index 0000000..b69d494
--- /dev/null
+++ b/.github/workflows/config_tests.yml
@@ -0,0 +1,39 @@
+name: Config Tests
+
+on:
+  pull_request:
+    paths:
+      - 'packages/eval_config/**'
+      - '.github/workflows/config_tests.yml'
+  push:
+    branches:
+      - main
+    paths:
+      - 'packages/eval_config/**'
+      - '.github/workflows/config_tests.yml'
+
+jobs:
+  config-tests:
+    runs-on: ubuntu-latest
+    timeout-minutes: 10
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v6
+
+      - name: Install Flutter
+        run: |
+          git clone https://github.com/flutter/flutter.git --depth 1 -b stable $HOME/flutter
+          echo "$HOME/flutter/bin" >> $GITHUB_PATH
+          echo "$HOME/.pub-cache/bin" >> $GITHUB_PATH
+
+      - name: Install dependencies
+        run: flutter pub get
+
+      - name: Analyze
+        working-directory: packages/eval_config
+        run: dart analyze --fatal-infos
+
+      - name: Run tests
+        working-directory: packages/eval_config
+        run: dart test
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..286021d
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,234 @@
+# Added by human
+data/
+logs/
+_uploaded_logs/
+*.log
+htmlcov/
+*/htmlcov/
+**/htmlcov/
+*/.coverage/
+**/.coverage/
+.coverage
+coverage
+.dart_tool/
+.devals-tool/
+/docs/_build
+/docs/dart_docs
+logs/
+
+
+##
+# Generated by Flutter, Python, Firebase... who knows what else.
+##
+
+
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+
+# C extensions
+*.so
+
+# Firebase service account keys (NEVER COMMIT THESE!)
+*firebase*key*.json
+firebase-adminsdk-*.json
+serviceAccountKey.json
+
+# Distribution / packaging
+.Python
+build/
+dist/
+downloads/
+eggs/
+.eggs/
+pkgs/runner/lib/
+pkgs/runner/lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.# Sandbox - track structure, ignore generated files
+sandbox/**/.dart_tool/
+sandbox/**/build/
+sandbox/**/.flutter-plugins
+sandbox/**/.flutter-plugins-dependencies
+sandbox/**/pubspec.lock
+sandbox/**/.packages
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+.python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   not pinning sub-dependencies is a problem, you can uncomment the next line.
+# Pipfile.lock
+
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   For more info, see: https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file
+# poetry.lock
+
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+# pdm.lock
+
+# PEP 582; used by e.g. pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+# Virtual environment should be in pkgs/runner/ subdirectory
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+pkgs/runner/.venv
+pkgs/runner/env/
+pkgs/runner/venv/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+.DS_Store
+/**/.DS_Store
+
+# intellij
+.idea
+
+# Miscellaneous
+*.class
+*.log
+*.pyc
+*.swp
+.DS_Store
+.atom/
+.build/
+.buildlog/
+.history
+.svn/
+.swiftpm/
+migrate_working_dir/
+
+# IntelliJ related
+*.iml
+*.ipr
+*.iws
+.idea/
+
+# The .vscode folder contains launch configuration and tasks you configure in
+# VS Code which you may wish to be included in version control, so this line
+# is commented out by default.
+#.vscode/
+
+# Flutter/Dart/Pub related
+**/ios/Flutter/.last_build_id
+.dart_tool/
+.flutter-plugins-dependencies
+**/.flutter-plugins-dependencies
+.pub-cache/
+**/.pub-cache/
+.pub/
+**/.pub/
+/build/
+/coverage/
+
+# Symbolication related
+app.*.symbols
+
+# Obfuscation related
+app.*.map.json
+
+# Android Studio will place build artifacts here
+/android/app/debug
+/android/app/profile
+/android/app/release
diff --git a/packages/eval_cli/.dart_tool/pub/workspace_ref.json b/packages/eval_cli/.dart_tool/pub/workspace_ref.json
new file mode 100644
index 0000000..1cd13ef
--- /dev/null
+++ b/packages/eval_cli/.dart_tool/pub/workspace_ref.json
@@ -0,0 +1,3 @@
+{
+  "workspaceRoot": "../../../.."
+}
diff --git a/packages/eval_cli/README.md b/packages/eval_cli/README.md
new file mode 100644
index 0000000..c3c2919
--- /dev/null
+++ b/packages/eval_cli/README.md
@@ -0,0 +1,5 @@
+# Deval CLI
+
+An opinionated CLI that manages dash-evals evaluation tasks and jobs. Requires the [Dart SDK](https://dart.dev/get-dart).
+
+📖 **[Full documentation](../../docs/cli.md)** — setup, commands, and usage.
\ No newline at end of file
diff --git a/packages/eval_cli/analysis_options.yaml b/packages/eval_cli/analysis_options.yaml
new file mode 100644
index 0000000..df4015c
--- /dev/null
+++ b/packages/eval_cli/analysis_options.yaml
@@ -0,0 +1,9 @@
+include: package:lints/recommended.yaml
+
+linter:
+  rules:
+    - prefer_single_quotes
+    - directives_ordering
+
+formatter:
+  trailing_commas: preserve
diff --git a/packages/eval_cli/bin/devals.dart b/packages/eval_cli/bin/devals.dart
new file mode 100644
index 0000000..c323d96
--- /dev/null
+++ b/packages/eval_cli/bin/devals.dart
@@ -0,0 +1,29 @@
+/// Entry point for the deval CLI.
+library;
+
+import 'dart:io';
+
+import 'package:args/command_runner.dart';
+import 'package:devals/devals.dart';
+import 'package:howdy/howdy.dart';
+
+Future<void> main(List<String> args) async {
+  final runner = CommandRunner<int>('devals', 'Manage dash-evals projects')
+    ..addCommand(InitCommand())
+    ..addCommand(DoctorCommand())
+    ..addCommand(CreateCommand())
+    ..addCommand(PublishCommand())
+    ..addCommand(RunCommand())
+    ..addCommand(ViewCommand());
+
+  try {
+    final exitCode = await runner.run(args) ?? 0;
+    exit(exitCode);
+  } on CliException catch (e) {
+    Text.error('${e.message}\n');
+    exit(e.exitCode);
+  } on Exception catch (e) {
+    Text.error('Error: $e\n');
+    exit(1);
+  }
+}
diff --git a/packages/eval_cli/dart_test.yaml b/packages/eval_cli/dart_test.yaml
new file mode 100644
index 0000000..01a9aab
--- /dev/null
+++ b/packages/eval_cli/dart_test.yaml
@@ -0,0 +1,9 @@
+# Test configuration for the eval_cli package.
+# Run tests serially to avoid race conditions with Directory.current
+# mutations in filesystem_utils_test.dart and dataset_reader_test.dart.
+concurrency: 1
+
+tags:
+  e2e:
+    # E2E tests spawn subprocesses and may be slower
+    timeout: 120s
diff --git a/packages/eval_cli/example/.gitignore b/packages/eval_cli/example/.gitignore
new file mode 100644
index 0000000..3820a95
--- /dev/null
+++ b/packages/eval_cli/example/.gitignore
@@ -0,0 +1,45 @@
+# Miscellaneous
+*.class
+*.log
+*.pyc
+*.swp
+.DS_Store
+.atom/
+.build/
+.buildlog/
+.history
+.svn/
+.swiftpm/
+migrate_working_dir/
+
+# IntelliJ related
+*.iml
+*.ipr
+*.iws
+.idea/
+
+# The .vscode folder contains launch configuration and tasks you configure in
+# VS Code which you may wish to be included in version control, so this line
+# is commented out by default.
+#.vscode/
+
+# Flutter/Dart/Pub related
+**/doc/api/
+**/ios/Flutter/.last_build_id
+.dart_tool/
+.flutter-plugins-dependencies
+.pub-cache/
+.pub/
+/build/
+/coverage/
+
+# Symbolication related
+app.*.symbols
+
+# Obfuscation related
+app.*.map.json
+
+# Android Studio will place build artifacts here
+/android/app/debug
+/android/app/profile
+/android/app/release
diff --git a/packages/eval_cli/example/.metadata b/packages/eval_cli/example/.metadata
new file mode 100644
index 0000000..a1fd05e
--- /dev/null
+++ b/packages/eval_cli/example/.metadata
@@ -0,0 +1,45 @@
+# This file tracks properties of this Flutter project.
+# Used by Flutter tool to assess capabilities and perform upgrades etc.
+#
+# This file should be version controlled and should not be manually edited.
+
+version:
+  revision: "af179acb1353167ce34e324b7d06e6c0091a4edf"
+  channel: "beta"
+
+project_type: app
+
+# Tracks metadata for the flutter migrate command
+migration:
+  platforms:
+    - platform: root
+      create_revision: af179acb1353167ce34e324b7d06e6c0091a4edf
+      base_revision: af179acb1353167ce34e324b7d06e6c0091a4edf
+    - platform: android
+      create_revision: af179acb1353167ce34e324b7d06e6c0091a4edf
+      base_revision: af179acb1353167ce34e324b7d06e6c0091a4edf
+    - platform: ios
+      create_revision: af179acb1353167ce34e324b7d06e6c0091a4edf
+      base_revision: af179acb1353167ce34e324b7d06e6c0091a4edf
+    - platform: linux
+      create_revision: af179acb1353167ce34e324b7d06e6c0091a4edf
+      base_revision: af179acb1353167ce34e324b7d06e6c0091a4edf
+    - platform: macos
+      create_revision: af179acb1353167ce34e324b7d06e6c0091a4edf
+      base_revision: af179acb1353167ce34e324b7d06e6c0091a4edf
+    - platform: web
+      create_revision: af179acb1353167ce34e324b7d06e6c0091a4edf
+      base_revision: af179acb1353167ce34e324b7d06e6c0091a4edf
+    - platform: windows
+      create_revision: af179acb1353167ce34e324b7d06e6c0091a4edf
+      base_revision: af179acb1353167ce34e324b7d06e6c0091a4edf
+
+  # User provided section
+
+  # List of Local paths (relative to this file) that should be
+  # ignored by the migrate tool.
+  #
+  # Files that are not part of the templates will be ignored by default.
+  unmanaged_files:
+    - 'lib/main.dart'
+    - 'ios/Runner.xcodeproj/project.pbxproj'
diff --git a/packages/eval_cli/example/README.md b/packages/eval_cli/example/README.md
new file mode 100644
index 0000000..83f7e49
--- /dev/null
+++ b/packages/eval_cli/example/README.md
@@ -0,0 +1,17 @@
+# example
+
+A new Flutter project.
+
+## Getting Started
+
+This project is a starting point for a Flutter application.
+
+A few resources to get you started if this is your first Flutter project:
+
+- [Learn Flutter](https://docs.flutter.dev/get-started/learn-flutter)
+- [Write your first Flutter app](https://docs.flutter.dev/get-started/codelab)
+- [Flutter learning resources](https://docs.flutter.dev/reference/learning-resources)
+
+For help getting started with Flutter development, view the
+[online documentation](https://docs.flutter.dev/), which offers tutorials,
+samples, guidance on mobile development, and a full API reference.
diff --git a/packages/eval_cli/example/analysis_options.yaml b/packages/eval_cli/example/analysis_options.yaml
new file mode 100644
index 0000000..0d29021
--- /dev/null
+++ b/packages/eval_cli/example/analysis_options.yaml
@@ -0,0 +1,28 @@
+# This file configures the analyzer, which statically analyzes Dart code to
+# check for errors, warnings, and lints.
+#
+# The issues identified by the analyzer are surfaced in the UI of Dart-enabled
+# IDEs (https://dart.dev/tools#ides-and-editors). The analyzer can also be
+# invoked from the command line by running `flutter analyze`.
+
+# The following line activates a set of recommended lints for Flutter apps,
+# packages, and plugins designed to encourage good coding practices.
+include: package:flutter_lints/flutter.yaml
+
+linter:
+  # The lint rules applied to this project can be customized in the
+  # section below to disable rules from the `package:flutter_lints/flutter.yaml`
+  # included above or to enable additional rules. A list of all available lints
+  # and their documentation is published at https://dart.dev/lints.
+  #
+  # Instead of disabling a lint rule for the entire project in the
+  # section below, it can also be suppressed for a single line of code
+  # or a specific dart file by using the `// ignore: name_of_lint` and
+  # `// ignore_for_file: name_of_lint` syntax on the line or in the file
+  # producing the lint.
+  rules:
+    # avoid_print: false  # Uncomment to disable the `avoid_print` rule
+    # prefer_single_quotes: true  # Uncomment to enable the `prefer_single_quotes` rule
+
+# Additional information about this file can be found at
+# https://dart.dev/guides/language/analysis-options
diff --git a/packages/eval_cli/example/devals.yaml b/packages/eval_cli/example/devals.yaml
new file mode 100644
index 0000000..28e5241
--- /dev/null
+++ b/packages/eval_cli/example/devals.yaml
@@ -0,0 +1,3 @@
+# Marks this directory as a project that contains dash evals.
+# Created by `devals init`.
+dataset: ./evals
diff --git a/packages/eval_cli/example/evals/jobs/local_dev.yaml b/packages/eval_cli/example/evals/jobs/local_dev.yaml
new file mode 100644
index 0000000..1af8005
--- /dev/null
+++ b/packages/eval_cli/example/evals/jobs/local_dev.yaml
@@ -0,0 +1,86 @@
+# =============================================================================
+# Job Configuration: local_dev
+# =============================================================================
+# A job defines what subset of your dataset to run and how to run it.
+# Jobs are the primary way to control evaluation runs.
+#
+# To run this job:
+#   devals run local_dev
+
+
+# =============================================================================
+# RUNTIME SETTINGS (Optional)
+# =============================================================================
+# !!!Important!!!
+# These override built-in defaults. If you're just getting started,
+# I recommend you ignore these for now.
+# Uncomment and modify as needed.
+
+# Directory for evaluation logs (relative to dataset root)
+# A timestamped subdirectory is created automatically for each run.
+# logs_dir: ../logs
+
+# Sandbox environment: "local", "docker", or "podman"
+# - local: Run directly on host (fastest, no isolation)
+# - docker: Run in Docker containers (recommended for code execution)
+# - podman: Run in Podman containers (rootless alternative to Docker)
+# sandbox_type: local
+
+# Maximum concurrent API connections to model providers.
+# Higher = faster but may hit rate limits with a large dataset
+# max_connections: 10
+
+# Maximum retry attempts for failed API calls.
+# Helps handle transient errors.
+# max_retries: 3
+
+# =============================================================================
+# MODELS
+# =============================================================================
+# Which models to evaluate. Format: "provider/model-name"
+# If omitted, falls back to DEFAULT_MODELS from the Python registries.
+models:
+  - google/gemini-2.0-flash
+
+# =============================================================================
+# VARIANTS (Optional)
+# =============================================================================
+# Which configuration variants to test.
+# Variants control access to tools and context.
+# Each variant is a map of feature flags. An empty map {} is the baseline.
+# If omitted, only the baseline (no features) is used.
+#
+# Example:
+#   variants:
+#     baseline: {}                              # no extra features
+#     context_only: { context_files: [../../context/flutter.md] }
+#     mcp_only: { mcp_servers: [dart] }
+
+# =============================================================================
+# TASKS
+# =============================================================================
+# Which tasks to run and how. Uses paths for discovery and inline for overrides.
+# If omitted, runs ALL discovered tasks.
+#
+# Task discovery via glob patterns (relative to dataset root):
+#   tasks:
+#     paths: [tasks/*]
+#
+# Per-task overrides:
+#   tasks:
+#     inline:
+#       task_id:
+#         # (use allowed_variants in task.yaml to whitelist variants)
+#         include-samples: [sample1]   # Only run specific samples (mutually exclusive with exclude)
+#         exclude-samples: [sample2]   # Skip specific samples (mutually exclusive with include)
+#         system_message: |            # Override system prompt for this task
+#           Custom instructions...
+#
+# Simple format (run all samples with job-level settings):
+#   tasks:
+#     inline:
+#       task_id: {}
+#
+tasks:
+  inline:
+    get_started: {}
diff --git a/packages/eval_cli/example/evals/tasks/get_started/task.yaml b/packages/eval_cli/example/evals/tasks/get_started/task.yaml
new file mode 100644
index 0000000..d1bb9e4
--- /dev/null
+++ b/packages/eval_cli/example/evals/tasks/get_started/task.yaml
@@ -0,0 +1,26 @@
+# =============================================================================
+# Starter Task
+# =============================================================================
+# This task points at your project root as its workspace and runs a simple
+# codebase analysis evaluation.
+
+func: analyze_codebase
+
+# Workspace: points to the project root containing pubspec.yaml
+workspace:
+  path: ../../
+
+samples:
+  inline:
+    - id: get_started
+      difficulty: easy
+      tags: []
+      # Input: The prompt given to the model
+      input: |
+        Explore this codebase and suggest one improvement
+        to the code quality, readability, or architecture.
+      # Target: Expected output or grading criteria
+      target: |
+        The suggestion should be specific, actionable, and reference
+        actual code in the project. It should explain why the change
+        improves the codebase.
diff --git a/packages/eval_cli/example/lib/main.dart b/packages/eval_cli/example/lib/main.dart
new file mode 100644
index 0000000..244a702
--- /dev/null
+++ b/packages/eval_cli/example/lib/main.dart
@@ -0,0 +1,122 @@
+import 'package:flutter/material.dart';
+
+void main() {
+  runApp(const MyApp());
+}
+
+class MyApp extends StatelessWidget {
+  const MyApp({super.key});
+
+  // This widget is the root of your application.
+  @override
+  Widget build(BuildContext context) {
+    return MaterialApp(
+      title: 'Flutter Demo',
+      theme: ThemeData(
+        // This is the theme of your application.
+        //
+        // TRY THIS: Try running your application with "flutter run". You'll see
+        // the application has a purple toolbar. Then, without quitting the app,
+        // try changing the seedColor in the colorScheme below to Colors.green
+        // and then invoke "hot reload" (save your changes or press the "hot
+        // reload" button in a Flutter-supported IDE, or press "r" if you used
+        // the command line to start the app).
+        //
+        // Notice that the counter didn't reset back to zero; the application
+        // state is not lost during the reload. To reset the state, use hot
+        // restart instead.
+        //
+        // This works for code too, not just values: Most code changes can be
+        // tested with just a hot reload.
+        colorScheme: .fromSeed(seedColor: Colors.deepPurple),
+      ),
+      home: const MyHomePage(title: 'Flutter Demo Home Page'),
+    );
+  }
+}
+
+class MyHomePage extends StatefulWidget {
+  const MyHomePage({super.key, required this.title});
+
+  // This widget is the home page of your application. It is stateful, meaning
+  // that it has a State object (defined below) that contains fields that affect
+  // how it looks.
+
+  // This class is the configuration for the state. It holds the values (in this
+  // case the title) provided by the parent (in this case the App widget) and
+  // used by the build method of the State. Fields in a Widget subclass are
+  // always marked "final".
+
+  final String title;
+
+  @override
+  State<MyHomePage> createState() => _MyHomePageState();
+}
+
+class _MyHomePageState extends State<MyHomePage> {
+  int _counter = 0;
+
+  void _incrementCounter() {
+    setState(() {
+      // This call to setState tells the Flutter framework that something has
+      // changed in this State, which causes it to rerun the build method below
+      // so that the display can reflect the updated values. If we changed
+      // _counter without calling setState(), then the build method would not be
+      // called again, and so nothing would appear to happen.
+      _counter++;
+    });
+  }
+
+  @override
+  Widget build(BuildContext context) {
+    // This method is rerun every time setState is called, for instance as done
+    // by the _incrementCounter method above.
+    //
+    // The Flutter framework has been optimized to make rerunning build methods
+    // fast, so that you can just rebuild anything that needs updating rather
+    // than having to individually change instances of widgets.
+    return Scaffold(
+      appBar: AppBar(
+        // TRY THIS: Try changing the color here to a specific color (to
+        // Colors.amber, perhaps?) and trigger a hot reload to see the AppBar
+        // change color while the other colors stay the same.
+        backgroundColor: Theme.of(context).colorScheme.inversePrimary,
+        // Here we take the value from the MyHomePage object that was created by
+        // the App.build method, and use it to set our appbar title.
+        title: Text(widget.title),
+      ),
+      body: Center(
+        // Center is a layout widget. It takes a single child and positions it
+        // in the middle of the parent.
+        child: Column(
+          // Column is also a layout widget. It takes a list of children and
+          // arranges them vertically. By default, it sizes itself to fit its
+          // children horizontally, and tries to be as tall as its parent.
+          //
+          // Column has various properties to control how it sizes itself and
+          // how it positions its children. Here we use mainAxisAlignment to
+          // center the children vertically; the main axis here is the vertical
+          // axis because Columns are vertical (the cross axis would be
+          // horizontal).
+          //
+          // TRY THIS: Invoke "debug painting" (choose the "Toggle Debug Paint"
+          // action in the IDE, or press "p" in the console), to see the
+          // wireframe for each widget.
+          mainAxisAlignment: .center,
+          children: [
+            const Text('You have pushed the button this many times:'),
+            Text(
+              '$_counter',
+              style: Theme.of(context).textTheme.headlineMedium,
+            ),
+          ],
+        ),
+      ),
+      floatingActionButton: FloatingActionButton(
+        onPressed: _incrementCounter,
+        tooltip: 'Increment',
+        child: const Icon(Icons.add),
+      ),
+    );
+  }
+}
diff --git a/packages/eval_cli/example/logs/2026-02-13_17-39-54/.eval-set-id b/packages/eval_cli/example/logs/2026-02-13_17-39-54/.eval-set-id
new file mode 100644
index 0000000..c7d5688
--- /dev/null
+++ b/packages/eval_cli/example/logs/2026-02-13_17-39-54/.eval-set-id
@@ -0,0 +1 @@
+DUVSLcpnZ6bJdWRi2dqGg7
\ No newline at end of file
diff --git a/packages/eval_cli/example/logs/2026-02-13_17-39-54/2026-02-13T17-39-55+00-00_get-started-baseline_H4rxfkjPK5ug2Bcmo9VodZ.json b/packages/eval_cli/example/logs/2026-02-13_17-39-54/2026-02-13T17-39-55+00-00_get-started-baseline_H4rxfkjPK5ug2Bcmo9VodZ.json
new file mode 100644
index 0000000..eaa4e04
--- /dev/null
+++ b/packages/eval_cli/example/logs/2026-02-13_17-39-54/2026-02-13T17-39-55+00-00_get-started-baseline_H4rxfkjPK5ug2Bcmo9VodZ.json
@@ -0,0 +1,6862 @@
+{
+  "version": 2,
+  "status": "success",
+  "eval": {
+    "eval_set_id": "DUVSLcpnZ6bJdWRi2dqGg7",
+    "eval_id": "YJZKbC9omerSZ2dv9CZEH2",
+    "run_id": "XX3Pv6Bug8YpTZTJLDZqie",
+    "created": "2026-02-13T17:39:55+00:00",
+    "task": "get_started:baseline",
+    "task_id": "H4rxfkjPK5ug2Bcmo9VodZ",
+    "task_version": 0,
+    "task_file": "/Users/ewindmill/development/dash_evals/pkgs/eval_runner/src/eval_runner/runner/tasks/analyze_codebase.py",
+    "task_display_name": "get_started:baseline",
+    "task_registry_name": "analyze_codebase",
+    "task_attribs": {},
+    "task_args": {
+      "task_config": {
+        "id": "get_started",
+        "task": "analyze_codebase",
+        "samples": [
+          {
+            "id": "get_started",
+            "input": "Explore this codebase and suggest one improvement\nto the code quality, readability, or architecture.\n",
+            "target": "The suggestion should be specific, actionable, and reference\nactual code in the project. It should explain why the change\nimproves the codebase.\n",
+            "workspace": "/Users/ewindmill/development/dash_evals/pkgs/eval_cli/example/evals",
+            "tests": null,
+            "difficulty": "easy",
+            "tags": [],
+            "metadata": {},
+            "workspace_git": null,
+            "workspace_git_ref": null
+          }
+        ],
+        "variant": {
+          "context_files": [],
+          "mcp_servers": [],
+          "skill_paths": []
+        },
+        "sandbox_type": "local",
+        "system_message": null
+      }
+    },
+    "task_args_passed": {
+      "task_config": {
+        "id": "get_started",
+        "task": "analyze_codebase",
+        "samples": [
+          {
+            "id": "get_started",
+            "input": "Explore this codebase and suggest one improvement\nto the code quality, readability, or architecture.\n",
+            "target": "The suggestion should be specific, actionable, and reference\nactual code in the project. It should explain why the change\nimproves the codebase.\n",
+            "workspace": "/Users/ewindmill/development/dash_evals/pkgs/eval_cli/example/evals",
+            "tests": null,
+            "difficulty": "easy",
+            "tags": [],
+            "metadata": {},
+            "workspace_git": null,
+            "workspace_git_ref": null
+          }
+        ],
+        "variant": {
+          "context_files": [],
+          "mcp_servers": [],
+          "skill_paths": []
+        },
+        "sandbox_type": "local",
+        "system_message": null
+      }
+    },
+    "dataset": {
+      "name": "get_started_baseline",
+      "samples": 1,
+      "sample_ids": [
+        "get_started"
+      ],
+      "shuffled": false
+    },
+    "sandbox": {
+      "type": "local"
+    },
+    "model": "google/gemini-2.0-flash",
+    "model_generate_config": {
+      "max_connections": 10,
+      "cache_prompt": true,
+      "cache": {
+        "expiry": "2W",
+        "per_epoch": true,
+        "scopes": {}
+      }
+    },
+    "model_args": {},
+    "config": {
+      "epochs": 1,
+      "epochs_reducer": [
+        "mean"
+      ],
+      "fail_on_error": 0.05,
+      "continue_on_fail": false,
+      "retry_on_error": 3,
+      "message_limit": 50,
+      "token_limit": 200000,
+      "time_limit": 180,
+      "max_tasks": 10,
+      "max_sandboxes": 32,
+      "sandbox_cleanup": true,
+      "log_samples": true,
+      "log_realtime": true,
+      "log_images": true,
+      "score_display": true
+    },
+    "revision": {
+      "type": "git",
+      "origin": "https://github.com/flutter/dash_evals.git",
+      "commit": "5092907",
+      "dirty": true
+    },
+    "packages": {
+      "inspect_ai": "0.3.160"
+    },
+    "metadata": {
+      "variant_config": {
+        "label": "baseline",
+        "context_files": [],
+        "mcp_servers": [],
+        "skill_paths": []
+      }
+    },
+    "scorers": [
+      {
+        "name": "model_graded_fact",
+        "options": {},
+        "metrics": [
+          {
+            "name": "inspect_ai/accuracy",
+            "options": {}
+          },
+          {
+            "name": "inspect_ai/stderr",
+            "options": {}
+          }
+        ],
+        "metadata": {}
+      }
+    ]
+  },
+  "plan": {
+    "name": "plan",
+    "steps": [
+      {
+        "solver": "setup_workspace",
+        "params": {},
+        "params_passed": {}
+      },
+      {
+        "solver": "_add_workspace_system_message",
+        "params": {
+          "template": "You are an expert code reviewer analyzing a codebase.\n\nYour task is to:\n\n1. Explore the codebase at {workspace} using the available tools\n2. Understand the project structure, dependencies, and architecture\n3. Answer the user's question based on what you find in the code\n\nImportant guidelines:\n- Use bash commands (cat, find, grep, ls, head, tail, etc.) to browse files\n- Do NOT edit or modify any files\n- Base your answer on actual code you find, not assumptions\n- Reference specific files and line numbers when relevant\n- When done, call submit() with your complete answer\n"
+        },
+        "params_passed": {
+          "template": "You are an expert code reviewer analyzing a codebase.\n\nYour task is to:\n\n1. Explore the codebase at {workspace} using the available tools\n2. Understand the project structure, dependencies, and architecture\n3. Answer the user's question based on what you find in the code\n\nImportant guidelines:\n- Use bash commands (cat, find, grep, ls, head, tail, etc.) to browse files\n- Do NOT edit or modify any files\n- Base your answer on actual code you find, not assumptions\n- Reference specific files and line numbers when relevant\n- When done, call submit() with your complete answer\n"
+        }
+      },
+      {
+        "solver": "react",
+        "params": {
+          "name": "code_analyzer",
+          "description": "Expert code reviewer who explores and analyzes codebases.",
+          "prompt": [
+            null,
+            "\nYou are part of a multi-agent system designed to make agent coordination and execution easy. Agents uses two primary abstraction: **Agents** and **Handoffs**. An agent encompasses instructions and tools and can hand off a conversation to another agent when appropriate. Handoffs are achieved by calling a handoff function,generally named `transfer_to_<agent_name>`. Transfers between agents are handled seamlessly in the background; do not mention or draw attention to these transfers in your conversation with the user.\n",
+            "\nYou are a helpful assistant attempting to submit the best possible answer. You have several tools available to help with finding the answer. You will see the result of tool calls right after sending the message. If you need to perform multiple actions, you can always send more messages with additional tool calls. Do some reasoning before your actions, describing what tool calls you are going to use and how they fit into your plan.\n",
+            "\nWhen you have completed the task and have an answer, call the {submit}() tool to report it.\n"
+          ],
+          "tools": [
+            {
+              "type": "tool",
+              "name": "bash",
+              "params": {
+                "timeout": 120
+              }
+            }
+          ],
+          "model": null,
+          "attempts": 1,
+          "submit": null,
+          "on_continue": null,
+          "retry_refusals": null,
+          "compaction": null,
+          "truncation": "disabled"
+        },
+        "params_passed": {
+          "name": "code_analyzer",
+          "description": "Expert code reviewer who explores and analyzes codebases.",
+          "tools": [
+            {
+              "type": "tool",
+              "name": "bash",
+              "params": {
+                "timeout": 120
+              }
+            }
+          ]
+        }
+      }
+    ],
+    "config": {
+      "max_connections": 10,
+      "cache_prompt": true,
+      "cache": {
+        "expiry": "2W",
+        "per_epoch": true,
+        "scopes": {}
+      }
+    }
+  },
+  "results": {
+    "total_samples": 1,
+    "completed_samples": 1,
+    "scores": [
+      {
+        "name": "model_graded_fact",
+        "scorer": "model_graded_fact",
+        "scored_samples": 1,
+        "unscored_samples": 0,
+        "params": {},
+        "metrics": {
+          "accuracy": {
+            "name": "accuracy",
+            "value": 1.0,
+            "params": {}
+          },
+          "stderr": {
+            "name": "stderr",
+            "value": 0.0,
+            "params": {}
+          }
+        }
+      }
+    ]
+  },
+  "stats": {
+    "started_at": "2026-02-13T17:39:55+00:00",
+    "completed_at": "2026-02-13T17:40:58+00:00",
+    "model_usage": {
+      "google/gemini-2.0-flash": {
+        "input_tokens": 11033,
+        "output_tokens": 835,
+        "total_tokens": 11868,
+        "reasoning_tokens": 0
+      }
+    }
+  },
+  "invalidated": false,
+  "samples": [
+    {
+      "id": "get_started",
+      "epoch": 1,
+      "input": "Explore this codebase and suggest one improvement\nto the code quality, readability, or architecture.\n",
+      "target": "The suggestion should be specific, actionable, and reference\nactual code in the project. It should explain why the change\nimproves the codebase.\n",
+      "messages": [
+        {
+          "id": "iNKorPWWBrvjkRqDKyJPYF",
+          "content": "\nYou are a helpful assistant attempting to submit the best possible answer. You have several tools available to help with finding the answer. You will see the result of tool calls right after sending the message. If you need to perform multiple actions, you can always send more messages with additional tool calls. Do some reasoning before your actions, describing what tool calls you are going to use and how they fit into your plan.\n\n\nWhen you have completed the task and have an answer, call the submit() tool to report it.\n",
+          "role": "system"
+        },
+        {
+          "id": "dgiCsCRsNgzMKagz4PEePk",
+          "content": "You are an expert code reviewer analyzing a codebase.\n\nYour task is to:\n\n1. Explore the codebase at /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals using the available tools\n2. Understand the project structure, dependencies, and architecture\n3. Answer the user's question based on what you find in the code\n\nImportant guidelines:\n- Use bash commands (cat, find, grep, ls, head, tail, etc.) to browse files\n- Do NOT edit or modify any files\n- Base your answer on actual code you find, not assumptions\n- Reference specific files and line numbers when relevant\n- When done, call submit() with your complete answer\n",
+          "role": "system"
+        },
+        {
+          "id": "YKoBXPM2D3LAYfpRtJ8rSG",
+          "content": "Explore this codebase and suggest one improvement\nto the code quality, readability, or architecture.\n",
+          "source": "input",
+          "role": "user"
+        },
+        {
+          "id": "ePaRxtW6BAFFKX3wgPvuxQ",
+          "content": [
+            {
+              "type": "text",
+              "text": "Okay, I will explore the codebase and suggest one improvement.\n\nFirst, I'll start by listing the files in the directory to understand the project structure.\n"
+            }
+          ],
+          "source": "generate",
+          "role": "assistant",
+          "tool_calls": [
+            {
+              "id": "bash_96CGHnyaWeCRTS8X8BFMvb",
+              "function": "bash",
+              "arguments": {
+                "cmd": "ls -la"
+              },
+              "view": {
+                "title": "bash",
+                "format": "markdown",
+                "content": "```bash\nls -la\n```\n"
+              },
+              "type": "function"
+            }
+          ],
+          "model": "gemini-2.0-flash"
+        },
+        {
+          "id": "D6EvgQPgAoveZwXndCQX7i",
+          "content": "/Users/ewindmill/.bash_profile: line 1: n#: command not found\n\ntotal 0\ndrwx------@   2 ewindmill  primarygroup     64 Feb 13 09:39 .\ndrwx------@ 770 ewindmill  primarygroup  24640 Feb 13 09:40 ..\n",
+          "role": "tool",
+          "tool_call_id": "bash_96CGHnyaWeCRTS8X8BFMvb",
+          "function": "bash"
+        },
+        {
+          "id": "X9YEAnfcJpZPfKYouGPfia",
+          "content": [
+            {
+              "type": "text",
+              "text": "It seems the current directory is empty. I should explore the actual workspace directory.\n"
+            }
+          ],
+          "source": "generate",
+          "role": "assistant",
+          "tool_calls": [
+            {
+              "id": "bash_9FNtsrZyifmy6dUNvsWNcp",
+              "function": "bash",
+              "arguments": {
+                "cmd": "ls -la /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals"
+              },
+              "view": {
+                "title": "bash",
+                "format": "markdown",
+                "content": "```bash\nls -la /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals\n```\n"
+              },
+              "type": "function"
+            }
+          ],
+          "model": "gemini-2.0-flash"
+        },
+        {
+          "id": "UQEKF93mLKeBVCkUrZEbXH",
+          "content": "/Users/ewindmill/.bash_profile: line 1: n#: command not found\n\ntotal 0\ndrwxr-xr-x@ 4 ewindmill  primarygroup  128 Feb 13 09:39 .\ndrwx------@ 3 ewindmill  primarygroup   96 Feb 13 09:39 ..\ndrwxr-xr-x@ 3 ewindmill  primarygroup   96 Feb 13 09:39 jobs\ndrwxr-xr-x@ 3 ewindmill  primarygroup   96 Feb 13 09:39 tasks\n",
+          "role": "tool",
+          "tool_call_id": "bash_9FNtsrZyifmy6dUNvsWNcp",
+          "function": "bash"
+        },
+        {
+          "id": "XmsmV3dHrAsZTJptzwsL6r",
+          "content": [
+            {
+              "type": "text",
+              "text": "Okay, there are two subdirectories: `jobs` and `tasks`. I will explore the `jobs` directory first.\n"
+            }
+          ],
+          "source": "generate",
+          "role": "assistant",
+          "tool_calls": [
+            {
+              "id": "bash_De6QRUaDbUBkQBxhwj9LQi",
+              "function": "bash",
+              "arguments": {
+                "cmd": "ls -la /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals/jobs"
+              },
+              "view": {
+                "title": "bash",
+                "format": "markdown",
+                "content": "```bash\nls -la /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals/jobs\n```\n"
+              },
+              "type": "function"
+            }
+          ],
+          "model": "gemini-2.0-flash"
+        },
+        {
+          "id": "QomCdPn3yYCyCWK8bLHveJ",
+          "content": "/Users/ewindmill/.bash_profile: line 1: n#: command not found\n\ntotal 8\ndrwxr-xr-x@ 3 ewindmill  primarygroup    96 Feb 13 09:39 .\ndrwxr-xr-x@ 4 ewindmill  primarygroup   128 Feb 13 09:39 ..\n-rw-r--r--@ 1 ewindmill  primarygroup  3087 Feb 13 09:39 local_dev.yaml\n",
+          "role": "tool",
+          "tool_call_id": "bash_De6QRUaDbUBkQBxhwj9LQi",
+          "function": "bash"
+        },
+        {
+          "id": "QPveCFnzTZtWYnJSu8tAHf",
+          "content": [
+            {
+              "type": "text",
+              "text": "Now I'll examine the contents of `local_dev.yaml`.\n"
+            }
+          ],
+          "source": "generate",
+          "role": "assistant",
+          "tool_calls": [
+            {
+              "id": "bash_4t5raTAnKAddFKQUM5TX6R",
+              "function": "bash",
+              "arguments": {
+                "cmd": "cat /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals/jobs/local_dev.yaml"
+              },
+              "view": {
+                "title": "bash",
+                "format": "markdown",
+                "content": "```bash\ncat /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals/jobs/local_dev.yaml\n```\n"
+              },
+              "type": "function"
+            }
+          ],
+          "model": "gemini-2.0-flash"
+        },
+        {
+          "id": "TQprdtjEjmzJsuL3QR22ed",
+          "content": "/Users/ewindmill/.bash_profile: line 1: n#: command not found\n\n# =============================================================================\n# Job Configuration: local_dev\n# =============================================================================\n# A job defines what subset of your dataset to run and how to run it.\n# Jobs are the primary way to control evaluation runs.\n#\n# To run this job:\n#   devals run local_dev\n\n\n# =============================================================================\n# RUNTIME SETTINGS (Optional)\n# =============================================================================\n# !!!Important!!!\n# These override built-in defaults. If you're just getting started,\n# I recommend you ignore these for now.\n# Uncomment and modify as needed.\n\n# Directory for evaluation logs (relative to dataset root)\n# A timestamped subdirectory is created automatically for each run.\n# logs_dir: ../logs\n\n# Sandbox environment: \"local\", \"docker\", or \"podman\"\n# - local: Run directly on host (fastest, no isolation)\n# - docker: Run in Docker containers (recommended for code execution)\n# - podman: Run in Podman containers (rootless alternative to Docker)\n# sandbox_type: local\n\n# Maximum concurrent API connections to model providers.\n# Higher = faster but may hit rate limits with a large dataset\n# max_connections: 10\n\n# Maximum retry attempts for failed API calls.\n# Helps handle transient errors.\n# max_retries: 3\n\n# =============================================================================\n# MODELS\n# =============================================================================\n# Which models to evaluate. Format: \"provider/model-name\"\n# If omitted, falls back to DEFAULT_MODELS from the Python registries.\nmodels:\n  - google/gemini-2.0-flash\n\n# =============================================================================\n# VARIANTS (Optional)\n# =============================================================================\n# Which configuration variants to test.\n# Variants control access to tools and context.\n# Each variant is a map of feature flags. An empty map {} is the baseline.\n# If omitted, only the baseline (no features) is used.\n#\n# Example:\n#   variants:\n#     - {}                                    # baseline\n#     - context_files: [../../context/flutter.md]\n#     - mcp_servers: [dart]\n\n# =============================================================================\n# TASKS\n# =============================================================================\n# Which tasks to run. Keys must match directory names in tasks/.\n# If omitted, runs ALL discovered tasks.\n#\n# Each task can have optional configuration:\n#   task_id:\n#     variants: [subset]           # Override job-level variants for this task\n#     include-samples: [sample1]   # Only run specific samples (mutually exclusive with exclude)\n#     exclude-samples: [sample2]   # Skip specific samples (mutually exclusive with include)\n#     system_message: |            # Override system prompt for this task\n#       Custom instructions...\n#\n# Simple format (run all samples with job-level settings):\n#   task_id: {}\n#\ntasks:\n  get_started: {}\n",
+          "role": "tool",
+          "tool_call_id": "bash_4t5raTAnKAddFKQUM5TX6R",
+          "function": "bash"
+        },
+        {
+          "id": "ahZPXipD5neR29en3zRtUf",
+          "content": [
+            {
+              "type": "text",
+              "text": "This file configures a job named `local_dev`. It specifies the model to use (`google/gemini-2.0-flash`) and the task to run (`get_started`).\n\nNow I'll check the `tasks` directory.\n"
+            }
+          ],
+          "source": "generate",
+          "role": "assistant",
+          "tool_calls": [
+            {
+              "id": "bash_cSXL3sj6i58uXqmDdKxi7g",
+              "function": "bash",
+              "arguments": {
+                "cmd": "ls -la /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals/tasks"
+              },
+              "view": {
+                "title": "bash",
+                "format": "markdown",
+                "content": "```bash\nls -la /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals/tasks\n```\n"
+              },
+              "type": "function"
+            }
+          ],
+          "model": "gemini-2.0-flash"
+        },
+        {
+          "id": "8PhDW7rvHM4JkMit9nee6a",
+          "content": "/Users/ewindmill/.bash_profile: line 1: n#: command not found\n\ntotal 0\ndrwxr-xr-x@ 3 ewindmill  primarygroup   96 Feb 13 09:39 .\ndrwxr-xr-x@ 4 ewindmill  primarygroup  128 Feb 13 09:39 ..\ndrwxr-xr-x@ 3 ewindmill  primarygroup   96 Feb 13 09:39 get_started\n",
+          "role": "tool",
+          "tool_call_id": "bash_cSXL3sj6i58uXqmDdKxi7g",
+          "function": "bash"
+        },
+        {
+          "id": "W6P45d4wxabT8JGajuSEpL",
+          "content": [
+            {
+              "type": "text",
+              "text": "And then the `get_started` task directory.\n"
+            }
+          ],
+          "source": "generate",
+          "role": "assistant",
+          "tool_calls": [
+            {
+              "id": "bash_Bu6HNdHns7UuJjifvkHvwv",
+              "function": "bash",
+              "arguments": {
+                "cmd": "ls -la /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals/tasks/get_started"
+              },
+              "view": {
+                "title": "bash",
+                "format": "markdown",
+                "content": "```bash\nls -la /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals/tasks/get_started\n```\n"
+              },
+              "type": "function"
+            }
+          ],
+          "model": "gemini-2.0-flash"
+        },
+        {
+          "id": "KBpTgoAkxQDLZLkreVi62z",
+          "content": "/Users/ewindmill/.bash_profile: line 1: n#: command not found\n\ntotal 8\ndrwxr-xr-x@ 3 ewindmill  primarygroup   96 Feb 13 09:39 .\ndrwxr-xr-x@ 3 ewindmill  primarygroup   96 Feb 13 09:39 ..\n-rw-r--r--@ 1 ewindmill  primarygroup  857 Feb 13 09:39 task.yaml\n",
+          "role": "tool",
+          "tool_call_id": "bash_Bu6HNdHns7UuJjifvkHvwv",
+          "function": "bash"
+        },
+        {
+          "id": "6HbpKpsZvH63bUtdAfVCm3",
+          "content": [
+            {
+              "type": "text",
+              "text": "Finally, the contents of `task.yaml`.\n"
+            }
+          ],
+          "source": "generate",
+          "role": "assistant",
+          "tool_calls": [
+            {
+              "id": "bash_LV4nES3T7jPrzgtTEg3a42",
+              "function": "bash",
+              "arguments": {
+                "cmd": "cat /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals/tasks/get_started/task.yaml"
+              },
+              "view": {
+                "title": "bash",
+                "format": "markdown",
+                "content": "```bash\ncat /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals/tasks/get_started/task.yaml\n```\n"
+              },
+              "type": "function"
+            }
+          ],
+          "model": "gemini-2.0-flash"
+        },
+        {
+          "id": "2EQtjKBudJPfGdxBaCduci",
+          "content": "/Users/ewindmill/.bash_profile: line 1: n#: command not found\n\n# =============================================================================\n# Starter Task\n# =============================================================================\n# This task points at your project root as its workspace and runs a simple\n# codebase analysis evaluation.\n\nfunc: analyze_codebase\n\n# Workspace: points to the project root containing pubspec.yaml\nworkspace:\n  path: ../../\n\nsamples:\n  - id: get_started\n    difficulty: easy\n    tags: []\n    # Input: The prompt given to the model\n    input: |\n      Explore this codebase and suggest one improvement\n      to the code quality, readability, or architecture.\n    # Target: Expected output or grading criteria\n    target: |\n      The suggestion should be specific, actionable, and reference\n      actual code in the project. It should explain why the change\n      improves the codebase.\n",
+          "role": "tool",
+          "tool_call_id": "bash_LV4nES3T7jPrzgtTEg3a42",
+          "function": "bash"
+        },
+        {
+          "id": "RZXDGe7zL3uhsSSp8sVD2n",
+          "content": [
+            {
+              "type": "text",
+              "text": "The `task.yaml` file defines the `get_started` task. It uses the `analyze_codebase` function and points to the project root as its workspace. The input to the model is \"Explore this codebase and suggest one improvement to the code quality, readability, or architecture.\" and provides the target criteria for a good answer.\n\nImprovement suggestion:\n\nThe `task.yaml` file currently defines the workspace path as `\"../../\"`. This path is relative, and while it might work in the current setup, it's not very robust. If the location of the `task.yaml` file changes relative to the project root, this path will break.\n\nA better approach would be to use an absolute path or a configuration option to specify the project root. This would make the task definition more portable and less prone to errors caused by changes in the directory structure.\n\n"
+            },
+            {
+              "type": "text",
+              "text": "The `task.yaml` file in the `get_started` task uses a relative path `../../` for the workspace. It would be more robust to use an absolute path or a configuration variable to specify the workspace path, making the task definition more portable and less prone to errors."
+            }
+          ],
+          "source": "generate",
+          "role": "assistant",
+          "model": "gemini-2.0-flash"
+        }
+      ],
+      "output": {
+        "model": "gemini-2.0-flash",
+        "choices": [
+          {
+            "message": {
+              "id": "RZXDGe7zL3uhsSSp8sVD2n",
+              "content": [
+                {
+                  "type": "text",
+                  "text": "The `task.yaml` file defines the `get_started` task. It uses the `analyze_codebase` function and points to the project root as its workspace. The input to the model is \"Explore this codebase and suggest one improvement to the code quality, readability, or architecture.\" and provides the target criteria for a good answer.\n\nImprovement suggestion:\n\nThe `task.yaml` file currently defines the workspace path as `\"../../\"`. This path is relative, and while it might work in the current setup, it's not very robust. If the location of the `task.yaml` file changes relative to the project root, this path will break.\n\nA better approach would be to use an absolute path or a configuration option to specify the project root. This would make the task definition more portable and less prone to errors caused by changes in the directory structure.\n\n"
+                },
+                {
+                  "type": "text",
+                  "text": "The `task.yaml` file in the `get_started` task uses a relative path `../../` for the workspace. It would be more robust to use an absolute path or a configuration variable to specify the workspace path, making the task definition more portable and less prone to errors."
+                }
+              ],
+              "source": "generate",
+              "role": "assistant",
+              "tool_calls": [
+                {
+                  "id": "submit_UzSGKfgg6TdCSXHUPiiK2f",
+                  "function": "submit",
+                  "arguments": {
+                    "answer": "The `task.yaml` file in the `get_started` task uses a relative path `../../` for the workspace. It would be more robust to use an absolute path or a configuration variable to specify the workspace path, making the task definition more portable and less prone to errors."
+                  },
+                  "type": "function"
+                }
+              ],
+              "model": "gemini-2.0-flash"
+            },
+            "stop_reason": "stop"
+          }
+        ],
+        "completion": "The `task.yaml` file defines the `get_started` task. It uses the `analyze_codebase` function and points to the project root as its workspace. The input to the model is \"Explore this codebase and suggest one improvement to the code quality, readability, or architecture.\" and provides the target criteria for a good answer.\n\nImprovement suggestion:\n\nThe `task.yaml` file currently defines the workspace path as `\"../../\"`. This path is relative, and while it might work in the current setup, it's not very robust. If the location of the `task.yaml` file changes relative to the project root, this path will break.\n\nA better approach would be to use an absolute path or a configuration option to specify the project root. This would make the task definition more portable and less prone to errors caused by changes in the directory structure.\n\n\n\nThe `task.yaml` file in the `get_started` task uses a relative path `../../` for the workspace. It would be more robust to use an absolute path or a configuration variable to specify the workspace path, making the task definition more portable and less prone to errors.",
+        "usage": {
+          "input_tokens": 2359,
+          "output_tokens": 241,
+          "total_tokens": 2600,
+          "reasoning_tokens": 0
+        },
+        "time": 2.359958540997468
+      },
+      "scores": {
+        "model_graded_fact": {
+          "value": "C",
+          "answer": "The `task.yaml` file defines the `get_started` task. It uses the `analyze_codebase` function and points to the project root as its workspace. The input to the model is \"Explore this codebase and suggest one improvement to the code quality, readability, or architecture.\" and provides the target criteria for a good answer.\n\nImprovement suggestion:\n\nThe `task.yaml` file currently defines the workspace path as `\"../../\"`. This path is relative, and while it might work in the current setup, it's not very robust. If the location of the `task.yaml` file changes relative to the project root, this path will break.\n\nA better approach would be to use an absolute path or a configuration option to specify the project root. This would make the task definition more portable and less prone to errors caused by changes in the directory structure.\n\n\n\nThe `task.yaml` file in the `get_started` task uses a relative path `../../` for the workspace. It would be more robust to use an absolute path or a configuration variable to specify the workspace path, making the task definition more portable and less prone to errors.",
+          "explanation": "The expert answer states that the submission should be specific, actionable, and reference actual code in the project, while explaining why the change improves the codebase.\n\nThe submission refers to the `task.yaml` file and the workspace path defined within, which is `../../`. The submission also suggests using an absolute path or a configuration option instead of the relative path, and explains that this improves robustness and portability.\n\nThe submission satisfies the expert answer's criteria.\n\nGRADE: C\n",
+          "metadata": {
+            "grading": [
+              {
+                "id": "XafA2ear8uf24woz47FXR5",
+                "content": "\nYou are comparing a submitted answer to an expert answer on a given question. Here is the data:\n\n[BEGIN DATA]\n************\n[Question]: Explore this codebase and suggest one improvement\nto the code quality, readability, or architecture.\n\n************\n[Expert]: The suggestion should be specific, actionable, and reference\nactual code in the project. It should explain why the change\nimproves the codebase.\n\n************\n[Submission]: The `task.yaml` file defines the `get_started` task. It uses the `analyze_codebase` function and points to the project root as its workspace. The input to the model is \"Explore this codebase and suggest one improvement to the code quality, readability, or architecture.\" and provides the target criteria for a good answer.\n\nImprovement suggestion:\n\nThe `task.yaml` file currently defines the workspace path as `\"../../\"`. This path is relative, and while it might work in the current setup, it's not very robust. If the location of the `task.yaml` file changes relative to the project root, this path will break.\n\nA better approach would be to use an absolute path or a configuration option to specify the project root. This would make the task definition more portable and less prone to errors caused by changes in the directory structure.\n\n\n\nThe `task.yaml` file in the `get_started` task uses a relative path `../../` for the workspace. It would be more robust to use an absolute path or a configuration variable to specify the workspace path, making the task definition more portable and less prone to errors.\n************\n[END DATA]\n\nCompare the factual content of the submitted answer with the expert answer. Ignore any differences in style, grammar, or punctuation.\n\nDoes the submission contain the content in the expert answer?\n\n\nAfter assessing the submitted answer, reply with 'GRADE: $LETTER' (without quotes) where LETTER is one of CI.  Please choose ONE option for the grade: either \"C\" for correct answers, or \"I\" for incorrect answers.\n\nFor example, after reviewing a correct answer you might write 'GRADE: C' or after reviewing an incorrect answer you might write 'GRADE: I'.\n\nFirst, write out in a step by step manner your reasoning about the criterion to be sure that your conclusion is correct. Avoid simply stating the correct answers at the outset. Then, end with your answer formatted as 'GRADE: $LETTER' (without quotes) where LETTER is one of CI.\n\n",
+                "role": "user"
+              },
+              {
+                "id": "fWg3jrQEJzwVXS2CRhzbwH",
+                "content": [
+                  {
+                    "type": "text",
+                    "text": "The expert answer states that the submission should be specific, actionable, and reference actual code in the project, while explaining why the change improves the codebase.\n\nThe submission refers to the `task.yaml` file and the workspace path defined within, which is `../../`. The submission also suggests using an absolute path or a configuration option instead of the relative path, and explains that this improves robustness and portability.\n\nThe submission satisfies the expert answer's criteria.\n\nGRADE: C\n"
+                  }
+                ],
+                "source": "generate",
+                "role": "assistant",
+                "model": "gemini-2.0-flash"
+              }
+            ]
+          },
+          "history": []
+        }
+      },
+      "metadata": {
+        "difficulty": "easy",
+        "tags": [],
+        "workspace": "/var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals",
+        "workspace_template": "/Users/ewindmill/development/dash_evals/pkgs/eval_cli/example/evals",
+        "setup_error": "flutter pub get failed: Expected to find project root in current working directory.\n"
+      },
+      "store": {},
+      "events": [
+        {
+          "uuid": "byiFxK6uBNaoUQZtn5ms2Z",
+          "span_id": "85d8da5884834cc2ab4f200e55a8ee5a",
+          "timestamp": "2026-02-13T17:39:56.095002+00:00",
+          "working_start": 580834.240523333,
+          "event": "span_begin",
+          "id": "85d8da5884834cc2ab4f200e55a8ee5a",
+          "type": "init",
+          "name": "init"
+        },
+        {
+          "uuid": "oRj24GsSVfoU4MYpKnVQLd",
+          "span_id": "85d8da5884834cc2ab4f200e55a8ee5a",
+          "timestamp": "2026-02-13T17:39:56.096219+00:00",
+          "working_start": 580834.241738833,
+          "event": "sample_init",
+          "sample": {
+            "input": "attachment://9db899dd5425b74fa4eb825cb7100dfe",
+            "target": "The suggestion should be specific, actionable, and reference\nactual code in the project. It should explain why the change\nimproves the codebase.\n",
+            "id": "get_started",
+            "metadata": {
+              "difficulty": "easy",
+              "tags": [],
+              "workspace": "/Users/ewindmill/development/dash_evals/pkgs/eval_cli/example/evals"
+            }
+          },
+          "state": {
+            "messages": [
+              {
+                "id": "YKoBXPM2D3LAYfpRtJ8rSG",
+                "content": "attachment://9db899dd5425b74fa4eb825cb7100dfe",
+                "source": "input",
+                "role": "user"
+              }
+            ],
+            "tools": [],
+            "tool_choice": null,
+            "store": {},
+            "output": {
+              "model": "google/gemini-2.0-flash",
+              "choices": [],
+              "completion": ""
+            },
+            "completed": false,
+            "metadata": {
+              "difficulty": "easy",
+              "tags": [],
+              "workspace": "/Users/ewindmill/development/dash_evals/pkgs/eval_cli/example/evals"
+            }
+          }
+        },
+        {
+          "uuid": "kQmxtVwwwXL9Z78ZVDXBAS",
+          "span_id": "85d8da5884834cc2ab4f200e55a8ee5a",
+          "timestamp": "2026-02-13T17:39:57.029989+00:00",
+          "working_start": 580835.175459166,
+          "event": "span_end",
+          "id": "85d8da5884834cc2ab4f200e55a8ee5a"
+        },
+        {
+          "uuid": "cPGSCKyBvppnEu38WhiKiu",
+          "span_id": "2e768b3896c0454695912d0f6ecda7e9",
+          "timestamp": "2026-02-13T17:39:57.033844+00:00",
+          "working_start": 0.0014658750733360648,
+          "event": "span_begin",
+          "id": "2e768b3896c0454695912d0f6ecda7e9",
+          "type": "solvers",
+          "name": "solvers"
+        },
+        {
+          "uuid": "hLgSva9uuXAvqCjs7Loq3K",
+          "span_id": "1e729eedd7bb49aaadb5a72dfc81f801",
+          "timestamp": "2026-02-13T17:39:57.035369+00:00",
+          "working_start": 0.0029908750439062715,
+          "event": "span_begin",
+          "id": "1e729eedd7bb49aaadb5a72dfc81f801",
+          "parent_id": "2e768b3896c0454695912d0f6ecda7e9",
+          "type": "solver",
+          "name": "setup_workspace"
+        },
+        {
+          "uuid": "FDRXEG3XhEwepwykXUuDaK",
+          "span_id": "1e729eedd7bb49aaadb5a72dfc81f801",
+          "timestamp": "2026-02-13T17:39:57.040134+00:00",
+          "working_start": 0.5839843751164153,
+          "event": "sandbox",
+          "action": "exec",
+          "cmd": "flutter pub get",
+          "options": {
+            "cwd": "/var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals"
+          },
+          "result": 1,
+          "output": "Expected to find project root in current working directory.\n\n\n                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  ",
+          "completed": "2026-02-13T17:39:57.616413+00:00"
+        },
+        {
+          "uuid": "YAyTQAC4NZiH3rsguHiYRp",
+          "span_id": "1e729eedd7bb49aaadb5a72dfc81f801",
+          "timestamp": "2026-02-13T17:39:57.618741+00:00",
+          "working_start": 0.5862727090716362,
+          "event": "state",
+          "changes": [
+            {
+              "op": "add",
+              "path": "/metadata/workspace_template",
+              "value": "/Users/ewindmill/development/dash_evals/pkgs/eval_cli/example/evals"
+            },
+            {
+              "op": "add",
+              "path": "/metadata/setup_error",
+              "value": "flutter pub get failed: Expected to find project root in current working directory.\n"
+            },
+            {
+              "op": "replace",
+              "path": "/metadata/workspace",
+              "value": "/var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals",
+              "replaced": "/Users/ewindmill/development/dash_evals/pkgs/eval_cli/example/evals"
+            }
+          ]
+        },
+        {
+          "uuid": "h6FiBjCRsT8Ag2rPNWfzix",
+          "span_id": "1e729eedd7bb49aaadb5a72dfc81f801",
+          "timestamp": "2026-02-13T17:39:57.619620+00:00",
+          "working_start": 0.5871493341401219,
+          "event": "span_end",
+          "id": "1e729eedd7bb49aaadb5a72dfc81f801"
+        },
+        {
+          "uuid": "Dkp88RpLGRZQ6NA8QnsjxK",
+          "span_id": "c72d150dc23b4478bf1cc801d919e21a",
+          "timestamp": "2026-02-13T17:39:57.620412+00:00",
+          "working_start": 0.58794179209508,
+          "event": "span_begin",
+          "id": "c72d150dc23b4478bf1cc801d919e21a",
+          "parent_id": "2e768b3896c0454695912d0f6ecda7e9",
+          "type": "solver",
+          "name": "_add_workspace_system_message"
+        },
+        {
+          "uuid": "QdtuuktV3XAUL8R4kLbJmW",
+          "span_id": "c72d150dc23b4478bf1cc801d919e21a",
+          "timestamp": "2026-02-13T17:39:57.621497+00:00",
+          "working_start": 0.5890261670574546,
+          "event": "state",
+          "changes": [
+            {
+              "op": "remove",
+              "path": "/messages/0/source"
+            },
+            {
+              "op": "replace",
+              "path": "/messages/0/role",
+              "value": "system",
+              "replaced": "user"
+            },
+            {
+              "op": "replace",
+              "path": "/messages/0/id",
+              "value": "dgiCsCRsNgzMKagz4PEePk",
+              "replaced": "YKoBXPM2D3LAYfpRtJ8rSG"
+            },
+            {
+              "op": "replace",
+              "path": "/messages/0/content",
+              "value": "attachment://b38086a4c737ca5ce168983fe7d100a3",
+              "replaced": "Explore this codebase and suggest one improvement\nto the code quality, readability, or architecture.\n"
+            },
+            {
+              "op": "add",
+              "path": "/messages/1",
+              "value": {
+                "id": "YKoBXPM2D3LAYfpRtJ8rSG",
+                "content": "attachment://9db899dd5425b74fa4eb825cb7100dfe",
+                "source": "input",
+                "role": "user"
+              }
+            }
+          ]
+        },
+        {
+          "uuid": "bBcjJLqGijR7MmMMZGxJ4C",
+          "span_id": "c72d150dc23b4478bf1cc801d919e21a",
+          "timestamp": "2026-02-13T17:39:57.622658+00:00",
+          "working_start": 0.5901881670579314,
+          "event": "span_end",
+          "id": "c72d150dc23b4478bf1cc801d919e21a"
+        },
+        {
+          "uuid": "btjnjYttxmmKEBSWxFpW6p",
+          "span_id": "f9fd97e908c24545a6197437d66953af",
+          "timestamp": "2026-02-13T17:39:57.623438+00:00",
+          "working_start": 0.5909668750828132,
+          "event": "span_begin",
+          "id": "f9fd97e908c24545a6197437d66953af",
+          "parent_id": "2e768b3896c0454695912d0f6ecda7e9",
+          "type": "solver",
+          "name": "react"
+        },
+        {
+          "uuid": "nJCxAuB5SieXdvtLhD7rDW",
+          "span_id": "3be0985210724574b90ce9f916060031",
+          "timestamp": "2026-02-13T17:39:57.624098+00:00",
+          "working_start": 0.5916270840680227,
+          "event": "span_begin",
+          "id": "3be0985210724574b90ce9f916060031",
+          "parent_id": "f9fd97e908c24545a6197437d66953af",
+          "type": "agent",
+          "name": "react"
+        },
+        {
+          "uuid": "8EyRYSE4yeEvocPX4fLAvd",
+          "span_id": "3be0985210724574b90ce9f916060031",
+          "timestamp": "2026-02-13T17:39:57.628007+00:00",
+          "working_start": 0.5954973760526627,
+          "pending": true,
+          "event": "model",
+          "model": "google/gemini-2.0-flash",
+          "input": [
+            {
+              "id": "iNKorPWWBrvjkRqDKyJPYF",
+              "content": "\nYou are a helpful assistant attempting to submit the best possible answer. You have several tools available to help with finding the answer. You will see the result of tool calls right after sending the message. If you need to perform multiple actions, you can always send more messages with additional tool calls. Do some reasoning before your actions, describing what tool calls you are going to use and how they fit into your plan.\n\n\nWhen you have completed the task and have an answer, call the submit() tool to report it.\n",
+              "role": "system"
+            },
+            {
+              "id": "dgiCsCRsNgzMKagz4PEePk",
+              "content": "You are an expert code reviewer analyzing a codebase.\n\nYour task is to:\n\n1. Explore the codebase at /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals using the available tools\n2. Understand the project structure, dependencies, and architecture\n3. Answer the user's question based on what you find in the code\n\nImportant guidelines:\n- Use bash commands (cat, find, grep, ls, head, tail, etc.) to browse files\n- Do NOT edit or modify any files\n- Base your answer on actual code you find, not assumptions\n- Reference specific files and line numbers when relevant\n- When done, call submit() with your complete answer\n",
+              "role": "system"
+            },
+            {
+              "id": "YKoBXPM2D3LAYfpRtJ8rSG",
+              "content": "Explore this codebase and suggest one improvement\nto the code quality, readability, or architecture.\n",
+              "source": "input",
+              "role": "user"
+            }
+          ],
+          "tools": [
+            {
+              "name": "bash",
+              "description": "Use this function to execute bash commands.",
+              "parameters": {
+                "type": "object",
+                "properties": {
+                  "cmd": {
+                    "type": "string",
+                    "description": "The bash command to execute."
+                  }
+                },
+                "required": [
+                  "cmd"
+                ],
+                "additionalProperties": false
+              }
+            },
+            {
+              "name": "submit",
+              "description": "Submit an answer for evaluation.",
+              "parameters": {
+                "type": "object",
+                "properties": {
+                  "answer": {
+                    "type": "string",
+                    "description": "Submitted answer"
+                  }
+                },
+                "required": [
+                  "answer"
+                ],
+                "additionalProperties": false
+              }
+            }
+          ],
+          "tool_choice": "auto",
+          "config": {
+            "max_connections": 10,
+            "cache_prompt": true,
+            "cache": {
+              "expiry": "2W",
+              "per_epoch": true,
+              "scopes": {}
+            }
+          },
+          "output": {
+            "model": "google/gemini-2.0-flash",
+            "choices": [
+              {
+                "message": {
+                  "id": "dXkQhSRwyp49QVMnevrVHv",
+                  "content": "",
+                  "source": "generate",
+                  "role": "assistant",
+                  "model": "google/gemini-2.0-flash"
+                },
+                "stop_reason": "stop"
+              }
+            ],
+            "completion": ""
+          },
+          "cache": "write"
+        },
+        {
+          "uuid": "7TfAJ6wopjBzWeDdXwNtDP",
+          "span_id": "3be0985210724574b90ce9f916060031",
+          "timestamp": "2026-02-13T17:39:57.625359+00:00",
+          "working_start": 0.5928878751583397,
+          "event": "model",
+          "model": "google/gemini-2.0-flash",
+          "input": [
+            {
+              "id": "iNKorPWWBrvjkRqDKyJPYF",
+              "content": "\nYou are a helpful assistant attempting to submit the best possible answer. You have several tools available to help with finding the answer. You will see the result of tool calls right after sending the message. If you need to perform multiple actions, you can always send more messages with additional tool calls. Do some reasoning before your actions, describing what tool calls you are going to use and how they fit into your plan.\n\n\nWhen you have completed the task and have an answer, call the submit() tool to report it.\n",
+              "role": "system"
+            },
+            {
+              "id": "dgiCsCRsNgzMKagz4PEePk",
+              "content": "You are an expert code reviewer analyzing a codebase.\n\nYour task is to:\n\n1. Explore the codebase at /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals using the available tools\n2. Understand the project structure, dependencies, and architecture\n3. Answer the user's question based on what you find in the code\n\nImportant guidelines:\n- Use bash commands (cat, find, grep, ls, head, tail, etc.) to browse files\n- Do NOT edit or modify any files\n- Base your answer on actual code you find, not assumptions\n- Reference specific files and line numbers when relevant\n- When done, call submit() with your complete answer\n",
+              "role": "system"
+            },
+            {
+              "id": "YKoBXPM2D3LAYfpRtJ8rSG",
+              "content": "Explore this codebase and suggest one improvement\nto the code quality, readability, or architecture.\n",
+              "source": "input",
+              "role": "user"
+            }
+          ],
+          "tools": [
+            {
+              "name": "bash",
+              "description": "Use this function to execute bash commands.",
+              "parameters": {
+                "type": "object",
+                "properties": {
+                  "cmd": {
+                    "type": "string",
+                    "description": "The bash command to execute."
+                  }
+                },
+                "required": [
+                  "cmd"
+                ],
+                "additionalProperties": false
+              }
+            },
+            {
+              "name": "submit",
+              "description": "Submit an answer for evaluation.",
+              "parameters": {
+                "type": "object",
+                "properties": {
+                  "answer": {
+                    "type": "string",
+                    "description": "Submitted answer"
+                  }
+                },
+                "required": [
+                  "answer"
+                ],
+                "additionalProperties": false
+              }
+            }
+          ],
+          "tool_choice": "auto",
+          "config": {
+            "max_connections": 10,
+            "cache_prompt": true,
+            "cache": {
+              "expiry": "2W",
+              "per_epoch": true,
+              "scopes": {}
+            }
+          },
+          "output": {
+            "model": "gemini-2.0-flash",
+            "choices": [
+              {
+                "message": {
+                  "id": "ePaRxtW6BAFFKX3wgPvuxQ",
+                  "content": [
+                    {
+                      "type": "text",
+                      "text": "Okay, I will explore the codebase and suggest one improvement.\n\nFirst, I'll start by listing the files in the directory to understand the project structure.\n"
+                    }
+                  ],
+                  "source": "generate",
+                  "role": "assistant",
+                  "tool_calls": [
+                    {
+                      "id": "bash_96CGHnyaWeCRTS8X8BFMvb",
+                      "function": "bash",
+                      "arguments": {
+                        "cmd": "ls -la"
+                      },
+                      "view": {
+                        "title": "bash",
+                        "format": "markdown",
+                        "content": "```bash\nls -la\n```\n"
+                      },
+                      "type": "function"
+                    }
+                  ],
+                  "model": "gemini-2.0-flash"
+                },
+                "stop_reason": "stop"
+              }
+            ],
+            "completion": "Okay, I will explore the codebase and suggest one improvement.\n\nFirst, I'll start by listing the files in the directory to understand the project structure.\n",
+            "usage": {
+              "input_tokens": 376,
+              "output_tokens": 38,
+              "total_tokens": 414,
+              "reasoning_tokens": 0
+            },
+            "time": 1.1703125830972567
+          },
+          "cache": "write",
+          "call": {
+            "request": {
+              "contents": [
+                {
+                  "parts": [
+                    {
+                      "text": "attachment://9db899dd5425b74fa4eb825cb7100dfe"
+                    }
+                  ],
+                  "role": "user"
+                }
+              ],
+              "generation_config": {
+                "httpOptions": {
+                  "headers": {
+                    "x-irid": "Vs5weewkZgco7bkbbf4sMH"
+                  }
+                }
+              },
+              "safety_settings": [
+                {
+                  "category": "HARM_CATEGORY_CIVIC_INTEGRITY",
+                  "threshold": "BLOCK_NONE"
+                },
+                {
+                  "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
+                  "threshold": "BLOCK_NONE"
+                },
+                {
+                  "category": "HARM_CATEGORY_HARASSMENT",
+                  "threshold": "BLOCK_NONE"
+                },
+                {
+                  "category": "HARM_CATEGORY_HATE_SPEECH",
+                  "threshold": "BLOCK_NONE"
+                },
+                {
+                  "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
+                  "threshold": "BLOCK_NONE"
+                }
+              ],
+              "tools": [
+                {
+                  "functionDeclarations": [
+                    {
+                      "description": "Use this function to execute bash commands.",
+                      "name": "bash",
+                      "parameters": {
+                        "nullable": false,
+                        "properties": {
+                          "cmd": {
+                            "description": "The bash command to execute.",
+                            "nullable": false,
+                            "type": "STRING"
+                          }
+                        },
+                        "required": [
+                          "cmd"
+                        ],
+                        "type": "OBJECT"
+                      }
+                    },
+                    {
+                      "description": "Submit an answer for evaluation.",
+                      "name": "submit",
+                      "parameters": {
+                        "nullable": false,
+                        "properties": {
+                          "answer": {
+                            "description": "Submitted answer",
+                            "nullable": false,
+                            "type": "STRING"
+                          }
+                        },
+                        "required": [
+                          "answer"
+                        ],
+                        "type": "OBJECT"
+                      }
+                    }
+                  ]
+                }
+              ],
+              "tool_config": {
+                "functionCallingConfig": {
+                  "mode": "AUTO"
+                }
+              },
+              "system_instruction": [
+                {
+                  "text": "attachment://96d2add6be1fbfa37bad6c101bbdcabb"
+                },
+                {
+                  "text": "attachment://b38086a4c737ca5ce168983fe7d100a3"
+                },
+                {
+                  "text": "attachment://ca12b4d4123df0f43f6669e0f6179470"
+                }
+              ]
+            },
+            "response": {
+              "sdkHttpResponse": {
+                "headers": {
+                  "x-google-esf-cloud-client-params": "attachment://a248653186e1cbc62ada82759278d140",
+                  "X-Google-Session-Info": "GgQYECgLIAE6IxIhZ2VuZXJhdGl2ZWxhbmd1YWdlLmdvb2dsZWFwaXMuY29t",
+                  "Content-Type": "application/json; charset=UTF-8",
+                  "X-Google-Security-Signals": "attachment://2adbe2b9c3b7a76ea83ee906051e404e",
+                  "Vary": "Origin, X-Origin, Referer",
+                  "Content-Encoding": "gzip",
+                  "Date": "Fri, 13 Feb 2026 17:40:03 GMT",
+                  "Server": "scaffolding on HTTPServer2",
+                  "X-Google-Netmon-Label": "/bns/is/borg/is/bns/genai-api/prod.genai-api/75",
+                  "X-XSS-Protection": "0",
+                  "X-Frame-Options": "SAMEORIGIN",
+                  "X-Content-Type-Options": "nosniff",
+                  "Server-Timing": "gfet4t7; dur=1125",
+                  "X-Google-GFE-Service-Trace": "attachment://149a1eba121c602744db9459206a58eb",
+                  "X-Google-Backends": "attachment://9fd93716298d2fa1b9cd7a1cfd07f1dd",
+                  "X-Google-GFE-Request-Trace": "acnuqa7:443,/bns/is/borg/is/bns/genai-api/prod.genai-api/75,acnuqa7:443",
+                  "X-Google-DOS-Service-Trace": "main:genai-api-api-prod,main:GLOBAL_all_non_cloud",
+                  "X-Google-GFE-Handshake-Trace": "attachment://8f96980ac42fc857ff3ea8542b167baa",
+                  "X-Google-Service": "attachment://149a1eba121c602744db9459206a58eb",
+                  "X-Google-GFE-Response-Code-Details-Trace": "response_code_set_by_backend",
+                  "X-Google-GFE-Response-Body-Transformations": "chunked",
+                  "X-Google-Shellfish-Status": "CA0gBEBG",
+                  "X-Google-GFE-Version": "2.967.1",
+                  "Alt-Svc": "h3=\":443\"; ma=2592000,h3-29=\":443\"; ma=2592000",
+                  "Transfer-Encoding": "chunked"
+                }
+              },
+              "candidates": [
+                {
+                  "content": {
+                    "parts": [
+                      {
+                        "text": "attachment://9a04b99c768a997e406d36798ffe7341"
+                      },
+                      {
+                        "functionCall": {
+                          "args": {
+                            "cmd": "ls -la"
+                          },
+                          "name": "bash"
+                        }
+                      }
+                    ],
+                    "role": "model"
+                  },
+                  "finishReason": "STOP",
+                  "avgLogprobs": -0.23214397932353772,
+                  "safetyRatings": [
+                    {
+                      "category": "HARM_CATEGORY_HATE_SPEECH",
+                      "probability": "NEGLIGIBLE"
+                    },
+                    {
+                      "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
+                      "probability": "NEGLIGIBLE"
+                    },
+                    {
+                      "category": "HARM_CATEGORY_HARASSMENT",
+                      "probability": "NEGLIGIBLE"
+                    },
+                    {
+                      "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
+                      "probability": "NEGLIGIBLE"
+                    }
+                  ]
+                }
+              ],
+              "modelVersion": "gemini-2.0-flash",
+              "responseId": "8mGPacrEMpeL-sAPpJCH4A0",
+              "usageMetadata": {
+                "candidatesTokenCount": 38,
+                "candidatesTokensDetails": [
+                  {
+                    "modality": "TEXT",
+                    "tokenCount": 38
+                  }
+                ],
+                "promptTokenCount": 376,
+                "promptTokensDetails": [
+                  {
+                    "modality": "TEXT",
+                    "tokenCount": 376
+                  }
+                ],
+                "totalTokenCount": 414
+              }
+            },
+            "time": 1.1703125830972567
+          },
+          "completed": "2026-02-13T17:40:03.897356+00:00",
+          "working_time": 1.1703125830972567
+        },
+        {
+          "uuid": "VNZw8NzN5LJgFQX32fKid4",
+          "span_id": "6567f98fb6074bce8d3d179905cea5da",
+          "timestamp": "2026-02-13T17:40:03.898529+00:00",
+          "working_start": 1.7649136672262102,
+          "event": "span_begin",
+          "id": "6567f98fb6074bce8d3d179905cea5da",
+          "parent_id": "3be0985210724574b90ce9f916060031",
+          "type": "tool",
+          "name": "bash"
+        },
+        {
+          "uuid": "YnstKDZtJMAYejF8WExgJU",
+          "span_id": "3be0985210724574b90ce9f916060031",
+          "timestamp": "2026-02-13T17:40:03.897808+00:00",
+          "working_start": 1.7641930002719164,
+          "event": "tool",
+          "type": "function",
+          "id": "bash_96CGHnyaWeCRTS8X8BFMvb",
+          "function": "bash",
+          "arguments": {
+            "cmd": "ls -la"
+          },
+          "view": {
+            "title": "bash",
+            "format": "markdown",
+            "content": "```bash\nls -la\n```\n"
+          },
+          "result": "/Users/ewindmill/.bash_profile: line 1: n#: command not found\n\ntotal 0\ndrwx------@   2 ewindmill  primarygroup     64 Feb 13 09:39 .\ndrwx------@ 770 ewindmill  primarygroup  24640 Feb 13 09:40 ..\n",
+          "events": [],
+          "completed": "2026-02-13T17:40:03.958236+00:00",
+          "working_time": 0.060252416940100494,
+          "message_id": "D6EvgQPgAoveZwXndCQX7i"
+        },
+        {
+          "uuid": "X8zEYPuAVPRve9qMgREVvG",
+          "span_id": "6567f98fb6074bce8d3d179905cea5da",
+          "timestamp": "2026-02-13T17:40:03.900894+00:00",
+          "working_start": 1.8203975422075018,
+          "event": "sandbox",
+          "action": "exec",
+          "cmd": "bash --login -c 'ls -la'",
+          "options": {
+            "timeout": 120
+          },
+          "result": 0,
+          "output": "/Users/ewindmill/.bash_profile: line 1: n#: command not found\n\n\ntotal 0\ndrwx------@   2 ewindmill  primarygroup     64 Feb 13 09:39 .\ndrwx------@ 770 ewindmill  primarygroup  24640 Feb 13 09:40 ..\n                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                           ",
+          "completed": "2026-02-13T17:40:03.954135+00:00"
+        },
+        {
+          "uuid": "WBkbVcHubi2GAkGCgsxquS",
+          "span_id": "6567f98fb6074bce8d3d179905cea5da",
+          "timestamp": "2026-02-13T17:40:03.956218+00:00",
+          "working_start": 1.8224308341741562,
+          "event": "span_end",
+          "id": "6567f98fb6074bce8d3d179905cea5da"
+        },
+        {
+          "uuid": "T8nPuXsKGvHEEXmoFhs5VL",
+          "span_id": "3be0985210724574b90ce9f916060031",
+          "timestamp": "2026-02-13T17:40:03.959977+00:00",
+          "working_start": 1.826187459169887,
+          "event": "model",
+          "model": "google/gemini-2.0-flash",
+          "input": [
+            {
+              "id": "iNKorPWWBrvjkRqDKyJPYF",
+              "content": "\nYou are a helpful assistant attempting to submit the best possible answer. You have several tools available to help with finding the answer. You will see the result of tool calls right after sending the message. If you need to perform multiple actions, you can always send more messages with additional tool calls. Do some reasoning before your actions, describing what tool calls you are going to use and how they fit into your plan.\n\n\nWhen you have completed the task and have an answer, call the submit() tool to report it.\n",
+              "role": "system"
+            },
+            {
+              "id": "dgiCsCRsNgzMKagz4PEePk",
+              "content": "You are an expert code reviewer analyzing a codebase.\n\nYour task is to:\n\n1. Explore the codebase at /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals using the available tools\n2. Understand the project structure, dependencies, and architecture\n3. Answer the user's question based on what you find in the code\n\nImportant guidelines:\n- Use bash commands (cat, find, grep, ls, head, tail, etc.) to browse files\n- Do NOT edit or modify any files\n- Base your answer on actual code you find, not assumptions\n- Reference specific files and line numbers when relevant\n- When done, call submit() with your complete answer\n",
+              "role": "system"
+            },
+            {
+              "id": "YKoBXPM2D3LAYfpRtJ8rSG",
+              "content": "Explore this codebase and suggest one improvement\nto the code quality, readability, or architecture.\n",
+              "source": "input",
+              "role": "user"
+            },
+            {
+              "id": "ePaRxtW6BAFFKX3wgPvuxQ",
+              "content": [
+                {
+                  "type": "text",
+                  "text": "Okay, I will explore the codebase and suggest one improvement.\n\nFirst, I'll start by listing the files in the directory to understand the project structure.\n"
+                }
+              ],
+              "source": "generate",
+              "role": "assistant",
+              "tool_calls": [
+                {
+                  "id": "bash_96CGHnyaWeCRTS8X8BFMvb",
+                  "function": "bash",
+                  "arguments": {
+                    "cmd": "ls -la"
+                  },
+                  "view": {
+                    "title": "bash",
+                    "format": "markdown",
+                    "content": "```bash\nls -la\n```\n"
+                  },
+                  "type": "function"
+                }
+              ],
+              "model": "gemini-2.0-flash"
+            },
+            {
+              "id": "D6EvgQPgAoveZwXndCQX7i",
+              "content": "/Users/ewindmill/.bash_profile: line 1: n#: command not found\n\ntotal 0\ndrwx------@   2 ewindmill  primarygroup     64 Feb 13 09:39 .\ndrwx------@ 770 ewindmill  primarygroup  24640 Feb 13 09:40 ..\n",
+              "role": "tool",
+              "tool_call_id": "bash_96CGHnyaWeCRTS8X8BFMvb",
+              "function": "bash"
+            }
+          ],
+          "tools": [
+            {
+              "name": "bash",
+              "description": "Use this function to execute bash commands.",
+              "parameters": {
+                "type": "object",
+                "properties": {
+                  "cmd": {
+                    "type": "string",
+                    "description": "The bash command to execute."
+                  }
+                },
+                "required": [
+                  "cmd"
+                ],
+                "additionalProperties": false
+              }
+            },
+            {
+              "name": "submit",
+              "description": "Submit an answer for evaluation.",
+              "parameters": {
+                "type": "object",
+                "properties": {
+                  "answer": {
+                    "type": "string",
+                    "description": "Submitted answer"
+                  }
+                },
+                "required": [
+                  "answer"
+                ],
+                "additionalProperties": false
+              }
+            }
+          ],
+          "tool_choice": "auto",
+          "config": {
+            "max_connections": 10,
+            "cache_prompt": true,
+            "cache": {
+              "expiry": "2W",
+              "per_epoch": true,
+              "scopes": {}
+            }
+          },
+          "output": {
+            "model": "gemini-2.0-flash",
+            "choices": [
+              {
+                "message": {
+                  "id": "X9YEAnfcJpZPfKYouGPfia",
+                  "content": [
+                    {
+                      "type": "text",
+                      "text": "It seems the current directory is empty. I should explore the actual workspace directory.\n"
+                    }
+                  ],
+                  "source": "generate",
+                  "role": "assistant",
+                  "tool_calls": [
+                    {
+                      "id": "bash_9FNtsrZyifmy6dUNvsWNcp",
+                      "function": "bash",
+                      "arguments": {
+                        "cmd": "ls -la /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals"
+                      },
+                      "view": {
+                        "title": "bash",
+                        "format": "markdown",
+                        "content": "```bash\nls -la /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals\n```\n"
+                      },
+                      "type": "function"
+                    }
+                  ],
+                  "model": "gemini-2.0-flash"
+                },
+                "stop_reason": "stop"
+              }
+            ],
+            "completion": "It seems the current directory is empty. I should explore the actual workspace directory.\n",
+            "usage": {
+              "input_tokens": 498,
+              "output_tokens": 67,
+              "total_tokens": 565,
+              "reasoning_tokens": 0
+            },
+            "time": 1.083856499986723
+          },
+          "cache": "write",
+          "call": {
+            "request": {
+              "contents": [
+                {
+                  "parts": [
+                    {
+                      "text": "attachment://9db899dd5425b74fa4eb825cb7100dfe"
+                    }
+                  ],
+                  "role": "user"
+                },
+                {
+                  "parts": [
+                    {
+                      "text": "attachment://9a04b99c768a997e406d36798ffe7341"
+                    },
+                    {
+                      "functionCall": {
+                        "args": {
+                          "cmd": "ls -la"
+                        },
+                        "name": "bash"
+                      }
+                    }
+                  ],
+                  "role": "model"
+                },
+                {
+                  "parts": [
+                    {
+                      "functionResponse": {
+                        "name": "bash",
+                        "response": {
+                          "content": "attachment://a0225335de8f8644abb148db16ff949e"
+                        }
+                      }
+                    }
+                  ],
+                  "role": "user"
+                }
+              ],
+              "generation_config": {
+                "httpOptions": {
+                  "headers": {
+                    "x-irid": "oMN9mkGPaHQpBTBVjwhMnC"
+                  }
+                }
+              },
+              "safety_settings": [
+                {
+                  "category": "HARM_CATEGORY_CIVIC_INTEGRITY",
+                  "threshold": "BLOCK_NONE"
+                },
+                {
+                  "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
+                  "threshold": "BLOCK_NONE"
+                },
+                {
+                  "category": "HARM_CATEGORY_HARASSMENT",
+                  "threshold": "BLOCK_NONE"
+                },
+                {
+                  "category": "HARM_CATEGORY_HATE_SPEECH",
+                  "threshold": "BLOCK_NONE"
+                },
+                {
+                  "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
+                  "threshold": "BLOCK_NONE"
+                }
+              ],
+              "tools": [
+                {
+                  "functionDeclarations": [
+                    {
+                      "description": "Use this function to execute bash commands.",
+                      "name": "bash",
+                      "parameters": {
+                        "nullable": false,
+                        "properties": {
+                          "cmd": {
+                            "description": "The bash command to execute.",
+                            "nullable": false,
+                            "type": "STRING"
+                          }
+                        },
+                        "required": [
+                          "cmd"
+                        ],
+                        "type": "OBJECT"
+                      }
+                    },
+                    {
+                      "description": "Submit an answer for evaluation.",
+                      "name": "submit",
+                      "parameters": {
+                        "nullable": false,
+                        "properties": {
+                          "answer": {
+                            "description": "Submitted answer",
+                            "nullable": false,
+                            "type": "STRING"
+                          }
+                        },
+                        "required": [
+                          "answer"
+                        ],
+                        "type": "OBJECT"
+                      }
+                    }
+                  ]
+                }
+              ],
+              "tool_config": {
+                "functionCallingConfig": {
+                  "mode": "AUTO"
+                }
+              },
+              "system_instruction": [
+                {
+                  "text": "attachment://96d2add6be1fbfa37bad6c101bbdcabb"
+                },
+                {
+                  "text": "attachment://b38086a4c737ca5ce168983fe7d100a3"
+                },
+                {
+                  "text": "attachment://ca12b4d4123df0f43f6669e0f6179470"
+                }
+              ]
+            },
+            "response": {
+              "sdkHttpResponse": {
+                "headers": {
+                  "x-google-esf-cloud-client-params": "attachment://a248653186e1cbc62ada82759278d140",
+                  "X-Google-Session-Info": "GgQYECgLIAE6IxIhZ2VuZXJhdGl2ZWxhbmd1YWdlLmdvb2dsZWFwaXMuY29t",
+                  "Content-Type": "application/json; charset=UTF-8",
+                  "X-Google-Security-Signals": "attachment://2adbe2b9c3b7a76ea83ee906051e404e",
+                  "Vary": "Origin, X-Origin, Referer",
+                  "Content-Encoding": "gzip",
+                  "Date": "Fri, 13 Feb 2026 17:40:05 GMT",
+                  "Server": "scaffolding on HTTPServer2",
+                  "X-Google-Netmon-Label": "/bns/is/borg/is/bns/genai-api/prod.genai-api/43",
+                  "X-XSS-Protection": "0",
+                  "X-Frame-Options": "SAMEORIGIN",
+                  "X-Content-Type-Options": "nosniff",
+                  "Server-Timing": "gfet4t7; dur=1022",
+                  "X-Google-GFE-Service-Trace": "attachment://149a1eba121c602744db9459206a58eb",
+                  "X-Google-Backends": "attachment://44b10d1a8860bb5794ac2c3f9632d1ec",
+                  "X-Google-GFE-Request-Trace": "acnuqa12:443,/bns/is/borg/is/bns/genai-api/prod.genai-api/43,acnuqa12:443",
+                  "X-Google-DOS-Service-Trace": "main:genai-api-api-prod,main:GLOBAL_all_non_cloud",
+                  "X-Google-GFE-Handshake-Trace": "attachment://1155e3202cf7af1f32a6579b896d0644",
+                  "X-Google-Service": "attachment://149a1eba121c602744db9459206a58eb",
+                  "X-Google-GFE-Response-Code-Details-Trace": "response_code_set_by_backend",
+                  "X-Google-GFE-Response-Body-Transformations": "chunked",
+                  "X-Google-Shellfish-Status": "CA0gBEBG",
+                  "X-Google-GFE-Version": "2.967.1",
+                  "Alt-Svc": "h3=\":443\"; ma=2592000,h3-29=\":443\"; ma=2592000",
+                  "Transfer-Encoding": "chunked"
+                }
+              },
+              "candidates": [
+                {
+                  "content": {
+                    "parts": [
+                      {
+                        "text": "It seems the current directory is empty. I should explore the actual workspace directory.\n"
+                      },
+                      {
+                        "functionCall": {
+                          "args": {
+                            "cmd": "ls -la /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals"
+                          },
+                          "name": "bash"
+                        }
+                      }
+                    ],
+                    "role": "model"
+                  },
+                  "finishReason": "STOP",
+                  "avgLogprobs": -0.16592368794910944,
+                  "safetyRatings": [
+                    {
+                      "category": "HARM_CATEGORY_HATE_SPEECH",
+                      "probability": "NEGLIGIBLE"
+                    },
+                    {
+                      "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
+                      "probability": "NEGLIGIBLE"
+                    },
+                    {
+                      "category": "HARM_CATEGORY_HARASSMENT",
+                      "probability": "NEGLIGIBLE"
+                    },
+                    {
+                      "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
+                      "probability": "NEGLIGIBLE"
+                    }
+                  ]
+                }
+              ],
+              "modelVersion": "gemini-2.0-flash",
+              "responseId": "9GGPae-pB4eWjMcP94O-gAc",
+              "usageMetadata": {
+                "candidatesTokenCount": 67,
+                "candidatesTokensDetails": [
+                  {
+                    "modality": "TEXT",
+                    "tokenCount": 67
+                  }
+                ],
+                "promptTokenCount": 498,
+                "promptTokensDetails": [
+                  {
+                    "modality": "TEXT",
+                    "tokenCount": 498
+                  }
+                ],
+                "totalTokenCount": 565
+              }
+            },
+            "time": 1.083856499986723
+          },
+          "completed": "2026-02-13T17:40:05.092097+00:00",
+          "working_time": 1.083856499986723
+        },
+        {
+          "uuid": "bXrzauQMkwnWv4XEnkRmeU",
+          "span_id": "9e62f3fbe71f4050a9095ddad802523b",
+          "timestamp": "2026-02-13T17:40:05.093167+00:00",
+          "working_start": 2.9115754161030054,
+          "event": "span_begin",
+          "id": "9e62f3fbe71f4050a9095ddad802523b",
+          "parent_id": "3be0985210724574b90ce9f916060031",
+          "type": "tool",
+          "name": "bash"
+        },
+        {
+          "uuid": "Y5KzDkX9Zt7qKK9oBqNHEf",
+          "span_id": "3be0985210724574b90ce9f916060031",
+          "timestamp": "2026-02-13T17:40:05.092516+00:00",
+          "working_start": 2.910924666095525,
+          "event": "tool",
+          "type": "function",
+          "id": "bash_9FNtsrZyifmy6dUNvsWNcp",
+          "function": "bash",
+          "arguments": {
+            "cmd": "ls -la /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals"
+          },
+          "view": {
+            "title": "bash",
+            "format": "markdown",
+            "content": "```bash\nls -la /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals\n```\n"
+          },
+          "result": "/Users/ewindmill/.bash_profile: line 1: n#: command not found\n\ntotal 0\ndrwxr-xr-x@ 4 ewindmill  primarygroup  128 Feb 13 09:39 .\ndrwx------@ 3 ewindmill  primarygroup   96 Feb 13 09:39 ..\ndrwxr-xr-x@ 3 ewindmill  primarygroup   96 Feb 13 09:39 jobs\ndrwxr-xr-x@ 3 ewindmill  primarygroup   96 Feb 13 09:39 tasks\n",
+          "events": [],
+          "completed": "2026-02-13T17:40:05.148329+00:00",
+          "working_time": 0.05569483297682554,
+          "message_id": "UQEKF93mLKeBVCkUrZEbXH"
+        },
+        {
+          "uuid": "nbCd83ogoQ7H4mppNkMkUR",
+          "span_id": "9e62f3fbe71f4050a9095ddad802523b",
+          "timestamp": "2026-02-13T17:40:05.095472+00:00",
+          "working_start": 2.9612215410452336,
+          "event": "sandbox",
+          "action": "exec",
+          "cmd": "bash --login -c 'ls -la /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals'",
+          "options": {
+            "timeout": 120
+          },
+          "result": 0,
+          "output": "/Users/ewindmill/.bash_profile: line 1: n#: command not found\n\n\ntotal 0\ndrwxr-xr-x@ 4 ewindmill  primarygroup  128 Feb 13 09:39 .\ndrwx------@ 3 ewindmill  primarygroup   96 Feb 13 09:39 ..\ndrwxr-xr-x@ 3 ewindmill  primarygroup   96 Feb 13 09:39 jobs\ndrwxr-xr-x@ 3 ewindmill  primarygroup   96 Feb 13 09:39 tasks\n                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        ",
+          "completed": "2026-02-13T17:40:05.142874+00:00"
+        },
+        {
+          "uuid": "L93eqmo7tuSBfojXMSEdAW",
+          "span_id": "9e62f3fbe71f4050a9095ddad802523b",
+          "timestamp": "2026-02-13T17:40:05.146753+00:00",
+          "working_start": 2.9650477070827037,
+          "event": "span_end",
+          "id": "9e62f3fbe71f4050a9095ddad802523b"
+        },
+        {
+          "uuid": "eGPEbLxQwWPaMnKi55CPAc",
+          "span_id": "3be0985210724574b90ce9f916060031",
+          "timestamp": "2026-02-13T17:40:05.150275+00:00",
+          "working_start": 2.9685653740307316,
+          "event": "model",
+          "model": "google/gemini-2.0-flash",
+          "input": [
+            {
+              "id": "iNKorPWWBrvjkRqDKyJPYF",
+              "content": "\nYou are a helpful assistant attempting to submit the best possible answer. You have several tools available to help with finding the answer. You will see the result of tool calls right after sending the message. If you need to perform multiple actions, you can always send more messages with additional tool calls. Do some reasoning before your actions, describing what tool calls you are going to use and how they fit into your plan.\n\n\nWhen you have completed the task and have an answer, call the submit() tool to report it.\n",
+              "role": "system"
+            },
+            {
+              "id": "dgiCsCRsNgzMKagz4PEePk",
+              "content": "You are an expert code reviewer analyzing a codebase.\n\nYour task is to:\n\n1. Explore the codebase at /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals using the available tools\n2. Understand the project structure, dependencies, and architecture\n3. Answer the user's question based on what you find in the code\n\nImportant guidelines:\n- Use bash commands (cat, find, grep, ls, head, tail, etc.) to browse files\n- Do NOT edit or modify any files\n- Base your answer on actual code you find, not assumptions\n- Reference specific files and line numbers when relevant\n- When done, call submit() with your complete answer\n",
+              "role": "system"
+            },
+            {
+              "id": "YKoBXPM2D3LAYfpRtJ8rSG",
+              "content": "Explore this codebase and suggest one improvement\nto the code quality, readability, or architecture.\n",
+              "source": "input",
+              "role": "user"
+            },
+            {
+              "id": "ePaRxtW6BAFFKX3wgPvuxQ",
+              "content": [
+                {
+                  "type": "text",
+                  "text": "Okay, I will explore the codebase and suggest one improvement.\n\nFirst, I'll start by listing the files in the directory to understand the project structure.\n"
+                }
+              ],
+              "source": "generate",
+              "role": "assistant",
+              "tool_calls": [
+                {
+                  "id": "bash_96CGHnyaWeCRTS8X8BFMvb",
+                  "function": "bash",
+                  "arguments": {
+                    "cmd": "ls -la"
+                  },
+                  "view": {
+                    "title": "bash",
+                    "format": "markdown",
+                    "content": "```bash\nls -la\n```\n"
+                  },
+                  "type": "function"
+                }
+              ],
+              "model": "gemini-2.0-flash"
+            },
+            {
+              "id": "D6EvgQPgAoveZwXndCQX7i",
+              "content": "/Users/ewindmill/.bash_profile: line 1: n#: command not found\n\ntotal 0\ndrwx------@   2 ewindmill  primarygroup     64 Feb 13 09:39 .\ndrwx------@ 770 ewindmill  primarygroup  24640 Feb 13 09:40 ..\n",
+              "role": "tool",
+              "tool_call_id": "bash_96CGHnyaWeCRTS8X8BFMvb",
+              "function": "bash"
+            },
+            {
+              "id": "X9YEAnfcJpZPfKYouGPfia",
+              "content": [
+                {
+                  "type": "text",
+                  "text": "It seems the current directory is empty. I should explore the actual workspace directory.\n"
+                }
+              ],
+              "source": "generate",
+              "role": "assistant",
+              "tool_calls": [
+                {
+                  "id": "bash_9FNtsrZyifmy6dUNvsWNcp",
+                  "function": "bash",
+                  "arguments": {
+                    "cmd": "ls -la /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals"
+                  },
+                  "view": {
+                    "title": "bash",
+                    "format": "markdown",
+                    "content": "```bash\nls -la /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals\n```\n"
+                  },
+                  "type": "function"
+                }
+              ],
+              "model": "gemini-2.0-flash"
+            },
+            {
+              "id": "UQEKF93mLKeBVCkUrZEbXH",
+              "content": "/Users/ewindmill/.bash_profile: line 1: n#: command not found\n\ntotal 0\ndrwxr-xr-x@ 4 ewindmill  primarygroup  128 Feb 13 09:39 .\ndrwx------@ 3 ewindmill  primarygroup   96 Feb 13 09:39 ..\ndrwxr-xr-x@ 3 ewindmill  primarygroup   96 Feb 13 09:39 jobs\ndrwxr-xr-x@ 3 ewindmill  primarygroup   96 Feb 13 09:39 tasks\n",
+              "role": "tool",
+              "tool_call_id": "bash_9FNtsrZyifmy6dUNvsWNcp",
+              "function": "bash"
+            }
+          ],
+          "tools": [
+            {
+              "name": "bash",
+              "description": "Use this function to execute bash commands.",
+              "parameters": {
+                "type": "object",
+                "properties": {
+                  "cmd": {
+                    "type": "string",
+                    "description": "The bash command to execute."
+                  }
+                },
+                "required": [
+                  "cmd"
+                ],
+                "additionalProperties": false
+              }
+            },
+            {
+              "name": "submit",
+              "description": "Submit an answer for evaluation.",
+              "parameters": {
+                "type": "object",
+                "properties": {
+                  "answer": {
+                    "type": "string",
+                    "description": "Submitted answer"
+                  }
+                },
+                "required": [
+                  "answer"
+                ],
+                "additionalProperties": false
+              }
+            }
+          ],
+          "tool_choice": "auto",
+          "config": {
+            "max_connections": 10,
+            "cache_prompt": true,
+            "cache": {
+              "expiry": "2W",
+              "per_epoch": true,
+              "scopes": {}
+            }
+          },
+          "output": {
+            "model": "gemini-2.0-flash",
+            "choices": [
+              {
+                "message": {
+                  "id": "XmsmV3dHrAsZTJptzwsL6r",
+                  "content": [
+                    {
+                      "type": "text",
+                      "text": "Okay, there are two subdirectories: `jobs` and `tasks`. I will explore the `jobs` directory first.\n"
+                    }
+                  ],
+                  "source": "generate",
+                  "role": "assistant",
+                  "tool_calls": [
+                    {
+                      "id": "bash_De6QRUaDbUBkQBxhwj9LQi",
+                      "function": "bash",
+                      "arguments": {
+                        "cmd": "ls -la /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals/jobs"
+                      },
+                      "view": {
+                        "title": "bash",
+                        "format": "markdown",
+                        "content": "```bash\nls -la /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals/jobs\n```\n"
+                      },
+                      "type": "function"
+                    }
+                  ],
+                  "model": "gemini-2.0-flash"
+                },
+                "stop_reason": "stop"
+              }
+            ],
+            "completion": "Okay, there are two subdirectories: `jobs` and `tasks`. I will explore the `jobs` directory first.\n",
+            "usage": {
+              "input_tokens": 706,
+              "output_tokens": 78,
+              "total_tokens": 784,
+              "reasoning_tokens": 0
+            },
+            "time": 1.0334521669428796
+          },
+          "cache": "write",
+          "call": {
+            "request": {
+              "contents": [
+                {
+                  "parts": [
+                    {
+                      "text": "attachment://9db899dd5425b74fa4eb825cb7100dfe"
+                    }
+                  ],
+                  "role": "user"
+                },
+                {
+                  "parts": [
+                    {
+                      "text": "attachment://9a04b99c768a997e406d36798ffe7341"
+                    },
+                    {
+                      "functionCall": {
+                        "args": {
+                          "cmd": "ls -la"
+                        },
+                        "name": "bash"
+                      }
+                    }
+                  ],
+                  "role": "model"
+                },
+                {
+                  "parts": [
+                    {
+                      "functionResponse": {
+                        "name": "bash",
+                        "response": {
+                          "content": "attachment://a0225335de8f8644abb148db16ff949e"
+                        }
+                      }
+                    }
+                  ],
+                  "role": "user"
+                },
+                {
+                  "parts": [
+                    {
+                      "text": "It seems the current directory is empty. I should explore the actual workspace directory.\n"
+                    },
+                    {
+                      "functionCall": {
+                        "args": {
+                          "cmd": "ls -la /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals"
+                        },
+                        "name": "bash"
+                      }
+                    }
+                  ],
+                  "role": "model"
+                },
+                {
+                  "parts": [
+                    {
+                      "functionResponse": {
+                        "name": "bash",
+                        "response": {
+                          "content": "attachment://ea5b36fd20a1905467d53eca2cf1773f"
+                        }
+                      }
+                    }
+                  ],
+                  "role": "user"
+                }
+              ],
+              "generation_config": {
+                "httpOptions": {
+                  "headers": {
+                    "x-irid": "K4evTi79aGKDLjDFvifrA2"
+                  }
+                }
+              },
+              "safety_settings": [
+                {
+                  "category": "HARM_CATEGORY_CIVIC_INTEGRITY",
+                  "threshold": "BLOCK_NONE"
+                },
+                {
+                  "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
+                  "threshold": "BLOCK_NONE"
+                },
+                {
+                  "category": "HARM_CATEGORY_HARASSMENT",
+                  "threshold": "BLOCK_NONE"
+                },
+                {
+                  "category": "HARM_CATEGORY_HATE_SPEECH",
+                  "threshold": "BLOCK_NONE"
+                },
+                {
+                  "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
+                  "threshold": "BLOCK_NONE"
+                }
+              ],
+              "tools": [
+                {
+                  "functionDeclarations": [
+                    {
+                      "description": "Use this function to execute bash commands.",
+                      "name": "bash",
+                      "parameters": {
+                        "nullable": false,
+                        "properties": {
+                          "cmd": {
+                            "description": "The bash command to execute.",
+                            "nullable": false,
+                            "type": "STRING"
+                          }
+                        },
+                        "required": [
+                          "cmd"
+                        ],
+                        "type": "OBJECT"
+                      }
+                    },
+                    {
+                      "description": "Submit an answer for evaluation.",
+                      "name": "submit",
+                      "parameters": {
+                        "nullable": false,
+                        "properties": {
+                          "answer": {
+                            "description": "Submitted answer",
+                            "nullable": false,
+                            "type": "STRING"
+                          }
+                        },
+                        "required": [
+                          "answer"
+                        ],
+                        "type": "OBJECT"
+                      }
+                    }
+                  ]
+                }
+              ],
+              "tool_config": {
+                "functionCallingConfig": {
+                  "mode": "AUTO"
+                }
+              },
+              "system_instruction": [
+                {
+                  "text": "attachment://96d2add6be1fbfa37bad6c101bbdcabb"
+                },
+                {
+                  "text": "attachment://b38086a4c737ca5ce168983fe7d100a3"
+                },
+                {
+                  "text": "attachment://ca12b4d4123df0f43f6669e0f6179470"
+                }
+              ]
+            },
+            "response": {
+              "sdkHttpResponse": {
+                "headers": {
+                  "x-google-esf-cloud-client-params": "attachment://a248653186e1cbc62ada82759278d140",
+                  "X-Google-Session-Info": "GgQYECgLIAE6IxIhZ2VuZXJhdGl2ZWxhbmd1YWdlLmdvb2dsZWFwaXMuY29t",
+                  "Content-Type": "application/json; charset=UTF-8",
+                  "X-Google-Security-Signals": "attachment://2adbe2b9c3b7a76ea83ee906051e404e",
+                  "Vary": "Origin, X-Origin, Referer",
+                  "Content-Encoding": "gzip",
+                  "Date": "Fri, 13 Feb 2026 17:40:06 GMT",
+                  "Server": "scaffolding on HTTPServer2",
+                  "X-Google-Netmon-Label": "/bns/is/borg/is/bns/genai-api/prod.genai-api/40",
+                  "X-XSS-Protection": "0",
+                  "X-Frame-Options": "SAMEORIGIN",
+                  "X-Content-Type-Options": "nosniff",
+                  "Server-Timing": "gfet4t7; dur=984",
+                  "X-Google-GFE-Service-Trace": "attachment://149a1eba121c602744db9459206a58eb",
+                  "X-Google-Backends": "attachment://d7bb9364bae96e30d48d7a9d78f3c032",
+                  "X-Google-GFE-Request-Trace": "acnuqa6:443,/bns/is/borg/is/bns/genai-api/prod.genai-api/40,acnuqa6:443",
+                  "X-Google-DOS-Service-Trace": "main:genai-api-api-prod,main:GLOBAL_all_non_cloud",
+                  "X-Google-GFE-Handshake-Trace": "attachment://681981393fbf48f54f0313e8d08829a2",
+                  "X-Google-Service": "attachment://149a1eba121c602744db9459206a58eb",
+                  "X-Google-GFE-Response-Code-Details-Trace": "response_code_set_by_backend",
+                  "X-Google-GFE-Response-Body-Transformations": "chunked",
+                  "X-Google-Shellfish-Status": "CA0gBEBG",
+                  "X-Google-GFE-Version": "2.967.1",
+                  "Alt-Svc": "h3=\":443\"; ma=2592000,h3-29=\":443\"; ma=2592000",
+                  "Transfer-Encoding": "chunked"
+                }
+              },
+              "candidates": [
+                {
+                  "content": {
+                    "parts": [
+                      {
+                        "text": "Okay, there are two subdirectories: `jobs` and `tasks`. I will explore the `jobs` directory first.\n"
+                      },
+                      {
+                        "functionCall": {
+                          "args": {
+                            "cmd": "ls -la /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals/jobs"
+                          },
+                          "name": "bash"
+                        }
+                      }
+                    ],
+                    "role": "model"
+                  },
+                  "finishReason": "STOP",
+                  "avgLogprobs": -0.04929968332632994,
+                  "safetyRatings": [
+                    {
+                      "category": "HARM_CATEGORY_HATE_SPEECH",
+                      "probability": "NEGLIGIBLE"
+                    },
+                    {
+                      "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
+                      "probability": "NEGLIGIBLE"
+                    },
+                    {
+                      "category": "HARM_CATEGORY_HARASSMENT",
+                      "probability": "NEGLIGIBLE"
+                    },
+                    {
+                      "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
+                      "probability": "NEGLIGIBLE"
+                    }
+                  ]
+                }
+              ],
+              "modelVersion": "gemini-2.0-flash",
+              "responseId": "9WGPaZuTErOljMcP06KOuAE",
+              "usageMetadata": {
+                "candidatesTokenCount": 78,
+                "candidatesTokensDetails": [
+                  {
+                    "modality": "TEXT",
+                    "tokenCount": 78
+                  }
+                ],
+                "promptTokenCount": 706,
+                "promptTokensDetails": [
+                  {
+                    "modality": "TEXT",
+                    "tokenCount": 706
+                  }
+                ],
+                "totalTokenCount": 784
+              }
+            },
+            "time": 1.0334521669428796
+          },
+          "completed": "2026-02-13T17:40:06.232879+00:00",
+          "working_time": 1.0334521669428796
+        },
+        {
+          "uuid": "RSQH9QZ5uzjwnUMysCSN72",
+          "span_id": "acc2227fdd144d24887418febc3b1d55",
+          "timestamp": "2026-02-13T17:40:06.233878+00:00",
+          "working_start": 4.003529831999913,
+          "event": "span_begin",
+          "id": "acc2227fdd144d24887418febc3b1d55",
+          "parent_id": "3be0985210724574b90ce9f916060031",
+          "type": "tool",
+          "name": "bash"
+        },
+        {
+          "uuid": "C3Ew5Qomj9gQ3MR5zVDVcX",
+          "span_id": "3be0985210724574b90ce9f916060031",
+          "timestamp": "2026-02-13T17:40:06.233271+00:00",
+          "working_start": 4.002922791056335,
+          "event": "tool",
+          "type": "function",
+          "id": "bash_De6QRUaDbUBkQBxhwj9LQi",
+          "function": "bash",
+          "arguments": {
+            "cmd": "ls -la /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals/jobs"
+          },
+          "view": {
+            "title": "bash",
+            "format": "markdown",
+            "content": "```bash\nls -la /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals/jobs\n```\n"
+          },
+          "result": "/Users/ewindmill/.bash_profile: line 1: n#: command not found\n\ntotal 8\ndrwxr-xr-x@ 3 ewindmill  primarygroup    96 Feb 13 09:39 .\ndrwxr-xr-x@ 4 ewindmill  primarygroup   128 Feb 13 09:39 ..\n-rw-r--r--@ 1 ewindmill  primarygroup  3087 Feb 13 09:39 local_dev.yaml\n",
+          "events": [],
+          "completed": "2026-02-13T17:40:06.298206+00:00",
+          "working_time": 0.06476525004621596,
+          "message_id": "QomCdPn3yYCyCWK8bLHveJ"
+        },
+        {
+          "uuid": "2FKjqrYFAAW26qvPNG7aEm",
+          "span_id": "acc2227fdd144d24887418febc3b1d55",
+          "timestamp": "2026-02-13T17:40:06.235963+00:00",
+          "working_start": 4.059052207041532,
+          "event": "sandbox",
+          "action": "exec",
+          "cmd": "bash --login -c 'ls -la /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals/jobs'",
+          "options": {
+            "timeout": 120
+          },
+          "result": 0,
+          "output": "/Users/ewindmill/.bash_profile: line 1: n#: command not found\n\n\ntotal 8\ndrwxr-xr-x@ 3 ewindmill  primarygroup    96 Feb 13 09:39 .\ndrwxr-xr-x@ 4 ewindmill  primarygroup   128 Feb 13 09:39 ..\n-rw-r--r--@ 1 ewindmill  primarygroup  3087 Feb 13 09:39 local_dev.yaml\n                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         ",
+          "completed": "2026-02-13T17:40:06.289488+00:00"
+        },
+        {
+          "uuid": "UYCoenrmRDSgSj8z4PGq6d",
+          "span_id": "acc2227fdd144d24887418febc3b1d55",
+          "timestamp": "2026-02-13T17:40:06.293159+00:00",
+          "working_start": 4.062653332133777,
+          "event": "span_end",
+          "id": "acc2227fdd144d24887418febc3b1d55"
+        },
+        {
+          "uuid": "VE4UuCXytPbL4FHGSA2423",
+          "span_id": "3be0985210724574b90ce9f916060031",
+          "timestamp": "2026-02-13T17:40:06.310889+00:00",
+          "working_start": 4.078274290193804,
+          "pending": true,
+          "event": "model",
+          "model": "google/gemini-2.0-flash",
+          "input": [
+            {
+              "id": "iNKorPWWBrvjkRqDKyJPYF",
+              "content": "\nYou are a helpful assistant attempting to submit the best possible answer. You have several tools available to help with finding the answer. You will see the result of tool calls right after sending the message. If you need to perform multiple actions, you can always send more messages with additional tool calls. Do some reasoning before your actions, describing what tool calls you are going to use and how they fit into your plan.\n\n\nWhen you have completed the task and have an answer, call the submit() tool to report it.\n",
+              "role": "system"
+            },
+            {
+              "id": "dgiCsCRsNgzMKagz4PEePk",
+              "content": "You are an expert code reviewer analyzing a codebase.\n\nYour task is to:\n\n1. Explore the codebase at /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals using the available tools\n2. Understand the project structure, dependencies, and architecture\n3. Answer the user's question based on what you find in the code\n\nImportant guidelines:\n- Use bash commands (cat, find, grep, ls, head, tail, etc.) to browse files\n- Do NOT edit or modify any files\n- Base your answer on actual code you find, not assumptions\n- Reference specific files and line numbers when relevant\n- When done, call submit() with your complete answer\n",
+              "role": "system"
+            },
+            {
+              "id": "YKoBXPM2D3LAYfpRtJ8rSG",
+              "content": "Explore this codebase and suggest one improvement\nto the code quality, readability, or architecture.\n",
+              "source": "input",
+              "role": "user"
+            },
+            {
+              "id": "ePaRxtW6BAFFKX3wgPvuxQ",
+              "content": [
+                {
+                  "type": "text",
+                  "text": "Okay, I will explore the codebase and suggest one improvement.\n\nFirst, I'll start by listing the files in the directory to understand the project structure.\n"
+                }
+              ],
+              "source": "generate",
+              "role": "assistant",
+              "tool_calls": [
+                {
+                  "id": "bash_96CGHnyaWeCRTS8X8BFMvb",
+                  "function": "bash",
+                  "arguments": {
+                    "cmd": "ls -la"
+                  },
+                  "view": {
+                    "title": "bash",
+                    "format": "markdown",
+                    "content": "```bash\nls -la\n```\n"
+                  },
+                  "type": "function"
+                }
+              ],
+              "model": "gemini-2.0-flash"
+            },
+            {
+              "id": "D6EvgQPgAoveZwXndCQX7i",
+              "content": "/Users/ewindmill/.bash_profile: line 1: n#: command not found\n\ntotal 0\ndrwx------@   2 ewindmill  primarygroup     64 Feb 13 09:39 .\ndrwx------@ 770 ewindmill  primarygroup  24640 Feb 13 09:40 ..\n",
+              "role": "tool",
+              "tool_call_id": "bash_96CGHnyaWeCRTS8X8BFMvb",
+              "function": "bash"
+            },
+            {
+              "id": "X9YEAnfcJpZPfKYouGPfia",
+              "content": [
+                {
+                  "type": "text",
+                  "text": "It seems the current directory is empty. I should explore the actual workspace directory.\n"
+                }
+              ],
+              "source": "generate",
+              "role": "assistant",
+              "tool_calls": [
+                {
+                  "id": "bash_9FNtsrZyifmy6dUNvsWNcp",
+                  "function": "bash",
+                  "arguments": {
+                    "cmd": "ls -la /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals"
+                  },
+                  "view": {
+                    "title": "bash",
+                    "format": "markdown",
+                    "content": "```bash\nls -la /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals\n```\n"
+                  },
+                  "type": "function"
+                }
+              ],
+              "model": "gemini-2.0-flash"
+            },
+            {
+              "id": "UQEKF93mLKeBVCkUrZEbXH",
+              "content": "/Users/ewindmill/.bash_profile: line 1: n#: command not found\n\ntotal 0\ndrwxr-xr-x@ 4 ewindmill  primarygroup  128 Feb 13 09:39 .\ndrwx------@ 3 ewindmill  primarygroup   96 Feb 13 09:39 ..\ndrwxr-xr-x@ 3 ewindmill  primarygroup   96 Feb 13 09:39 jobs\ndrwxr-xr-x@ 3 ewindmill  primarygroup   96 Feb 13 09:39 tasks\n",
+              "role": "tool",
+              "tool_call_id": "bash_9FNtsrZyifmy6dUNvsWNcp",
+              "function": "bash"
+            },
+            {
+              "id": "XmsmV3dHrAsZTJptzwsL6r",
+              "content": [
+                {
+                  "type": "text",
+                  "text": "Okay, there are two subdirectories: `jobs` and `tasks`. I will explore the `jobs` directory first.\n"
+                }
+              ],
+              "source": "generate",
+              "role": "assistant",
+              "tool_calls": [
+                {
+                  "id": "bash_De6QRUaDbUBkQBxhwj9LQi",
+                  "function": "bash",
+                  "arguments": {
+                    "cmd": "ls -la /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals/jobs"
+                  },
+                  "view": {
+                    "title": "bash",
+                    "format": "markdown",
+                    "content": "```bash\nls -la /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals/jobs\n```\n"
+                  },
+                  "type": "function"
+                }
+              ],
+              "model": "gemini-2.0-flash"
+            },
+            {
+              "id": "QomCdPn3yYCyCWK8bLHveJ",
+              "content": "/Users/ewindmill/.bash_profile: line 1: n#: command not found\n\ntotal 8\ndrwxr-xr-x@ 3 ewindmill  primarygroup    96 Feb 13 09:39 .\ndrwxr-xr-x@ 4 ewindmill  primarygroup   128 Feb 13 09:39 ..\n-rw-r--r--@ 1 ewindmill  primarygroup  3087 Feb 13 09:39 local_dev.yaml\n",
+              "role": "tool",
+              "tool_call_id": "bash_De6QRUaDbUBkQBxhwj9LQi",
+              "function": "bash"
+            }
+          ],
+          "tools": [
+            {
+              "name": "bash",
+              "description": "Use this function to execute bash commands.",
+              "parameters": {
+                "type": "object",
+                "properties": {
+                  "cmd": {
+                    "type": "string",
+                    "description": "The bash command to execute."
+                  }
+                },
+                "required": [
+                  "cmd"
+                ],
+                "additionalProperties": false
+              }
+            },
+            {
+              "name": "submit",
+              "description": "Submit an answer for evaluation.",
+              "parameters": {
+                "type": "object",
+                "properties": {
+                  "answer": {
+                    "type": "string",
+                    "description": "Submitted answer"
+                  }
+                },
+                "required": [
+                  "answer"
+                ],
+                "additionalProperties": false
+              }
+            }
+          ],
+          "tool_choice": "auto",
+          "config": {
+            "max_connections": 10,
+            "cache_prompt": true,
+            "cache": {
+              "expiry": "2W",
+              "per_epoch": true,
+              "scopes": {}
+            }
+          },
+          "output": {
+            "model": "google/gemini-2.0-flash",
+            "choices": [
+              {
+                "message": {
+                  "id": "5pHnRGyUkxWGQB4wkpS2a6",
+                  "content": "",
+                  "source": "generate",
+                  "role": "assistant",
+                  "model": "google/gemini-2.0-flash"
+                },
+                "stop_reason": "stop"
+              }
+            ],
+            "completion": ""
+          },
+          "cache": "write"
+        },
+        {
+          "uuid": "izKctKzhrJmF95JsLnDVKz",
+          "span_id": "3be0985210724574b90ce9f916060031",
+          "timestamp": "2026-02-13T17:40:12.631537+00:00",
+          "working_start": 4.872115834830327,
+          "pending": true,
+          "event": "model",
+          "model": "google/gemini-2.0-flash",
+          "input": [
+            {
+              "id": "iNKorPWWBrvjkRqDKyJPYF",
+              "content": "\nYou are a helpful assistant attempting to submit the best possible answer. You have several tools available to help with finding the answer. You will see the result of tool calls right after sending the message. If you need to perform multiple actions, you can always send more messages with additional tool calls. Do some reasoning before your actions, describing what tool calls you are going to use and how they fit into your plan.\n\n\nWhen you have completed the task and have an answer, call the submit() tool to report it.\n",
+              "role": "system"
+            },
+            {
+              "id": "dgiCsCRsNgzMKagz4PEePk",
+              "content": "You are an expert code reviewer analyzing a codebase.\n\nYour task is to:\n\n1. Explore the codebase at /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals using the available tools\n2. Understand the project structure, dependencies, and architecture\n3. Answer the user's question based on what you find in the code\n\nImportant guidelines:\n- Use bash commands (cat, find, grep, ls, head, tail, etc.) to browse files\n- Do NOT edit or modify any files\n- Base your answer on actual code you find, not assumptions\n- Reference specific files and line numbers when relevant\n- When done, call submit() with your complete answer\n",
+              "role": "system"
+            },
+            {
+              "id": "YKoBXPM2D3LAYfpRtJ8rSG",
+              "content": "Explore this codebase and suggest one improvement\nto the code quality, readability, or architecture.\n",
+              "source": "input",
+              "role": "user"
+            },
+            {
+              "id": "ePaRxtW6BAFFKX3wgPvuxQ",
+              "content": [
+                {
+                  "type": "text",
+                  "text": "Okay, I will explore the codebase and suggest one improvement.\n\nFirst, I'll start by listing the files in the directory to understand the project structure.\n"
+                }
+              ],
+              "source": "generate",
+              "role": "assistant",
+              "tool_calls": [
+                {
+                  "id": "bash_96CGHnyaWeCRTS8X8BFMvb",
+                  "function": "bash",
+                  "arguments": {
+                    "cmd": "ls -la"
+                  },
+                  "view": {
+                    "title": "bash",
+                    "format": "markdown",
+                    "content": "```bash\nls -la\n```\n"
+                  },
+                  "type": "function"
+                }
+              ],
+              "model": "gemini-2.0-flash"
+            },
+            {
+              "id": "D6EvgQPgAoveZwXndCQX7i",
+              "content": "/Users/ewindmill/.bash_profile: line 1: n#: command not found\n\ntotal 0\ndrwx------@   2 ewindmill  primarygroup     64 Feb 13 09:39 .\ndrwx------@ 770 ewindmill  primarygroup  24640 Feb 13 09:40 ..\n",
+              "role": "tool",
+              "tool_call_id": "bash_96CGHnyaWeCRTS8X8BFMvb",
+              "function": "bash"
+            },
+            {
+              "id": "X9YEAnfcJpZPfKYouGPfia",
+              "content": [
+                {
+                  "type": "text",
+                  "text": "It seems the current directory is empty. I should explore the actual workspace directory.\n"
+                }
+              ],
+              "source": "generate",
+              "role": "assistant",
+              "tool_calls": [
+                {
+                  "id": "bash_9FNtsrZyifmy6dUNvsWNcp",
+                  "function": "bash",
+                  "arguments": {
+                    "cmd": "ls -la /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals"
+                  },
+                  "view": {
+                    "title": "bash",
+                    "format": "markdown",
+                    "content": "```bash\nls -la /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals\n```\n"
+                  },
+                  "type": "function"
+                }
+              ],
+              "model": "gemini-2.0-flash"
+            },
+            {
+              "id": "UQEKF93mLKeBVCkUrZEbXH",
+              "content": "/Users/ewindmill/.bash_profile: line 1: n#: command not found\n\ntotal 0\ndrwxr-xr-x@ 4 ewindmill  primarygroup  128 Feb 13 09:39 .\ndrwx------@ 3 ewindmill  primarygroup   96 Feb 13 09:39 ..\ndrwxr-xr-x@ 3 ewindmill  primarygroup   96 Feb 13 09:39 jobs\ndrwxr-xr-x@ 3 ewindmill  primarygroup   96 Feb 13 09:39 tasks\n",
+              "role": "tool",
+              "tool_call_id": "bash_9FNtsrZyifmy6dUNvsWNcp",
+              "function": "bash"
+            },
+            {
+              "id": "XmsmV3dHrAsZTJptzwsL6r",
+              "content": [
+                {
+                  "type": "text",
+                  "text": "Okay, there are two subdirectories: `jobs` and `tasks`. I will explore the `jobs` directory first.\n"
+                }
+              ],
+              "source": "generate",
+              "role": "assistant",
+              "tool_calls": [
+                {
+                  "id": "bash_De6QRUaDbUBkQBxhwj9LQi",
+                  "function": "bash",
+                  "arguments": {
+                    "cmd": "ls -la /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals/jobs"
+                  },
+                  "view": {
+                    "title": "bash",
+                    "format": "markdown",
+                    "content": "```bash\nls -la /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals/jobs\n```\n"
+                  },
+                  "type": "function"
+                }
+              ],
+              "model": "gemini-2.0-flash"
+            },
+            {
+              "id": "QomCdPn3yYCyCWK8bLHveJ",
+              "content": "/Users/ewindmill/.bash_profile: line 1: n#: command not found\n\ntotal 8\ndrwxr-xr-x@ 3 ewindmill  primarygroup    96 Feb 13 09:39 .\ndrwxr-xr-x@ 4 ewindmill  primarygroup   128 Feb 13 09:39 ..\n-rw-r--r--@ 1 ewindmill  primarygroup  3087 Feb 13 09:39 local_dev.yaml\n",
+              "role": "tool",
+              "tool_call_id": "bash_De6QRUaDbUBkQBxhwj9LQi",
+              "function": "bash"
+            }
+          ],
+          "tools": [
+            {
+              "name": "bash",
+              "description": "Use this function to execute bash commands.",
+              "parameters": {
+                "type": "object",
+                "properties": {
+                  "cmd": {
+                    "type": "string",
+                    "description": "The bash command to execute."
+                  }
+                },
+                "required": [
+                  "cmd"
+                ],
+                "additionalProperties": false
+              }
+            },
+            {
+              "name": "submit",
+              "description": "Submit an answer for evaluation.",
+              "parameters": {
+                "type": "object",
+                "properties": {
+                  "answer": {
+                    "type": "string",
+                    "description": "Submitted answer"
+                  }
+                },
+                "required": [
+                  "answer"
+                ],
+                "additionalProperties": false
+              }
+            }
+          ],
+          "tool_choice": "auto",
+          "config": {
+            "max_connections": 10,
+            "cache_prompt": true,
+            "cache": {
+              "expiry": "2W",
+              "per_epoch": true,
+              "scopes": {}
+            }
+          },
+          "output": {
+            "model": "google/gemini-2.0-flash",
+            "choices": [
+              {
+                "message": {
+                  "id": "oFoGW7zLRBBKfnoT5VtqkE",
+                  "content": "",
+                  "source": "generate",
+                  "role": "assistant",
+                  "model": "google/gemini-2.0-flash"
+                },
+                "stop_reason": "stop"
+              }
+            ],
+            "completion": ""
+          },
+          "cache": "write"
+        },
+        {
+          "uuid": "EBgyqyH3VHQ7kcvij8PY96",
+          "span_id": "3be0985210724574b90ce9f916060031",
+          "timestamp": "2026-02-13T17:40:06.304033+00:00",
+          "working_start": 4.073526291060261,
+          "event": "model",
+          "model": "google/gemini-2.0-flash",
+          "input": [
+            {
+              "id": "iNKorPWWBrvjkRqDKyJPYF",
+              "content": "\nYou are a helpful assistant attempting to submit the best possible answer. You have several tools available to help with finding the answer. You will see the result of tool calls right after sending the message. If you need to perform multiple actions, you can always send more messages with additional tool calls. Do some reasoning before your actions, describing what tool calls you are going to use and how they fit into your plan.\n\n\nWhen you have completed the task and have an answer, call the submit() tool to report it.\n",
+              "role": "system"
+            },
+            {
+              "id": "dgiCsCRsNgzMKagz4PEePk",
+              "content": "You are an expert code reviewer analyzing a codebase.\n\nYour task is to:\n\n1. Explore the codebase at /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals using the available tools\n2. Understand the project structure, dependencies, and architecture\n3. Answer the user's question based on what you find in the code\n\nImportant guidelines:\n- Use bash commands (cat, find, grep, ls, head, tail, etc.) to browse files\n- Do NOT edit or modify any files\n- Base your answer on actual code you find, not assumptions\n- Reference specific files and line numbers when relevant\n- When done, call submit() with your complete answer\n",
+              "role": "system"
+            },
+            {
+              "id": "YKoBXPM2D3LAYfpRtJ8rSG",
+              "content": "Explore this codebase and suggest one improvement\nto the code quality, readability, or architecture.\n",
+              "source": "input",
+              "role": "user"
+            },
+            {
+              "id": "ePaRxtW6BAFFKX3wgPvuxQ",
+              "content": [
+                {
+                  "type": "text",
+                  "text": "Okay, I will explore the codebase and suggest one improvement.\n\nFirst, I'll start by listing the files in the directory to understand the project structure.\n"
+                }
+              ],
+              "source": "generate",
+              "role": "assistant",
+              "tool_calls": [
+                {
+                  "id": "bash_96CGHnyaWeCRTS8X8BFMvb",
+                  "function": "bash",
+                  "arguments": {
+                    "cmd": "ls -la"
+                  },
+                  "view": {
+                    "title": "bash",
+                    "format": "markdown",
+                    "content": "```bash\nls -la\n```\n"
+                  },
+                  "type": "function"
+                }
+              ],
+              "model": "gemini-2.0-flash"
+            },
+            {
+              "id": "D6EvgQPgAoveZwXndCQX7i",
+              "content": "/Users/ewindmill/.bash_profile: line 1: n#: command not found\n\ntotal 0\ndrwx------@   2 ewindmill  primarygroup     64 Feb 13 09:39 .\ndrwx------@ 770 ewindmill  primarygroup  24640 Feb 13 09:40 ..\n",
+              "role": "tool",
+              "tool_call_id": "bash_96CGHnyaWeCRTS8X8BFMvb",
+              "function": "bash"
+            },
+            {
+              "id": "X9YEAnfcJpZPfKYouGPfia",
+              "content": [
+                {
+                  "type": "text",
+                  "text": "It seems the current directory is empty. I should explore the actual workspace directory.\n"
+                }
+              ],
+              "source": "generate",
+              "role": "assistant",
+              "tool_calls": [
+                {
+                  "id": "bash_9FNtsrZyifmy6dUNvsWNcp",
+                  "function": "bash",
+                  "arguments": {
+                    "cmd": "ls -la /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals"
+                  },
+                  "view": {
+                    "title": "bash",
+                    "format": "markdown",
+                    "content": "```bash\nls -la /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals\n```\n"
+                  },
+                  "type": "function"
+                }
+              ],
+              "model": "gemini-2.0-flash"
+            },
+            {
+              "id": "UQEKF93mLKeBVCkUrZEbXH",
+              "content": "/Users/ewindmill/.bash_profile: line 1: n#: command not found\n\ntotal 0\ndrwxr-xr-x@ 4 ewindmill  primarygroup  128 Feb 13 09:39 .\ndrwx------@ 3 ewindmill  primarygroup   96 Feb 13 09:39 ..\ndrwxr-xr-x@ 3 ewindmill  primarygroup   96 Feb 13 09:39 jobs\ndrwxr-xr-x@ 3 ewindmill  primarygroup   96 Feb 13 09:39 tasks\n",
+              "role": "tool",
+              "tool_call_id": "bash_9FNtsrZyifmy6dUNvsWNcp",
+              "function": "bash"
+            },
+            {
+              "id": "XmsmV3dHrAsZTJptzwsL6r",
+              "content": [
+                {
+                  "type": "text",
+                  "text": "Okay, there are two subdirectories: `jobs` and `tasks`. I will explore the `jobs` directory first.\n"
+                }
+              ],
+              "source": "generate",
+              "role": "assistant",
+              "tool_calls": [
+                {
+                  "id": "bash_De6QRUaDbUBkQBxhwj9LQi",
+                  "function": "bash",
+                  "arguments": {
+                    "cmd": "ls -la /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals/jobs"
+                  },
+                  "view": {
+                    "title": "bash",
+                    "format": "markdown",
+                    "content": "```bash\nls -la /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals/jobs\n```\n"
+                  },
+                  "type": "function"
+                }
+              ],
+              "model": "gemini-2.0-flash"
+            },
+            {
+              "id": "QomCdPn3yYCyCWK8bLHveJ",
+              "content": "/Users/ewindmill/.bash_profile: line 1: n#: command not found\n\ntotal 8\ndrwxr-xr-x@ 3 ewindmill  primarygroup    96 Feb 13 09:39 .\ndrwxr-xr-x@ 4 ewindmill  primarygroup   128 Feb 13 09:39 ..\n-rw-r--r--@ 1 ewindmill  primarygroup  3087 Feb 13 09:39 local_dev.yaml\n",
+              "role": "tool",
+              "tool_call_id": "bash_De6QRUaDbUBkQBxhwj9LQi",
+              "function": "bash"
+            }
+          ],
+          "tools": [
+            {
+              "name": "bash",
+              "description": "Use this function to execute bash commands.",
+              "parameters": {
+                "type": "object",
+                "properties": {
+                  "cmd": {
+                    "type": "string",
+                    "description": "The bash command to execute."
+                  }
+                },
+                "required": [
+                  "cmd"
+                ],
+                "additionalProperties": false
+              }
+            },
+            {
+              "name": "submit",
+              "description": "Submit an answer for evaluation.",
+              "parameters": {
+                "type": "object",
+                "properties": {
+                  "answer": {
+                    "type": "string",
+                    "description": "Submitted answer"
+                  }
+                },
+                "required": [
+                  "answer"
+                ],
+                "additionalProperties": false
+              }
+            }
+          ],
+          "tool_choice": "auto",
+          "config": {
+            "max_connections": 10,
+            "cache_prompt": true,
+            "cache": {
+              "expiry": "2W",
+              "per_epoch": true,
+              "scopes": {}
+            }
+          },
+          "output": {
+            "model": "gemini-2.0-flash",
+            "choices": [
+              {
+                "message": {
+                  "id": "QPveCFnzTZtWYnJSu8tAHf",
+                  "content": [
+                    {
+                      "type": "text",
+                      "text": "Now I'll examine the contents of `local_dev.yaml`.\n"
+                    }
+                  ],
+                  "source": "generate",
+                  "role": "assistant",
+                  "tool_calls": [
+                    {
+                      "id": "bash_4t5raTAnKAddFKQUM5TX6R",
+                      "function": "bash",
+                      "arguments": {
+                        "cmd": "cat /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals/jobs/local_dev.yaml"
+                      },
+                      "view": {
+                        "title": "bash",
+                        "format": "markdown",
+                        "content": "```bash\ncat /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals/jobs/local_dev.yaml\n```\n"
+                      },
+                      "type": "function"
+                    }
+                  ],
+                  "model": "gemini-2.0-flash"
+                },
+                "stop_reason": "stop"
+              }
+            ],
+            "completion": "Now I'll examine the contents of `local_dev.yaml`.\n",
+            "usage": {
+              "input_tokens": 906,
+              "output_tokens": 72,
+              "total_tokens": 978,
+              "reasoning_tokens": 0
+            },
+            "time": 1.7595584170194343
+          },
+          "cache": "write",
+          "call": {
+            "request": {
+              "contents": [
+                {
+                  "parts": [
+                    {
+                      "text": "attachment://9db899dd5425b74fa4eb825cb7100dfe"
+                    }
+                  ],
+                  "role": "user"
+                },
+                {
+                  "parts": [
+                    {
+                      "text": "attachment://9a04b99c768a997e406d36798ffe7341"
+                    },
+                    {
+                      "functionCall": {
+                        "args": {
+                          "cmd": "ls -la"
+                        },
+                        "name": "bash"
+                      }
+                    }
+                  ],
+                  "role": "model"
+                },
+                {
+                  "parts": [
+                    {
+                      "functionResponse": {
+                        "name": "bash",
+                        "response": {
+                          "content": "attachment://a0225335de8f8644abb148db16ff949e"
+                        }
+                      }
+                    }
+                  ],
+                  "role": "user"
+                },
+                {
+                  "parts": [
+                    {
+                      "text": "It seems the current directory is empty. I should explore the actual workspace directory.\n"
+                    },
+                    {
+                      "functionCall": {
+                        "args": {
+                          "cmd": "ls -la /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals"
+                        },
+                        "name": "bash"
+                      }
+                    }
+                  ],
+                  "role": "model"
+                },
+                {
+                  "parts": [
+                    {
+                      "functionResponse": {
+                        "name": "bash",
+                        "response": {
+                          "content": "attachment://ea5b36fd20a1905467d53eca2cf1773f"
+                        }
+                      }
+                    }
+                  ],
+                  "role": "user"
+                },
+                {
+                  "parts": [
+                    {
+                      "text": "Okay, there are two subdirectories: `jobs` and `tasks`. I will explore the `jobs` directory first.\n"
+                    },
+                    {
+                      "functionCall": {
+                        "args": {
+                          "cmd": "ls -la /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals/jobs"
+                        },
+                        "name": "bash"
+                      }
+                    }
+                  ],
+                  "role": "model"
+                },
+                {
+                  "parts": [
+                    {
+                      "functionResponse": {
+                        "name": "bash",
+                        "response": {
+                          "content": "attachment://ce6545038816debba39039dfc30cba45"
+                        }
+                      }
+                    }
+                  ],
+                  "role": "user"
+                }
+              ],
+              "generation_config": {
+                "httpOptions": {
+                  "headers": {
+                    "x-irid": "5ttdhRvAgWPQNp3w5EqqrN"
+                  }
+                }
+              },
+              "safety_settings": [
+                {
+                  "category": "HARM_CATEGORY_CIVIC_INTEGRITY",
+                  "threshold": "BLOCK_NONE"
+                },
+                {
+                  "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
+                  "threshold": "BLOCK_NONE"
+                },
+                {
+                  "category": "HARM_CATEGORY_HARASSMENT",
+                  "threshold": "BLOCK_NONE"
+                },
+                {
+                  "category": "HARM_CATEGORY_HATE_SPEECH",
+                  "threshold": "BLOCK_NONE"
+                },
+                {
+                  "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
+                  "threshold": "BLOCK_NONE"
+                }
+              ],
+              "tools": [
+                {
+                  "functionDeclarations": [
+                    {
+                      "description": "Use this function to execute bash commands.",
+                      "name": "bash",
+                      "parameters": {
+                        "nullable": false,
+                        "properties": {
+                          "cmd": {
+                            "description": "The bash command to execute.",
+                            "nullable": false,
+                            "type": "STRING"
+                          }
+                        },
+                        "required": [
+                          "cmd"
+                        ],
+                        "type": "OBJECT"
+                      }
+                    },
+                    {
+                      "description": "Submit an answer for evaluation.",
+                      "name": "submit",
+                      "parameters": {
+                        "nullable": false,
+                        "properties": {
+                          "answer": {
+                            "description": "Submitted answer",
+                            "nullable": false,
+                            "type": "STRING"
+                          }
+                        },
+                        "required": [
+                          "answer"
+                        ],
+                        "type": "OBJECT"
+                      }
+                    }
+                  ]
+                }
+              ],
+              "tool_config": {
+                "functionCallingConfig": {
+                  "mode": "AUTO"
+                }
+              },
+              "system_instruction": [
+                {
+                  "text": "attachment://96d2add6be1fbfa37bad6c101bbdcabb"
+                },
+                {
+                  "text": "attachment://b38086a4c737ca5ce168983fe7d100a3"
+                },
+                {
+                  "text": "attachment://ca12b4d4123df0f43f6669e0f6179470"
+                }
+              ]
+            },
+            "response": {
+              "sdkHttpResponse": {
+                "headers": {
+                  "x-google-esf-cloud-client-params": "attachment://a248653186e1cbc62ada82759278d140",
+                  "X-Google-Session-Info": "GgQYECgLIAE6IxIhZ2VuZXJhdGl2ZWxhbmd1YWdlLmdvb2dsZWFwaXMuY29t",
+                  "Content-Type": "application/json; charset=UTF-8",
+                  "X-Google-Security-Signals": "attachment://2adbe2b9c3b7a76ea83ee906051e404e",
+                  "Vary": "Origin, X-Origin, Referer",
+                  "Content-Encoding": "gzip",
+                  "Date": "Fri, 13 Feb 2026 17:40:24 GMT",
+                  "Server": "scaffolding on HTTPServer2",
+                  "X-Google-Netmon-Label": "/bns/is/borg/is/bns/genai-api/prod.genai-api/37",
+                  "X-XSS-Protection": "0",
+                  "X-Frame-Options": "SAMEORIGIN",
+                  "X-Content-Type-Options": "nosniff",
+                  "Server-Timing": "gfet4t7; dur=1709",
+                  "X-Google-GFE-Service-Trace": "attachment://149a1eba121c602744db9459206a58eb",
+                  "X-Google-Backends": "attachment://ac3f04405496a443a23045d82059b1a2",
+                  "X-Google-GFE-Request-Trace": "acnuqa13:443,/bns/is/borg/is/bns/genai-api/prod.genai-api/37,acnuqa13:443",
+                  "X-Google-DOS-Service-Trace": "main:genai-api-api-prod,main:GLOBAL_all_non_cloud",
+                  "X-Google-GFE-Handshake-Trace": "attachment://49d7fb39ae6b2bce756c2729d42a2a81",
+                  "X-Google-Service": "attachment://149a1eba121c602744db9459206a58eb",
+                  "X-Google-GFE-Response-Code-Details-Trace": "response_code_set_by_backend",
+                  "X-Google-GFE-Response-Body-Transformations": "chunked",
+                  "X-Google-Shellfish-Status": "CA0gBEBG",
+                  "X-Google-GFE-Version": "2.967.1",
+                  "Alt-Svc": "h3=\":443\"; ma=2592000,h3-29=\":443\"; ma=2592000",
+                  "Transfer-Encoding": "chunked"
+                }
+              },
+              "candidates": [
+                {
+                  "content": {
+                    "parts": [
+                      {
+                        "text": "Now I'll examine the contents of `local_dev.yaml`.\n"
+                      },
+                      {
+                        "functionCall": {
+                          "args": {
+                            "cmd": "attachment://fa914ac907fba627d8f195e9dd810f20"
+                          },
+                          "name": "bash"
+                        }
+                      }
+                    ],
+                    "role": "model"
+                  },
+                  "finishReason": "STOP",
+                  "avgLogprobs": -0.06912359926435682,
+                  "safetyRatings": [
+                    {
+                      "category": "HARM_CATEGORY_HATE_SPEECH",
+                      "probability": "NEGLIGIBLE"
+                    },
+                    {
+                      "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
+                      "probability": "NEGLIGIBLE"
+                    },
+                    {
+                      "category": "HARM_CATEGORY_HARASSMENT",
+                      "probability": "NEGLIGIBLE"
+                    },
+                    {
+                      "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
+                      "probability": "NEGLIGIBLE"
+                    }
+                  ]
+                }
+              ],
+              "modelVersion": "gemini-2.0-flash",
+              "responseId": "BmKPaYrBNYz1jMcP46ymwA8",
+              "usageMetadata": {
+                "candidatesTokenCount": 72,
+                "candidatesTokensDetails": [
+                  {
+                    "modality": "TEXT",
+                    "tokenCount": 72
+                  }
+                ],
+                "promptTokenCount": 906,
+                "promptTokensDetails": [
+                  {
+                    "modality": "TEXT",
+                    "tokenCount": 906
+                  }
+                ],
+                "totalTokenCount": 978
+              }
+            },
+            "time": 1.7595584170194343
+          },
+          "completed": "2026-02-13T17:40:24.544382+00:00",
+          "working_time": 1.7595584170194343
+        },
+        {
+          "uuid": "iAh6Wwnjts2eucCD8cLE4b",
+          "span_id": "6ff631dfe42942ee96c5f741d01eb7d8",
+          "timestamp": "2026-02-13T17:40:24.547211+00:00",
+          "working_start": 5.839098248165101,
+          "event": "span_begin",
+          "id": "6ff631dfe42942ee96c5f741d01eb7d8",
+          "parent_id": "3be0985210724574b90ce9f916060031",
+          "type": "tool",
+          "name": "bash"
+        },
+        {
+          "uuid": "DV3UZqEQxPEj8SJ2wncw25",
+          "span_id": "3be0985210724574b90ce9f916060031",
+          "timestamp": "2026-02-13T17:40:24.545010+00:00",
+          "working_start": 5.8368978321086615,
+          "event": "tool",
+          "type": "function",
+          "id": "bash_4t5raTAnKAddFKQUM5TX6R",
+          "function": "bash",
+          "arguments": {
+            "cmd": "attachment://fa914ac907fba627d8f195e9dd810f20"
+          },
+          "view": {
+            "title": "bash",
+            "format": "markdown",
+            "content": "```bash\ncat /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals/jobs/local_dev.yaml\n```\n"
+          },
+          "result": "/Users/ewindmill/.bash_profile: line 1: n#: command not found\n\n# =============================================================================\n# Job Configuration: local_dev\n# =============================================================================\n# A job defines what subset of your dataset to run and how to run it.\n# Jobs are the primary way to control evaluation runs.\n#\n# To run this job:\n#   devals run local_dev\n\n\n# =============================================================================\n# RUNTIME SETTINGS (Optional)\n# =============================================================================\n# !!!Important!!!\n# These override built-in defaults. If you're just getting started,\n# I recommend you ignore these for now.\n# Uncomment and modify as needed.\n\n# Directory for evaluation logs (relative to dataset root)\n# A timestamped subdirectory is created automatically for each run.\n# logs_dir: ../logs\n\n# Sandbox environment: \"local\", \"docker\", or \"podman\"\n# - local: Run directly on host (fastest, no isolation)\n# - docker: Run in Docker containers (recommended for code execution)\n# - podman: Run in Podman containers (rootless alternative to Docker)\n# sandbox_type: local\n\n# Maximum concurrent API connections to model providers.\n# Higher = faster but may hit rate limits with a large dataset\n# max_connections: 10\n\n# Maximum retry attempts for failed API calls.\n# Helps handle transient errors.\n# max_retries: 3\n\n# =============================================================================\n# MODELS\n# =============================================================================\n# Which models to evaluate. Format: \"provider/model-name\"\n# If omitted, falls back to DEFAULT_MODELS from the Python registries.\nmodels:\n  - google/gemini-2.0-flash\n\n# =============================================================================\n# VARIANTS (Optional)\n# =============================================================================\n# Which configuration variants to test.\n# Variants control access to tools and context.\n# Each variant is a map of feature flags. An empty map {} is the baseline.\n# If omitted, only the baseline (no features) is used.\n#\n# Example:\n#   variants:\n#     - {}                                    # baseline\n#     - context_files: [../../context/flutter.md]\n#     - mcp_servers: [dart]\n\n# =============================================================================\n# TASKS\n# =============================================================================\n# Which tasks to run. Keys must match directory names in tasks/.\n# If omitted, runs ALL discovered tasks.\n#\n# Each task can have optional configuration:\n#   task_id:\n#     variants: [subset]           # Override job-level variants for this task\n#     include-samples: [sample1]   # Only run specific samples (mutually exclusive with exclude)\n#     exclude-samples: [sample2]   # Skip specific samples (mutually exclusive with include)\n#     system_message: |            # Override system prompt for this task\n#       Custom instructions...\n#\n# Simple format (run all samples with job-level settings):\n#   task_id: {}\n#\ntasks:\n  get_started: {}\n",
+          "events": [],
+          "completed": "2026-02-13T17:40:24.589221+00:00",
+          "working_time": 0.04388674991751462,
+          "message_id": "TQprdtjEjmzJsuL3QR22ed"
+        },
+        {
+          "uuid": "kVWYXxokSAyP2X9adsuXKp",
+          "span_id": "6ff631dfe42942ee96c5f741d01eb7d8",
+          "timestamp": "2026-02-13T17:40:24.549627+00:00",
+          "working_start": 5.877185082063079,
+          "event": "sandbox",
+          "action": "exec",
+          "cmd": "bash --login -c 'cat /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals/jobs/local_dev.yaml'",
+          "options": {
+            "timeout": 120
+          },
+          "result": 0,
+          "output": "/Users/ewindmill/.bash_profile: line 1: n#: command not found\n\n\n# =============================================================================\n# Job Configuration: local_dev\n# =============================================================================\n# A job defines what subset of your dataset to run and how to run it.\n# Jobs are the primary way to control evaluation runs.\n#\n# To run this job:\n#   devals run local_dev\n\n\n# =============================================================================\n# RUNTIME SETTINGS (Optional)\n# =============================================================================\n# !!!Important!!!\n# These override built-in defaults. If you're just getting started,\n# I recommend you ignore these for now.\n# Uncomment and modify as needed.\n\nOutput truncated (61 additional lines)",
+          "completed": "2026-02-13T17:40:24.585560+00:00"
+        },
+        {
+          "uuid": "gzbJq3AzzjWAcMM2qAXwSG",
+          "span_id": "6ff631dfe42942ee96c5f741d01eb7d8",
+          "timestamp": "2026-02-13T17:40:24.587660+00:00",
+          "working_start": 5.879223082098179,
+          "event": "span_end",
+          "id": "6ff631dfe42942ee96c5f741d01eb7d8"
+        },
+        {
+          "uuid": "AGADzPg9ohmhmXAirVEdRo",
+          "span_id": "3be0985210724574b90ce9f916060031",
+          "timestamp": "2026-02-13T17:40:24.591161+00:00",
+          "working_start": 5.882723915041424,
+          "event": "model",
+          "model": "google/gemini-2.0-flash",
+          "input": [
+            {
+              "id": "iNKorPWWBrvjkRqDKyJPYF",
+              "content": "\nYou are a helpful assistant attempting to submit the best possible answer. You have several tools available to help with finding the answer. You will see the result of tool calls right after sending the message. If you need to perform multiple actions, you can always send more messages with additional tool calls. Do some reasoning before your actions, describing what tool calls you are going to use and how they fit into your plan.\n\n\nWhen you have completed the task and have an answer, call the submit() tool to report it.\n",
+              "role": "system"
+            },
+            {
+              "id": "dgiCsCRsNgzMKagz4PEePk",
+              "content": "You are an expert code reviewer analyzing a codebase.\n\nYour task is to:\n\n1. Explore the codebase at /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals using the available tools\n2. Understand the project structure, dependencies, and architecture\n3. Answer the user's question based on what you find in the code\n\nImportant guidelines:\n- Use bash commands (cat, find, grep, ls, head, tail, etc.) to browse files\n- Do NOT edit or modify any files\n- Base your answer on actual code you find, not assumptions\n- Reference specific files and line numbers when relevant\n- When done, call submit() with your complete answer\n",
+              "role": "system"
+            },
+            {
+              "id": "YKoBXPM2D3LAYfpRtJ8rSG",
+              "content": "Explore this codebase and suggest one improvement\nto the code quality, readability, or architecture.\n",
+              "source": "input",
+              "role": "user"
+            },
+            {
+              "id": "ePaRxtW6BAFFKX3wgPvuxQ",
+              "content": [
+                {
+                  "type": "text",
+                  "text": "Okay, I will explore the codebase and suggest one improvement.\n\nFirst, I'll start by listing the files in the directory to understand the project structure.\n"
+                }
+              ],
+              "source": "generate",
+              "role": "assistant",
+              "tool_calls": [
+                {
+                  "id": "bash_96CGHnyaWeCRTS8X8BFMvb",
+                  "function": "bash",
+                  "arguments": {
+                    "cmd": "ls -la"
+                  },
+                  "view": {
+                    "title": "bash",
+                    "format": "markdown",
+                    "content": "```bash\nls -la\n```\n"
+                  },
+                  "type": "function"
+                }
+              ],
+              "model": "gemini-2.0-flash"
+            },
+            {
+              "id": "D6EvgQPgAoveZwXndCQX7i",
+              "content": "/Users/ewindmill/.bash_profile: line 1: n#: command not found\n\ntotal 0\ndrwx------@   2 ewindmill  primarygroup     64 Feb 13 09:39 .\ndrwx------@ 770 ewindmill  primarygroup  24640 Feb 13 09:40 ..\n",
+              "role": "tool",
+              "tool_call_id": "bash_96CGHnyaWeCRTS8X8BFMvb",
+              "function": "bash"
+            },
+            {
+              "id": "X9YEAnfcJpZPfKYouGPfia",
+              "content": [
+                {
+                  "type": "text",
+                  "text": "It seems the current directory is empty. I should explore the actual workspace directory.\n"
+                }
+              ],
+              "source": "generate",
+              "role": "assistant",
+              "tool_calls": [
+                {
+                  "id": "bash_9FNtsrZyifmy6dUNvsWNcp",
+                  "function": "bash",
+                  "arguments": {
+                    "cmd": "ls -la /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals"
+                  },
+                  "view": {
+                    "title": "bash",
+                    "format": "markdown",
+                    "content": "```bash\nls -la /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals\n```\n"
+                  },
+                  "type": "function"
+                }
+              ],
+              "model": "gemini-2.0-flash"
+            },
+            {
+              "id": "UQEKF93mLKeBVCkUrZEbXH",
+              "content": "/Users/ewindmill/.bash_profile: line 1: n#: command not found\n\ntotal 0\ndrwxr-xr-x@ 4 ewindmill  primarygroup  128 Feb 13 09:39 .\ndrwx------@ 3 ewindmill  primarygroup   96 Feb 13 09:39 ..\ndrwxr-xr-x@ 3 ewindmill  primarygroup   96 Feb 13 09:39 jobs\ndrwxr-xr-x@ 3 ewindmill  primarygroup   96 Feb 13 09:39 tasks\n",
+              "role": "tool",
+              "tool_call_id": "bash_9FNtsrZyifmy6dUNvsWNcp",
+              "function": "bash"
+            },
+            {
+              "id": "XmsmV3dHrAsZTJptzwsL6r",
+              "content": [
+                {
+                  "type": "text",
+                  "text": "Okay, there are two subdirectories: `jobs` and `tasks`. I will explore the `jobs` directory first.\n"
+                }
+              ],
+              "source": "generate",
+              "role": "assistant",
+              "tool_calls": [
+                {
+                  "id": "bash_De6QRUaDbUBkQBxhwj9LQi",
+                  "function": "bash",
+                  "arguments": {
+                    "cmd": "ls -la /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals/jobs"
+                  },
+                  "view": {
+                    "title": "bash",
+                    "format": "markdown",
+                    "content": "```bash\nls -la /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals/jobs\n```\n"
+                  },
+                  "type": "function"
+                }
+              ],
+              "model": "gemini-2.0-flash"
+            },
+            {
+              "id": "QomCdPn3yYCyCWK8bLHveJ",
+              "content": "/Users/ewindmill/.bash_profile: line 1: n#: command not found\n\ntotal 8\ndrwxr-xr-x@ 3 ewindmill  primarygroup    96 Feb 13 09:39 .\ndrwxr-xr-x@ 4 ewindmill  primarygroup   128 Feb 13 09:39 ..\n-rw-r--r--@ 1 ewindmill  primarygroup  3087 Feb 13 09:39 local_dev.yaml\n",
+              "role": "tool",
+              "tool_call_id": "bash_De6QRUaDbUBkQBxhwj9LQi",
+              "function": "bash"
+            },
+            {
+              "id": "QPveCFnzTZtWYnJSu8tAHf",
+              "content": [
+                {
+                  "type": "text",
+                  "text": "Now I'll examine the contents of `local_dev.yaml`.\n"
+                }
+              ],
+              "source": "generate",
+              "role": "assistant",
+              "tool_calls": [
+                {
+                  "id": "bash_4t5raTAnKAddFKQUM5TX6R",
+                  "function": "bash",
+                  "arguments": {
+                    "cmd": "cat /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals/jobs/local_dev.yaml"
+                  },
+                  "view": {
+                    "title": "bash",
+                    "format": "markdown",
+                    "content": "```bash\ncat /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals/jobs/local_dev.yaml\n```\n"
+                  },
+                  "type": "function"
+                }
+              ],
+              "model": "gemini-2.0-flash"
+            },
+            {
+              "id": "TQprdtjEjmzJsuL3QR22ed",
+              "content": "/Users/ewindmill/.bash_profile: line 1: n#: command not found\n\n# =============================================================================\n# Job Configuration: local_dev\n# =============================================================================\n# A job defines what subset of your dataset to run and how to run it.\n# Jobs are the primary way to control evaluation runs.\n#\n# To run this job:\n#   devals run local_dev\n\n\n# =============================================================================\n# RUNTIME SETTINGS (Optional)\n# =============================================================================\n# !!!Important!!!\n# These override built-in defaults. If you're just getting started,\n# I recommend you ignore these for now.\n# Uncomment and modify as needed.\n\n# Directory for evaluation logs (relative to dataset root)\n# A timestamped subdirectory is created automatically for each run.\n# logs_dir: ../logs\n\n# Sandbox environment: \"local\", \"docker\", or \"podman\"\n# - local: Run directly on host (fastest, no isolation)\n# - docker: Run in Docker containers (recommended for code execution)\n# - podman: Run in Podman containers (rootless alternative to Docker)\n# sandbox_type: local\n\n# Maximum concurrent API connections to model providers.\n# Higher = faster but may hit rate limits with a large dataset\n# max_connections: 10\n\n# Maximum retry attempts for failed API calls.\n# Helps handle transient errors.\n# max_retries: 3\n\n# =============================================================================\n# MODELS\n# =============================================================================\n# Which models to evaluate. Format: \"provider/model-name\"\n# If omitted, falls back to DEFAULT_MODELS from the Python registries.\nmodels:\n  - google/gemini-2.0-flash\n\n# =============================================================================\n# VARIANTS (Optional)\n# =============================================================================\n# Which configuration variants to test.\n# Variants control access to tools and context.\n# Each variant is a map of feature flags. An empty map {} is the baseline.\n# If omitted, only the baseline (no features) is used.\n#\n# Example:\n#   variants:\n#     - {}                                    # baseline\n#     - context_files: [../../context/flutter.md]\n#     - mcp_servers: [dart]\n\n# =============================================================================\n# TASKS\n# =============================================================================\n# Which tasks to run. Keys must match directory names in tasks/.\n# If omitted, runs ALL discovered tasks.\n#\n# Each task can have optional configuration:\n#   task_id:\n#     variants: [subset]           # Override job-level variants for this task\n#     include-samples: [sample1]   # Only run specific samples (mutually exclusive with exclude)\n#     exclude-samples: [sample2]   # Skip specific samples (mutually exclusive with include)\n#     system_message: |            # Override system prompt for this task\n#       Custom instructions...\n#\n# Simple format (run all samples with job-level settings):\n#   task_id: {}\n#\ntasks:\n  get_started: {}\n",
+              "role": "tool",
+              "tool_call_id": "bash_4t5raTAnKAddFKQUM5TX6R",
+              "function": "bash"
+            }
+          ],
+          "tools": [
+            {
+              "name": "bash",
+              "description": "Use this function to execute bash commands.",
+              "parameters": {
+                "type": "object",
+                "properties": {
+                  "cmd": {
+                    "type": "string",
+                    "description": "The bash command to execute."
+                  }
+                },
+                "required": [
+                  "cmd"
+                ],
+                "additionalProperties": false
+              }
+            },
+            {
+              "name": "submit",
+              "description": "Submit an answer for evaluation.",
+              "parameters": {
+                "type": "object",
+                "properties": {
+                  "answer": {
+                    "type": "string",
+                    "description": "Submitted answer"
+                  }
+                },
+                "required": [
+                  "answer"
+                ],
+                "additionalProperties": false
+              }
+            }
+          ],
+          "tool_choice": "auto",
+          "config": {
+            "max_connections": 10,
+            "cache_prompt": true,
+            "cache": {
+              "expiry": "2W",
+              "per_epoch": true,
+              "scopes": {}
+            }
+          },
+          "output": {
+            "model": "gemini-2.0-flash",
+            "choices": [
+              {
+                "message": {
+                  "id": "ahZPXipD5neR29en3zRtUf",
+                  "content": [
+                    {
+                      "type": "text",
+                      "text": "This file configures a job named `local_dev`. It specifies the model to use (`google/gemini-2.0-flash`) and the task to run (`get_started`).\n\nNow I'll check the `tasks` directory.\n"
+                    }
+                  ],
+                  "source": "generate",
+                  "role": "assistant",
+                  "tool_calls": [
+                    {
+                      "id": "bash_cSXL3sj6i58uXqmDdKxi7g",
+                      "function": "bash",
+                      "arguments": {
+                        "cmd": "ls -la /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals/tasks"
+                      },
+                      "view": {
+                        "title": "bash",
+                        "format": "markdown",
+                        "content": "```bash\nls -la /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals/tasks\n```\n"
+                      },
+                      "type": "function"
+                    }
+                  ],
+                  "model": "gemini-2.0-flash"
+                },
+                "stop_reason": "stop"
+              }
+            ],
+            "completion": "This file configures a job named `local_dev`. It specifies the model to use (`google/gemini-2.0-flash`) and the task to run (`get_started`).\n\nNow I'll check the `tasks` directory.\n",
+            "usage": {
+              "input_tokens": 1680,
+              "output_tokens": 105,
+              "total_tokens": 1785,
+              "reasoning_tokens": 0
+            },
+            "time": 7.498002166044898
+          },
+          "cache": "write",
+          "call": {
+            "request": {
+              "contents": [
+                {
+                  "parts": [
+                    {
+                      "text": "attachment://9db899dd5425b74fa4eb825cb7100dfe"
+                    }
+                  ],
+                  "role": "user"
+                },
+                {
+                  "parts": [
+                    {
+                      "text": "attachment://9a04b99c768a997e406d36798ffe7341"
+                    },
+                    {
+                      "functionCall": {
+                        "args": {
+                          "cmd": "ls -la"
+                        },
+                        "name": "bash"
+                      }
+                    }
+                  ],
+                  "role": "model"
+                },
+                {
+                  "parts": [
+                    {
+                      "functionResponse": {
+                        "name": "bash",
+                        "response": {
+                          "content": "attachment://a0225335de8f8644abb148db16ff949e"
+                        }
+                      }
+                    }
+                  ],
+                  "role": "user"
+                },
+                {
+                  "parts": [
+                    {
+                      "text": "It seems the current directory is empty. I should explore the actual workspace directory.\n"
+                    },
+                    {
+                      "functionCall": {
+                        "args": {
+                          "cmd": "ls -la /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals"
+                        },
+                        "name": "bash"
+                      }
+                    }
+                  ],
+                  "role": "model"
+                },
+                {
+                  "parts": [
+                    {
+                      "functionResponse": {
+                        "name": "bash",
+                        "response": {
+                          "content": "attachment://ea5b36fd20a1905467d53eca2cf1773f"
+                        }
+                      }
+                    }
+                  ],
+                  "role": "user"
+                },
+                {
+                  "parts": [
+                    {
+                      "text": "Okay, there are two subdirectories: `jobs` and `tasks`. I will explore the `jobs` directory first.\n"
+                    },
+                    {
+                      "functionCall": {
+                        "args": {
+                          "cmd": "ls -la /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals/jobs"
+                        },
+                        "name": "bash"
+                      }
+                    }
+                  ],
+                  "role": "model"
+                },
+                {
+                  "parts": [
+                    {
+                      "functionResponse": {
+                        "name": "bash",
+                        "response": {
+                          "content": "attachment://ce6545038816debba39039dfc30cba45"
+                        }
+                      }
+                    }
+                  ],
+                  "role": "user"
+                },
+                {
+                  "parts": [
+                    {
+                      "text": "Now I'll examine the contents of `local_dev.yaml`.\n"
+                    },
+                    {
+                      "functionCall": {
+                        "args": {
+                          "cmd": "attachment://fa914ac907fba627d8f195e9dd810f20"
+                        },
+                        "name": "bash"
+                      }
+                    }
+                  ],
+                  "role": "model"
+                },
+                {
+                  "parts": [
+                    {
+                      "functionResponse": {
+                        "name": "bash",
+                        "response": {
+                          "content": "attachment://4542a679980a33ce0d83be7803b2d16a"
+                        }
+                      }
+                    }
+                  ],
+                  "role": "user"
+                }
+              ],
+              "generation_config": {
+                "httpOptions": {
+                  "headers": {
+                    "x-irid": "i3L84FfpKkkH2i4uahMrQY"
+                  }
+                }
+              },
+              "safety_settings": [
+                {
+                  "category": "HARM_CATEGORY_CIVIC_INTEGRITY",
+                  "threshold": "BLOCK_NONE"
+                },
+                {
+                  "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
+                  "threshold": "BLOCK_NONE"
+                },
+                {
+                  "category": "HARM_CATEGORY_HARASSMENT",
+                  "threshold": "BLOCK_NONE"
+                },
+                {
+                  "category": "HARM_CATEGORY_HATE_SPEECH",
+                  "threshold": "BLOCK_NONE"
+                },
+                {
+                  "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
+                  "threshold": "BLOCK_NONE"
+                }
+              ],
+              "tools": [
+                {
+                  "functionDeclarations": [
+                    {
+                      "description": "Use this function to execute bash commands.",
+                      "name": "bash",
+                      "parameters": {
+                        "nullable": false,
+                        "properties": {
+                          "cmd": {
+                            "description": "The bash command to execute.",
+                            "nullable": false,
+                            "type": "STRING"
+                          }
+                        },
+                        "required": [
+                          "cmd"
+                        ],
+                        "type": "OBJECT"
+                      }
+                    },
+                    {
+                      "description": "Submit an answer for evaluation.",
+                      "name": "submit",
+                      "parameters": {
+                        "nullable": false,
+                        "properties": {
+                          "answer": {
+                            "description": "Submitted answer",
+                            "nullable": false,
+                            "type": "STRING"
+                          }
+                        },
+                        "required": [
+                          "answer"
+                        ],
+                        "type": "OBJECT"
+                      }
+                    }
+                  ]
+                }
+              ],
+              "tool_config": {
+                "functionCallingConfig": {
+                  "mode": "AUTO"
+                }
+              },
+              "system_instruction": [
+                {
+                  "text": "attachment://96d2add6be1fbfa37bad6c101bbdcabb"
+                },
+                {
+                  "text": "attachment://b38086a4c737ca5ce168983fe7d100a3"
+                },
+                {
+                  "text": "attachment://ca12b4d4123df0f43f6669e0f6179470"
+                }
+              ]
+            },
+            "response": {
+              "sdkHttpResponse": {
+                "headers": {
+                  "x-google-esf-cloud-client-params": "attachment://a248653186e1cbc62ada82759278d140",
+                  "X-Google-Session-Info": "GgQYECgLIAE6IxIhZ2VuZXJhdGl2ZWxhbmd1YWdlLmdvb2dsZWFwaXMuY29t",
+                  "Content-Type": "application/json; charset=UTF-8",
+                  "X-Google-Security-Signals": "attachment://2adbe2b9c3b7a76ea83ee906051e404e",
+                  "Vary": "Origin, X-Origin, Referer",
+                  "Content-Encoding": "gzip",
+                  "Date": "Fri, 13 Feb 2026 17:40:32 GMT",
+                  "Server": "scaffolding on HTTPServer2",
+                  "X-Google-Netmon-Label": "/bns/is/borg/is/bns/genai-api/prod.genai-api/61",
+                  "X-XSS-Protection": "0",
+                  "X-Frame-Options": "SAMEORIGIN",
+                  "X-Content-Type-Options": "nosniff",
+                  "Server-Timing": "gfet4t7; dur=7444",
+                  "X-Google-GFE-Service-Trace": "attachment://149a1eba121c602744db9459206a58eb",
+                  "X-Google-Backends": "attachment://ba3d6153baab84a581d4cef34d3e064a",
+                  "X-Google-GFE-Request-Trace": "acnuqa12:443,/bns/is/borg/is/bns/genai-api/prod.genai-api/61,acnuqa12:443",
+                  "X-Google-DOS-Service-Trace": "main:genai-api-api-prod,main:GLOBAL_all_non_cloud",
+                  "X-Google-GFE-Handshake-Trace": "attachment://188e8e605c689d55de36fa0a2ac177d4",
+                  "X-Google-Service": "attachment://149a1eba121c602744db9459206a58eb",
+                  "X-Google-GFE-Response-Code-Details-Trace": "response_code_set_by_backend",
+                  "X-Google-GFE-Response-Body-Transformations": "chunked",
+                  "X-Google-Shellfish-Status": "CA0gBEBG",
+                  "X-Google-GFE-Version": "2.967.1",
+                  "Alt-Svc": "h3=\":443\"; ma=2592000,h3-29=\":443\"; ma=2592000",
+                  "Transfer-Encoding": "chunked"
+                }
+              },
+              "candidates": [
+                {
+                  "content": {
+                    "parts": [
+                      {
+                        "text": "attachment://75001449357c4856199d7a69bf21cef9"
+                      },
+                      {
+                        "functionCall": {
+                          "args": {
+                            "cmd": "ls -la /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals/tasks"
+                          },
+                          "name": "bash"
+                        }
+                      }
+                    ],
+                    "role": "model"
+                  },
+                  "finishReason": "STOP",
+                  "avgLogprobs": -1.104267302013579,
+                  "safetyRatings": [
+                    {
+                      "category": "HARM_CATEGORY_HATE_SPEECH",
+                      "probability": "NEGLIGIBLE"
+                    },
+                    {
+                      "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
+                      "probability": "NEGLIGIBLE"
+                    },
+                    {
+                      "category": "HARM_CATEGORY_HARASSMENT",
+                      "probability": "NEGLIGIBLE"
+                    },
+                    {
+                      "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
+                      "probability": "NEGLIGIBLE"
+                    }
+                  ]
+                }
+              ],
+              "modelVersion": "gemini-2.0-flash",
+              "responseId": "CGKPaeHULYeYjMcP-5XakAE",
+              "usageMetadata": {
+                "candidatesTokenCount": 105,
+                "candidatesTokensDetails": [
+                  {
+                    "modality": "TEXT",
+                    "tokenCount": 105
+                  }
+                ],
+                "promptTokenCount": 1680,
+                "promptTokensDetails": [
+                  {
+                    "modality": "TEXT",
+                    "tokenCount": 1680
+                  }
+                ],
+                "totalTokenCount": 1785
+              }
+            },
+            "time": 7.498002166044898
+          },
+          "completed": "2026-02-13T17:40:32.137195+00:00",
+          "working_time": 7.498002166044898
+        },
+        {
+          "uuid": "DTNiw6ZAChw7hFxC6zJCy8",
+          "span_id": "ec15bb1222ea4f47a9a93d39d6919eb8",
+          "timestamp": "2026-02-13T17:40:32.138097+00:00",
+          "working_start": 13.382188705145381,
+          "event": "span_begin",
+          "id": "ec15bb1222ea4f47a9a93d39d6919eb8",
+          "parent_id": "3be0985210724574b90ce9f916060031",
+          "type": "tool",
+          "name": "bash"
+        },
+        {
+          "uuid": "X6v8AzGP6ZhF8cqewhkLLm",
+          "span_id": "3be0985210724574b90ce9f916060031",
+          "timestamp": "2026-02-13T17:40:32.137584+00:00",
+          "working_start": 13.381675497163087,
+          "event": "tool",
+          "type": "function",
+          "id": "bash_cSXL3sj6i58uXqmDdKxi7g",
+          "function": "bash",
+          "arguments": {
+            "cmd": "ls -la /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals/tasks"
+          },
+          "view": {
+            "title": "bash",
+            "format": "markdown",
+            "content": "```bash\nls -la /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals/tasks\n```\n"
+          },
+          "result": "/Users/ewindmill/.bash_profile: line 1: n#: command not found\n\ntotal 0\ndrwxr-xr-x@ 3 ewindmill  primarygroup   96 Feb 13 09:39 .\ndrwxr-xr-x@ 4 ewindmill  primarygroup  128 Feb 13 09:39 ..\ndrwxr-xr-x@ 3 ewindmill  primarygroup   96 Feb 13 09:39 get_started\n",
+          "events": [],
+          "completed": "2026-02-13T17:40:32.188993+00:00",
+          "working_time": 0.051264875037610534,
+          "message_id": "8PhDW7rvHM4JkMit9nee6a"
+        },
+        {
+          "uuid": "W6Qh7MquXbURdveE8cyTsK",
+          "span_id": "ec15bb1222ea4f47a9a93d39d6919eb8",
+          "timestamp": "2026-02-13T17:40:32.140216+00:00",
+          "working_start": 13.42927778919693,
+          "event": "sandbox",
+          "action": "exec",
+          "cmd": "bash --login -c 'ls -la /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals/tasks'",
+          "options": {
+            "timeout": 120
+          },
+          "result": 0,
+          "output": "/Users/ewindmill/.bash_profile: line 1: n#: command not found\n\n\ntotal 0\ndrwxr-xr-x@ 3 ewindmill  primarygroup   96 Feb 13 09:39 .\ndrwxr-xr-x@ 4 ewindmill  primarygroup  128 Feb 13 09:39 ..\ndrwxr-xr-x@ 3 ewindmill  primarygroup   96 Feb 13 09:39 get_started\n                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               ",
+          "completed": "2026-02-13T17:40:32.185262+00:00"
+        },
+        {
+          "uuid": "BHTbEUXwdCFcmq4X3Mmwmx",
+          "span_id": "ec15bb1222ea4f47a9a93d39d6919eb8",
+          "timestamp": "2026-02-13T17:40:32.187246+00:00",
+          "working_start": 13.431195289245807,
+          "event": "span_end",
+          "id": "ec15bb1222ea4f47a9a93d39d6919eb8"
+        },
+        {
+          "uuid": "eBqDfP67f4xgKaxhMBXHMk",
+          "span_id": "3be0985210724574b90ce9f916060031",
+          "timestamp": "2026-02-13T17:40:32.191136+00:00",
+          "working_start": 13.43508566415403,
+          "event": "model",
+          "model": "google/gemini-2.0-flash",
+          "input": [
+            {
+              "id": "iNKorPWWBrvjkRqDKyJPYF",
+              "content": "\nYou are a helpful assistant attempting to submit the best possible answer. You have several tools available to help with finding the answer. You will see the result of tool calls right after sending the message. If you need to perform multiple actions, you can always send more messages with additional tool calls. Do some reasoning before your actions, describing what tool calls you are going to use and how they fit into your plan.\n\n\nWhen you have completed the task and have an answer, call the submit() tool to report it.\n",
+              "role": "system"
+            },
+            {
+              "id": "dgiCsCRsNgzMKagz4PEePk",
+              "content": "You are an expert code reviewer analyzing a codebase.\n\nYour task is to:\n\n1. Explore the codebase at /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals using the available tools\n2. Understand the project structure, dependencies, and architecture\n3. Answer the user's question based on what you find in the code\n\nImportant guidelines:\n- Use bash commands (cat, find, grep, ls, head, tail, etc.) to browse files\n- Do NOT edit or modify any files\n- Base your answer on actual code you find, not assumptions\n- Reference specific files and line numbers when relevant\n- When done, call submit() with your complete answer\n",
+              "role": "system"
+            },
+            {
+              "id": "YKoBXPM2D3LAYfpRtJ8rSG",
+              "content": "Explore this codebase and suggest one improvement\nto the code quality, readability, or architecture.\n",
+              "source": "input",
+              "role": "user"
+            },
+            {
+              "id": "ePaRxtW6BAFFKX3wgPvuxQ",
+              "content": [
+                {
+                  "type": "text",
+                  "text": "Okay, I will explore the codebase and suggest one improvement.\n\nFirst, I'll start by listing the files in the directory to understand the project structure.\n"
+                }
+              ],
+              "source": "generate",
+              "role": "assistant",
+              "tool_calls": [
+                {
+                  "id": "bash_96CGHnyaWeCRTS8X8BFMvb",
+                  "function": "bash",
+                  "arguments": {
+                    "cmd": "ls -la"
+                  },
+                  "view": {
+                    "title": "bash",
+                    "format": "markdown",
+                    "content": "```bash\nls -la\n```\n"
+                  },
+                  "type": "function"
+                }
+              ],
+              "model": "gemini-2.0-flash"
+            },
+            {
+              "id": "D6EvgQPgAoveZwXndCQX7i",
+              "content": "/Users/ewindmill/.bash_profile: line 1: n#: command not found\n\ntotal 0\ndrwx------@   2 ewindmill  primarygroup     64 Feb 13 09:39 .\ndrwx------@ 770 ewindmill  primarygroup  24640 Feb 13 09:40 ..\n",
+              "role": "tool",
+              "tool_call_id": "bash_96CGHnyaWeCRTS8X8BFMvb",
+              "function": "bash"
+            },
+            {
+              "id": "X9YEAnfcJpZPfKYouGPfia",
+              "content": [
+                {
+                  "type": "text",
+                  "text": "It seems the current directory is empty. I should explore the actual workspace directory.\n"
+                }
+              ],
+              "source": "generate",
+              "role": "assistant",
+              "tool_calls": [
+                {
+                  "id": "bash_9FNtsrZyifmy6dUNvsWNcp",
+                  "function": "bash",
+                  "arguments": {
+                    "cmd": "ls -la /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals"
+                  },
+                  "view": {
+                    "title": "bash",
+                    "format": "markdown",
+                    "content": "```bash\nls -la /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals\n```\n"
+                  },
+                  "type": "function"
+                }
+              ],
+              "model": "gemini-2.0-flash"
+            },
+            {
+              "id": "UQEKF93mLKeBVCkUrZEbXH",
+              "content": "/Users/ewindmill/.bash_profile: line 1: n#: command not found\n\ntotal 0\ndrwxr-xr-x@ 4 ewindmill  primarygroup  128 Feb 13 09:39 .\ndrwx------@ 3 ewindmill  primarygroup   96 Feb 13 09:39 ..\ndrwxr-xr-x@ 3 ewindmill  primarygroup   96 Feb 13 09:39 jobs\ndrwxr-xr-x@ 3 ewindmill  primarygroup   96 Feb 13 09:39 tasks\n",
+              "role": "tool",
+              "tool_call_id": "bash_9FNtsrZyifmy6dUNvsWNcp",
+              "function": "bash"
+            },
+            {
+              "id": "XmsmV3dHrAsZTJptzwsL6r",
+              "content": [
+                {
+                  "type": "text",
+                  "text": "Okay, there are two subdirectories: `jobs` and `tasks`. I will explore the `jobs` directory first.\n"
+                }
+              ],
+              "source": "generate",
+              "role": "assistant",
+              "tool_calls": [
+                {
+                  "id": "bash_De6QRUaDbUBkQBxhwj9LQi",
+                  "function": "bash",
+                  "arguments": {
+                    "cmd": "ls -la /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals/jobs"
+                  },
+                  "view": {
+                    "title": "bash",
+                    "format": "markdown",
+                    "content": "```bash\nls -la /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals/jobs\n```\n"
+                  },
+                  "type": "function"
+                }
+              ],
+              "model": "gemini-2.0-flash"
+            },
+            {
+              "id": "QomCdPn3yYCyCWK8bLHveJ",
+              "content": "/Users/ewindmill/.bash_profile: line 1: n#: command not found\n\ntotal 8\ndrwxr-xr-x@ 3 ewindmill  primarygroup    96 Feb 13 09:39 .\ndrwxr-xr-x@ 4 ewindmill  primarygroup   128 Feb 13 09:39 ..\n-rw-r--r--@ 1 ewindmill  primarygroup  3087 Feb 13 09:39 local_dev.yaml\n",
+              "role": "tool",
+              "tool_call_id": "bash_De6QRUaDbUBkQBxhwj9LQi",
+              "function": "bash"
+            },
+            {
+              "id": "QPveCFnzTZtWYnJSu8tAHf",
+              "content": [
+                {
+                  "type": "text",
+                  "text": "Now I'll examine the contents of `local_dev.yaml`.\n"
+                }
+              ],
+              "source": "generate",
+              "role": "assistant",
+              "tool_calls": [
+                {
+                  "id": "bash_4t5raTAnKAddFKQUM5TX6R",
+                  "function": "bash",
+                  "arguments": {
+                    "cmd": "cat /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals/jobs/local_dev.yaml"
+                  },
+                  "view": {
+                    "title": "bash",
+                    "format": "markdown",
+                    "content": "```bash\ncat /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals/jobs/local_dev.yaml\n```\n"
+                  },
+                  "type": "function"
+                }
+              ],
+              "model": "gemini-2.0-flash"
+            },
+            {
+              "id": "TQprdtjEjmzJsuL3QR22ed",
+              "content": "/Users/ewindmill/.bash_profile: line 1: n#: command not found\n\n# =============================================================================\n# Job Configuration: local_dev\n# =============================================================================\n# A job defines what subset of your dataset to run and how to run it.\n# Jobs are the primary way to control evaluation runs.\n#\n# To run this job:\n#   devals run local_dev\n\n\n# =============================================================================\n# RUNTIME SETTINGS (Optional)\n# =============================================================================\n# !!!Important!!!\n# These override built-in defaults. If you're just getting started,\n# I recommend you ignore these for now.\n# Uncomment and modify as needed.\n\n# Directory for evaluation logs (relative to dataset root)\n# A timestamped subdirectory is created automatically for each run.\n# logs_dir: ../logs\n\n# Sandbox environment: \"local\", \"docker\", or \"podman\"\n# - local: Run directly on host (fastest, no isolation)\n# - docker: Run in Docker containers (recommended for code execution)\n# - podman: Run in Podman containers (rootless alternative to Docker)\n# sandbox_type: local\n\n# Maximum concurrent API connections to model providers.\n# Higher = faster but may hit rate limits with a large dataset\n# max_connections: 10\n\n# Maximum retry attempts for failed API calls.\n# Helps handle transient errors.\n# max_retries: 3\n\n# =============================================================================\n# MODELS\n# =============================================================================\n# Which models to evaluate. Format: \"provider/model-name\"\n# If omitted, falls back to DEFAULT_MODELS from the Python registries.\nmodels:\n  - google/gemini-2.0-flash\n\n# =============================================================================\n# VARIANTS (Optional)\n# =============================================================================\n# Which configuration variants to test.\n# Variants control access to tools and context.\n# Each variant is a map of feature flags. An empty map {} is the baseline.\n# If omitted, only the baseline (no features) is used.\n#\n# Example:\n#   variants:\n#     - {}                                    # baseline\n#     - context_files: [../../context/flutter.md]\n#     - mcp_servers: [dart]\n\n# =============================================================================\n# TASKS\n# =============================================================================\n# Which tasks to run. Keys must match directory names in tasks/.\n# If omitted, runs ALL discovered tasks.\n#\n# Each task can have optional configuration:\n#   task_id:\n#     variants: [subset]           # Override job-level variants for this task\n#     include-samples: [sample1]   # Only run specific samples (mutually exclusive with exclude)\n#     exclude-samples: [sample2]   # Skip specific samples (mutually exclusive with include)\n#     system_message: |            # Override system prompt for this task\n#       Custom instructions...\n#\n# Simple format (run all samples with job-level settings):\n#   task_id: {}\n#\ntasks:\n  get_started: {}\n",
+              "role": "tool",
+              "tool_call_id": "bash_4t5raTAnKAddFKQUM5TX6R",
+              "function": "bash"
+            },
+            {
+              "id": "ahZPXipD5neR29en3zRtUf",
+              "content": [
+                {
+                  "type": "text",
+                  "text": "This file configures a job named `local_dev`. It specifies the model to use (`google/gemini-2.0-flash`) and the task to run (`get_started`).\n\nNow I'll check the `tasks` directory.\n"
+                }
+              ],
+              "source": "generate",
+              "role": "assistant",
+              "tool_calls": [
+                {
+                  "id": "bash_cSXL3sj6i58uXqmDdKxi7g",
+                  "function": "bash",
+                  "arguments": {
+                    "cmd": "ls -la /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals/tasks"
+                  },
+                  "view": {
+                    "title": "bash",
+                    "format": "markdown",
+                    "content": "```bash\nls -la /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals/tasks\n```\n"
+                  },
+                  "type": "function"
+                }
+              ],
+              "model": "gemini-2.0-flash"
+            },
+            {
+              "id": "8PhDW7rvHM4JkMit9nee6a",
+              "content": "/Users/ewindmill/.bash_profile: line 1: n#: command not found\n\ntotal 0\ndrwxr-xr-x@ 3 ewindmill  primarygroup   96 Feb 13 09:39 .\ndrwxr-xr-x@ 4 ewindmill  primarygroup  128 Feb 13 09:39 ..\ndrwxr-xr-x@ 3 ewindmill  primarygroup   96 Feb 13 09:39 get_started\n",
+              "role": "tool",
+              "tool_call_id": "bash_cSXL3sj6i58uXqmDdKxi7g",
+              "function": "bash"
+            }
+          ],
+          "tools": [
+            {
+              "name": "bash",
+              "description": "Use this function to execute bash commands.",
+              "parameters": {
+                "type": "object",
+                "properties": {
+                  "cmd": {
+                    "type": "string",
+                    "description": "The bash command to execute."
+                  }
+                },
+                "required": [
+                  "cmd"
+                ],
+                "additionalProperties": false
+              }
+            },
+            {
+              "name": "submit",
+              "description": "Submit an answer for evaluation.",
+              "parameters": {
+                "type": "object",
+                "properties": {
+                  "answer": {
+                    "type": "string",
+                    "description": "Submitted answer"
+                  }
+                },
+                "required": [
+                  "answer"
+                ],
+                "additionalProperties": false
+              }
+            }
+          ],
+          "tool_choice": "auto",
+          "config": {
+            "max_connections": 10,
+            "cache_prompt": true,
+            "cache": {
+              "expiry": "2W",
+              "per_epoch": true,
+              "scopes": {}
+            }
+          },
+          "output": {
+            "model": "gemini-2.0-flash",
+            "choices": [
+              {
+                "message": {
+                  "id": "W6P45d4wxabT8JGajuSEpL",
+                  "content": [
+                    {
+                      "type": "text",
+                      "text": "And then the `get_started` task directory.\n"
+                    }
+                  ],
+                  "source": "generate",
+                  "role": "assistant",
+                  "tool_calls": [
+                    {
+                      "id": "bash_Bu6HNdHns7UuJjifvkHvwv",
+                      "function": "bash",
+                      "arguments": {
+                        "cmd": "ls -la /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals/tasks/get_started"
+                      },
+                      "view": {
+                        "title": "bash",
+                        "format": "markdown",
+                        "content": "```bash\nls -la /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals/tasks/get_started\n```\n"
+                      },
+                      "type": "function"
+                    }
+                  ],
+                  "model": "gemini-2.0-flash"
+                },
+                "stop_reason": "stop"
+              }
+            ],
+            "completion": "And then the `get_started` task directory.\n",
+            "usage": {
+              "input_tokens": 1902,
+              "output_tokens": 68,
+              "total_tokens": 1970,
+              "reasoning_tokens": 0
+            },
+            "time": 8.804282999946736
+          },
+          "cache": "write",
+          "call": {
+            "request": {
+              "contents": [
+                {
+                  "parts": [
+                    {
+                      "text": "attachment://9db899dd5425b74fa4eb825cb7100dfe"
+                    }
+                  ],
+                  "role": "user"
+                },
+                {
+                  "parts": [
+                    {
+                      "text": "attachment://9a04b99c768a997e406d36798ffe7341"
+                    },
+                    {
+                      "functionCall": {
+                        "args": {
+                          "cmd": "ls -la"
+                        },
+                        "name": "bash"
+                      }
+                    }
+                  ],
+                  "role": "model"
+                },
+                {
+                  "parts": [
+                    {
+                      "functionResponse": {
+                        "name": "bash",
+                        "response": {
+                          "content": "attachment://a0225335de8f8644abb148db16ff949e"
+                        }
+                      }
+                    }
+                  ],
+                  "role": "user"
+                },
+                {
+                  "parts": [
+                    {
+                      "text": "It seems the current directory is empty. I should explore the actual workspace directory.\n"
+                    },
+                    {
+                      "functionCall": {
+                        "args": {
+                          "cmd": "ls -la /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals"
+                        },
+                        "name": "bash"
+                      }
+                    }
+                  ],
+                  "role": "model"
+                },
+                {
+                  "parts": [
+                    {
+                      "functionResponse": {
+                        "name": "bash",
+                        "response": {
+                          "content": "attachment://ea5b36fd20a1905467d53eca2cf1773f"
+                        }
+                      }
+                    }
+                  ],
+                  "role": "user"
+                },
+                {
+                  "parts": [
+                    {
+                      "text": "Okay, there are two subdirectories: `jobs` and `tasks`. I will explore the `jobs` directory first.\n"
+                    },
+                    {
+                      "functionCall": {
+                        "args": {
+                          "cmd": "ls -la /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals/jobs"
+                        },
+                        "name": "bash"
+                      }
+                    }
+                  ],
+                  "role": "model"
+                },
+                {
+                  "parts": [
+                    {
+                      "functionResponse": {
+                        "name": "bash",
+                        "response": {
+                          "content": "attachment://ce6545038816debba39039dfc30cba45"
+                        }
+                      }
+                    }
+                  ],
+                  "role": "user"
+                },
+                {
+                  "parts": [
+                    {
+                      "text": "Now I'll examine the contents of `local_dev.yaml`.\n"
+                    },
+                    {
+                      "functionCall": {
+                        "args": {
+                          "cmd": "attachment://fa914ac907fba627d8f195e9dd810f20"
+                        },
+                        "name": "bash"
+                      }
+                    }
+                  ],
+                  "role": "model"
+                },
+                {
+                  "parts": [
+                    {
+                      "functionResponse": {
+                        "name": "bash",
+                        "response": {
+                          "content": "attachment://4542a679980a33ce0d83be7803b2d16a"
+                        }
+                      }
+                    }
+                  ],
+                  "role": "user"
+                },
+                {
+                  "parts": [
+                    {
+                      "text": "attachment://75001449357c4856199d7a69bf21cef9"
+                    },
+                    {
+                      "functionCall": {
+                        "args": {
+                          "cmd": "ls -la /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals/tasks"
+                        },
+                        "name": "bash"
+                      }
+                    }
+                  ],
+                  "role": "model"
+                },
+                {
+                  "parts": [
+                    {
+                      "functionResponse": {
+                        "name": "bash",
+                        "response": {
+                          "content": "attachment://5a5632feb6066dd225b3c124a1bac129"
+                        }
+                      }
+                    }
+                  ],
+                  "role": "user"
+                }
+              ],
+              "generation_config": {
+                "httpOptions": {
+                  "headers": {
+                    "x-irid": "AUa4QafKTNMLYA2BtEWwQs"
+                  }
+                }
+              },
+              "safety_settings": [
+                {
+                  "category": "HARM_CATEGORY_CIVIC_INTEGRITY",
+                  "threshold": "BLOCK_NONE"
+                },
+                {
+                  "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
+                  "threshold": "BLOCK_NONE"
+                },
+                {
+                  "category": "HARM_CATEGORY_HARASSMENT",
+                  "threshold": "BLOCK_NONE"
+                },
+                {
+                  "category": "HARM_CATEGORY_HATE_SPEECH",
+                  "threshold": "BLOCK_NONE"
+                },
+                {
+                  "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
+                  "threshold": "BLOCK_NONE"
+                }
+              ],
+              "tools": [
+                {
+                  "functionDeclarations": [
+                    {
+                      "description": "Use this function to execute bash commands.",
+                      "name": "bash",
+                      "parameters": {
+                        "nullable": false,
+                        "properties": {
+                          "cmd": {
+                            "description": "The bash command to execute.",
+                            "nullable": false,
+                            "type": "STRING"
+                          }
+                        },
+                        "required": [
+                          "cmd"
+                        ],
+                        "type": "OBJECT"
+                      }
+                    },
+                    {
+                      "description": "Submit an answer for evaluation.",
+                      "name": "submit",
+                      "parameters": {
+                        "nullable": false,
+                        "properties": {
+                          "answer": {
+                            "description": "Submitted answer",
+                            "nullable": false,
+                            "type": "STRING"
+                          }
+                        },
+                        "required": [
+                          "answer"
+                        ],
+                        "type": "OBJECT"
+                      }
+                    }
+                  ]
+                }
+              ],
+              "tool_config": {
+                "functionCallingConfig": {
+                  "mode": "AUTO"
+                }
+              },
+              "system_instruction": [
+                {
+                  "text": "attachment://96d2add6be1fbfa37bad6c101bbdcabb"
+                },
+                {
+                  "text": "attachment://b38086a4c737ca5ce168983fe7d100a3"
+                },
+                {
+                  "text": "attachment://ca12b4d4123df0f43f6669e0f6179470"
+                }
+              ]
+            },
+            "response": {
+              "sdkHttpResponse": {
+                "headers": {
+                  "x-google-esf-cloud-client-params": "attachment://a248653186e1cbc62ada82759278d140",
+                  "X-Google-Session-Info": "GgQYECgLIAE6IxIhZ2VuZXJhdGl2ZWxhbmd1YWdlLmdvb2dsZWFwaXMuY29t",
+                  "Content-Type": "application/json; charset=UTF-8",
+                  "X-Google-Security-Signals": "attachment://2adbe2b9c3b7a76ea83ee906051e404e",
+                  "Vary": "Origin, X-Origin, Referer",
+                  "Content-Encoding": "gzip",
+                  "Date": "Fri, 13 Feb 2026 17:40:41 GMT",
+                  "Server": "scaffolding on HTTPServer2",
+                  "X-Google-Netmon-Label": "/bns/is/borg/is/bns/genai-api/prod.genai-api/63",
+                  "X-XSS-Protection": "0",
+                  "X-Frame-Options": "SAMEORIGIN",
+                  "X-Content-Type-Options": "nosniff",
+                  "Server-Timing": "gfet4t7; dur=8748",
+                  "X-Google-GFE-Service-Trace": "attachment://149a1eba121c602744db9459206a58eb",
+                  "X-Google-Backends": "attachment://83217d10977474b4655b92e39aebaee5",
+                  "X-Google-GFE-Request-Trace": "acnuqa6:443,/bns/is/borg/is/bns/genai-api/prod.genai-api/63,acnuqa6:443",
+                  "X-Google-DOS-Service-Trace": "main:genai-api-api-prod,main:GLOBAL_all_non_cloud",
+                  "X-Google-GFE-Handshake-Trace": "attachment://04b84a47f04af4777b81695e7d416a2b",
+                  "X-Google-Service": "attachment://149a1eba121c602744db9459206a58eb",
+                  "X-Google-GFE-Response-Code-Details-Trace": "response_code_set_by_backend",
+                  "X-Google-GFE-Response-Body-Transformations": "chunked",
+                  "X-Google-Shellfish-Status": "CA0gBEBG",
+                  "X-Google-GFE-Version": "2.967.1",
+                  "Alt-Svc": "h3=\":443\"; ma=2592000,h3-29=\":443\"; ma=2592000",
+                  "Transfer-Encoding": "chunked"
+                }
+              },
+              "candidates": [
+                {
+                  "content": {
+                    "parts": [
+                      {
+                        "text": "And then the `get_started` task directory.\n"
+                      },
+                      {
+                        "functionCall": {
+                          "args": {
+                            "cmd": "attachment://1cc47e8e9b60458fc2fb2c07cbd56e28"
+                          },
+                          "name": "bash"
+                        }
+                      }
+                    ],
+                    "role": "model"
+                  },
+                  "finishReason": "STOP",
+                  "avgLogprobs": -3.2610606025247013,
+                  "safetyRatings": [
+                    {
+                      "category": "HARM_CATEGORY_HATE_SPEECH",
+                      "probability": "NEGLIGIBLE"
+                    },
+                    {
+                      "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
+                      "probability": "NEGLIGIBLE"
+                    },
+                    {
+                      "category": "HARM_CATEGORY_HARASSMENT",
+                      "probability": "NEGLIGIBLE"
+                    },
+                    {
+                      "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
+                      "probability": "NEGLIGIBLE"
+                    }
+                  ]
+                }
+              ],
+              "modelVersion": "gemini-2.0-flash",
+              "responseId": "EGKPacn2FafRjMcP846UiA4",
+              "usageMetadata": {
+                "candidatesTokenCount": 68,
+                "candidatesTokensDetails": [
+                  {
+                    "modality": "TEXT",
+                    "tokenCount": 68
+                  }
+                ],
+                "promptTokenCount": 1902,
+                "promptTokensDetails": [
+                  {
+                    "modality": "TEXT",
+                    "tokenCount": 1902
+                  }
+                ],
+                "totalTokenCount": 1970
+              }
+            },
+            "time": 8.804282999946736
+          },
+          "completed": "2026-02-13T17:40:41.044725+00:00",
+          "working_time": 8.804282999946736
+        },
+        {
+          "uuid": "5JwQzjoStc6pNmtuXUcAuk",
+          "span_id": "13b6efdb83654dc0a2b552d043a08e8a",
+          "timestamp": "2026-02-13T17:40:41.045709+00:00",
+          "working_start": 22.240810122224502,
+          "event": "span_begin",
+          "id": "13b6efdb83654dc0a2b552d043a08e8a",
+          "parent_id": "3be0985210724574b90ce9f916060031",
+          "type": "tool",
+          "name": "bash"
+        },
+        {
+          "uuid": "c9xeaZaYTeJmYTK7fMYxso",
+          "span_id": "3be0985210724574b90ce9f916060031",
+          "timestamp": "2026-02-13T17:40:41.045111+00:00",
+          "working_start": 22.240212247124873,
+          "event": "tool",
+          "type": "function",
+          "id": "bash_Bu6HNdHns7UuJjifvkHvwv",
+          "function": "bash",
+          "arguments": {
+            "cmd": "attachment://1cc47e8e9b60458fc2fb2c07cbd56e28"
+          },
+          "view": {
+            "title": "bash",
+            "format": "markdown",
+            "content": "```bash\nls -la /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals/tasks/get_started\n```\n"
+          },
+          "result": "/Users/ewindmill/.bash_profile: line 1: n#: command not found\n\ntotal 8\ndrwxr-xr-x@ 3 ewindmill  primarygroup   96 Feb 13 09:39 .\ndrwxr-xr-x@ 3 ewindmill  primarygroup   96 Feb 13 09:39 ..\n-rw-r--r--@ 1 ewindmill  primarygroup  857 Feb 13 09:39 task.yaml\n",
+          "events": [],
+          "completed": "2026-02-13T17:40:41.095378+00:00",
+          "working_time": 0.050144499998547136,
+          "message_id": "KBpTgoAkxQDLZLkreVi62z"
+        },
+        {
+          "uuid": "hoG2sSSDJUtSAZJZKJQpPh",
+          "span_id": "13b6efdb83654dc0a2b552d043a08e8a",
+          "timestamp": "2026-02-13T17:40:41.047892+00:00",
+          "working_start": 22.287074206164107,
+          "event": "sandbox",
+          "action": "exec",
+          "cmd": "bash --login -c 'ls -la /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals/tasks/get_started'",
+          "options": {
+            "timeout": 120
+          },
+          "result": 0,
+          "output": "/Users/ewindmill/.bash_profile: line 1: n#: command not found\n\n\ntotal 8\ndrwxr-xr-x@ 3 ewindmill  primarygroup   96 Feb 13 09:39 .\ndrwxr-xr-x@ 3 ewindmill  primarygroup   96 Feb 13 09:39 ..\n-rw-r--r--@ 1 ewindmill  primarygroup  857 Feb 13 09:39 task.yaml\n                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 ",
+          "completed": "2026-02-13T17:40:41.092015+00:00"
+        },
+        {
+          "uuid": "NMHHgFeqRqLQmvWgwvYNuJ",
+          "span_id": "13b6efdb83654dc0a2b552d043a08e8a",
+          "timestamp": "2026-02-13T17:40:41.094089+00:00",
+          "working_start": 22.28906958119478,
+          "event": "span_end",
+          "id": "13b6efdb83654dc0a2b552d043a08e8a"
+        },
+        {
+          "uuid": "UB6MiAjhGH829gn4wiLnXQ",
+          "span_id": "3be0985210724574b90ce9f916060031",
+          "timestamp": "2026-02-13T17:40:41.097137+00:00",
+          "working_start": 22.2921159141697,
+          "event": "model",
+          "model": "google/gemini-2.0-flash",
+          "input": [
+            {
+              "id": "iNKorPWWBrvjkRqDKyJPYF",
+              "content": "\nYou are a helpful assistant attempting to submit the best possible answer. You have several tools available to help with finding the answer. You will see the result of tool calls right after sending the message. If you need to perform multiple actions, you can always send more messages with additional tool calls. Do some reasoning before your actions, describing what tool calls you are going to use and how they fit into your plan.\n\n\nWhen you have completed the task and have an answer, call the submit() tool to report it.\n",
+              "role": "system"
+            },
+            {
+              "id": "dgiCsCRsNgzMKagz4PEePk",
+              "content": "You are an expert code reviewer analyzing a codebase.\n\nYour task is to:\n\n1. Explore the codebase at /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals using the available tools\n2. Understand the project structure, dependencies, and architecture\n3. Answer the user's question based on what you find in the code\n\nImportant guidelines:\n- Use bash commands (cat, find, grep, ls, head, tail, etc.) to browse files\n- Do NOT edit or modify any files\n- Base your answer on actual code you find, not assumptions\n- Reference specific files and line numbers when relevant\n- When done, call submit() with your complete answer\n",
+              "role": "system"
+            },
+            {
+              "id": "YKoBXPM2D3LAYfpRtJ8rSG",
+              "content": "Explore this codebase and suggest one improvement\nto the code quality, readability, or architecture.\n",
+              "source": "input",
+              "role": "user"
+            },
+            {
+              "id": "ePaRxtW6BAFFKX3wgPvuxQ",
+              "content": [
+                {
+                  "type": "text",
+                  "text": "Okay, I will explore the codebase and suggest one improvement.\n\nFirst, I'll start by listing the files in the directory to understand the project structure.\n"
+                }
+              ],
+              "source": "generate",
+              "role": "assistant",
+              "tool_calls": [
+                {
+                  "id": "bash_96CGHnyaWeCRTS8X8BFMvb",
+                  "function": "bash",
+                  "arguments": {
+                    "cmd": "ls -la"
+                  },
+                  "view": {
+                    "title": "bash",
+                    "format": "markdown",
+                    "content": "```bash\nls -la\n```\n"
+                  },
+                  "type": "function"
+                }
+              ],
+              "model": "gemini-2.0-flash"
+            },
+            {
+              "id": "D6EvgQPgAoveZwXndCQX7i",
+              "content": "/Users/ewindmill/.bash_profile: line 1: n#: command not found\n\ntotal 0\ndrwx------@   2 ewindmill  primarygroup     64 Feb 13 09:39 .\ndrwx------@ 770 ewindmill  primarygroup  24640 Feb 13 09:40 ..\n",
+              "role": "tool",
+              "tool_call_id": "bash_96CGHnyaWeCRTS8X8BFMvb",
+              "function": "bash"
+            },
+            {
+              "id": "X9YEAnfcJpZPfKYouGPfia",
+              "content": [
+                {
+                  "type": "text",
+                  "text": "It seems the current directory is empty. I should explore the actual workspace directory.\n"
+                }
+              ],
+              "source": "generate",
+              "role": "assistant",
+              "tool_calls": [
+                {
+                  "id": "bash_9FNtsrZyifmy6dUNvsWNcp",
+                  "function": "bash",
+                  "arguments": {
+                    "cmd": "ls -la /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals"
+                  },
+                  "view": {
+                    "title": "bash",
+                    "format": "markdown",
+                    "content": "```bash\nls -la /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals\n```\n"
+                  },
+                  "type": "function"
+                }
+              ],
+              "model": "gemini-2.0-flash"
+            },
+            {
+              "id": "UQEKF93mLKeBVCkUrZEbXH",
+              "content": "/Users/ewindmill/.bash_profile: line 1: n#: command not found\n\ntotal 0\ndrwxr-xr-x@ 4 ewindmill  primarygroup  128 Feb 13 09:39 .\ndrwx------@ 3 ewindmill  primarygroup   96 Feb 13 09:39 ..\ndrwxr-xr-x@ 3 ewindmill  primarygroup   96 Feb 13 09:39 jobs\ndrwxr-xr-x@ 3 ewindmill  primarygroup   96 Feb 13 09:39 tasks\n",
+              "role": "tool",
+              "tool_call_id": "bash_9FNtsrZyifmy6dUNvsWNcp",
+              "function": "bash"
+            },
+            {
+              "id": "XmsmV3dHrAsZTJptzwsL6r",
+              "content": [
+                {
+                  "type": "text",
+                  "text": "Okay, there are two subdirectories: `jobs` and `tasks`. I will explore the `jobs` directory first.\n"
+                }
+              ],
+              "source": "generate",
+              "role": "assistant",
+              "tool_calls": [
+                {
+                  "id": "bash_De6QRUaDbUBkQBxhwj9LQi",
+                  "function": "bash",
+                  "arguments": {
+                    "cmd": "ls -la /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals/jobs"
+                  },
+                  "view": {
+                    "title": "bash",
+                    "format": "markdown",
+                    "content": "```bash\nls -la /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals/jobs\n```\n"
+                  },
+                  "type": "function"
+                }
+              ],
+              "model": "gemini-2.0-flash"
+            },
+            {
+              "id": "QomCdPn3yYCyCWK8bLHveJ",
+              "content": "/Users/ewindmill/.bash_profile: line 1: n#: command not found\n\ntotal 8\ndrwxr-xr-x@ 3 ewindmill  primarygroup    96 Feb 13 09:39 .\ndrwxr-xr-x@ 4 ewindmill  primarygroup   128 Feb 13 09:39 ..\n-rw-r--r--@ 1 ewindmill  primarygroup  3087 Feb 13 09:39 local_dev.yaml\n",
+              "role": "tool",
+              "tool_call_id": "bash_De6QRUaDbUBkQBxhwj9LQi",
+              "function": "bash"
+            },
+            {
+              "id": "QPveCFnzTZtWYnJSu8tAHf",
+              "content": [
+                {
+                  "type": "text",
+                  "text": "Now I'll examine the contents of `local_dev.yaml`.\n"
+                }
+              ],
+              "source": "generate",
+              "role": "assistant",
+              "tool_calls": [
+                {
+                  "id": "bash_4t5raTAnKAddFKQUM5TX6R",
+                  "function": "bash",
+                  "arguments": {
+                    "cmd": "cat /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals/jobs/local_dev.yaml"
+                  },
+                  "view": {
+                    "title": "bash",
+                    "format": "markdown",
+                    "content": "```bash\ncat /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals/jobs/local_dev.yaml\n```\n"
+                  },
+                  "type": "function"
+                }
+              ],
+              "model": "gemini-2.0-flash"
+            },
+            {
+              "id": "TQprdtjEjmzJsuL3QR22ed",
+              "content": "/Users/ewindmill/.bash_profile: line 1: n#: command not found\n\n# =============================================================================\n# Job Configuration: local_dev\n# =============================================================================\n# A job defines what subset of your dataset to run and how to run it.\n# Jobs are the primary way to control evaluation runs.\n#\n# To run this job:\n#   devals run local_dev\n\n\n# =============================================================================\n# RUNTIME SETTINGS (Optional)\n# =============================================================================\n# !!!Important!!!\n# These override built-in defaults. If you're just getting started,\n# I recommend you ignore these for now.\n# Uncomment and modify as needed.\n\n# Directory for evaluation logs (relative to dataset root)\n# A timestamped subdirectory is created automatically for each run.\n# logs_dir: ../logs\n\n# Sandbox environment: \"local\", \"docker\", or \"podman\"\n# - local: Run directly on host (fastest, no isolation)\n# - docker: Run in Docker containers (recommended for code execution)\n# - podman: Run in Podman containers (rootless alternative to Docker)\n# sandbox_type: local\n\n# Maximum concurrent API connections to model providers.\n# Higher = faster but may hit rate limits with a large dataset\n# max_connections: 10\n\n# Maximum retry attempts for failed API calls.\n# Helps handle transient errors.\n# max_retries: 3\n\n# =============================================================================\n# MODELS\n# =============================================================================\n# Which models to evaluate. Format: \"provider/model-name\"\n# If omitted, falls back to DEFAULT_MODELS from the Python registries.\nmodels:\n  - google/gemini-2.0-flash\n\n# =============================================================================\n# VARIANTS (Optional)\n# =============================================================================\n# Which configuration variants to test.\n# Variants control access to tools and context.\n# Each variant is a map of feature flags. An empty map {} is the baseline.\n# If omitted, only the baseline (no features) is used.\n#\n# Example:\n#   variants:\n#     - {}                                    # baseline\n#     - context_files: [../../context/flutter.md]\n#     - mcp_servers: [dart]\n\n# =============================================================================\n# TASKS\n# =============================================================================\n# Which tasks to run. Keys must match directory names in tasks/.\n# If omitted, runs ALL discovered tasks.\n#\n# Each task can have optional configuration:\n#   task_id:\n#     variants: [subset]           # Override job-level variants for this task\n#     include-samples: [sample1]   # Only run specific samples (mutually exclusive with exclude)\n#     exclude-samples: [sample2]   # Skip specific samples (mutually exclusive with include)\n#     system_message: |            # Override system prompt for this task\n#       Custom instructions...\n#\n# Simple format (run all samples with job-level settings):\n#   task_id: {}\n#\ntasks:\n  get_started: {}\n",
+              "role": "tool",
+              "tool_call_id": "bash_4t5raTAnKAddFKQUM5TX6R",
+              "function": "bash"
+            },
+            {
+              "id": "ahZPXipD5neR29en3zRtUf",
+              "content": [
+                {
+                  "type": "text",
+                  "text": "This file configures a job named `local_dev`. It specifies the model to use (`google/gemini-2.0-flash`) and the task to run (`get_started`).\n\nNow I'll check the `tasks` directory.\n"
+                }
+              ],
+              "source": "generate",
+              "role": "assistant",
+              "tool_calls": [
+                {
+                  "id": "bash_cSXL3sj6i58uXqmDdKxi7g",
+                  "function": "bash",
+                  "arguments": {
+                    "cmd": "ls -la /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals/tasks"
+                  },
+                  "view": {
+                    "title": "bash",
+                    "format": "markdown",
+                    "content": "```bash\nls -la /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals/tasks\n```\n"
+                  },
+                  "type": "function"
+                }
+              ],
+              "model": "gemini-2.0-flash"
+            },
+            {
+              "id": "8PhDW7rvHM4JkMit9nee6a",
+              "content": "/Users/ewindmill/.bash_profile: line 1: n#: command not found\n\ntotal 0\ndrwxr-xr-x@ 3 ewindmill  primarygroup   96 Feb 13 09:39 .\ndrwxr-xr-x@ 4 ewindmill  primarygroup  128 Feb 13 09:39 ..\ndrwxr-xr-x@ 3 ewindmill  primarygroup   96 Feb 13 09:39 get_started\n",
+              "role": "tool",
+              "tool_call_id": "bash_cSXL3sj6i58uXqmDdKxi7g",
+              "function": "bash"
+            },
+            {
+              "id": "W6P45d4wxabT8JGajuSEpL",
+              "content": [
+                {
+                  "type": "text",
+                  "text": "And then the `get_started` task directory.\n"
+                }
+              ],
+              "source": "generate",
+              "role": "assistant",
+              "tool_calls": [
+                {
+                  "id": "bash_Bu6HNdHns7UuJjifvkHvwv",
+                  "function": "bash",
+                  "arguments": {
+                    "cmd": "ls -la /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals/tasks/get_started"
+                  },
+                  "view": {
+                    "title": "bash",
+                    "format": "markdown",
+                    "content": "```bash\nls -la /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals/tasks/get_started\n```\n"
+                  },
+                  "type": "function"
+                }
+              ],
+              "model": "gemini-2.0-flash"
+            },
+            {
+              "id": "KBpTgoAkxQDLZLkreVi62z",
+              "content": "/Users/ewindmill/.bash_profile: line 1: n#: command not found\n\ntotal 8\ndrwxr-xr-x@ 3 ewindmill  primarygroup   96 Feb 13 09:39 .\ndrwxr-xr-x@ 3 ewindmill  primarygroup   96 Feb 13 09:39 ..\n-rw-r--r--@ 1 ewindmill  primarygroup  857 Feb 13 09:39 task.yaml\n",
+              "role": "tool",
+              "tool_call_id": "bash_Bu6HNdHns7UuJjifvkHvwv",
+              "function": "bash"
+            }
+          ],
+          "tools": [
+            {
+              "name": "bash",
+              "description": "Use this function to execute bash commands.",
+              "parameters": {
+                "type": "object",
+                "properties": {
+                  "cmd": {
+                    "type": "string",
+                    "description": "The bash command to execute."
+                  }
+                },
+                "required": [
+                  "cmd"
+                ],
+                "additionalProperties": false
+              }
+            },
+            {
+              "name": "submit",
+              "description": "Submit an answer for evaluation.",
+              "parameters": {
+                "type": "object",
+                "properties": {
+                  "answer": {
+                    "type": "string",
+                    "description": "Submitted answer"
+                  }
+                },
+                "required": [
+                  "answer"
+                ],
+                "additionalProperties": false
+              }
+            }
+          ],
+          "tool_choice": "auto",
+          "config": {
+            "max_connections": 10,
+            "cache_prompt": true,
+            "cache": {
+              "expiry": "2W",
+              "per_epoch": true,
+              "scopes": {}
+            }
+          },
+          "output": {
+            "model": "gemini-2.0-flash",
+            "choices": [
+              {
+                "message": {
+                  "id": "6HbpKpsZvH63bUtdAfVCm3",
+                  "content": [
+                    {
+                      "type": "text",
+                      "text": "Finally, the contents of `task.yaml`.\n"
+                    }
+                  ],
+                  "source": "generate",
+                  "role": "assistant",
+                  "tool_calls": [
+                    {
+                      "id": "bash_LV4nES3T7jPrzgtTEg3a42",
+                      "function": "bash",
+                      "arguments": {
+                        "cmd": "cat /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals/tasks/get_started/task.yaml"
+                      },
+                      "view": {
+                        "title": "bash",
+                        "format": "markdown",
+                        "content": "```bash\ncat /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals/tasks/get_started/task.yaml\n```\n"
+                      },
+                      "type": "function"
+                    }
+                  ],
+                  "model": "gemini-2.0-flash"
+                },
+                "stop_reason": "stop"
+              }
+            ],
+            "completion": "Finally, the contents of `task.yaml`.\n",
+            "usage": {
+              "input_tokens": 2088,
+              "output_tokens": 69,
+              "total_tokens": 2157,
+              "reasoning_tokens": 0
+            },
+            "time": 8.898628458031453
+          },
+          "cache": "write",
+          "call": {
+            "request": {
+              "contents": [
+                {
+                  "parts": [
+                    {
+                      "text": "attachment://9db899dd5425b74fa4eb825cb7100dfe"
+                    }
+                  ],
+                  "role": "user"
+                },
+                {
+                  "parts": [
+                    {
+                      "text": "attachment://9a04b99c768a997e406d36798ffe7341"
+                    },
+                    {
+                      "functionCall": {
+                        "args": {
+                          "cmd": "ls -la"
+                        },
+                        "name": "bash"
+                      }
+                    }
+                  ],
+                  "role": "model"
+                },
+                {
+                  "parts": [
+                    {
+                      "functionResponse": {
+                        "name": "bash",
+                        "response": {
+                          "content": "attachment://a0225335de8f8644abb148db16ff949e"
+                        }
+                      }
+                    }
+                  ],
+                  "role": "user"
+                },
+                {
+                  "parts": [
+                    {
+                      "text": "It seems the current directory is empty. I should explore the actual workspace directory.\n"
+                    },
+                    {
+                      "functionCall": {
+                        "args": {
+                          "cmd": "ls -la /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals"
+                        },
+                        "name": "bash"
+                      }
+                    }
+                  ],
+                  "role": "model"
+                },
+                {
+                  "parts": [
+                    {
+                      "functionResponse": {
+                        "name": "bash",
+                        "response": {
+                          "content": "attachment://ea5b36fd20a1905467d53eca2cf1773f"
+                        }
+                      }
+                    }
+                  ],
+                  "role": "user"
+                },
+                {
+                  "parts": [
+                    {
+                      "text": "Okay, there are two subdirectories: `jobs` and `tasks`. I will explore the `jobs` directory first.\n"
+                    },
+                    {
+                      "functionCall": {
+                        "args": {
+                          "cmd": "ls -la /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals/jobs"
+                        },
+                        "name": "bash"
+                      }
+                    }
+                  ],
+                  "role": "model"
+                },
+                {
+                  "parts": [
+                    {
+                      "functionResponse": {
+                        "name": "bash",
+                        "response": {
+                          "content": "attachment://ce6545038816debba39039dfc30cba45"
+                        }
+                      }
+                    }
+                  ],
+                  "role": "user"
+                },
+                {
+                  "parts": [
+                    {
+                      "text": "Now I'll examine the contents of `local_dev.yaml`.\n"
+                    },
+                    {
+                      "functionCall": {
+                        "args": {
+                          "cmd": "attachment://fa914ac907fba627d8f195e9dd810f20"
+                        },
+                        "name": "bash"
+                      }
+                    }
+                  ],
+                  "role": "model"
+                },
+                {
+                  "parts": [
+                    {
+                      "functionResponse": {
+                        "name": "bash",
+                        "response": {
+                          "content": "attachment://4542a679980a33ce0d83be7803b2d16a"
+                        }
+                      }
+                    }
+                  ],
+                  "role": "user"
+                },
+                {
+                  "parts": [
+                    {
+                      "text": "attachment://75001449357c4856199d7a69bf21cef9"
+                    },
+                    {
+                      "functionCall": {
+                        "args": {
+                          "cmd": "ls -la /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals/tasks"
+                        },
+                        "name": "bash"
+                      }
+                    }
+                  ],
+                  "role": "model"
+                },
+                {
+                  "parts": [
+                    {
+                      "functionResponse": {
+                        "name": "bash",
+                        "response": {
+                          "content": "attachment://5a5632feb6066dd225b3c124a1bac129"
+                        }
+                      }
+                    }
+                  ],
+                  "role": "user"
+                },
+                {
+                  "parts": [
+                    {
+                      "text": "And then the `get_started` task directory.\n"
+                    },
+                    {
+                      "functionCall": {
+                        "args": {
+                          "cmd": "attachment://1cc47e8e9b60458fc2fb2c07cbd56e28"
+                        },
+                        "name": "bash"
+                      }
+                    }
+                  ],
+                  "role": "model"
+                },
+                {
+                  "parts": [
+                    {
+                      "functionResponse": {
+                        "name": "bash",
+                        "response": {
+                          "content": "attachment://421ab3fe5c6eb371ed45728b326879fa"
+                        }
+                      }
+                    }
+                  ],
+                  "role": "user"
+                }
+              ],
+              "generation_config": {
+                "httpOptions": {
+                  "headers": {
+                    "x-irid": "RorUzMiMzWmE2Pm2QswFMJ"
+                  }
+                }
+              },
+              "safety_settings": [
+                {
+                  "category": "HARM_CATEGORY_CIVIC_INTEGRITY",
+                  "threshold": "BLOCK_NONE"
+                },
+                {
+                  "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
+                  "threshold": "BLOCK_NONE"
+                },
+                {
+                  "category": "HARM_CATEGORY_HARASSMENT",
+                  "threshold": "BLOCK_NONE"
+                },
+                {
+                  "category": "HARM_CATEGORY_HATE_SPEECH",
+                  "threshold": "BLOCK_NONE"
+                },
+                {
+                  "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
+                  "threshold": "BLOCK_NONE"
+                }
+              ],
+              "tools": [
+                {
+                  "functionDeclarations": [
+                    {
+                      "description": "Use this function to execute bash commands.",
+                      "name": "bash",
+                      "parameters": {
+                        "nullable": false,
+                        "properties": {
+                          "cmd": {
+                            "description": "The bash command to execute.",
+                            "nullable": false,
+                            "type": "STRING"
+                          }
+                        },
+                        "required": [
+                          "cmd"
+                        ],
+                        "type": "OBJECT"
+                      }
+                    },
+                    {
+                      "description": "Submit an answer for evaluation.",
+                      "name": "submit",
+                      "parameters": {
+                        "nullable": false,
+                        "properties": {
+                          "answer": {
+                            "description": "Submitted answer",
+                            "nullable": false,
+                            "type": "STRING"
+                          }
+                        },
+                        "required": [
+                          "answer"
+                        ],
+                        "type": "OBJECT"
+                      }
+                    }
+                  ]
+                }
+              ],
+              "tool_config": {
+                "functionCallingConfig": {
+                  "mode": "AUTO"
+                }
+              },
+              "system_instruction": [
+                {
+                  "text": "attachment://96d2add6be1fbfa37bad6c101bbdcabb"
+                },
+                {
+                  "text": "attachment://b38086a4c737ca5ce168983fe7d100a3"
+                },
+                {
+                  "text": "attachment://ca12b4d4123df0f43f6669e0f6179470"
+                }
+              ]
+            },
+            "response": {
+              "sdkHttpResponse": {
+                "headers": {
+                  "x-google-esf-cloud-client-params": "attachment://a248653186e1cbc62ada82759278d140",
+                  "X-Google-Session-Info": "GgQYECgLIAE6IxIhZ2VuZXJhdGl2ZWxhbmd1YWdlLmdvb2dsZWFwaXMuY29t",
+                  "Content-Type": "application/json; charset=UTF-8",
+                  "X-Google-Security-Signals": "attachment://2adbe2b9c3b7a76ea83ee906051e404e",
+                  "Vary": "Origin, X-Origin, Referer",
+                  "Content-Encoding": "gzip",
+                  "Date": "Fri, 13 Feb 2026 17:40:50 GMT",
+                  "Server": "scaffolding on HTTPServer2",
+                  "X-Google-Netmon-Label": "/bns/is/borg/is/bns/genai-api/prod.genai-api/74",
+                  "X-XSS-Protection": "0",
+                  "X-Frame-Options": "SAMEORIGIN",
+                  "X-Content-Type-Options": "nosniff",
+                  "Server-Timing": "gfet4t7; dur=8845",
+                  "X-Google-GFE-Service-Trace": "attachment://149a1eba121c602744db9459206a58eb",
+                  "X-Google-Backends": "attachment://0f8cf5de29178ac4e22880548e4e0884",
+                  "X-Google-GFE-Request-Trace": "acnuqa11:443,/bns/is/borg/is/bns/genai-api/prod.genai-api/74,acnuqa11:443",
+                  "X-Google-DOS-Service-Trace": "main:genai-api-api-prod,main:GLOBAL_all_non_cloud",
+                  "X-Google-GFE-Handshake-Trace": "attachment://8afbf58217687de2c30df222c5d49f30",
+                  "X-Google-Service": "attachment://149a1eba121c602744db9459206a58eb",
+                  "X-Google-GFE-Response-Code-Details-Trace": "response_code_set_by_backend",
+                  "X-Google-GFE-Response-Body-Transformations": "chunked",
+                  "X-Google-Shellfish-Status": "CA0gBEBG",
+                  "X-Google-GFE-Version": "2.967.1",
+                  "Alt-Svc": "h3=\":443\"; ma=2592000,h3-29=\":443\"; ma=2592000",
+                  "Transfer-Encoding": "chunked"
+                }
+              },
+              "candidates": [
+                {
+                  "content": {
+                    "parts": [
+                      {
+                        "text": "Finally, the contents of `task.yaml`.\n"
+                      },
+                      {
+                        "functionCall": {
+                          "args": {
+                            "cmd": "attachment://c73294582d1bab6be6e6f43e528c5fd4"
+                          },
+                          "name": "bash"
+                        }
+                      }
+                    ],
+                    "role": "model"
+                  },
+                  "finishReason": "STOP",
+                  "avgLogprobs": -2.802779487941576,
+                  "safetyRatings": [
+                    {
+                      "category": "HARM_CATEGORY_HATE_SPEECH",
+                      "probability": "NEGLIGIBLE"
+                    },
+                    {
+                      "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
+                      "probability": "NEGLIGIBLE"
+                    },
+                    {
+                      "category": "HARM_CATEGORY_HARASSMENT",
+                      "probability": "NEGLIGIBLE"
+                    },
+                    {
+                      "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
+                      "probability": "NEGLIGIBLE"
+                    }
+                  ]
+                }
+              ],
+              "modelVersion": "gemini-2.0-flash",
+              "responseId": "GWKPaaTvD_L1jMcPjv6omA4",
+              "usageMetadata": {
+                "candidatesTokenCount": 69,
+                "candidatesTokensDetails": [
+                  {
+                    "modality": "TEXT",
+                    "tokenCount": 69
+                  }
+                ],
+                "promptTokenCount": 2088,
+                "promptTokensDetails": [
+                  {
+                    "modality": "TEXT",
+                    "tokenCount": 2088
+                  }
+                ],
+                "totalTokenCount": 2157
+              }
+            },
+            "time": 8.898628458031453
+          },
+          "completed": "2026-02-13T17:40:50.039679+00:00",
+          "working_time": 8.898628458031453
+        },
+        {
+          "uuid": "3SSwmrX84Y5JHpAc24BZTU",
+          "span_id": "e0c156cef83d48348f50e089c6210fa3",
+          "timestamp": "2026-02-13T17:40:50.040287+00:00",
+          "working_start": 31.19182803912554,
+          "event": "span_begin",
+          "id": "e0c156cef83d48348f50e089c6210fa3",
+          "parent_id": "3be0985210724574b90ce9f916060031",
+          "type": "tool",
+          "name": "bash"
+        },
+        {
+          "uuid": "WNGBn9iCEiKv2z2oNhmAwN",
+          "span_id": "3be0985210724574b90ce9f916060031",
+          "timestamp": "2026-02-13T17:40:50.039951+00:00",
+          "working_start": 31.191492539132014,
+          "event": "tool",
+          "type": "function",
+          "id": "bash_LV4nES3T7jPrzgtTEg3a42",
+          "function": "bash",
+          "arguments": {
+            "cmd": "attachment://c73294582d1bab6be6e6f43e528c5fd4"
+          },
+          "view": {
+            "title": "bash",
+            "format": "markdown",
+            "content": "```bash\ncat /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals/tasks/get_started/task.yaml\n```\n"
+          },
+          "result": "/Users/ewindmill/.bash_profile: line 1: n#: command not found\n\n# =============================================================================\n# Starter Task\n# =============================================================================\n# This task points at your project root as its workspace and runs a simple\n# codebase analysis evaluation.\n\nfunc: analyze_codebase\n\n# Workspace: points to the project root containing pubspec.yaml\nworkspace:\n  path: ../../\n\nsamples:\n  - id: get_started\n    difficulty: easy\n    tags: []\n    # Input: The prompt given to the model\n    input: |\n      Explore this codebase and suggest one improvement\n      to the code quality, readability, or architecture.\n    # Target: Expected output or grading criteria\n    target: |\n      The suggestion should be specific, actionable, and reference\n      actual code in the project. It should explain why the change\n      improves the codebase.\n",
+          "events": [],
+          "completed": "2026-02-13T17:40:50.080108+00:00",
+          "working_time": 0.04008537500758469,
+          "message_id": "2EQtjKBudJPfGdxBaCduci"
+        },
+        {
+          "uuid": "cDq8Y6dEnskSaaTM3qKiyk",
+          "span_id": "e0c156cef83d48348f50e089c6210fa3",
+          "timestamp": "2026-02-13T17:40:50.041843+00:00",
+          "working_start": 31.229223997099325,
+          "event": "sandbox",
+          "action": "exec",
+          "cmd": "bash --login -c 'cat /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals/tasks/get_started/task.yaml'",
+          "options": {
+            "timeout": 120
+          },
+          "result": 0,
+          "output": "/Users/ewindmill/.bash_profile: line 1: n#: command not found\n\n\n# =============================================================================\n# Starter Task\n# =============================================================================\n# This task points at your project root as its workspace and runs a simple\n# codebase analysis evaluation.\n\nfunc: analyze_codebase\n\n# Workspace: points to the project root containing pubspec.yaml\nworkspace:\n  path: ../../\n\nsamples:\n  - id: get_started\n    difficulty: easy\n    tags: []\n    # Input: The prompt given to the model\n\nOutput truncated (9 additional lines)",
+          "completed": "2026-02-13T17:40:50.077712+00:00"
+        },
+        {
+          "uuid": "WdroZYLvFbgNFPr5Y5mERF",
+          "span_id": "e0c156cef83d48348f50e089c6210fa3",
+          "timestamp": "2026-02-13T17:40:50.079069+00:00",
+          "working_start": 31.23053895519115,
+          "event": "span_end",
+          "id": "e0c156cef83d48348f50e089c6210fa3"
+        },
+        {
+          "uuid": "9U7JJ6LqWpu6DPB4MnoFsZ",
+          "span_id": "3be0985210724574b90ce9f916060031",
+          "timestamp": "2026-02-13T17:40:50.081638+00:00",
+          "working_start": 31.233109122142196,
+          "event": "model",
+          "model": "google/gemini-2.0-flash",
+          "input": [
+            {
+              "id": "iNKorPWWBrvjkRqDKyJPYF",
+              "content": "\nYou are a helpful assistant attempting to submit the best possible answer. You have several tools available to help with finding the answer. You will see the result of tool calls right after sending the message. If you need to perform multiple actions, you can always send more messages with additional tool calls. Do some reasoning before your actions, describing what tool calls you are going to use and how they fit into your plan.\n\n\nWhen you have completed the task and have an answer, call the submit() tool to report it.\n",
+              "role": "system"
+            },
+            {
+              "id": "dgiCsCRsNgzMKagz4PEePk",
+              "content": "You are an expert code reviewer analyzing a codebase.\n\nYour task is to:\n\n1. Explore the codebase at /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals using the available tools\n2. Understand the project structure, dependencies, and architecture\n3. Answer the user's question based on what you find in the code\n\nImportant guidelines:\n- Use bash commands (cat, find, grep, ls, head, tail, etc.) to browse files\n- Do NOT edit or modify any files\n- Base your answer on actual code you find, not assumptions\n- Reference specific files and line numbers when relevant\n- When done, call submit() with your complete answer\n",
+              "role": "system"
+            },
+            {
+              "id": "YKoBXPM2D3LAYfpRtJ8rSG",
+              "content": "Explore this codebase and suggest one improvement\nto the code quality, readability, or architecture.\n",
+              "source": "input",
+              "role": "user"
+            },
+            {
+              "id": "ePaRxtW6BAFFKX3wgPvuxQ",
+              "content": [
+                {
+                  "type": "text",
+                  "text": "Okay, I will explore the codebase and suggest one improvement.\n\nFirst, I'll start by listing the files in the directory to understand the project structure.\n"
+                }
+              ],
+              "source": "generate",
+              "role": "assistant",
+              "tool_calls": [
+                {
+                  "id": "bash_96CGHnyaWeCRTS8X8BFMvb",
+                  "function": "bash",
+                  "arguments": {
+                    "cmd": "ls -la"
+                  },
+                  "view": {
+                    "title": "bash",
+                    "format": "markdown",
+                    "content": "```bash\nls -la\n```\n"
+                  },
+                  "type": "function"
+                }
+              ],
+              "model": "gemini-2.0-flash"
+            },
+            {
+              "id": "D6EvgQPgAoveZwXndCQX7i",
+              "content": "/Users/ewindmill/.bash_profile: line 1: n#: command not found\n\ntotal 0\ndrwx------@   2 ewindmill  primarygroup     64 Feb 13 09:39 .\ndrwx------@ 770 ewindmill  primarygroup  24640 Feb 13 09:40 ..\n",
+              "role": "tool",
+              "tool_call_id": "bash_96CGHnyaWeCRTS8X8BFMvb",
+              "function": "bash"
+            },
+            {
+              "id": "X9YEAnfcJpZPfKYouGPfia",
+              "content": [
+                {
+                  "type": "text",
+                  "text": "It seems the current directory is empty. I should explore the actual workspace directory.\n"
+                }
+              ],
+              "source": "generate",
+              "role": "assistant",
+              "tool_calls": [
+                {
+                  "id": "bash_9FNtsrZyifmy6dUNvsWNcp",
+                  "function": "bash",
+                  "arguments": {
+                    "cmd": "ls -la /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals"
+                  },
+                  "view": {
+                    "title": "bash",
+                    "format": "markdown",
+                    "content": "```bash\nls -la /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals\n```\n"
+                  },
+                  "type": "function"
+                }
+              ],
+              "model": "gemini-2.0-flash"
+            },
+            {
+              "id": "UQEKF93mLKeBVCkUrZEbXH",
+              "content": "/Users/ewindmill/.bash_profile: line 1: n#: command not found\n\ntotal 0\ndrwxr-xr-x@ 4 ewindmill  primarygroup  128 Feb 13 09:39 .\ndrwx------@ 3 ewindmill  primarygroup   96 Feb 13 09:39 ..\ndrwxr-xr-x@ 3 ewindmill  primarygroup   96 Feb 13 09:39 jobs\ndrwxr-xr-x@ 3 ewindmill  primarygroup   96 Feb 13 09:39 tasks\n",
+              "role": "tool",
+              "tool_call_id": "bash_9FNtsrZyifmy6dUNvsWNcp",
+              "function": "bash"
+            },
+            {
+              "id": "XmsmV3dHrAsZTJptzwsL6r",
+              "content": [
+                {
+                  "type": "text",
+                  "text": "Okay, there are two subdirectories: `jobs` and `tasks`. I will explore the `jobs` directory first.\n"
+                }
+              ],
+              "source": "generate",
+              "role": "assistant",
+              "tool_calls": [
+                {
+                  "id": "bash_De6QRUaDbUBkQBxhwj9LQi",
+                  "function": "bash",
+                  "arguments": {
+                    "cmd": "ls -la /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals/jobs"
+                  },
+                  "view": {
+                    "title": "bash",
+                    "format": "markdown",
+                    "content": "```bash\nls -la /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals/jobs\n```\n"
+                  },
+                  "type": "function"
+                }
+              ],
+              "model": "gemini-2.0-flash"
+            },
+            {
+              "id": "QomCdPn3yYCyCWK8bLHveJ",
+              "content": "/Users/ewindmill/.bash_profile: line 1: n#: command not found\n\ntotal 8\ndrwxr-xr-x@ 3 ewindmill  primarygroup    96 Feb 13 09:39 .\ndrwxr-xr-x@ 4 ewindmill  primarygroup   128 Feb 13 09:39 ..\n-rw-r--r--@ 1 ewindmill  primarygroup  3087 Feb 13 09:39 local_dev.yaml\n",
+              "role": "tool",
+              "tool_call_id": "bash_De6QRUaDbUBkQBxhwj9LQi",
+              "function": "bash"
+            },
+            {
+              "id": "QPveCFnzTZtWYnJSu8tAHf",
+              "content": [
+                {
+                  "type": "text",
+                  "text": "Now I'll examine the contents of `local_dev.yaml`.\n"
+                }
+              ],
+              "source": "generate",
+              "role": "assistant",
+              "tool_calls": [
+                {
+                  "id": "bash_4t5raTAnKAddFKQUM5TX6R",
+                  "function": "bash",
+                  "arguments": {
+                    "cmd": "cat /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals/jobs/local_dev.yaml"
+                  },
+                  "view": {
+                    "title": "bash",
+                    "format": "markdown",
+                    "content": "```bash\ncat /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals/jobs/local_dev.yaml\n```\n"
+                  },
+                  "type": "function"
+                }
+              ],
+              "model": "gemini-2.0-flash"
+            },
+            {
+              "id": "TQprdtjEjmzJsuL3QR22ed",
+              "content": "/Users/ewindmill/.bash_profile: line 1: n#: command not found\n\n# =============================================================================\n# Job Configuration: local_dev\n# =============================================================================\n# A job defines what subset of your dataset to run and how to run it.\n# Jobs are the primary way to control evaluation runs.\n#\n# To run this job:\n#   devals run local_dev\n\n\n# =============================================================================\n# RUNTIME SETTINGS (Optional)\n# =============================================================================\n# !!!Important!!!\n# These override built-in defaults. If you're just getting started,\n# I recommend you ignore these for now.\n# Uncomment and modify as needed.\n\n# Directory for evaluation logs (relative to dataset root)\n# A timestamped subdirectory is created automatically for each run.\n# logs_dir: ../logs\n\n# Sandbox environment: \"local\", \"docker\", or \"podman\"\n# - local: Run directly on host (fastest, no isolation)\n# - docker: Run in Docker containers (recommended for code execution)\n# - podman: Run in Podman containers (rootless alternative to Docker)\n# sandbox_type: local\n\n# Maximum concurrent API connections to model providers.\n# Higher = faster but may hit rate limits with a large dataset\n# max_connections: 10\n\n# Maximum retry attempts for failed API calls.\n# Helps handle transient errors.\n# max_retries: 3\n\n# =============================================================================\n# MODELS\n# =============================================================================\n# Which models to evaluate. Format: \"provider/model-name\"\n# If omitted, falls back to DEFAULT_MODELS from the Python registries.\nmodels:\n  - google/gemini-2.0-flash\n\n# =============================================================================\n# VARIANTS (Optional)\n# =============================================================================\n# Which configuration variants to test.\n# Variants control access to tools and context.\n# Each variant is a map of feature flags. An empty map {} is the baseline.\n# If omitted, only the baseline (no features) is used.\n#\n# Example:\n#   variants:\n#     - {}                                    # baseline\n#     - context_files: [../../context/flutter.md]\n#     - mcp_servers: [dart]\n\n# =============================================================================\n# TASKS\n# =============================================================================\n# Which tasks to run. Keys must match directory names in tasks/.\n# If omitted, runs ALL discovered tasks.\n#\n# Each task can have optional configuration:\n#   task_id:\n#     variants: [subset]           # Override job-level variants for this task\n#     include-samples: [sample1]   # Only run specific samples (mutually exclusive with exclude)\n#     exclude-samples: [sample2]   # Skip specific samples (mutually exclusive with include)\n#     system_message: |            # Override system prompt for this task\n#       Custom instructions...\n#\n# Simple format (run all samples with job-level settings):\n#   task_id: {}\n#\ntasks:\n  get_started: {}\n",
+              "role": "tool",
+              "tool_call_id": "bash_4t5raTAnKAddFKQUM5TX6R",
+              "function": "bash"
+            },
+            {
+              "id": "ahZPXipD5neR29en3zRtUf",
+              "content": [
+                {
+                  "type": "text",
+                  "text": "This file configures a job named `local_dev`. It specifies the model to use (`google/gemini-2.0-flash`) and the task to run (`get_started`).\n\nNow I'll check the `tasks` directory.\n"
+                }
+              ],
+              "source": "generate",
+              "role": "assistant",
+              "tool_calls": [
+                {
+                  "id": "bash_cSXL3sj6i58uXqmDdKxi7g",
+                  "function": "bash",
+                  "arguments": {
+                    "cmd": "ls -la /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals/tasks"
+                  },
+                  "view": {
+                    "title": "bash",
+                    "format": "markdown",
+                    "content": "```bash\nls -la /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals/tasks\n```\n"
+                  },
+                  "type": "function"
+                }
+              ],
+              "model": "gemini-2.0-flash"
+            },
+            {
+              "id": "8PhDW7rvHM4JkMit9nee6a",
+              "content": "/Users/ewindmill/.bash_profile: line 1: n#: command not found\n\ntotal 0\ndrwxr-xr-x@ 3 ewindmill  primarygroup   96 Feb 13 09:39 .\ndrwxr-xr-x@ 4 ewindmill  primarygroup  128 Feb 13 09:39 ..\ndrwxr-xr-x@ 3 ewindmill  primarygroup   96 Feb 13 09:39 get_started\n",
+              "role": "tool",
+              "tool_call_id": "bash_cSXL3sj6i58uXqmDdKxi7g",
+              "function": "bash"
+            },
+            {
+              "id": "W6P45d4wxabT8JGajuSEpL",
+              "content": [
+                {
+                  "type": "text",
+                  "text": "And then the `get_started` task directory.\n"
+                }
+              ],
+              "source": "generate",
+              "role": "assistant",
+              "tool_calls": [
+                {
+                  "id": "bash_Bu6HNdHns7UuJjifvkHvwv",
+                  "function": "bash",
+                  "arguments": {
+                    "cmd": "ls -la /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals/tasks/get_started"
+                  },
+                  "view": {
+                    "title": "bash",
+                    "format": "markdown",
+                    "content": "```bash\nls -la /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals/tasks/get_started\n```\n"
+                  },
+                  "type": "function"
+                }
+              ],
+              "model": "gemini-2.0-flash"
+            },
+            {
+              "id": "KBpTgoAkxQDLZLkreVi62z",
+              "content": "/Users/ewindmill/.bash_profile: line 1: n#: command not found\n\ntotal 8\ndrwxr-xr-x@ 3 ewindmill  primarygroup   96 Feb 13 09:39 .\ndrwxr-xr-x@ 3 ewindmill  primarygroup   96 Feb 13 09:39 ..\n-rw-r--r--@ 1 ewindmill  primarygroup  857 Feb 13 09:39 task.yaml\n",
+              "role": "tool",
+              "tool_call_id": "bash_Bu6HNdHns7UuJjifvkHvwv",
+              "function": "bash"
+            },
+            {
+              "id": "6HbpKpsZvH63bUtdAfVCm3",
+              "content": [
+                {
+                  "type": "text",
+                  "text": "Finally, the contents of `task.yaml`.\n"
+                }
+              ],
+              "source": "generate",
+              "role": "assistant",
+              "tool_calls": [
+                {
+                  "id": "bash_LV4nES3T7jPrzgtTEg3a42",
+                  "function": "bash",
+                  "arguments": {
+                    "cmd": "cat /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals/tasks/get_started/task.yaml"
+                  },
+                  "view": {
+                    "title": "bash",
+                    "format": "markdown",
+                    "content": "```bash\ncat /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals/tasks/get_started/task.yaml\n```\n"
+                  },
+                  "type": "function"
+                }
+              ],
+              "model": "gemini-2.0-flash"
+            },
+            {
+              "id": "2EQtjKBudJPfGdxBaCduci",
+              "content": "/Users/ewindmill/.bash_profile: line 1: n#: command not found\n\n# =============================================================================\n# Starter Task\n# =============================================================================\n# This task points at your project root as its workspace and runs a simple\n# codebase analysis evaluation.\n\nfunc: analyze_codebase\n\n# Workspace: points to the project root containing pubspec.yaml\nworkspace:\n  path: ../../\n\nsamples:\n  - id: get_started\n    difficulty: easy\n    tags: []\n    # Input: The prompt given to the model\n    input: |\n      Explore this codebase and suggest one improvement\n      to the code quality, readability, or architecture.\n    # Target: Expected output or grading criteria\n    target: |\n      The suggestion should be specific, actionable, and reference\n      actual code in the project. It should explain why the change\n      improves the codebase.\n",
+              "role": "tool",
+              "tool_call_id": "bash_LV4nES3T7jPrzgtTEg3a42",
+              "function": "bash"
+            }
+          ],
+          "tools": [
+            {
+              "name": "bash",
+              "description": "Use this function to execute bash commands.",
+              "parameters": {
+                "type": "object",
+                "properties": {
+                  "cmd": {
+                    "type": "string",
+                    "description": "The bash command to execute."
+                  }
+                },
+                "required": [
+                  "cmd"
+                ],
+                "additionalProperties": false
+              }
+            },
+            {
+              "name": "submit",
+              "description": "Submit an answer for evaluation.",
+              "parameters": {
+                "type": "object",
+                "properties": {
+                  "answer": {
+                    "type": "string",
+                    "description": "Submitted answer"
+                  }
+                },
+                "required": [
+                  "answer"
+                ],
+                "additionalProperties": false
+              }
+            }
+          ],
+          "tool_choice": "auto",
+          "config": {
+            "max_connections": 10,
+            "cache_prompt": true,
+            "cache": {
+              "expiry": "2W",
+              "per_epoch": true,
+              "scopes": {}
+            }
+          },
+          "output": {
+            "model": "gemini-2.0-flash",
+            "choices": [
+              {
+                "message": {
+                  "id": "RZXDGe7zL3uhsSSp8sVD2n",
+                  "content": [
+                    {
+                      "type": "text",
+                      "text": "attachment://5684fb11be22e745b968e13f4f60486c"
+                    },
+                    {
+                      "type": "text",
+                      "text": "attachment://a90c31e08ffc1d69baf4216e4c8876d2"
+                    }
+                  ],
+                  "source": "generate",
+                  "role": "assistant",
+                  "tool_calls": [
+                    {
+                      "id": "submit_UzSGKfgg6TdCSXHUPiiK2f",
+                      "function": "submit",
+                      "arguments": {
+                        "answer": "attachment://a90c31e08ffc1d69baf4216e4c8876d2"
+                      },
+                      "type": "function"
+                    }
+                  ],
+                  "model": "gemini-2.0-flash"
+                },
+                "stop_reason": "stop"
+              }
+            ],
+            "completion": "The `task.yaml` file defines the `get_started` task. It uses the `analyze_codebase` function and points to the project root as its workspace. The input to the model is \"Explore this codebase and suggest one improvement to the code quality, readability, or architecture.\" and provides the target criteria for a good answer.\n\nImprovement suggestion:\n\nThe `task.yaml` file currently defines the workspace path as `\"../../\"`. This path is relative, and while it might work in the current setup, it's not very robust. If the location of the `task.yaml` file changes relative to the project root, this path will break.\n\nA better approach would be to use an absolute path or a configuration option to specify the project root. This would make the task definition more portable and less prone to errors caused by changes in the directory structure.\n\n\n\nThe `task.yaml` file in the `get_started` task uses a relative path `../../` for the workspace. It would be more robust to use an absolute path or a configuration variable to specify the workspace path, making the task definition more portable and less prone to errors.",
+            "usage": {
+              "input_tokens": 2359,
+              "output_tokens": 241,
+              "total_tokens": 2600,
+              "reasoning_tokens": 0
+            },
+            "time": 2.359958540997468
+          },
+          "cache": "write",
+          "call": {
+            "request": {
+              "contents": [
+                {
+                  "parts": [
+                    {
+                      "text": "attachment://9db899dd5425b74fa4eb825cb7100dfe"
+                    }
+                  ],
+                  "role": "user"
+                },
+                {
+                  "parts": [
+                    {
+                      "text": "attachment://9a04b99c768a997e406d36798ffe7341"
+                    },
+                    {
+                      "functionCall": {
+                        "args": {
+                          "cmd": "ls -la"
+                        },
+                        "name": "bash"
+                      }
+                    }
+                  ],
+                  "role": "model"
+                },
+                {
+                  "parts": [
+                    {
+                      "functionResponse": {
+                        "name": "bash",
+                        "response": {
+                          "content": "attachment://a0225335de8f8644abb148db16ff949e"
+                        }
+                      }
+                    }
+                  ],
+                  "role": "user"
+                },
+                {
+                  "parts": [
+                    {
+                      "text": "It seems the current directory is empty. I should explore the actual workspace directory.\n"
+                    },
+                    {
+                      "functionCall": {
+                        "args": {
+                          "cmd": "ls -la /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals"
+                        },
+                        "name": "bash"
+                      }
+                    }
+                  ],
+                  "role": "model"
+                },
+                {
+                  "parts": [
+                    {
+                      "functionResponse": {
+                        "name": "bash",
+                        "response": {
+                          "content": "attachment://ea5b36fd20a1905467d53eca2cf1773f"
+                        }
+                      }
+                    }
+                  ],
+                  "role": "user"
+                },
+                {
+                  "parts": [
+                    {
+                      "text": "Okay, there are two subdirectories: `jobs` and `tasks`. I will explore the `jobs` directory first.\n"
+                    },
+                    {
+                      "functionCall": {
+                        "args": {
+                          "cmd": "ls -la /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals/jobs"
+                        },
+                        "name": "bash"
+                      }
+                    }
+                  ],
+                  "role": "model"
+                },
+                {
+                  "parts": [
+                    {
+                      "functionResponse": {
+                        "name": "bash",
+                        "response": {
+                          "content": "attachment://ce6545038816debba39039dfc30cba45"
+                        }
+                      }
+                    }
+                  ],
+                  "role": "user"
+                },
+                {
+                  "parts": [
+                    {
+                      "text": "Now I'll examine the contents of `local_dev.yaml`.\n"
+                    },
+                    {
+                      "functionCall": {
+                        "args": {
+                          "cmd": "attachment://fa914ac907fba627d8f195e9dd810f20"
+                        },
+                        "name": "bash"
+                      }
+                    }
+                  ],
+                  "role": "model"
+                },
+                {
+                  "parts": [
+                    {
+                      "functionResponse": {
+                        "name": "bash",
+                        "response": {
+                          "content": "attachment://4542a679980a33ce0d83be7803b2d16a"
+                        }
+                      }
+                    }
+                  ],
+                  "role": "user"
+                },
+                {
+                  "parts": [
+                    {
+                      "text": "attachment://75001449357c4856199d7a69bf21cef9"
+                    },
+                    {
+                      "functionCall": {
+                        "args": {
+                          "cmd": "ls -la /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals/tasks"
+                        },
+                        "name": "bash"
+                      }
+                    }
+                  ],
+                  "role": "model"
+                },
+                {
+                  "parts": [
+                    {
+                      "functionResponse": {
+                        "name": "bash",
+                        "response": {
+                          "content": "attachment://5a5632feb6066dd225b3c124a1bac129"
+                        }
+                      }
+                    }
+                  ],
+                  "role": "user"
+                },
+                {
+                  "parts": [
+                    {
+                      "text": "And then the `get_started` task directory.\n"
+                    },
+                    {
+                      "functionCall": {
+                        "args": {
+                          "cmd": "attachment://1cc47e8e9b60458fc2fb2c07cbd56e28"
+                        },
+                        "name": "bash"
+                      }
+                    }
+                  ],
+                  "role": "model"
+                },
+                {
+                  "parts": [
+                    {
+                      "functionResponse": {
+                        "name": "bash",
+                        "response": {
+                          "content": "attachment://421ab3fe5c6eb371ed45728b326879fa"
+                        }
+                      }
+                    }
+                  ],
+                  "role": "user"
+                },
+                {
+                  "parts": [
+                    {
+                      "text": "Finally, the contents of `task.yaml`.\n"
+                    },
+                    {
+                      "functionCall": {
+                        "args": {
+                          "cmd": "attachment://c73294582d1bab6be6e6f43e528c5fd4"
+                        },
+                        "name": "bash"
+                      }
+                    }
+                  ],
+                  "role": "model"
+                },
+                {
+                  "parts": [
+                    {
+                      "functionResponse": {
+                        "name": "bash",
+                        "response": {
+                          "content": "attachment://ce9860604d560b4328509bddf9a9fd4b"
+                        }
+                      }
+                    }
+                  ],
+                  "role": "user"
+                }
+              ],
+              "generation_config": {
+                "httpOptions": {
+                  "headers": {
+                    "x-irid": "4ZJkCiTtheFgdam75uGXJK"
+                  }
+                }
+              },
+              "safety_settings": [
+                {
+                  "category": "HARM_CATEGORY_CIVIC_INTEGRITY",
+                  "threshold": "BLOCK_NONE"
+                },
+                {
+                  "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
+                  "threshold": "BLOCK_NONE"
+                },
+                {
+                  "category": "HARM_CATEGORY_HARASSMENT",
+                  "threshold": "BLOCK_NONE"
+                },
+                {
+                  "category": "HARM_CATEGORY_HATE_SPEECH",
+                  "threshold": "BLOCK_NONE"
+                },
+                {
+                  "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
+                  "threshold": "BLOCK_NONE"
+                }
+              ],
+              "tools": [
+                {
+                  "functionDeclarations": [
+                    {
+                      "description": "Use this function to execute bash commands.",
+                      "name": "bash",
+                      "parameters": {
+                        "nullable": false,
+                        "properties": {
+                          "cmd": {
+                            "description": "The bash command to execute.",
+                            "nullable": false,
+                            "type": "STRING"
+                          }
+                        },
+                        "required": [
+                          "cmd"
+                        ],
+                        "type": "OBJECT"
+                      }
+                    },
+                    {
+                      "description": "Submit an answer for evaluation.",
+                      "name": "submit",
+                      "parameters": {
+                        "nullable": false,
+                        "properties": {
+                          "answer": {
+                            "description": "Submitted answer",
+                            "nullable": false,
+                            "type": "STRING"
+                          }
+                        },
+                        "required": [
+                          "answer"
+                        ],
+                        "type": "OBJECT"
+                      }
+                    }
+                  ]
+                }
+              ],
+              "tool_config": {
+                "functionCallingConfig": {
+                  "mode": "AUTO"
+                }
+              },
+              "system_instruction": [
+                {
+                  "text": "attachment://96d2add6be1fbfa37bad6c101bbdcabb"
+                },
+                {
+                  "text": "attachment://b38086a4c737ca5ce168983fe7d100a3"
+                },
+                {
+                  "text": "attachment://ca12b4d4123df0f43f6669e0f6179470"
+                }
+              ]
+            },
+            "response": {
+              "sdkHttpResponse": {
+                "headers": {
+                  "x-google-esf-cloud-client-params": "attachment://a248653186e1cbc62ada82759278d140",
+                  "X-Google-Session-Info": "GgQYECgLIAE6IxIhZ2VuZXJhdGl2ZWxhbmd1YWdlLmdvb2dsZWFwaXMuY29t",
+                  "Content-Type": "application/json; charset=UTF-8",
+                  "X-Google-Security-Signals": "attachment://2adbe2b9c3b7a76ea83ee906051e404e",
+                  "Vary": "Origin, X-Origin, Referer",
+                  "Content-Encoding": "gzip",
+                  "Date": "Fri, 13 Feb 2026 17:40:52 GMT",
+                  "Server": "scaffolding on HTTPServer2",
+                  "X-Google-Netmon-Label": "/bns/is/borg/is/bns/genai-api/prod.genai-api/44",
+                  "X-XSS-Protection": "0",
+                  "X-Frame-Options": "SAMEORIGIN",
+                  "X-Content-Type-Options": "nosniff",
+                  "Server-Timing": "gfet4t7; dur=2302",
+                  "X-Google-GFE-Service-Trace": "attachment://149a1eba121c602744db9459206a58eb",
+                  "X-Google-Backends": "attachment://287939dfc084b27ecbbf3235eb41a52a",
+                  "X-Google-GFE-Request-Trace": "acnuqa2:443,/bns/is/borg/is/bns/genai-api/prod.genai-api/44,acnuqa2:443",
+                  "X-Google-DOS-Service-Trace": "main:genai-api-api-prod,main:GLOBAL_all_non_cloud",
+                  "X-Google-GFE-Handshake-Trace": "attachment://94b3547aef94c0eb7493e1db0f584356",
+                  "X-Google-Service": "attachment://149a1eba121c602744db9459206a58eb",
+                  "X-Google-GFE-Response-Code-Details-Trace": "response_code_set_by_backend",
+                  "X-Google-GFE-Response-Body-Transformations": "chunked",
+                  "X-Google-Shellfish-Status": "CA0gBEBG",
+                  "X-Google-GFE-Version": "2.967.1",
+                  "Alt-Svc": "h3=\":443\"; ma=2592000,h3-29=\":443\"; ma=2592000",
+                  "Transfer-Encoding": "chunked"
+                }
+              },
+              "candidates": [
+                {
+                  "content": {
+                    "parts": [
+                      {
+                        "text": "attachment://5684fb11be22e745b968e13f4f60486c"
+                      },
+                      {
+                        "functionCall": {
+                          "args": {
+                            "answer": "attachment://a90c31e08ffc1d69baf4216e4c8876d2"
+                          },
+                          "name": "submit"
+                        }
+                      }
+                    ],
+                    "role": "model"
+                  },
+                  "finishReason": "STOP",
+                  "avgLogprobs": -0.24704007983702347,
+                  "safetyRatings": [
+                    {
+                      "category": "HARM_CATEGORY_HATE_SPEECH",
+                      "probability": "NEGLIGIBLE"
+                    },
+                    {
+                      "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
+                      "probability": "NEGLIGIBLE"
+                    },
+                    {
+                      "category": "HARM_CATEGORY_HARASSMENT",
+                      "probability": "NEGLIGIBLE"
+                    },
+                    {
+                      "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
+                      "probability": "NEGLIGIBLE"
+                    }
+                  ]
+                }
+              ],
+              "modelVersion": "gemini-2.0-flash",
+              "responseId": "ImKPaabUDp6ljMcPkdiKqAw",
+              "usageMetadata": {
+                "candidatesTokenCount": 241,
+                "candidatesTokensDetails": [
+                  {
+                    "modality": "TEXT",
+                    "tokenCount": 241
+                  }
+                ],
+                "promptTokenCount": 2359,
+                "promptTokensDetails": [
+                  {
+                    "modality": "TEXT",
+                    "tokenCount": 2359
+                  }
+                ],
+                "totalTokenCount": 2600
+              }
+            },
+            "time": 2.359958540997468
+          },
+          "completed": "2026-02-13T17:40:52.486468+00:00",
+          "working_time": 2.359958540997468
+        },
+        {
+          "uuid": "oXx7xWN5BBVGvE3Hg6Hzav",
+          "span_id": "0e94ad9f66d348e78191d5908be95d72",
+          "timestamp": "2026-02-13T17:40:52.487622+00:00",
+          "working_start": 33.59463787206914,
+          "event": "span_begin",
+          "id": "0e94ad9f66d348e78191d5908be95d72",
+          "parent_id": "3be0985210724574b90ce9f916060031",
+          "type": "tool",
+          "name": "submit"
+        },
+        {
+          "uuid": "cZGPUdtPrnJJuRVUoEVtKM",
+          "span_id": "3be0985210724574b90ce9f916060031",
+          "timestamp": "2026-02-13T17:40:52.486897+00:00",
+          "working_start": 33.59391349705402,
+          "event": "tool",
+          "type": "function",
+          "id": "submit_UzSGKfgg6TdCSXHUPiiK2f",
+          "function": "submit",
+          "arguments": {
+            "answer": "attachment://a90c31e08ffc1d69baf4216e4c8876d2"
+          },
+          "result": "The `task.yaml` file in the `get_started` task uses a relative path `../../` for the workspace. It would be more robust to use an absolute path or a configuration variable to specify the workspace path, making the task definition more portable and less prone to errors.",
+          "events": [],
+          "completed": "2026-02-13T17:40:52.491257+00:00",
+          "working_time": 0.00436,
+          "message_id": "8xAjT3Pkk7JKVSnd5KV3fR"
+        },
+        {
+          "uuid": "CK5YuKyheQNo7pCANTMjjr",
+          "span_id": "0e94ad9f66d348e78191d5908be95d72",
+          "timestamp": "2026-02-13T17:40:52.489486+00:00",
+          "working_start": 33.596504164044745,
+          "event": "span_end",
+          "id": "0e94ad9f66d348e78191d5908be95d72"
+        },
+        {
+          "uuid": "azno7exfsppaEGoJKc7Vsm",
+          "span_id": "3be0985210724574b90ce9f916060031",
+          "timestamp": "2026-02-13T17:40:52.492624+00:00",
+          "working_start": 33.59964208002202,
+          "event": "span_end",
+          "id": "3be0985210724574b90ce9f916060031"
+        },
+        {
+          "uuid": "bQFrePeVEGxFuLGJVSrPtK",
+          "span_id": "f9fd97e908c24545a6197437d66953af",
+          "timestamp": "2026-02-13T17:40:52.493905+00:00",
+          "working_start": 33.600920830038376,
+          "event": "state",
+          "changes": [
+            {
+              "op": "replace",
+              "path": "/messages/0/id",
+              "value": "iNKorPWWBrvjkRqDKyJPYF",
+              "replaced": "dgiCsCRsNgzMKagz4PEePk"
+            },
+            {
+              "op": "replace",
+              "path": "/messages/0/content",
+              "value": "attachment://96d2add6be1fbfa37bad6c101bbdcabb",
+              "replaced": "You are an expert code reviewer analyzing a codebase.\n\nYour task is to:\n\n1. Explore the codebase at /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals using the available tools\n2. Understand the project structure, dependencies, and architecture\n3. Answer the user's question based on what you find in the code\n\nImportant guidelines:\n- Use bash commands (cat, find, grep, ls, head, tail, etc.) to browse files\n- Do NOT edit or modify any files\n- Base your answer on actual code you find, not assumptions\n- Reference specific files and line numbers when relevant\n- When done, call submit() with your complete answer\n"
+            },
+            {
+              "op": "remove",
+              "path": "/messages/1/source"
+            },
+            {
+              "op": "replace",
+              "path": "/messages/1/role",
+              "value": "system",
+              "replaced": "user"
+            },
+            {
+              "op": "replace",
+              "path": "/messages/1/id",
+              "value": "dgiCsCRsNgzMKagz4PEePk",
+              "replaced": "YKoBXPM2D3LAYfpRtJ8rSG"
+            },
+            {
+              "op": "replace",
+              "path": "/messages/1/content",
+              "value": "attachment://b38086a4c737ca5ce168983fe7d100a3",
+              "replaced": "Explore this codebase and suggest one improvement\nto the code quality, readability, or architecture.\n"
+            },
+            {
+              "op": "add",
+              "path": "/messages/2",
+              "value": {
+                "id": "YKoBXPM2D3LAYfpRtJ8rSG",
+                "content": "attachment://9db899dd5425b74fa4eb825cb7100dfe",
+                "source": "input",
+                "role": "user"
+              }
+            },
+            {
+              "op": "add",
+              "path": "/messages/3",
+              "value": {
+                "id": "ePaRxtW6BAFFKX3wgPvuxQ",
+                "content": [
+                  {
+                    "type": "text",
+                    "text": "attachment://9a04b99c768a997e406d36798ffe7341"
+                  }
+                ],
+                "source": "generate",
+                "role": "assistant",
+                "tool_calls": [
+                  {
+                    "id": "bash_96CGHnyaWeCRTS8X8BFMvb",
+                    "function": "bash",
+                    "arguments": {
+                      "cmd": "ls -la"
+                    },
+                    "view": {
+                      "title": "bash",
+                      "format": "markdown",
+                      "content": "```bash\nls -la\n```\n"
+                    },
+                    "type": "function"
+                  }
+                ],
+                "model": "gemini-2.0-flash"
+              }
+            },
+            {
+              "op": "add",
+              "path": "/messages/4",
+              "value": {
+                "id": "D6EvgQPgAoveZwXndCQX7i",
+                "content": "attachment://a0225335de8f8644abb148db16ff949e",
+                "role": "tool",
+                "tool_call_id": "bash_96CGHnyaWeCRTS8X8BFMvb",
+                "function": "bash"
+              }
+            },
+            {
+              "op": "add",
+              "path": "/messages/5",
+              "value": {
+                "id": "X9YEAnfcJpZPfKYouGPfia",
+                "content": [
+                  {
+                    "type": "text",
+                    "text": "It seems the current directory is empty. I should explore the actual workspace directory.\n"
+                  }
+                ],
+                "source": "generate",
+                "role": "assistant",
+                "tool_calls": [
+                  {
+                    "id": "bash_9FNtsrZyifmy6dUNvsWNcp",
+                    "function": "bash",
+                    "arguments": {
+                      "cmd": "ls -la /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals"
+                    },
+                    "view": {
+                      "title": "bash",
+                      "format": "markdown",
+                      "content": "```bash\nls -la /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals\n```\n"
+                    },
+                    "type": "function"
+                  }
+                ],
+                "model": "gemini-2.0-flash"
+              }
+            },
+            {
+              "op": "add",
+              "path": "/messages/6",
+              "value": {
+                "id": "UQEKF93mLKeBVCkUrZEbXH",
+                "content": "attachment://ea5b36fd20a1905467d53eca2cf1773f",
+                "role": "tool",
+                "tool_call_id": "bash_9FNtsrZyifmy6dUNvsWNcp",
+                "function": "bash"
+              }
+            },
+            {
+              "op": "add",
+              "path": "/messages/7",
+              "value": {
+                "id": "XmsmV3dHrAsZTJptzwsL6r",
+                "content": [
+                  {
+                    "type": "text",
+                    "text": "Okay, there are two subdirectories: `jobs` and `tasks`. I will explore the `jobs` directory first.\n"
+                  }
+                ],
+                "source": "generate",
+                "role": "assistant",
+                "tool_calls": [
+                  {
+                    "id": "bash_De6QRUaDbUBkQBxhwj9LQi",
+                    "function": "bash",
+                    "arguments": {
+                      "cmd": "ls -la /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals/jobs"
+                    },
+                    "view": {
+                      "title": "bash",
+                      "format": "markdown",
+                      "content": "attachment://5282a84f99e713059d20891c8a9356e5"
+                    },
+                    "type": "function"
+                  }
+                ],
+                "model": "gemini-2.0-flash"
+              }
+            },
+            {
+              "op": "add",
+              "path": "/messages/8",
+              "value": {
+                "id": "QomCdPn3yYCyCWK8bLHveJ",
+                "content": "attachment://ce6545038816debba39039dfc30cba45",
+                "role": "tool",
+                "tool_call_id": "bash_De6QRUaDbUBkQBxhwj9LQi",
+                "function": "bash"
+              }
+            },
+            {
+              "op": "add",
+              "path": "/messages/9",
+              "value": {
+                "id": "QPveCFnzTZtWYnJSu8tAHf",
+                "content": [
+                  {
+                    "type": "text",
+                    "text": "Now I'll examine the contents of `local_dev.yaml`.\n"
+                  }
+                ],
+                "source": "generate",
+                "role": "assistant",
+                "tool_calls": [
+                  {
+                    "id": "bash_4t5raTAnKAddFKQUM5TX6R",
+                    "function": "bash",
+                    "arguments": {
+                      "cmd": "attachment://fa914ac907fba627d8f195e9dd810f20"
+                    },
+                    "view": {
+                      "title": "bash",
+                      "format": "markdown",
+                      "content": "attachment://894d16b0dfc3330e2a72ff18bfcbc1f0"
+                    },
+                    "type": "function"
+                  }
+                ],
+                "model": "gemini-2.0-flash"
+              }
+            },
+            {
+              "op": "add",
+              "path": "/messages/10",
+              "value": {
+                "id": "TQprdtjEjmzJsuL3QR22ed",
+                "content": "attachment://4542a679980a33ce0d83be7803b2d16a",
+                "role": "tool",
+                "tool_call_id": "bash_4t5raTAnKAddFKQUM5TX6R",
+                "function": "bash"
+              }
+            },
+            {
+              "op": "add",
+              "path": "/messages/11",
+              "value": {
+                "id": "ahZPXipD5neR29en3zRtUf",
+                "content": [
+                  {
+                    "type": "text",
+                    "text": "attachment://75001449357c4856199d7a69bf21cef9"
+                  }
+                ],
+                "source": "generate",
+                "role": "assistant",
+                "tool_calls": [
+                  {
+                    "id": "bash_cSXL3sj6i58uXqmDdKxi7g",
+                    "function": "bash",
+                    "arguments": {
+                      "cmd": "ls -la /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals/tasks"
+                    },
+                    "view": {
+                      "title": "bash",
+                      "format": "markdown",
+                      "content": "attachment://fa15a6d48178bdf2e3bbe95c76a67e8c"
+                    },
+                    "type": "function"
+                  }
+                ],
+                "model": "gemini-2.0-flash"
+              }
+            },
+            {
+              "op": "add",
+              "path": "/messages/12",
+              "value": {
+                "id": "8PhDW7rvHM4JkMit9nee6a",
+                "content": "attachment://5a5632feb6066dd225b3c124a1bac129",
+                "role": "tool",
+                "tool_call_id": "bash_cSXL3sj6i58uXqmDdKxi7g",
+                "function": "bash"
+              }
+            },
+            {
+              "op": "add",
+              "path": "/messages/13",
+              "value": {
+                "id": "W6P45d4wxabT8JGajuSEpL",
+                "content": [
+                  {
+                    "type": "text",
+                    "text": "And then the `get_started` task directory.\n"
+                  }
+                ],
+                "source": "generate",
+                "role": "assistant",
+                "tool_calls": [
+                  {
+                    "id": "bash_Bu6HNdHns7UuJjifvkHvwv",
+                    "function": "bash",
+                    "arguments": {
+                      "cmd": "attachment://1cc47e8e9b60458fc2fb2c07cbd56e28"
+                    },
+                    "view": {
+                      "title": "bash",
+                      "format": "markdown",
+                      "content": "attachment://04f5b45f7eb27f50ba42ff7155669327"
+                    },
+                    "type": "function"
+                  }
+                ],
+                "model": "gemini-2.0-flash"
+              }
+            },
+            {
+              "op": "add",
+              "path": "/messages/14",
+              "value": {
+                "id": "KBpTgoAkxQDLZLkreVi62z",
+                "content": "attachment://421ab3fe5c6eb371ed45728b326879fa",
+                "role": "tool",
+                "tool_call_id": "bash_Bu6HNdHns7UuJjifvkHvwv",
+                "function": "bash"
+              }
+            },
+            {
+              "op": "add",
+              "path": "/messages/15",
+              "value": {
+                "id": "6HbpKpsZvH63bUtdAfVCm3",
+                "content": [
+                  {
+                    "type": "text",
+                    "text": "Finally, the contents of `task.yaml`.\n"
+                  }
+                ],
+                "source": "generate",
+                "role": "assistant",
+                "tool_calls": [
+                  {
+                    "id": "bash_LV4nES3T7jPrzgtTEg3a42",
+                    "function": "bash",
+                    "arguments": {
+                      "cmd": "attachment://c73294582d1bab6be6e6f43e528c5fd4"
+                    },
+                    "view": {
+                      "title": "bash",
+                      "format": "markdown",
+                      "content": "attachment://85d9f779f2a331fd6accc8b92f1f546d"
+                    },
+                    "type": "function"
+                  }
+                ],
+                "model": "gemini-2.0-flash"
+              }
+            },
+            {
+              "op": "add",
+              "path": "/messages/16",
+              "value": {
+                "id": "2EQtjKBudJPfGdxBaCduci",
+                "content": "attachment://ce9860604d560b4328509bddf9a9fd4b",
+                "role": "tool",
+                "tool_call_id": "bash_LV4nES3T7jPrzgtTEg3a42",
+                "function": "bash"
+              }
+            },
+            {
+              "op": "add",
+              "path": "/messages/17",
+              "value": {
+                "id": "RZXDGe7zL3uhsSSp8sVD2n",
+                "content": [
+                  {
+                    "type": "text",
+                    "text": "attachment://5684fb11be22e745b968e13f4f60486c"
+                  },
+                  {
+                    "type": "text",
+                    "text": "attachment://a90c31e08ffc1d69baf4216e4c8876d2"
+                  }
+                ],
+                "source": "generate",
+                "role": "assistant",
+                "tool_calls": [],
+                "model": "gemini-2.0-flash"
+              }
+            },
+            {
+              "op": "add",
+              "path": "/output/time",
+              "value": 2.359958540997468
+            },
+            {
+              "op": "add",
+              "path": "/output/usage",
+              "value": {
+                "input_tokens": 2359,
+                "output_tokens": 241,
+                "total_tokens": 2600,
+                "reasoning_tokens": 0
+              }
+            },
+            {
+              "op": "replace",
+              "path": "/output/completion",
+              "value": "attachment://a705534cbb1833e3d70b86bab4bbc113",
+              "replaced": ""
+            },
+            {
+              "op": "replace",
+              "path": "/output/model",
+              "value": "gemini-2.0-flash",
+              "replaced": "google/gemini-2.0-flash"
+            },
+            {
+              "op": "add",
+              "path": "/output/choices/0",
+              "value": {
+                "message": {
+                  "id": "RZXDGe7zL3uhsSSp8sVD2n",
+                  "content": [
+                    {
+                      "type": "text",
+                      "text": "attachment://5684fb11be22e745b968e13f4f60486c"
+                    },
+                    {
+                      "type": "text",
+                      "text": "attachment://a90c31e08ffc1d69baf4216e4c8876d2"
+                    }
+                  ],
+                  "source": "generate",
+                  "role": "assistant",
+                  "tool_calls": [
+                    {
+                      "id": "submit_UzSGKfgg6TdCSXHUPiiK2f",
+                      "function": "submit",
+                      "arguments": {
+                        "answer": "attachment://a90c31e08ffc1d69baf4216e4c8876d2"
+                      },
+                      "type": "function"
+                    }
+                  ],
+                  "model": "gemini-2.0-flash"
+                },
+                "stop_reason": "stop"
+              }
+            }
+          ]
+        },
+        {
+          "uuid": "Q9V987ttMKmwFMJnz7adQh",
+          "span_id": "f9fd97e908c24545a6197437d66953af",
+          "timestamp": "2026-02-13T17:40:52.495214+00:00",
+          "working_start": 33.60223128902726,
+          "event": "span_end",
+          "id": "f9fd97e908c24545a6197437d66953af"
+        },
+        {
+          "uuid": "4fo5kYvfAmuiVcLYWixCUX",
+          "span_id": "2e768b3896c0454695912d0f6ecda7e9",
+          "timestamp": "2026-02-13T17:40:52.495987+00:00",
+          "working_start": 33.60300374706276,
+          "event": "span_end",
+          "id": "2e768b3896c0454695912d0f6ecda7e9"
+        },
+        {
+          "uuid": "EYmysc3i4KYYho2nYuqVhA",
+          "span_id": "5b96cdbb2fc245d9b3825969cb4b54a3",
+          "timestamp": "2026-02-13T17:40:52.497175+00:00",
+          "working_start": 33.604191664024256,
+          "event": "span_begin",
+          "id": "5b96cdbb2fc245d9b3825969cb4b54a3",
+          "type": "scorers",
+          "name": "scorers"
+        },
+        {
+          "uuid": "8STrnRBqMVswWL4VBypvLF",
+          "span_id": "65c3ff87fd274709a47d37a6c4117850",
+          "timestamp": "2026-02-13T17:40:52.498097+00:00",
+          "working_start": 33.60511387209408,
+          "event": "span_begin",
+          "id": "65c3ff87fd274709a47d37a6c4117850",
+          "parent_id": "5b96cdbb2fc245d9b3825969cb4b54a3",
+          "type": "scorer",
+          "name": "model_graded_fact"
+        },
+        {
+          "uuid": "TGwHsFcmCehvsEAtwgF8Vf",
+          "span_id": "65c3ff87fd274709a47d37a6c4117850",
+          "timestamp": "2026-02-13T17:40:52.499588+00:00",
+          "working_start": 33.606561871943995,
+          "pending": true,
+          "event": "model",
+          "model": "google/gemini-2.0-flash",
+          "input": [
+            {
+              "id": "XafA2ear8uf24woz47FXR5",
+              "content": "attachment://33e2eafc11865d4c1f435f1acd5730ef",
+              "role": "user"
+            }
+          ],
+          "tools": [],
+          "tool_choice": "none",
+          "config": {
+            "max_connections": 10,
+            "cache_prompt": true,
+            "cache": {
+              "expiry": "2W",
+              "per_epoch": true,
+              "scopes": {}
+            }
+          },
+          "output": {
+            "model": "google/gemini-2.0-flash",
+            "choices": [
+              {
+                "message": {
+                  "id": "PVpK7bfZa93RsvmFC6guqq",
+                  "content": "",
+                  "source": "generate",
+                  "role": "assistant",
+                  "model": "google/gemini-2.0-flash"
+                },
+                "stop_reason": "stop"
+              }
+            ],
+            "completion": ""
+          },
+          "cache": "write"
+        },
+        {
+          "uuid": "bujU9wuX8MXxxrTnc2zNXQ",
+          "span_id": "65c3ff87fd274709a47d37a6c4117850",
+          "timestamp": "2026-02-13T17:40:52.538854+00:00",
+          "working_start": 33.64582945499569,
+          "event": "logger",
+          "message": {
+            "level": "info",
+            "message": "AFC is enabled with max remote calls: 10.",
+            "created": 1771004452537.8462,
+            "filename": "models.py",
+            "module": "models",
+            "lineno": 7024
+          }
+        },
+        {
+          "uuid": "ASDv9HsGw5yzYVUXt362oj",
+          "span_id": "65c3ff87fd274709a47d37a6c4117850",
+          "timestamp": "2026-02-13T17:40:52.498976+00:00",
+          "working_start": 33.60599191405345,
+          "event": "model",
+          "model": "google/gemini-2.0-flash",
+          "input": [
+            {
+              "id": "XafA2ear8uf24woz47FXR5",
+              "content": "attachment://33e2eafc11865d4c1f435f1acd5730ef",
+              "role": "user"
+            }
+          ],
+          "tools": [],
+          "tool_choice": "none",
+          "config": {
+            "max_connections": 10,
+            "cache_prompt": true,
+            "cache": {
+              "expiry": "2W",
+              "per_epoch": true,
+              "scopes": {}
+            }
+          },
+          "output": {
+            "model": "gemini-2.0-flash",
+            "choices": [
+              {
+                "message": {
+                  "id": "fWg3jrQEJzwVXS2CRhzbwH",
+                  "content": [
+                    {
+                      "type": "text",
+                      "text": "attachment://23fa16a7cf2ba1e9f5b3d14be45f5793"
+                    }
+                  ],
+                  "source": "generate",
+                  "role": "assistant",
+                  "model": "gemini-2.0-flash"
+                },
+                "stop_reason": "stop"
+              }
+            ],
+            "completion": "The expert answer states that the submission should be specific, actionable, and reference actual code in the project, while explaining why the change improves the codebase.\n\nThe submission refers to the `task.yaml` file and the workspace path defined within, which is `../../`. The submission also suggests using an absolute path or a configuration option instead of the relative path, and explains that this improves robustness and portability.\n\nThe submission satisfies the expert answer's criteria.\n\nGRADE: C\n",
+            "usage": {
+              "input_tokens": 518,
+              "output_tokens": 97,
+              "total_tokens": 615,
+              "reasoning_tokens": 0
+            },
+            "time": 1.3219824170228094
+          },
+          "cache": "write",
+          "call": {
+            "request": {
+              "contents": [
+                {
+                  "parts": [
+                    {
+                      "text": "attachment://33e2eafc11865d4c1f435f1acd5730ef"
+                    }
+                  ],
+                  "role": "user"
+                }
+              ],
+              "generation_config": {
+                "httpOptions": {
+                  "headers": {
+                    "x-irid": "V6z3quSaHhASuocGYebpDY"
+                  }
+                }
+              },
+              "safety_settings": [
+                {
+                  "category": "HARM_CATEGORY_CIVIC_INTEGRITY",
+                  "threshold": "BLOCK_NONE"
+                },
+                {
+                  "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
+                  "threshold": "BLOCK_NONE"
+                },
+                {
+                  "category": "HARM_CATEGORY_HARASSMENT",
+                  "threshold": "BLOCK_NONE"
+                },
+                {
+                  "category": "HARM_CATEGORY_HATE_SPEECH",
+                  "threshold": "BLOCK_NONE"
+                },
+                {
+                  "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
+                  "threshold": "BLOCK_NONE"
+                }
+              ],
+              "tools": null,
+              "tool_config": null,
+              "system_instruction": null
+            },
+            "response": {
+              "sdkHttpResponse": {
+                "headers": {
+                  "x-google-esf-cloud-client-params": "attachment://a248653186e1cbc62ada82759278d140",
+                  "X-Google-Session-Info": "GgQYECgLIAE6IxIhZ2VuZXJhdGl2ZWxhbmd1YWdlLmdvb2dsZWFwaXMuY29t",
+                  "Content-Type": "application/json; charset=UTF-8",
+                  "X-Google-Security-Signals": "attachment://2adbe2b9c3b7a76ea83ee906051e404e",
+                  "Vary": "Origin, X-Origin, Referer",
+                  "Content-Encoding": "gzip",
+                  "Date": "Fri, 13 Feb 2026 17:40:58 GMT",
+                  "Server": "scaffolding on HTTPServer2",
+                  "X-Google-Netmon-Label": "/bns/is/borg/is/bns/genai-api/prod.genai-api/19",
+                  "X-XSS-Protection": "0",
+                  "X-Frame-Options": "SAMEORIGIN",
+                  "X-Content-Type-Options": "nosniff",
+                  "Server-Timing": "gfet4t7; dur=1206",
+                  "X-Google-GFE-Service-Trace": "attachment://149a1eba121c602744db9459206a58eb",
+                  "X-Google-Backends": "attachment://150f53d128f6a1ac794f1962a3f5dab2",
+                  "X-Google-GFE-Request-Trace": "acnuqa1:443,/bns/is/borg/is/bns/genai-api/prod.genai-api/19,acnuqa1:443",
+                  "X-Google-DOS-Service-Trace": "main:genai-api-api-prod,main:GLOBAL_all_non_cloud",
+                  "X-Google-GFE-Handshake-Trace": "attachment://eba0cf1b09ef7148ec57a39e354d19ac",
+                  "X-Google-Service": "attachment://149a1eba121c602744db9459206a58eb",
+                  "X-Google-GFE-Response-Code-Details-Trace": "response_code_set_by_backend",
+                  "X-Google-GFE-Response-Body-Transformations": "chunked",
+                  "X-Google-Shellfish-Status": "CA0gBEBG",
+                  "X-Google-GFE-Version": "2.967.1",
+                  "Alt-Svc": "h3=\":443\"; ma=2592000,h3-29=\":443\"; ma=2592000",
+                  "Transfer-Encoding": "chunked"
+                }
+              },
+              "candidates": [
+                {
+                  "content": {
+                    "parts": [
+                      {
+                        "text": "attachment://23fa16a7cf2ba1e9f5b3d14be45f5793"
+                      }
+                    ],
+                    "role": "model"
+                  },
+                  "finishReason": "STOP",
+                  "avgLogprobs": -0.4390586774373792,
+                  "safetyRatings": [
+                    {
+                      "category": "HARM_CATEGORY_HATE_SPEECH",
+                      "probability": "NEGLIGIBLE"
+                    },
+                    {
+                      "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
+                      "probability": "NEGLIGIBLE"
+                    },
+                    {
+                      "category": "HARM_CATEGORY_HARASSMENT",
+                      "probability": "NEGLIGIBLE"
+                    },
+                    {
+                      "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
+                      "probability": "NEGLIGIBLE"
+                    }
+                  ]
+                }
+              ],
+              "modelVersion": "gemini-2.0-flash",
+              "responseId": "KWKPafz1E4z1jMcP26ymwA8",
+              "usageMetadata": {
+                "candidatesTokenCount": 97,
+                "candidatesTokensDetails": [
+                  {
+                    "modality": "TEXT",
+                    "tokenCount": 97
+                  }
+                ],
+                "promptTokenCount": 518,
+                "promptTokensDetails": [
+                  {
+                    "modality": "TEXT",
+                    "tokenCount": 518
+                  }
+                ],
+                "totalTokenCount": 615
+              },
+              "automaticFunctionCallingHistory": []
+            },
+            "time": 1.3219824170228094
+          },
+          "completed": "2026-02-13T17:40:58.480617+00:00",
+          "working_time": 1.3219824170228094
+        },
+        {
+          "uuid": "Rse9m5XoXL9MfEuxgrphWV",
+          "span_id": "65c3ff87fd274709a47d37a6c4117850",
+          "timestamp": "2026-02-13T17:40:57.154527+00:00",
+          "working_start": 34.47563011088005,
+          "event": "logger",
+          "message": {
+            "level": "info",
+            "message": "AFC is enabled with max remote calls: 10.",
+            "created": 1771004457153.7659,
+            "filename": "models.py",
+            "module": "models",
+            "lineno": 7024
+          }
+        },
+        {
+          "uuid": "c82pxeihS4Vd6zsbiryc3T",
+          "span_id": "65c3ff87fd274709a47d37a6c4117850",
+          "timestamp": "2026-02-13T17:40:58.480938+00:00",
+          "working_start": 34.9284029151313,
+          "event": "score",
+          "score": {
+            "value": "C",
+            "answer": "The `task.yaml` file defines the `get_started` task. It uses the `analyze_codebase` function and points to the project root as its workspace. The input to the model is \"Explore this codebase and suggest one improvement to the code quality, readability, or architecture.\" and provides the target criteria for a good answer.\n\nImprovement suggestion:\n\nThe `task.yaml` file currently defines the workspace path as `\"../../\"`. This path is relative, and while it might work in the current setup, it's not very robust. If the location of the `task.yaml` file changes relative to the project root, this path will break.\n\nA better approach would be to use an absolute path or a configuration option to specify the project root. This would make the task definition more portable and less prone to errors caused by changes in the directory structure.\n\n\n\nThe `task.yaml` file in the `get_started` task uses a relative path `../../` for the workspace. It would be more robust to use an absolute path or a configuration variable to specify the workspace path, making the task definition more portable and less prone to errors.",
+            "explanation": "The expert answer states that the submission should be specific, actionable, and reference actual code in the project, while explaining why the change improves the codebase.\n\nThe submission refers to the `task.yaml` file and the workspace path defined within, which is `../../`. The submission also suggests using an absolute path or a configuration option instead of the relative path, and explains that this improves robustness and portability.\n\nThe submission satisfies the expert answer's criteria.\n\nGRADE: C\n",
+            "metadata": {
+              "grading": [
+                {
+                  "id": "XafA2ear8uf24woz47FXR5",
+                  "content": "\nYou are comparing a submitted answer to an expert answer on a given question. Here is the data:\n\n[BEGIN DATA]\n************\n[Question]: Explore this codebase and suggest one improvement\nto the code quality, readability, or architecture.\n\n************\n[Expert]: The suggestion should be specific, actionable, and reference\nactual code in the project. It should explain why the change\nimproves the codebase.\n\n************\n[Submission]: The `task.yaml` file defines the `get_started` task. It uses the `analyze_codebase` function and points to the project root as its workspace. The input to the model is \"Explore this codebase and suggest one improvement to the code quality, readability, or architecture.\" and provides the target criteria for a good answer.\n\nImprovement suggestion:\n\nThe `task.yaml` file currently defines the workspace path as `\"../../\"`. This path is relative, and while it might work in the current setup, it's not very robust. If the location of the `task.yaml` file changes relative to the project root, this path will break.\n\nA better approach would be to use an absolute path or a configuration option to specify the project root. This would make the task definition more portable and less prone to errors caused by changes in the directory structure.\n\n\n\nThe `task.yaml` file in the `get_started` task uses a relative path `../../` for the workspace. It would be more robust to use an absolute path or a configuration variable to specify the workspace path, making the task definition more portable and less prone to errors.\n************\n[END DATA]\n\nCompare the factual content of the submitted answer with the expert answer. Ignore any differences in style, grammar, or punctuation.\n\nDoes the submission contain the content in the expert answer?\n\n\nAfter assessing the submitted answer, reply with 'GRADE: $LETTER' (without quotes) where LETTER is one of CI.  Please choose ONE option for the grade: either \"C\" for correct answers, or \"I\" for incorrect answers.\n\nFor example, after reviewing a correct answer you might write 'GRADE: C' or after reviewing an incorrect answer you might write 'GRADE: I'.\n\nFirst, write out in a step by step manner your reasoning about the criterion to be sure that your conclusion is correct. Avoid simply stating the correct answers at the outset. Then, end with your answer formatted as 'GRADE: $LETTER' (without quotes) where LETTER is one of CI.\n\n",
+                  "role": "user"
+                },
+                {
+                  "id": "fWg3jrQEJzwVXS2CRhzbwH",
+                  "content": [
+                    {
+                      "type": "text",
+                      "text": "The expert answer states that the submission should be specific, actionable, and reference actual code in the project, while explaining why the change improves the codebase.\n\nThe submission refers to the `task.yaml` file and the workspace path defined within, which is `../../`. The submission also suggests using an absolute path or a configuration option instead of the relative path, and explains that this improves robustness and portability.\n\nThe submission satisfies the expert answer's criteria.\n\nGRADE: C\n"
+                    }
+                  ],
+                  "source": "generate",
+                  "role": "assistant",
+                  "model": "gemini-2.0-flash"
+                }
+              ]
+            },
+            "history": []
+          },
+          "target": "The suggestion should be specific, actionable, and reference\nactual code in the project. It should explain why the change\nimproves the codebase.\n",
+          "intermediate": false
+        },
+        {
+          "uuid": "bPxeyiARHLnRutWFED4x29",
+          "span_id": "65c3ff87fd274709a47d37a6c4117850",
+          "timestamp": "2026-02-13T17:40:58.482351+00:00",
+          "working_start": 34.929816081072204,
+          "event": "span_end",
+          "id": "65c3ff87fd274709a47d37a6c4117850"
+        },
+        {
+          "uuid": "8rAmdn95kUoVQx7FUyTzJ2",
+          "span_id": "5b96cdbb2fc245d9b3825969cb4b54a3",
+          "timestamp": "2026-02-13T17:40:58.483144+00:00",
+          "working_start": 34.93060933111701,
+          "event": "span_end",
+          "id": "5b96cdbb2fc245d9b3825969cb4b54a3"
+        }
+      ],
+      "model_usage": {
+        "google/gemini-2.0-flash": {
+          "input_tokens": 11033,
+          "output_tokens": 835,
+          "total_tokens": 11868,
+          "reasoning_tokens": 0
+        }
+      },
+      "started_at": "2026-02-13T17:39:57.032382+00:00",
+      "completed_at": "2026-02-13T17:40:58.485530+00:00",
+      "total_time": 61.453,
+      "working_time": 34.933,
+      "uuid": "KVPcmmfxyfz5JSQHkRyW7r",
+      "error_retries": [],
+      "attachments": {
+        "9db899dd5425b74fa4eb825cb7100dfe": "Explore this codebase and suggest one improvement\nto the code quality, readability, or architecture.\n",
+        "96d2add6be1fbfa37bad6c101bbdcabb": "\nYou are a helpful assistant attempting to submit the best possible answer. You have several tools available to help with finding the answer. You will see the result of tool calls right after sending the message. If you need to perform multiple actions, you can always send more messages with additional tool calls. Do some reasoning before your actions, describing what tool calls you are going to use and how they fit into your plan.\n\n\nWhen you have completed the task and have an answer, call the submit() tool to report it.\n",
+        "b38086a4c737ca5ce168983fe7d100a3": "You are an expert code reviewer analyzing a codebase.\n\nYour task is to:\n\n1. Explore the codebase at /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals using the available tools\n2. Understand the project structure, dependencies, and architecture\n3. Answer the user's question based on what you find in the code\n\nImportant guidelines:\n- Use bash commands (cat, find, grep, ls, head, tail, etc.) to browse files\n- Do NOT edit or modify any files\n- Base your answer on actual code you find, not assumptions\n- Reference specific files and line numbers when relevant\n- When done, call submit() with your complete answer\n",
+        "ca12b4d4123df0f43f6669e0f6179470": "\n## Function Calling\n- Do not generate code. Always generate the function call json\nWhen calling functions, output the function name exactly as defined. Do not prepend 'default_api.' or any other namespace to the function name\n",
+        "a248653186e1cbc62ada82759278d140": "backend_service_name: \"generativelanguage.googleapis.com\" backend_fully_qualified_method: \"google.ai.generativelanguage.v1beta.GenerativeService.GenerateContent\"",
+        "2adbe2b9c3b7a76ea83ee906051e404e": "FRAMEWORK=ONE_PLATFORM,ENV=borg,ENV_DEBUG=borg_user:genai-api;borg_job:prod.genai-api, FRAMEWORK=HTTPSERVER2,BUILD=GOOGLE3,BUILD_DEBUG=cl:856584283,ENV=borg,ENV_DEBUG=borg_user:genai-api;borg_job:prod.genai-api",
+        "149a1eba121c602744db9459206a58eb": "genai-api-api-prod/gfespec_googleapis-generativelanguage_generativelanguage-url-map-global_generativelanguage-genai-api-api-prod",
+        "9fd93716298d2fa1b9cd7a1cfd07f1dd": "unix:/tmp/esfbackend.1771001564.287530.116772,/bns/is/borg/is/bns/genai-api/prod.genai-api/75,/bns/ncsfoa/borg/ncsfoa/bns/blue-layer1-gfe-prod-edge/prod.blue-layer1-gfe.nuq04s43/42",
+        "8f96980ac42fc857ff3ea8542b167baa": "GFE: /bns/ncsfoa/borg/ncsfoa/bns/blue-layer1-gfe-prod-edge/prod.blue-layer1-gfe.nuq04s43/42,Mentat oracle: [2002:a05:7300:50cb:b0:2b8:61e9:ae87]:9801",
+        "9a04b99c768a997e406d36798ffe7341": "Okay, I will explore the codebase and suggest one improvement.\n\nFirst, I'll start by listing the files in the directory to understand the project structure.\n",
+        "a0225335de8f8644abb148db16ff949e": "/Users/ewindmill/.bash_profile: line 1: n#: command not found\n\ntotal 0\ndrwx------@   2 ewindmill  primarygroup     64 Feb 13 09:39 .\ndrwx------@ 770 ewindmill  primarygroup  24640 Feb 13 09:40 ..\n",
+        "44b10d1a8860bb5794ac2c3f9632d1ec": "unix:/tmp/esfbackend.1770937456.105470.1016311,/bns/is/borg/is/bns/genai-api/prod.genai-api/43,/bns/ncsfoa/borg/ncsfoa/bns/blue-layer1-gfe-prod-edge/prod.blue-layer1-gfe.nuq04s43/6",
+        "1155e3202cf7af1f32a6579b896d0644": "GFE: /bns/ncsfoa/borg/ncsfoa/bns/blue-layer1-gfe-prod-edge/prod.blue-layer1-gfe.nuq04s43/6,Mentat oracle: [2002:a05:7300:e508:b0:2b6:fe43:7b42]:9801",
+        "ea5b36fd20a1905467d53eca2cf1773f": "/Users/ewindmill/.bash_profile: line 1: n#: command not found\n\ntotal 0\ndrwxr-xr-x@ 4 ewindmill  primarygroup  128 Feb 13 09:39 .\ndrwx------@ 3 ewindmill  primarygroup   96 Feb 13 09:39 ..\ndrwxr-xr-x@ 3 ewindmill  primarygroup   96 Feb 13 09:39 jobs\ndrwxr-xr-x@ 3 ewindmill  primarygroup   96 Feb 13 09:39 tasks\n",
+        "d7bb9364bae96e30d48d7a9d78f3c032": "unix:/tmp/esfbackend.1770840599.720071.233811,/bns/is/borg/is/bns/genai-api/prod.genai-api/40,/bns/ncsfoa/borg/ncsfoa/bns/blue-layer1-gfe-prod-edge/prod.blue-layer1-gfe.nuq04s43/50",
+        "681981393fbf48f54f0313e8d08829a2": "GFE: /bns/ncsfoa/borg/ncsfoa/bns/blue-layer1-gfe-prod-edge/prod.blue-layer1-gfe.nuq04s43/50,Mentat oracle: [2002:a05:7022:11e:b0:97:eb7e:caf7]:9801",
+        "ce6545038816debba39039dfc30cba45": "/Users/ewindmill/.bash_profile: line 1: n#: command not found\n\ntotal 8\ndrwxr-xr-x@ 3 ewindmill  primarygroup    96 Feb 13 09:39 .\ndrwxr-xr-x@ 4 ewindmill  primarygroup   128 Feb 13 09:39 ..\n-rw-r--r--@ 1 ewindmill  primarygroup  3087 Feb 13 09:39 local_dev.yaml\n",
+        "ac3f04405496a443a23045d82059b1a2": "unix:/tmp/esfbackend.1770839288.589896.628323,/bns/is/borg/is/bns/genai-api/prod.genai-api/37,/bns/ncsfoa/borg/ncsfoa/bns/blue-layer1-gfe-prod-edge/prod.blue-layer1-gfe.nuq04s43/46",
+        "49d7fb39ae6b2bce756c2729d42a2a81": "GFE: /bns/ncsfoa/borg/ncsfoa/bns/blue-layer1-gfe-prod-edge/prod.blue-layer1-gfe.nuq04s43/46,Mentat oracle: [2002:a05:7300:e508:b0:2ba:94ab:276f]:9801",
+        "fa914ac907fba627d8f195e9dd810f20": "cat /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals/jobs/local_dev.yaml",
+        "4542a679980a33ce0d83be7803b2d16a": "/Users/ewindmill/.bash_profile: line 1: n#: command not found\n\n# =============================================================================\n# Job Configuration: local_dev\n# =============================================================================\n# A job defines what subset of your dataset to run and how to run it.\n# Jobs are the primary way to control evaluation runs.\n#\n# To run this job:\n#   devals run local_dev\n\n\n# =============================================================================\n# RUNTIME SETTINGS (Optional)\n# =============================================================================\n# !!!Important!!!\n# These override built-in defaults. If you're just getting started,\n# I recommend you ignore these for now.\n# Uncomment and modify as needed.\n\n# Directory for evaluation logs (relative to dataset root)\n# A timestamped subdirectory is created automatically for each run.\n# logs_dir: ../logs\n\n# Sandbox environment: \"local\", \"docker\", or \"podman\"\n# - local: Run directly on host (fastest, no isolation)\n# - docker: Run in Docker containers (recommended for code execution)\n# - podman: Run in Podman containers (rootless alternative to Docker)\n# sandbox_type: local\n\n# Maximum concurrent API connections to model providers.\n# Higher = faster but may hit rate limits with a large dataset\n# max_connections: 10\n\n# Maximum retry attempts for failed API calls.\n# Helps handle transient errors.\n# max_retries: 3\n\n# =============================================================================\n# MODELS\n# =============================================================================\n# Which models to evaluate. Format: \"provider/model-name\"\n# If omitted, falls back to DEFAULT_MODELS from the Python registries.\nmodels:\n  - google/gemini-2.0-flash\n\n# =============================================================================\n# VARIANTS (Optional)\n# =============================================================================\n# Which configuration variants to test.\n# Variants control access to tools and context.\n# Each variant is a map of feature flags. An empty map {} is the baseline.\n# If omitted, only the baseline (no features) is used.\n#\n# Example:\n#   variants:\n#     - {}                                    # baseline\n#     - context_files: [../../context/flutter.md]\n#     - mcp_servers: [dart]\n\n# =============================================================================\n# TASKS\n# =============================================================================\n# Which tasks to run. Keys must match directory names in tasks/.\n# If omitted, runs ALL discovered tasks.\n#\n# Each task can have optional configuration:\n#   task_id:\n#     variants: [subset]           # Override job-level variants for this task\n#     include-samples: [sample1]   # Only run specific samples (mutually exclusive with exclude)\n#     exclude-samples: [sample2]   # Skip specific samples (mutually exclusive with include)\n#     system_message: |            # Override system prompt for this task\n#       Custom instructions...\n#\n# Simple format (run all samples with job-level settings):\n#   task_id: {}\n#\ntasks:\n  get_started: {}\n",
+        "ba3d6153baab84a581d4cef34d3e064a": "unix:/tmp/esfbackend.1770840850.795227.1477371,/bns/is/borg/is/bns/genai-api/prod.genai-api/61,/bns/ncsfoa/borg/ncsfoa/bns/blue-layer1-gfe-prod-edge/prod.blue-layer1-gfe.nuq04s43/29",
+        "188e8e605c689d55de36fa0a2ac177d4": "GFE: /bns/ncsfoa/borg/ncsfoa/bns/blue-layer1-gfe-prod-edge/prod.blue-layer1-gfe.nuq04s43/29,Mentat oracle: [2002:a05:7300:3c8a:b0:2ba:8101:53d6]:9801",
+        "75001449357c4856199d7a69bf21cef9": "This file configures a job named `local_dev`. It specifies the model to use (`google/gemini-2.0-flash`) and the task to run (`get_started`).\n\nNow I'll check the `tasks` directory.\n",
+        "5a5632feb6066dd225b3c124a1bac129": "/Users/ewindmill/.bash_profile: line 1: n#: command not found\n\ntotal 0\ndrwxr-xr-x@ 3 ewindmill  primarygroup   96 Feb 13 09:39 .\ndrwxr-xr-x@ 4 ewindmill  primarygroup  128 Feb 13 09:39 ..\ndrwxr-xr-x@ 3 ewindmill  primarygroup   96 Feb 13 09:39 get_started\n",
+        "83217d10977474b4655b92e39aebaee5": "unix:/tmp/esfbackend.1770840545.575655.329587,/bns/is/borg/is/bns/genai-api/prod.genai-api/63,/bns/ncsfoa/borg/ncsfoa/bns/blue-layer1-gfe-prod-edge/prod.blue-layer1-gfe.nuq04s43/32",
+        "04b84a47f04af4777b81695e7d416a2b": "GFE: /bns/ncsfoa/borg/ncsfoa/bns/blue-layer1-gfe-prod-edge/prod.blue-layer1-gfe.nuq04s43/32,Mentat oracle: [2002:a05:7301:23aa:b0:2b6:fb8a:f197]:9801",
+        "1cc47e8e9b60458fc2fb2c07cbd56e28": "ls -la /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals/tasks/get_started",
+        "421ab3fe5c6eb371ed45728b326879fa": "/Users/ewindmill/.bash_profile: line 1: n#: command not found\n\ntotal 8\ndrwxr-xr-x@ 3 ewindmill  primarygroup   96 Feb 13 09:39 .\ndrwxr-xr-x@ 3 ewindmill  primarygroup   96 Feb 13 09:39 ..\n-rw-r--r--@ 1 ewindmill  primarygroup  857 Feb 13 09:39 task.yaml\n",
+        "0f8cf5de29178ac4e22880548e4e0884": "unix:/tmp/esfbackend.1770936291.584434.671502,/bns/is/borg/is/bns/genai-api/prod.genai-api/74,/bns/ncsfoa/borg/ncsfoa/bns/blue-layer1-gfe-prod-edge/prod.blue-layer1-gfe.nuq04s43/14",
+        "8afbf58217687de2c30df222c5d49f30": "GFE: /bns/ncsfoa/borg/ncsfoa/bns/blue-layer1-gfe-prod-edge/prod.blue-layer1-gfe.nuq04s43/14,Mentat oracle: [2002:a05:7302:512:b0:129:427a:524a]:9801",
+        "c73294582d1bab6be6e6f43e528c5fd4": "cat /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals/tasks/get_started/task.yaml",
+        "ce9860604d560b4328509bddf9a9fd4b": "/Users/ewindmill/.bash_profile: line 1: n#: command not found\n\n# =============================================================================\n# Starter Task\n# =============================================================================\n# This task points at your project root as its workspace and runs a simple\n# codebase analysis evaluation.\n\nfunc: analyze_codebase\n\n# Workspace: points to the project root containing pubspec.yaml\nworkspace:\n  path: ../../\n\nsamples:\n  - id: get_started\n    difficulty: easy\n    tags: []\n    # Input: The prompt given to the model\n    input: |\n      Explore this codebase and suggest one improvement\n      to the code quality, readability, or architecture.\n    # Target: Expected output or grading criteria\n    target: |\n      The suggestion should be specific, actionable, and reference\n      actual code in the project. It should explain why the change\n      improves the codebase.\n",
+        "287939dfc084b27ecbbf3235eb41a52a": "unix:/tmp/esfbackend.1770975685.603810.175121,/bns/is/borg/is/bns/genai-api/prod.genai-api/44,/bns/ncsfoa/borg/ncsfoa/bns/blue-layer1-gfe-prod-edge/prod.blue-layer1-gfe.nuq04s43/18",
+        "94b3547aef94c0eb7493e1db0f584356": "GFE: /bns/ncsfoa/borg/ncsfoa/bns/blue-layer1-gfe-prod-edge/prod.blue-layer1-gfe.nuq04s43/18,Mentat oracle: [2002:a05:7022:69c:b0:97:aa34:96d7]:9801",
+        "5684fb11be22e745b968e13f4f60486c": "The `task.yaml` file defines the `get_started` task. It uses the `analyze_codebase` function and points to the project root as its workspace. The input to the model is \"Explore this codebase and suggest one improvement to the code quality, readability, or architecture.\" and provides the target criteria for a good answer.\n\nImprovement suggestion:\n\nThe `task.yaml` file currently defines the workspace path as `\"../../\"`. This path is relative, and while it might work in the current setup, it's not very robust. If the location of the `task.yaml` file changes relative to the project root, this path will break.\n\nA better approach would be to use an absolute path or a configuration option to specify the project root. This would make the task definition more portable and less prone to errors caused by changes in the directory structure.\n\n",
+        "a90c31e08ffc1d69baf4216e4c8876d2": "The `task.yaml` file in the `get_started` task uses a relative path `../../` for the workspace. It would be more robust to use an absolute path or a configuration variable to specify the workspace path, making the task definition more portable and less prone to errors.",
+        "33e2eafc11865d4c1f435f1acd5730ef": "\nYou are comparing a submitted answer to an expert answer on a given question. Here is the data:\n\n[BEGIN DATA]\n************\n[Question]: Explore this codebase and suggest one improvement\nto the code quality, readability, or architecture.\n\n************\n[Expert]: The suggestion should be specific, actionable, and reference\nactual code in the project. It should explain why the change\nimproves the codebase.\n\n************\n[Submission]: The `task.yaml` file defines the `get_started` task. It uses the `analyze_codebase` function and points to the project root as its workspace. The input to the model is \"Explore this codebase and suggest one improvement to the code quality, readability, or architecture.\" and provides the target criteria for a good answer.\n\nImprovement suggestion:\n\nThe `task.yaml` file currently defines the workspace path as `\"../../\"`. This path is relative, and while it might work in the current setup, it's not very robust. If the location of the `task.yaml` file changes relative to the project root, this path will break.\n\nA better approach would be to use an absolute path or a configuration option to specify the project root. This would make the task definition more portable and less prone to errors caused by changes in the directory structure.\n\n\n\nThe `task.yaml` file in the `get_started` task uses a relative path `../../` for the workspace. It would be more robust to use an absolute path or a configuration variable to specify the workspace path, making the task definition more portable and less prone to errors.\n************\n[END DATA]\n\nCompare the factual content of the submitted answer with the expert answer. Ignore any differences in style, grammar, or punctuation.\n\nDoes the submission contain the content in the expert answer?\n\n\nAfter assessing the submitted answer, reply with 'GRADE: $LETTER' (without quotes) where LETTER is one of CI.  Please choose ONE option for the grade: either \"C\" for correct answers, or \"I\" for incorrect answers.\n\nFor example, after reviewing a correct answer you might write 'GRADE: C' or after reviewing an incorrect answer you might write 'GRADE: I'.\n\nFirst, write out in a step by step manner your reasoning about the criterion to be sure that your conclusion is correct. Avoid simply stating the correct answers at the outset. Then, end with your answer formatted as 'GRADE: $LETTER' (without quotes) where LETTER is one of CI.\n\n",
+        "150f53d128f6a1ac794f1962a3f5dab2": "unix:/tmp/esfbackend.1770839288.588867.628315,/bns/is/borg/is/bns/genai-api/prod.genai-api/19,/bns/ncsfoa/borg/ncsfoa/bns/blue-layer1-gfe-prod-edge/prod.blue-layer1-gfe.nuq04s43/19",
+        "eba0cf1b09ef7148ec57a39e354d19ac": "GFE: /bns/ncsfoa/borg/ncsfoa/bns/blue-layer1-gfe-prod-edge/prod.blue-layer1-gfe.nuq04s43/19,Mentat oracle: [2002:a05:7302:24:b0:12c:176b:5d35]:9801",
+        "23fa16a7cf2ba1e9f5b3d14be45f5793": "The expert answer states that the submission should be specific, actionable, and reference actual code in the project, while explaining why the change improves the codebase.\n\nThe submission refers to the `task.yaml` file and the workspace path defined within, which is `../../`. The submission also suggests using an absolute path or a configuration option instead of the relative path, and explains that this improves robustness and portability.\n\nThe submission satisfies the expert answer's criteria.\n\nGRADE: C\n",
+        "5282a84f99e713059d20891c8a9356e5": "```bash\nls -la /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals/jobs\n```\n",
+        "894d16b0dfc3330e2a72ff18bfcbc1f0": "```bash\ncat /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals/jobs/local_dev.yaml\n```\n",
+        "fa15a6d48178bdf2e3bbe95c76a67e8c": "```bash\nls -la /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals/tasks\n```\n",
+        "04f5b45f7eb27f50ba42ff7155669327": "```bash\nls -la /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals/tasks/get_started\n```\n",
+        "85d9f779f2a331fd6accc8b92f1f546d": "```bash\ncat /var/folders/dh/b1cxzcgd5s92q6m483sj2pz000xjqm/T/eval_workspace__anfzy6z/evals/tasks/get_started/task.yaml\n```\n",
+        "a705534cbb1833e3d70b86bab4bbc113": "The `task.yaml` file defines the `get_started` task. It uses the `analyze_codebase` function and points to the project root as its workspace. The input to the model is \"Explore this codebase and suggest one improvement to the code quality, readability, or architecture.\" and provides the target criteria for a good answer.\n\nImprovement suggestion:\n\nThe `task.yaml` file currently defines the workspace path as `\"../../\"`. This path is relative, and while it might work in the current setup, it's not very robust. If the location of the `task.yaml` file changes relative to the project root, this path will break.\n\nA better approach would be to use an absolute path or a configuration option to specify the project root. This would make the task definition more portable and less prone to errors caused by changes in the directory structure.\n\n\n\nThe `task.yaml` file in the `get_started` task uses a relative path `../../` for the workspace. It would be more robust to use an absolute path or a configuration variable to specify the workspace path, making the task definition more portable and less prone to errors."
+      }
+    }
+  ],
+  "reductions": [
+    {
+      "scorer": "model_graded_fact",
+      "samples": [
+        {
+          "value": 1.0,
+          "answer": "The `task.yaml` file defines the `get_started` task. It uses the `analyze_codebase` function and points to the project root as its workspace. The input to the model is \"Explore this codebase and suggest one improvement to the code quality, readability, or architecture.\" and provides the target criteria for a good answer.\n\nImprovement suggestion:\n\nThe `task.yaml` file currently defines the workspace path as `\"../../\"`. This path is relative, and while it might work in the current setup, it's not very robust. If the location of the `task.yaml` file changes relative to the project root, this path will break.\n\nA better approach would be to use an absolute path or a configuration option to specify the project root. This would make the task definition more portable and less prone to errors caused by changes in the directory structure.\n\n\n\nThe `task.yaml` file in the `get_started` task uses a relative path `../../` for the workspace. It would be more robust to use an absolute path or a configuration variable to specify the workspace path, making the task definition more portable and less prone to errors.",
+          "explanation": "The expert answer states that the submission should be specific, actionable, and reference actual code in the project, while explaining why the change improves the codebase.\n\nThe submission refers to the `task.yaml` file and the workspace path defined within, which is `../../`. The submission also suggests using an absolute path or a configuration option instead of the relative path, and explains that this improves robustness and portability.\n\nThe submission satisfies the expert answer's criteria.\n\nGRADE: C\n",
+          "metadata": {
+            "grading": [
+              {
+                "id": "XafA2ear8uf24woz47FXR5",
+                "content": "\nYou are comparing a submitted answer to an expert answer on a given question. Here is the data:\n\n[BEGIN DATA]\n************\n[Question]: Explore this codebase and suggest one improvement\nto the code quality, readability, or architecture.\n\n************\n[Expert]: The suggestion should be specific, actionable, and reference\nactual code in the project. It should explain why the change\nimproves the codebase.\n\n************\n[Submission]: The `task.yaml` file defines the `get_started` task. It uses the `analyze_codebase` function and points to the project root as its workspace. The input to the model is \"Explore this codebase and suggest one improvement to the code quality, readability, or architecture.\" and provides the target criteria for a good answer.\n\nImprovement suggestion:\n\nThe `task.yaml` file currently defines the workspace path as `\"../../\"`. This path is relative, and while it might work in the current setup, it's not very robust. If the location of the `task.yaml` file changes relative to the project root, this path will break.\n\nA better approach would be to use an absolute path or a configuration option to specify the project root. This would make the task definition more portable and less prone to errors caused by changes in the directory structure.\n\n\n\nThe `task.yaml` file in the `get_started` task uses a relative path `../../` for the workspace. It would be more robust to use an absolute path or a configuration variable to specify the workspace path, making the task definition more portable and less prone to errors.\n************\n[END DATA]\n\nCompare the factual content of the submitted answer with the expert answer. Ignore any differences in style, grammar, or punctuation.\n\nDoes the submission contain the content in the expert answer?\n\n\nAfter assessing the submitted answer, reply with 'GRADE: $LETTER' (without quotes) where LETTER is one of CI.  Please choose ONE option for the grade: either \"C\" for correct answers, or \"I\" for incorrect answers.\n\nFor example, after reviewing a correct answer you might write 'GRADE: C' or after reviewing an incorrect answer you might write 'GRADE: I'.\n\nFirst, write out in a step by step manner your reasoning about the criterion to be sure that your conclusion is correct. Avoid simply stating the correct answers at the outset. Then, end with your answer formatted as 'GRADE: $LETTER' (without quotes) where LETTER is one of CI.\n\n",
+                "role": "user"
+              },
+              {
+                "id": "fWg3jrQEJzwVXS2CRhzbwH",
+                "content": [
+                  {
+                    "type": "text",
+                    "text": "The expert answer states that the submission should be specific, actionable, and reference actual code in the project, while explaining why the change improves the codebase.\n\nThe submission refers to the `task.yaml` file and the workspace path defined within, which is `../../`. The submission also suggests using an absolute path or a configuration option instead of the relative path, and explains that this improves robustness and portability.\n\nThe submission satisfies the expert answer's criteria.\n\nGRADE: C\n"
+                  }
+                ],
+                "source": "generate",
+                "role": "assistant",
+                "model": "gemini-2.0-flash"
+              }
+            ]
+          },
+          "history": [],
+          "sample_id": "get_started"
+        }
+      ]
+    }
+  ]
+}
\ No newline at end of file
diff --git a/packages/eval_cli/example/logs/2026-02-13_17-39-54/eval-set.json b/packages/eval_cli/example/logs/2026-02-13_17-39-54/eval-set.json
new file mode 100644
index 0000000..67904ef
--- /dev/null
+++ b/packages/eval_cli/example/logs/2026-02-13_17-39-54/eval-set.json
@@ -0,0 +1,40 @@
+{
+  "eval_set_id": "DUVSLcpnZ6bJdWRi2dqGg7",
+  "tasks": [
+    {
+      "name": "get_started:baseline",
+      "task_id": "H4rxfkjPK5ug2Bcmo9VodZ",
+      "task_file": "/Users/ewindmill/development/dash_evals/pkgs/eval_runner/src/eval_runner/runner/tasks/analyze_codebase.py",
+      "task_args": {
+        "task_config": {
+          "id": "get_started",
+          "task": "analyze_codebase",
+          "samples": [
+            {
+              "id": "get_started",
+              "input": "Explore this codebase and suggest one improvement\nto the code quality, readability, or architecture.\n",
+              "target": "The suggestion should be specific, actionable, and reference\nactual code in the project. It should explain why the change\nimproves the codebase.\n",
+              "workspace": "/Users/ewindmill/development/dash_evals/pkgs/eval_cli/example/evals",
+              "tests": null,
+              "difficulty": "easy",
+              "tags": [],
+              "metadata": {},
+              "workspace_git": null,
+              "workspace_git_ref": null
+            }
+          ],
+          "variant": {
+            "context_files": [],
+            "mcp_servers": [],
+            "skill_paths": []
+          },
+          "sandbox_type": "local",
+          "system_message": null
+        }
+      },
+      "model": "google/gemini-2.0-flash",
+      "model_args": {},
+      "sequence": 0
+    }
+  ]
+}
\ No newline at end of file
diff --git a/packages/eval_cli/example/logs/2026-02-13_17-39-54/logs.json b/packages/eval_cli/example/logs/2026-02-13_17-39-54/logs.json
new file mode 100644
index 0000000..fd2cf4d
--- /dev/null
+++ b/packages/eval_cli/example/logs/2026-02-13_17-39-54/logs.json
@@ -0,0 +1,257 @@
+{
+  "2026-02-13T17-39-55+00-00_get-started-baseline_H4rxfkjPK5ug2Bcmo9VodZ.json": {
+    "version": 2,
+    "status": "success",
+    "eval": {
+      "eval_set_id": "DUVSLcpnZ6bJdWRi2dqGg7",
+      "eval_id": "YJZKbC9omerSZ2dv9CZEH2",
+      "run_id": "XX3Pv6Bug8YpTZTJLDZqie",
+      "created": "2026-02-13T17:39:55+00:00",
+      "task": "get_started:baseline",
+      "task_id": "H4rxfkjPK5ug2Bcmo9VodZ",
+      "task_version": 0,
+      "task_file": "/Users/ewindmill/development/dash_evals/pkgs/eval_runner/src/eval_runner/runner/tasks/analyze_codebase.py",
+      "task_display_name": "get_started:baseline",
+      "task_registry_name": "analyze_codebase",
+      "task_attribs": {},
+      "task_args": {
+        "task_config": {
+          "id": "get_started",
+          "task": "analyze_codebase",
+          "samples": [
+            {
+              "id": "get_started",
+              "input": "Explore this codebase and suggest one improvement\nto the code quality, readability, or architecture.\n",
+              "target": "The suggestion should be specific, actionable, and reference\nactual code in the project. It should explain why the change\nimproves the codebase.\n",
+              "workspace": "/Users/ewindmill/development/dash_evals/pkgs/eval_cli/example/evals",
+              "tests": null,
+              "difficulty": "easy",
+              "tags": [],
+              "metadata": {},
+              "workspace_git": null,
+              "workspace_git_ref": null
+            }
+          ],
+          "variant": {
+            "context_files": [],
+            "mcp_servers": [],
+            "skill_paths": []
+          },
+          "sandbox_type": "local",
+          "system_message": null
+        }
+      },
+      "task_args_passed": {
+        "task_config": {
+          "id": "get_started",
+          "task": "analyze_codebase",
+          "samples": [
+            {
+              "id": "get_started",
+              "input": "Explore this codebase and suggest one improvement\nto the code quality, readability, or architecture.\n",
+              "target": "The suggestion should be specific, actionable, and reference\nactual code in the project. It should explain why the change\nimproves the codebase.\n",
+              "workspace": "/Users/ewindmill/development/dash_evals/pkgs/eval_cli/example/evals",
+              "tests": null,
+              "difficulty": "easy",
+              "tags": [],
+              "metadata": {},
+              "workspace_git": null,
+              "workspace_git_ref": null
+            }
+          ],
+          "variant": {
+            "context_files": [],
+            "mcp_servers": [],
+            "skill_paths": []
+          },
+          "sandbox_type": "local",
+          "system_message": null
+        }
+      },
+      "solver_args_passed": {},
+      "dataset": {
+        "name": "get_started_baseline",
+        "samples": 1,
+        "sample_ids": [
+          "get_started"
+        ],
+        "shuffled": false
+      },
+      "sandbox": {
+        "type": "local"
+      },
+      "model": "google/gemini-2.0-flash",
+      "model_generate_config": {
+        "max_connections": 10,
+        "cache_prompt": true,
+        "cache": {
+          "expiry": "2W",
+          "per_epoch": true,
+          "scopes": {}
+        }
+      },
+      "model_args": {},
+      "config": {
+        "epochs": 1,
+        "epochs_reducer": [
+          "mean"
+        ],
+        "fail_on_error": 0.05,
+        "continue_on_fail": false,
+        "retry_on_error": 3,
+        "message_limit": 50,
+        "token_limit": 200000,
+        "time_limit": 180,
+        "max_tasks": 10,
+        "max_sandboxes": 32,
+        "sandbox_cleanup": true,
+        "log_samples": true,
+        "log_realtime": true,
+        "log_images": true,
+        "score_display": true
+      },
+      "revision": {
+        "type": "git",
+        "origin": "https://github.com/flutter/dash_evals.git",
+        "commit": "5092907",
+        "dirty": true
+      },
+      "packages": {
+        "inspect_ai": "0.3.160"
+      },
+      "metadata": {
+        "variant_config": {
+          "label": "baseline",
+          "context_files": [],
+          "mcp_servers": [],
+          "skill_paths": []
+        }
+      },
+      "scorers": [
+        {
+          "name": "model_graded_fact",
+          "options": {},
+          "metrics": [
+            {
+              "name": "inspect_ai/accuracy",
+              "options": {}
+            },
+            {
+              "name": "inspect_ai/stderr",
+              "options": {}
+            }
+          ],
+          "metadata": {}
+        }
+      ]
+    },
+    "plan": {
+      "name": "plan",
+      "steps": [
+        {
+          "solver": "setup_workspace",
+          "params": {},
+          "params_passed": {}
+        },
+        {
+          "solver": "_add_workspace_system_message",
+          "params": {
+            "template": "You are an expert code reviewer analyzing a codebase.\n\nYour task is to:\n\n1. Explore the codebase at {workspace} using the available tools\n2. Understand the project structure, dependencies, and architecture\n3. Answer the user's question based on what you find in the code\n\nImportant guidelines:\n- Use bash commands (cat, find, grep, ls, head, tail, etc.) to browse files\n- Do NOT edit or modify any files\n- Base your answer on actual code you find, not assumptions\n- Reference specific files and line numbers when relevant\n- When done, call submit() with your complete answer\n"
+          },
+          "params_passed": {
+            "template": "You are an expert code reviewer analyzing a codebase.\n\nYour task is to:\n\n1. Explore the codebase at {workspace} using the available tools\n2. Understand the project structure, dependencies, and architecture\n3. Answer the user's question based on what you find in the code\n\nImportant guidelines:\n- Use bash commands (cat, find, grep, ls, head, tail, etc.) to browse files\n- Do NOT edit or modify any files\n- Base your answer on actual code you find, not assumptions\n- Reference specific files and line numbers when relevant\n- When done, call submit() with your complete answer\n"
+          }
+        },
+        {
+          "solver": "react",
+          "params": {
+            "name": "code_analyzer",
+            "description": "Expert code reviewer who explores and analyzes codebases.",
+            "prompt": [
+              null,
+              "\nYou are part of a multi-agent system designed to make agent coordination and execution easy. Agents uses two primary abstraction: **Agents** and **Handoffs**. An agent encompasses instructions and tools and can hand off a conversation to another agent when appropriate. Handoffs are achieved by calling a handoff function,generally named `transfer_to_<agent_name>`. Transfers between agents are handled seamlessly in the background; do not mention or draw attention to these transfers in your conversation with the user.\n",
+              "\nYou are a helpful assistant attempting to submit the best possible answer. You have several tools available to help with finding the answer. You will see the result of tool calls right after sending the message. If you need to perform multiple actions, you can always send more messages with additional tool calls. Do some reasoning before your actions, describing what tool calls you are going to use and how they fit into your plan.\n",
+              "\nWhen you have completed the task and have an answer, call the {submit}() tool to report it.\n"
+            ],
+            "tools": [
+              {
+                "type": "tool",
+                "name": "bash",
+                "params": {
+                  "timeout": 120
+                }
+              }
+            ],
+            "model": null,
+            "attempts": 1,
+            "submit": null,
+            "on_continue": null,
+            "retry_refusals": null,
+            "compaction": null,
+            "truncation": "disabled"
+          },
+          "params_passed": {
+            "name": "code_analyzer",
+            "description": "Expert code reviewer who explores and analyzes codebases.",
+            "tools": [
+              {
+                "type": "tool",
+                "name": "bash",
+                "params": {
+                  "timeout": 120
+                }
+              }
+            ]
+          }
+        }
+      ],
+      "config": {
+        "max_connections": 10,
+        "cache_prompt": true,
+        "cache": {
+          "expiry": "2W",
+          "per_epoch": true,
+          "scopes": {}
+        }
+      }
+    },
+    "results": {
+      "total_samples": 1,
+      "completed_samples": 1,
+      "scores": [
+        {
+          "name": "model_graded_fact",
+          "scorer": "model_graded_fact",
+          "scored_samples": 1,
+          "unscored_samples": 0,
+          "params": {},
+          "metrics": {
+            "accuracy": {
+              "name": "accuracy",
+              "value": 1.0,
+              "params": {}
+            },
+            "stderr": {
+              "name": "stderr",
+              "value": 0.0,
+              "params": {}
+            }
+          }
+        }
+      ]
+    },
+    "stats": {
+      "started_at": "2026-02-13T17:39:55+00:00",
+      "completed_at": "2026-02-13T17:40:58+00:00",
+      "model_usage": {
+        "google/gemini-2.0-flash": {
+          "input_tokens": 11033,
+          "output_tokens": 835,
+          "total_tokens": 11868,
+          "reasoning_tokens": 0
+        }
+      }
+    },
+    "invalidated": false
+  }
+}
\ No newline at end of file
diff --git a/packages/eval_cli/example/pubspec.lock b/packages/eval_cli/example/pubspec.lock
new file mode 100644
index 0000000..5a4eaa7
--- /dev/null
+++ b/packages/eval_cli/example/pubspec.lock
@@ -0,0 +1,213 @@
+# Generated by pub
+# See https://dart.dev/tools/pub/glossary#lockfile
+packages:
+  async:
+    dependency: transitive
+    description:
+      name: async
+      sha256: "758e6d74e971c3e5aceb4110bfd6698efc7f501675bcfe0c775459a8140750eb"
+      url: "https://pub.dev"
+    source: hosted
+    version: "2.13.0"
+  boolean_selector:
+    dependency: transitive
+    description:
+      name: boolean_selector
+      sha256: "8aab1771e1243a5063b8b0ff68042d67334e3feab9e95b9490f9a6ebf73b42ea"
+      url: "https://pub.dev"
+    source: hosted
+    version: "2.1.2"
+  characters:
+    dependency: transitive
+    description:
+      name: characters
+      sha256: faf38497bda5ead2a8c7615f4f7939df04333478bf32e4173fcb06d428b5716b
+      url: "https://pub.dev"
+    source: hosted
+    version: "1.4.1"
+  clock:
+    dependency: transitive
+    description:
+      name: clock
+      sha256: fddb70d9b5277016c77a80201021d40a2247104d9f4aa7bab7157b7e3f05b84b
+      url: "https://pub.dev"
+    source: hosted
+    version: "1.1.2"
+  collection:
+    dependency: transitive
+    description:
+      name: collection
+      sha256: "2f5709ae4d3d59dd8f7cd309b4e023046b57d8a6c82130785d2b0e5868084e76"
+      url: "https://pub.dev"
+    source: hosted
+    version: "1.19.1"
+  cupertino_icons:
+    dependency: "direct main"
+    description:
+      name: cupertino_icons
+      sha256: ba631d1c7f7bef6b729a622b7b752645a2d076dba9976925b8f25725a30e1ee6
+      url: "https://pub.dev"
+    source: hosted
+    version: "1.0.8"
+  fake_async:
+    dependency: transitive
+    description:
+      name: fake_async
+      sha256: "5368f224a74523e8d2e7399ea1638b37aecfca824a3cc4dfdf77bf1fa905ac44"
+      url: "https://pub.dev"
+    source: hosted
+    version: "1.3.3"
+  flutter:
+    dependency: "direct main"
+    description: flutter
+    source: sdk
+    version: "0.0.0"
+  flutter_lints:
+    dependency: "direct dev"
+    description:
+      name: flutter_lints
+      sha256: "3105dc8492f6183fb076ccf1f351ac3d60564bff92e20bfc4af9cc1651f4e7e1"
+      url: "https://pub.dev"
+    source: hosted
+    version: "6.0.0"
+  flutter_test:
+    dependency: "direct dev"
+    description: flutter
+    source: sdk
+    version: "0.0.0"
+  leak_tracker:
+    dependency: transitive
+    description:
+      name: leak_tracker
+      sha256: "33e2e26bdd85a0112ec15400c8cbffea70d0f9c3407491f672a2fad47915e2de"
+      url: "https://pub.dev"
+    source: hosted
+    version: "11.0.2"
+  leak_tracker_flutter_testing:
+    dependency: transitive
+    description:
+      name: leak_tracker_flutter_testing
+      sha256: "1dbc140bb5a23c75ea9c4811222756104fbcd1a27173f0c34ca01e16bea473c1"
+      url: "https://pub.dev"
+    source: hosted
+    version: "3.0.10"
+  leak_tracker_testing:
+    dependency: transitive
+    description:
+      name: leak_tracker_testing
+      sha256: "8d5a2d49f4a66b49744b23b018848400d23e54caf9463f4eb20df3eb8acb2eb1"
+      url: "https://pub.dev"
+    source: hosted
+    version: "3.0.2"
+  lints:
+    dependency: transitive
+    description:
+      name: lints
+      sha256: "12f842a479589fea194fe5c5a3095abc7be0c1f2ddfa9a0e76aed1dbd26a87df"
+      url: "https://pub.dev"
+    source: hosted
+    version: "6.1.0"
+  matcher:
+    dependency: transitive
+    description:
+      name: matcher
+      sha256: "12956d0ad8390bbcc63ca2e1469c0619946ccb52809807067a7020d57e647aa6"
+      url: "https://pub.dev"
+    source: hosted
+    version: "0.12.18"
+  material_color_utilities:
+    dependency: transitive
+    description:
+      name: material_color_utilities
+      sha256: "9c337007e82b1889149c82ed242ed1cb24a66044e30979c44912381e9be4c48b"
+      url: "https://pub.dev"
+    source: hosted
+    version: "0.13.0"
+  meta:
+    dependency: transitive
+    description:
+      name: meta
+      sha256: "1741988757a65eb6b36abe716829688cf01910bbf91c34354ff7ec1c3de2b349"
+      url: "https://pub.dev"
+    source: hosted
+    version: "1.18.0"
+  path:
+    dependency: transitive
+    description:
+      name: path
+      sha256: "75cca69d1490965be98c73ceaea117e8a04dd21217b37b292c9ddbec0d955bc5"
+      url: "https://pub.dev"
+    source: hosted
+    version: "1.9.1"
+  sky_engine:
+    dependency: transitive
+    description: flutter
+    source: sdk
+    version: "0.0.0"
+  source_span:
+    dependency: transitive
+    description:
+      name: source_span
+      sha256: "56a02f1f4cd1a2d96303c0144c93bd6d909eea6bee6bf5a0e0b685edbd4c47ab"
+      url: "https://pub.dev"
+    source: hosted
+    version: "1.10.2"
+  stack_trace:
+    dependency: transitive
+    description:
+      name: stack_trace
+      sha256: "8b27215b45d22309b5cddda1aa2b19bdfec9df0e765f2de506401c071d38d1b1"
+      url: "https://pub.dev"
+    source: hosted
+    version: "1.12.1"
+  stream_channel:
+    dependency: transitive
+    description:
+      name: stream_channel
+      sha256: "969e04c80b8bcdf826f8f16579c7b14d780458bd97f56d107d3950fdbeef059d"
+      url: "https://pub.dev"
+    source: hosted
+    version: "2.1.4"
+  string_scanner:
+    dependency: transitive
+    description:
+      name: string_scanner
+      sha256: "921cd31725b72fe181906c6a94d987c78e3b98c2e205b397ea399d4054872b43"
+      url: "https://pub.dev"
+    source: hosted
+    version: "1.4.1"
+  term_glyph:
+    dependency: transitive
+    description:
+      name: term_glyph
+      sha256: "7f554798625ea768a7518313e58f83891c7f5024f88e46e7182a4558850a4b8e"
+      url: "https://pub.dev"
+    source: hosted
+    version: "1.2.2"
+  test_api:
+    dependency: transitive
+    description:
+      name: test_api
+      sha256: "93167629bfc610f71560ab9312acdda4959de4df6fac7492c89ff0d3886f6636"
+      url: "https://pub.dev"
+    source: hosted
+    version: "0.7.9"
+  vector_math:
+    dependency: transitive
+    description:
+      name: vector_math
+      sha256: d530bd74fea330e6e364cda7a85019c434070188383e1cd8d9777ee586914c5b
+      url: "https://pub.dev"
+    source: hosted
+    version: "2.2.0"
+  vm_service:
+    dependency: transitive
+    description:
+      name: vm_service
+      sha256: "45caa6c5917fa127b5dbcfbd1fa60b14e583afdc08bfc96dda38886ca252eb60"
+      url: "https://pub.dev"
+    source: hosted
+    version: "15.0.2"
+sdks:
+  dart: ">=3.11.0-296.5.beta <4.0.0"
+  flutter: ">=3.18.0-18.0.pre.54"
diff --git a/packages/eval_cli/example/pubspec.yaml b/packages/eval_cli/example/pubspec.yaml
new file mode 100644
index 0000000..afcbbf1
--- /dev/null
+++ b/packages/eval_cli/example/pubspec.yaml
@@ -0,0 +1,89 @@
+name: example
+description: "A new Flutter project."
+# The following line prevents the package from being accidentally published to
+# pub.dev using `flutter pub publish`. This is preferred for private packages.
+publish_to: 'none' # Remove this line if you wish to publish to pub.dev
+
+# The following defines the version and build number for your application.
+# A version number is three numbers separated by dots, like 1.2.43
+# followed by an optional build number separated by a +.
+# Both the version and the builder number may be overridden in flutter
+# build by specifying --build-name and --build-number, respectively.
+# In Android, build-name is used as versionName while build-number used as versionCode.
+# Read more about Android versioning at https://developer.android.com/studio/publish/versioning
+# In iOS, build-name is used as CFBundleShortVersionString while build-number is used as CFBundleVersion.
+# Read more about iOS versioning at
+# https://developer.apple.com/library/archive/documentation/General/Reference/InfoPlistKeyReference/Articles/CoreFoundationKeys.html
+# In Windows, build-name is used as the major, minor, and patch parts
+# of the product and file versions while build-number is used as the build suffix.
+version: 1.0.0+1
+
+environment:
+  sdk: ^3.11.0-296.5.beta
+
+# Dependencies specify other packages that your package needs in order to work.
+# To automatically upgrade your package dependencies to the latest versions
+# consider running `flutter pub upgrade --major-versions`. Alternatively,
+# dependencies can be manually updated by changing the version numbers below to
+# the latest version available on pub.dev. To see which dependencies have newer
+# versions available, run `flutter pub outdated`.
+dependencies:
+  flutter:
+    sdk: flutter
+
+  # The following adds the Cupertino Icons font to your application.
+  # Use with the CupertinoIcons class for iOS style icons.
+  cupertino_icons: ^1.0.8
+
+dev_dependencies:
+  flutter_test:
+    sdk: flutter
+
+  # The "flutter_lints" package below contains a set of recommended lints to
+  # encourage good coding practices. The lint set provided by the package is
+  # activated in the `analysis_options.yaml` file located at the root of your
+  # package. See that file for information about deactivating specific lint
+  # rules and activating additional ones.
+  flutter_lints: ^6.0.0
+
+# For information on the generic Dart part of this file, see the
+# following page: https://dart.dev/tools/pub/pubspec
+
+# The following section is specific to Flutter packages.
+flutter:
+
+  # The following line ensures that the Material Icons font is
+  # included with your application, so that you can use the icons in
+  # the material Icons class.
+  uses-material-design: true
+
+  # To add assets to your application, add an assets section, like this:
+  # assets:
+  #   - images/a_dot_burr.jpeg
+  #   - images/a_dot_ham.jpeg
+
+  # An image asset can refer to one or more resolution-specific "variants", see
+  # https://flutter.dev/to/resolution-aware-images
+
+  # For details regarding adding assets from package dependencies, see
+  # https://flutter.dev/to/asset-from-package
+
+  # To add custom fonts to your application, add a fonts section here,
+  # in this "flutter" section. Each entry in this list should have a
+  # "family" key with the font family name, and a "fonts" key with a
+  # list giving the asset and other descriptors for the font. For
+  # example:
+  # fonts:
+  #   - family: Schyler
+  #     fonts:
+  #       - asset: fonts/Schyler-Regular.ttf
+  #       - asset: fonts/Schyler-Italic.ttf
+  #         style: italic
+  #   - family: Trajan Pro
+  #     fonts:
+  #       - asset: fonts/TrajanPro.ttf
+  #       - asset: fonts/TrajanPro_Bold.ttf
+  #         weight: 700
+  #
+  # For details regarding fonts from package dependencies,
+  # see https://flutter.dev/to/font-from-package
diff --git a/packages/eval_cli/example/test/widget_test.dart b/packages/eval_cli/example/test/widget_test.dart
new file mode 100644
index 0000000..092d222
--- /dev/null
+++ b/packages/eval_cli/example/test/widget_test.dart
@@ -0,0 +1,30 @@
+// This is a basic Flutter widget test.
+//
+// To perform an interaction with a widget in your test, use the WidgetTester
+// utility in the flutter_test package. For example, you can send tap and scroll
+// gestures. You can also use WidgetTester to find child widgets in the widget
+// tree, read text, and verify that the values of widget properties are correct.
+
+import 'package:flutter/material.dart';
+import 'package:flutter_test/flutter_test.dart';
+
+import 'package:example/main.dart';
+
+void main() {
+  testWidgets('Counter increments smoke test', (WidgetTester tester) async {
+    // Build our app and trigger a frame.
+    await tester.pumpWidget(const MyApp());
+
+    // Verify that our counter starts at 0.
+    expect(find.text('0'), findsOneWidget);
+    expect(find.text('1'), findsNothing);
+
+    // Tap the '+' icon and trigger a frame.
+    await tester.tap(find.byIcon(Icons.add));
+    await tester.pump();
+
+    // Verify that our counter has incremented.
+    expect(find.text('0'), findsNothing);
+    expect(find.text('1'), findsOneWidget);
+  });
+}
diff --git a/packages/eval_cli/example/web/favicon.png b/packages/eval_cli/example/web/favicon.png
new file mode 100644
index 0000000..8aaa46a
Binary files /dev/null and b/packages/eval_cli/example/web/favicon.png differ
diff --git a/packages/eval_cli/example/web/icons/Icon-192.png b/packages/eval_cli/example/web/icons/Icon-192.png
new file mode 100644
index 0000000..b749bfe
Binary files /dev/null and b/packages/eval_cli/example/web/icons/Icon-192.png differ
diff --git a/packages/eval_cli/example/web/icons/Icon-512.png b/packages/eval_cli/example/web/icons/Icon-512.png
new file mode 100644
index 0000000..88cfd48
Binary files /dev/null and b/packages/eval_cli/example/web/icons/Icon-512.png differ
diff --git a/packages/eval_cli/example/web/icons/Icon-maskable-192.png b/packages/eval_cli/example/web/icons/Icon-maskable-192.png
new file mode 100644
index 0000000..eb9b4d7
Binary files /dev/null and b/packages/eval_cli/example/web/icons/Icon-maskable-192.png differ
diff --git a/packages/eval_cli/example/web/icons/Icon-maskable-512.png b/packages/eval_cli/example/web/icons/Icon-maskable-512.png
new file mode 100644
index 0000000..d69c566
Binary files /dev/null and b/packages/eval_cli/example/web/icons/Icon-maskable-512.png differ
diff --git a/packages/eval_cli/example/web/index.html b/packages/eval_cli/example/web/index.html
new file mode 100644
index 0000000..badaed3
--- /dev/null
+++ b/packages/eval_cli/example/web/index.html
@@ -0,0 +1,46 @@
+<!DOCTYPE html>
+<html>
+<head>
+  <!--
+    If you are serving your web app in a path other than the root, change the
+    href value below to reflect the base path you are serving from.
+
+    The path provided below has to start and end with a slash "/" in order for
+    it to work correctly.
+
+    For more details:
+    * https://developer.mozilla.org/en-US/docs/Web/HTML/Element/base
+
+    This is a placeholder for base href that will be replaced by the value of
+    the `--base-href` argument provided to `flutter build`.
+  -->
+  <base href="$FLUTTER_BASE_HREF">
+
+  <meta charset="UTF-8">
+  <meta content="IE=Edge" http-equiv="X-UA-Compatible">
+  <meta name="description" content="A new Flutter project.">
+
+  <!-- iOS meta tags & icons -->
+  <meta name="mobile-web-app-capable" content="yes">
+  <meta name="apple-mobile-web-app-status-bar-style" content="black">
+  <meta name="apple-mobile-web-app-title" content="example">
+  <link rel="apple-touch-icon" href="icons/Icon-192.png">
+
+  <!-- Favicon -->
+  <link rel="icon" type="image/png" href="favicon.png"/>
+
+  <title>example</title>
+  <link rel="manifest" href="manifest.json">
+</head>
+<body>
+  <!--
+    You can customize the "flutter_bootstrap.js" script.
+    This is useful to provide a custom configuration to the Flutter loader
+    or to give the user feedback during the initialization process.
+
+    For more details:
+    * https://docs.flutter.dev/platform-integration/web/initialization
+  -->
+  <script src="flutter_bootstrap.js" async></script>
+</body>
+</html>
diff --git a/packages/eval_cli/example/web/manifest.json b/packages/eval_cli/example/web/manifest.json
new file mode 100644
index 0000000..096edf8
--- /dev/null
+++ b/packages/eval_cli/example/web/manifest.json
@@ -0,0 +1,35 @@
+{
+    "name": "example",
+    "short_name": "example",
+    "start_url": ".",
+    "display": "standalone",
+    "background_color": "#0175C2",
+    "theme_color": "#0175C2",
+    "description": "A new Flutter project.",
+    "orientation": "portrait-primary",
+    "prefer_related_applications": false,
+    "icons": [
+        {
+            "src": "icons/Icon-192.png",
+            "sizes": "192x192",
+            "type": "image/png"
+        },
+        {
+            "src": "icons/Icon-512.png",
+            "sizes": "512x512",
+            "type": "image/png"
+        },
+        {
+            "src": "icons/Icon-maskable-192.png",
+            "sizes": "192x192",
+            "type": "image/png",
+            "purpose": "maskable"
+        },
+        {
+            "src": "icons/Icon-maskable-512.png",
+            "sizes": "512x512",
+            "type": "image/png",
+            "purpose": "maskable"
+        }
+    ]
+}
diff --git a/packages/eval_cli/lib/.DS_Store b/packages/eval_cli/lib/.DS_Store
new file mode 100644
index 0000000..9a874b5
Binary files /dev/null and b/packages/eval_cli/lib/.DS_Store differ
diff --git a/packages/eval_cli/lib/devals.dart b/packages/eval_cli/lib/devals.dart
new file mode 100644
index 0000000..bfd6857
--- /dev/null
+++ b/packages/eval_cli/lib/devals.dart
@@ -0,0 +1,10 @@
+/// CLI for managing dash-evals.
+///
+/// Provides commands for:
+/// - Creating samples and jobs
+/// - Running evaluations
+/// - Viewing results
+library;
+
+export 'src/cli_exception.dart';
+export 'src/commands/commands.dart';
diff --git a/packages/eval_cli/lib/src/cli_exception.dart b/packages/eval_cli/lib/src/cli_exception.dart
new file mode 100644
index 0000000..5125ed8
--- /dev/null
+++ b/packages/eval_cli/lib/src/cli_exception.dart
@@ -0,0 +1,13 @@
+/// Exception thrown when a CLI command fails with a specific exit code.
+///
+/// Throw this from anywhere in the CLI codebase when an error occurs.
+/// The top-level main function catches these and exits with the specified code.
+class CliException implements Exception {
+  final String message;
+  final int exitCode;
+
+  CliException(this.message, {this.exitCode = 1});
+
+  @override
+  String toString() => message;
+}
diff --git a/packages/eval_cli/lib/src/commands/commands.dart b/packages/eval_cli/lib/src/commands/commands.dart
new file mode 100644
index 0000000..6db2e89
--- /dev/null
+++ b/packages/eval_cli/lib/src/commands/commands.dart
@@ -0,0 +1,10 @@
+export 'create_command.dart';
+export 'create_job_command.dart';
+export 'create_pipeline_command.dart';
+export 'create_sample_command.dart';
+export 'create_task_command.dart';
+export 'doctor_command.dart';
+export 'init_command.dart';
+export 'publish_command.dart';
+export 'run_command.dart';
+export 'view_command.dart';
diff --git a/packages/eval_cli/lib/src/commands/create_command.dart b/packages/eval_cli/lib/src/commands/create_command.dart
new file mode 100644
index 0000000..85eee7a
--- /dev/null
+++ b/packages/eval_cli/lib/src/commands/create_command.dart
@@ -0,0 +1,22 @@
+import 'package:args/command_runner.dart';
+
+import 'create_job_command.dart';
+import 'create_pipeline_command.dart';
+import 'create_sample_command.dart';
+import 'create_task_command.dart';
+
+/// Parent command for create subcommands.
+class CreateCommand extends Command<int> {
+  CreateCommand() {
+    addSubcommand(CreateSampleCommand());
+    addSubcommand(CreateJobCommand());
+    addSubcommand(CreateTaskCommand());
+    addSubcommand(CreatePipelineCommand());
+  }
+
+  @override
+  String get name => 'create';
+
+  @override
+  String get description => 'Create samples, jobs, and tasks for the dataset.';
+}
diff --git a/packages/eval_cli/lib/src/commands/create_job_command.dart b/packages/eval_cli/lib/src/commands/create_job_command.dart
new file mode 100644
index 0000000..19a1f0a
--- /dev/null
+++ b/packages/eval_cli/lib/src/commands/create_job_command.dart
@@ -0,0 +1,119 @@
+import 'package:args/command_runner.dart';
+import 'package:devals/src/dataset/dataset_reader.dart';
+import 'package:devals/src/dataset/eval_writer.dart';
+import 'package:devals/src/dataset/file_templates/job_template.dart';
+import 'package:eval_config/eval_config.dart';
+import 'package:howdy/howdy.dart';
+
+/// Interactive command to create a new job file.
+class CreateJobCommand extends Command<int> {
+  @override
+  String get name => 'job';
+
+  @override
+  String get description => 'Create a new job file interactively.';
+
+  @override
+  Future<int> run() async {
+    terminal.scrollClear();
+    terminal.writeln();
+
+    // Get available options from the generated registries and filesystem
+    final models = List.of(kDefaultModels);
+    final variants = datasetReader.getVariants();
+    final tasks = datasetReader.getTasks();
+
+    final results = Form.send(
+      title: 'Create a new job',
+      children: [
+        Note(
+          next: true,
+          nextLabel: 'Get started',
+          children: [
+            Text(
+              'A ${"job".bold} is a runtime definition for dash-evals. It defines which tasks to run, which models to run against, and more. This flow generates a new job.yaml file, which is run with `devals run <job name>.'
+                  .wordWrap(60),
+            ),
+          ],
+        ),
+        Page(
+          children: [
+            Prompt(
+              'Job name',
+              help: 'devals run <job name>',
+              key: 'job',
+              validator: (value) =>
+                  value.isEmpty ? 'Job name cannot be empty' : null,
+            ),
+            Multiselect<String>(
+              'Select tasks',
+              help: 'Choose which tasks to run',
+              options: tasks.map((t) => Option(label: t, value: t)).toList(),
+              validator: (List<String>? selection) {
+                if (selection == null || selection.isEmpty) {
+                  return 'You must select at least one Task.';
+                }
+                return null;
+              },
+              key: 'tasks',
+            ),
+          ],
+        ),
+        Page(
+          children: [
+            Multiselect(
+              'Select models',
+              help: 'Tasks will run against each of these',
+              options: models.map((m) => Option(label: m, value: m)).toList(),
+              key: 'models',
+              defaultValue: models
+                  .where((String name) => name.contains('gemini'))
+                  .toList(),
+            ),
+            Multiselect(
+              'Select variants',
+              help: 'Tasks will run once for each variant',
+              options: variants.map((m) => Option(label: m, value: m)).toList(),
+              defaultValue: ['baseline'],
+              key: 'variants',
+            ),
+          ],
+        ),
+      ],
+    );
+
+    final jobName = results['job'] as String;
+    final selectedModels = results['models'] as List<String>;
+    final selectedVariants = results['variants'] as List<String>;
+    final selectedTasks = results['tasks'] as List<String>;
+
+    final success = await SpinnerTask.send<bool>(
+      'Creating $jobName.yaml file',
+      task: () async {
+        // Build job YAML
+        final jobContent = jobTemplate(
+          name: jobName,
+          models: selectedModels,
+          variants: selectedVariants,
+          tasks: selectedTasks,
+        );
+
+        // Write job file using the writer utility
+        generator.writeJobFile(jobName, jobContent);
+        return true;
+      },
+    );
+
+    if (success) {
+      Text.success('Created: jobs/$jobName.yaml');
+      Text.body('Run with: devals run $jobName');
+      return 0;
+    } else {
+      // Not sure if this is useful.
+      // If not success, the Spinner task threw an error,
+      // and this already exited.
+      Text.error('Create job failed');
+      return 1;
+    }
+  }
+}
diff --git a/packages/eval_cli/lib/src/commands/create_pipeline_command.dart b/packages/eval_cli/lib/src/commands/create_pipeline_command.dart
new file mode 100644
index 0000000..ba5632f
--- /dev/null
+++ b/packages/eval_cli/lib/src/commands/create_pipeline_command.dart
@@ -0,0 +1,256 @@
+import 'dart:io';
+
+import 'package:args/command_runner.dart';
+import 'package:devals/src/cli_exception.dart';
+import 'package:devals/src/dataset/eval_writer.dart';
+import 'package:devals/src/dataset/file_templates/job_template.dart';
+import 'package:devals/src/dataset/file_templates/task_template.dart';
+import 'package:devals/src/dataset/filesystem_utils.dart';
+import 'package:eval_config/eval_config.dart';
+import 'package:howdy/howdy.dart';
+
+/// Interactive guide to create a task and job in one go.
+class CreatePipelineCommand extends Command<int> {
+  @override
+  String get name => 'pipeline';
+
+  @override
+  String get description => 'Interactive guide to set up an end-to-end eval.';
+
+  @override
+  Future<int> run() async {
+    terminal.scrollClear();
+    terminal.writeln();
+
+    final datasetDirPath = findDatasetDirectory();
+    final tasksDirPath = findTasksDir(datasetDirPath);
+
+    final availableFuncs = datasetReader.getTaskFuncs();
+    if (availableFuncs.isEmpty) {
+      throw CliException(
+        'No task functions registered.\n'
+        'Run sync_registries.py to regenerate the task registry.',
+      );
+    }
+
+    final availableVariants = datasetReader.getVariants();
+    final models = List.of(kDefaultModels);
+    if (models.isEmpty) {
+      throw CliException(
+        'No models configured.',
+      );
+    }
+
+    // =========================================================================
+    // Form 1: Task setup
+    // =========================================================================
+    final taskResults = Form.send(
+      title: 'Create an eval pipeline',
+      children: [
+        Note(
+          next: true,
+          nextLabel: 'Get started',
+          children: [
+            Text(
+              'This command walks you through setting up an end-to-end eval.',
+            ),
+            Text('Running evals requires two components:', newline: false),
+            Text(
+              '  • A Task — input prompts paired with expected outputs.',
+              newline: false,
+            ),
+            Text('  • A Job — which models, tasks, and variants to evaluate.'),
+          ],
+        ),
+        Page(
+          children: [
+            Prompt(
+              'Task ID',
+              help:
+                  'Unique identifier (snake_case, e.g., fix_shopping_cart_bug)',
+              key: 'taskId',
+              validator: (value) {
+                if (value.isEmpty) {
+                  return 'Task ID cannot be empty.';
+                }
+                if (Directory('$tasksDirPath/$value').existsSync()) {
+                  return 'Task "$value" already exists. Try another name.';
+                }
+                return null;
+              },
+            ),
+            Select<String>(
+              'Task function',
+              help: 'Defines how the sample is run and scored.',
+              options: availableFuncs
+                  .map((f) => Option(label: f.name, value: f.name))
+                  .toList(),
+              key: 'taskFunc',
+            ),
+          ],
+        ),
+        Page(
+          children: [
+            if (availableVariants.isNotEmpty)
+              Multiselect<String>(
+                'Variants',
+                help: 'Which variants to run for this task. Optional.',
+                options: availableVariants
+                    .map((v) => Option(label: v, value: v))
+                    .toList(),
+                key: 'variants',
+              ),
+            Select<WorkspaceType>(
+              'Workspace type',
+              help:
+                  'Does your eval run against code? How is the code provided?',
+              options: [
+                Option(
+                  label: 'path',
+                  value: WorkspaceType.path,
+                  help: 'Enter a path ref to the codebase.',
+                ),
+                Option(
+                  label: 'git',
+                  value: WorkspaceType.git,
+                  help: 'Enter a public git url with the codebase.',
+                ),
+                Option(
+                  label: 'create',
+                  value: WorkspaceType.create,
+                  help: 'Run a command to generate a new codebase.',
+                ),
+              ],
+              key: 'workspaceType',
+            ),
+          ],
+        ),
+      ],
+    );
+    final taskId = taskResults['taskId'] as String;
+    final taskFunc = taskResults['taskFunc'] as String;
+    final selectedVariants = availableVariants.isNotEmpty
+        ? taskResults['variants'] as List<String>
+        : <String>[];
+    final workspaceType = taskResults['workspaceType'] as WorkspaceType;
+
+    // Workspace value depends on the selected type — collected standalone
+    final String? workspaceValue = switch (workspaceType) {
+      WorkspaceType.path => Prompt.send(
+        'Relative path',
+        help:
+            'Relative path to the project directory from the sample.yaml.\n'
+            'Example: ../../my_app',
+      ),
+      WorkspaceType.git => Prompt.send(
+        'Git URL',
+        help:
+            'Public repository URL.\n'
+            'Example: https://github.com/user/repo',
+      ),
+      WorkspaceType.create => Prompt.send(
+        'Creation command',
+        help:
+            'Command to run from the sample directory.\n'
+            'Use "project" as the output name.\n'
+            'Example: flutter create project --empty',
+        defaultValue: 'flutter create project --empty',
+      ),
+      _ => null,
+    };
+
+    await SpinnerTask.send<void>(
+      'Creating task "$taskId"',
+      task: () async {
+        try {
+          await createTaskResources(
+            taskId,
+            tasksDirPath: tasksDirPath,
+            workspaceKey: workspaceType,
+            templatePackage: null,
+            workspaceValue: workspaceValue,
+          );
+
+          final yaml = taskTemplate(
+            taskFunc: taskFunc,
+            workspaceType: workspaceType,
+            templatePackage: null,
+            workspaceValue: workspaceValue,
+            variants: selectedVariants,
+          );
+
+          generator.writeTaskFile(taskId, yaml: yaml);
+        } catch (e) {
+          throw CliException('Failed to create task: $e');
+        }
+      },
+    );
+
+    Text.success('Created task: ${generator.taskYamlFilePath(taskId)}');
+
+    // =========================================================================
+    // Form 2: Job setup
+    // =========================================================================
+    final defaultModel = models.contains('google/gemini-2.5-flash')
+        ? 'google/gemini-2.5-flash'
+        : models.first;
+
+    final jobResults = Form.send(
+      title: 'Step 2: Create a Job',
+      children: [
+        Page(
+          children: [
+            Prompt(
+              'Job name',
+              help: 'Used to run evals via: devals run <job name>',
+              defaultValue: '${taskId}_job',
+              key: 'jobName',
+              validator: (value) =>
+                  value.isEmpty ? 'Job name cannot be empty.' : null,
+            ),
+            Multiselect<String>(
+              'Models',
+              help:
+                  'Choose which models to evaluate. You need API keys for each provider.',
+              options: models.map((m) => Option(label: m, value: m)).toList(),
+              defaultValue: [defaultModel],
+              key: 'models',
+            ),
+          ],
+        ),
+      ],
+    );
+
+    final jobName = jobResults['jobName'] as String;
+    final selectedModels = jobResults['models'] as List<String>;
+
+    await SpinnerTask.send<void>(
+      'Creating job "$jobName"',
+      task: () async {
+        final jobContent = jobTemplate(
+          name: jobName,
+          models: selectedModels,
+          variants: selectedVariants,
+          tasks: [taskId],
+        );
+        generator.writeJobFile(jobName, jobContent);
+      },
+    );
+
+    Text.success('Created job: jobs/$jobName.yaml');
+
+    // =========================================================================
+    // Done!
+    // =========================================================================
+    terminal.writeln();
+    Text.body('🎉 You\'re all set!');
+    Text.body('');
+    Text.body('Next steps:');
+    Text.body('  1. Edit the task at: ${generator.taskYamlFilePath(taskId)}');
+    Text.body('     - Add your input prompt and expected target');
+    Text.body('  2. Run your evaluation:');
+    Text.body('     dart run bin/devals.dart run $jobName');
+
+    return 0;
+  }
+}
diff --git a/packages/eval_cli/lib/src/commands/create_sample_command.dart b/packages/eval_cli/lib/src/commands/create_sample_command.dart
new file mode 100644
index 0000000..e98797f
--- /dev/null
+++ b/packages/eval_cli/lib/src/commands/create_sample_command.dart
@@ -0,0 +1,105 @@
+import 'package:args/command_runner.dart';
+import 'package:devals/src/cli_exception.dart';
+import 'package:devals/src/dataset/dataset_reader.dart';
+import 'package:devals/src/dataset/eval_writer.dart';
+import 'package:devals/src/dataset/file_templates/sample_template.dart';
+import 'package:howdy/howdy.dart';
+
+/// Interactive command to add a new sample to an existing task file.
+class CreateSampleCommand extends Command<int> {
+  @override
+  String get name => 'sample';
+
+  @override
+  String get description => 'Add a new sample to an existing task file.';
+
+  @override
+  Future<int> run() async {
+    terminal.scrollClear();
+    terminal.writeln();
+    terminal.maxWidth = 60;
+
+    final existingTasks = datasetReader.getTasks();
+    if (existingTasks.isEmpty) {
+      Text.error(
+        'No tasks found. Run "devals create task" first to create a task.',
+      );
+      return 1;
+    }
+
+    final results = Form.send(
+      children: [
+        Note(
+          children: [
+            Text(
+              'This command will insert a new sample into a task with a placeholder input and output. '
+              "You'll still need to ${'write'.italic} that sample within the task file.",
+            ),
+          ],
+        ),
+        Page(
+          children: [
+            Select<String>(
+              'Select a task',
+              help: 'The sample will be appended to this task file.',
+              options: existingTasks
+                  .map((t) => Option(label: t, value: t))
+                  .toList(),
+              key: 'task',
+            ),
+            Prompt(
+              'Sample ID',
+              help: 'A unique ID for this sample (snake_case).',
+              key: 'id',
+              validator: (value) =>
+                  value.isEmpty ? 'Sample ID cannot be empty.' : null,
+            ),
+            Select<String>(
+              'Difficulty',
+              help: 'Used for filtering and reporting.',
+              options: [
+                Option(label: 'easy', value: 'easy'),
+                Option(label: 'medium', value: 'medium'),
+                Option(label: 'hard', value: 'hard'),
+              ],
+              defaultValue: 'medium',
+              key: 'difficulty',
+            ),
+          ],
+        ),
+      ],
+      title: 'Add a sample to a task',
+    );
+
+    final taskName = results['task'] as String;
+    final sampleId = results['id'] as String;
+    final difficulty = results['difficulty'] as String;
+
+    final success = await SpinnerTask.send<bool>(
+      'Adding sample to $taskName',
+      task: () async {
+        try {
+          final sampleContent = sampleTemplate(
+            id: sampleId,
+            difficulty: difficulty,
+          );
+          generator.appendToTaskFile(taskName, content: sampleContent);
+          return true;
+        } catch (e) {
+          throw CliException('Failed to add sample: $e');
+        }
+      },
+    );
+
+    if (success) {
+      Text.success('Added "$sampleId" to task "$taskName"');
+      Text.body(
+        'Open ${generator.taskYamlFilePath(taskName)} and edit the INPUT and TARGET.',
+      );
+      return 0;
+    } else {
+      Text.error('Failed to add sample.');
+      return 1;
+    }
+  }
+}
diff --git a/packages/eval_cli/lib/src/commands/create_task_command.dart b/packages/eval_cli/lib/src/commands/create_task_command.dart
new file mode 100644
index 0000000..c15dc8c
--- /dev/null
+++ b/packages/eval_cli/lib/src/commands/create_task_command.dart
@@ -0,0 +1,191 @@
+import 'package:args/command_runner.dart';
+import 'package:devals/src/cli_exception.dart';
+import 'package:devals/src/dataset/eval_writer.dart';
+import 'package:devals/src/dataset/file_templates/task_template.dart';
+import 'package:devals/src/dataset/filesystem_utils.dart';
+import 'package:howdy/howdy.dart';
+
+/// Interactive command to create a new task file in tasks/{name}/task.yaml.
+class CreateTaskCommand extends Command<int> {
+  @override
+  String get name => 'task';
+
+  @override
+  String get description => 'Create a new task file in tasks/.';
+
+  @override
+  Future<int> run() async {
+    terminal.scrollClear();
+    terminal.writeln();
+
+    final existingTasks = datasetReader.getExistingTaskNames();
+    final availableFuncs = datasetReader.getTaskFuncs();
+    final availableVariants = datasetReader.getVariants();
+
+    // Form 1: core task info
+    final results = Form.send(
+      children: [
+        Note(
+          next: true,
+          nextLabel: 'Get started',
+          children: [
+            Text(
+              'A task defines a group of samples that share a scoring '
+              'function and run configuration.',
+            ),
+          ],
+        ),
+        Page(
+          children: [
+            Prompt(
+              'Task name',
+              help: 'Unique name (snake_case). Creates tasks/<name>/task.yaml.',
+              key: 'name',
+              validator: (value) {
+                if (value.isEmpty) {
+                  return 'Task name cannot be empty.';
+                }
+                if (existingTasks.contains(value)) {
+                  return 'Task "$value" already exists.';
+                }
+                return null;
+              },
+            ),
+            Select<String>(
+              'Task function',
+              help: 'Defines how the sample is run and scored.',
+              options: availableFuncs
+                  .map(
+                    (f) => Option(
+                      label: f.name,
+                      value: f.name,
+                    ),
+                  )
+                  .toList(),
+              key: 'func',
+            ),
+          ],
+        ),
+        Page(
+          children: [
+            if (availableVariants.isNotEmpty)
+              Multiselect<String>(
+                'Variants',
+                help: 'Which variants to run for this task. Optional.',
+                options: availableVariants
+                    .map((v) => Option(label: v, value: v))
+                    .toList(),
+                key: 'variants',
+              ),
+            Select<WorkspaceType>(
+              'Workspace type',
+              help: 'How the task\'s sandbox workspace is provided.',
+              options: [
+                Option(
+                  label: 'path',
+                  value: WorkspaceType.path,
+                  textStyle: const TextStyle(),
+                ),
+                Option(
+                  label: 'git',
+                  value: WorkspaceType.git,
+                  textStyle: const TextStyle(),
+                ),
+                Option(
+                  label: 'create',
+                  value: WorkspaceType.create,
+                  textStyle: const TextStyle(),
+                ),
+              ],
+              key: 'workspaceType',
+            ),
+          ],
+        ),
+      ],
+      title: 'Create a new task',
+    );
+
+    final taskName = results['name'] as String;
+    final taskFunc = results['func'] as String;
+    final selectedVariants = availableVariants.isNotEmpty
+        ? results['variants'] as List<String>
+        : <String>[];
+    final workspaceType = results['workspaceType'] as WorkspaceType;
+
+    // Workspace value depends on type — prompt standalone after the form
+    final String? workspaceValue = switch (workspaceType) {
+      WorkspaceType.path => Prompt.send(
+        'Relative path',
+        help:
+            'Relative path to the project directory from the sample.yaml.\n'
+            'Example: ../../my_app',
+      ),
+      WorkspaceType.git => Prompt.send(
+        'Git URL',
+        help:
+            'Public repository URL.\n'
+            'Example: https://github.com/user/repo',
+      ),
+      WorkspaceType.create => Prompt.send(
+        'Creation command',
+        help:
+            'Command to run from the sample directory.\n'
+            'Use "project" as the output name.\n'
+            'Example: flutter create project --empty',
+        defaultValue: 'flutter create project --empty',
+      ),
+      _ => null,
+    };
+
+    // Optional system message
+    final systemMessage = Prompt.send(
+      'System message (optional)',
+      help:
+          'Custom system prompt. Leave blank to skip.\n'
+          'Example: "You are an expert Flutter developer."',
+      defaultValue: '',
+    );
+
+    final tasksDir = findTasksDir(datasetReader.datasetDirPath);
+
+    final success = await SpinnerTask.send<bool>(
+      'Creating task "$taskName"',
+      task: () async {
+        try {
+          await createTaskResources(
+            taskName,
+            tasksDirPath: tasksDir,
+            workspaceKey: workspaceType,
+            templatePackage: null,
+            workspaceValue: workspaceValue,
+          );
+
+          final yaml = taskTemplate(
+            taskFunc: taskFunc,
+            workspaceType: workspaceType,
+            templatePackage: null,
+            workspaceValue: workspaceValue,
+            variants: selectedVariants,
+            systemMessage: systemMessage.isNotEmpty ? systemMessage : null,
+          );
+
+          generator.writeTaskFile(taskName, yaml: yaml);
+          return true;
+        } catch (e) {
+          throw CliException('Failed to create task: $e');
+        }
+      },
+    );
+
+    if (success) {
+      Text.success('Created: ${generator.taskYamlFilePath(taskName)}');
+      Text.body(
+        'Edit the task file to set your sample INPUT and TARGET.',
+      );
+      return 0;
+    } else {
+      Text.error('Create task failed.');
+      return 1;
+    }
+  }
+}
diff --git a/packages/eval_cli/lib/src/commands/doctor_command.dart b/packages/eval_cli/lib/src/commands/doctor_command.dart
new file mode 100644
index 0000000..a286637
--- /dev/null
+++ b/packages/eval_cli/lib/src/commands/doctor_command.dart
@@ -0,0 +1,389 @@
+import 'dart:io';
+
+import 'package:args/command_runner.dart';
+import 'package:devals/src/config/env.dart';
+import 'package:devals/src/config/expand_home_dir.dart';
+import 'package:howdy/howdy.dart';
+
+/// The result status of a single doctor check.
+enum CheckStatus { ok, warning, error }
+
+/// The result of a single prerequisite check.
+class CheckResult {
+  const CheckResult({
+    required this.status,
+    this.version,
+    this.message,
+    this.fix,
+  });
+
+  final CheckStatus status;
+  final String? version;
+  final String? message;
+  final String? fix;
+}
+
+/// A single prerequisite check to run.
+class DoctorCheck {
+  const DoctorCheck({
+    required this.name,
+    required this.component,
+    required this.check,
+    this.isRequired = false,
+  });
+
+  final String name;
+  final String component;
+  final Future<CheckResult> Function() check;
+  final bool isRequired;
+}
+
+/// Typedef for a function that runs a process, enabling test injection.
+typedef ProcessRunner =
+    Future<ProcessResult> Function(
+      String executable,
+      List<String> arguments,
+    );
+
+/// Command that checks whether prerequisites are installed.
+///
+/// Similar to `flutter doctor`, this verifies the tools needed
+/// for the CLI, eval_runner, and eval_explorer.
+class DoctorCommand extends Command<int> {
+  DoctorCommand({ProcessRunner? processRunner})
+    : _runProcess = processRunner ?? Process.run;
+
+  final ProcessRunner _runProcess;
+
+  @override
+  String get name => 'doctor';
+
+  @override
+  String get description =>
+      'Check that all prerequisites are installed for '
+      'the CLI, eval_runner, and eval_explorer.';
+
+  @override
+  Future<int> run() async {
+    terminal.scrollClear();
+    terminal.writeln();
+
+    final checks = buildChecks(processRunner: _runProcess);
+
+    Text.body('devals doctor');
+    Text.body('Checking prerequisites...\n');
+
+    final results = <(DoctorCheck, CheckResult)>[];
+    for (final check in checks) {
+      final result = await check.check();
+      results.add((check, result));
+      _printResult(check, result);
+    }
+
+    terminal.writeln();
+
+    // Collect issues.
+    final issues = results.where((r) => r.$2.status != CheckStatus.ok).toList();
+
+    if (issues.isEmpty) {
+      Text.success('No issues found!\n');
+      return 0;
+    }
+
+    Text.warning('Issues found:\n');
+    for (final (check, result) in issues) {
+      final (icon, style) = switch (result.status) {
+        CheckStatus.error => (
+          '${Icon.error} ',
+          Theme.current.focused.errorMessage,
+        ),
+        CheckStatus.warning => (
+          '${Icon.warning} ',
+          Theme.current.focused.warningMessage,
+        ),
+        _ => ('', const TextStyle()),
+      };
+      terminal.writeln('  ${'$icon${check.name}'.style(style)}');
+      if (result.message != null) {
+        terminal.writeln('    ${result.message}');
+      }
+      if (result.fix != null) {
+        terminal.writeln('    Fix: ${result.fix}');
+      }
+    }
+
+    final hasErrors = issues.any((r) => r.$2.status == CheckStatus.error);
+    return hasErrors ? 1 : 0;
+  }
+
+  void _printResult(DoctorCheck check, CheckResult result) {
+    final (icon, style) = switch (result.status) {
+      CheckStatus.ok => (Icon.check, Theme.current.focused.successMessage),
+      CheckStatus.warning => (
+        Icon.warning,
+        Theme.current.focused.warningMessage,
+      ),
+      CheckStatus.error => (Icon.error, Theme.current.focused.errorMessage),
+    };
+    final versionSuffix = result.version != null ? ' (${result.version})' : '';
+    final messageSuffix = result.message != null ? ' — ${result.message}' : '';
+    terminal.writeln(
+      '  ${'$icon ${check.name}$versionSuffix$messageSuffix'.style(style)}',
+    );
+  }
+}
+
+// ---------------------------------------------------------------------------
+// Check definitions
+// ---------------------------------------------------------------------------
+
+/// Builds the list of all doctor checks.
+///
+/// [processRunner] is injectable for testing.
+List<DoctorCheck> buildChecks({ProcessRunner? processRunner}) {
+  final run = processRunner ?? Process.run;
+  return [
+    DoctorCheck(
+      name: 'Dart SDK',
+      component: 'CLI, eval_explorer',
+      isRequired: true,
+      check: () => _checkDart(run),
+    ),
+    DoctorCheck(
+      name: 'Python',
+      component: 'eval_runner',
+      isRequired: true,
+      check: () => _checkPython(run),
+    ),
+    DoctorCheck(
+      name: 'eval_runner installed',
+      component: 'eval_runner',
+      isRequired: true,
+      check: () => _checkEvalRunner(run),
+    ),
+    DoctorCheck(
+      name: 'Podman',
+      component: 'eval_runner',
+      check: () => _checkPodman(run),
+    ),
+    DoctorCheck(
+      name: 'Flutter SDK',
+      component: 'eval_explorer',
+      isRequired: true,
+      check: () => _checkFlutter(run),
+    ),
+    DoctorCheck(
+      name: 'Serverpod CLI',
+      component: 'eval_explorer',
+      check: () => _checkServerpod(run),
+    ),
+    DoctorCheck(
+      name: 'API keys',
+      component: 'eval_runner',
+      isRequired: true,
+      check: () => _checkApiKeys(),
+    ),
+    DoctorCheck(
+      name: 'Publish config',
+      component: 'CLI (devals publish)',
+      check: () => _checkPublishConfig(),
+    ),
+  ];
+}
+
+/// Runs a command and returns the stdout, or `null` if it fails.
+Future<String?> _tryRun(
+  ProcessRunner run,
+  String executable,
+  List<String> args,
+) async {
+  try {
+    final result = await run(executable, args);
+    if (result.exitCode == 0) {
+      return (result.stdout as String).trim();
+    }
+    return null;
+  } on ProcessException {
+    return null;
+  }
+}
+
+/// Extracts a version number pattern (e.g. "3.10.1") from [text].
+String? _extractVersion(String text) {
+  final match = RegExp(r'(\d+\.\d+[\.\d]*)').firstMatch(text);
+  return match?.group(1);
+}
+
+// -- Individual check implementations ----------------------------------------
+
+Future<CheckResult> _checkDart(ProcessRunner run) async {
+  final output = await _tryRun(run, 'dart', ['--version']);
+  if (output == null) {
+    return const CheckResult(
+      status: CheckStatus.error,
+      message: 'not found',
+      fix: 'Install the Dart SDK: https://dart.dev/get-dart',
+    );
+  }
+  return CheckResult(status: CheckStatus.ok, version: _extractVersion(output));
+}
+
+Future<CheckResult> _checkPython(ProcessRunner run) async {
+  final output = await _tryRun(run, 'python3', ['--version']);
+  if (output == null) {
+    return const CheckResult(
+      status: CheckStatus.error,
+      message: 'not found',
+      fix: 'Install Python 3.13+: https://www.python.org/downloads/',
+    );
+  }
+  final version = _extractVersion(output);
+  if (version != null) {
+    final parts = version.split('.');
+    final major = int.tryParse(parts[0]) ?? 0;
+    final minor = parts.length > 1 ? (int.tryParse(parts[1]) ?? 0) : 0;
+    if (major < 3 || (major == 3 && minor < 13)) {
+      return CheckResult(
+        status: CheckStatus.error,
+        version: version,
+        message: 'Python 3.13+ required, found $version',
+        fix: 'Upgrade Python: https://www.python.org/downloads/',
+      );
+    }
+  }
+  return CheckResult(status: CheckStatus.ok, version: version);
+}
+
+Future<CheckResult> _checkEvalRunner(ProcessRunner run) async {
+  final output = await _tryRun(run, 'run-evals', ['--help']);
+  if (output == null) {
+    return const CheckResult(
+      status: CheckStatus.error,
+      message: 'not found',
+      fix: 'cd path/to/eval_runner && pip install -e .',
+    );
+  }
+  return const CheckResult(status: CheckStatus.ok);
+}
+
+Future<CheckResult> _checkPodman(ProcessRunner run) async {
+  final output = await _tryRun(run, 'podman', ['--version']);
+  if (output == null) {
+    return const CheckResult(
+      status: CheckStatus.warning,
+      message: 'not found (optional, needed for sandbox tasks)',
+      fix: 'Install Podman: https://podman.io/getting-started/installation',
+    );
+  }
+  return CheckResult(status: CheckStatus.ok, version: _extractVersion(output));
+}
+
+Future<CheckResult> _checkFlutter(ProcessRunner run) async {
+  final output = await _tryRun(run, 'flutter', ['--version']);
+  if (output == null) {
+    return const CheckResult(
+      status: CheckStatus.error,
+      message: 'not found',
+      fix: 'Install the Flutter SDK: https://flutter.dev/flow',
+    );
+  }
+  return CheckResult(status: CheckStatus.ok, version: _extractVersion(output));
+}
+
+Future<CheckResult> _checkServerpod(ProcessRunner run) async {
+  final output = await _tryRun(run, 'serverpod', ['version']);
+  if (output == null) {
+    return const CheckResult(
+      status: CheckStatus.error,
+      message: 'not found',
+      fix: 'dart pub global activate serverpod_cli',
+    );
+  }
+  return CheckResult(status: CheckStatus.ok, version: _extractVersion(output));
+}
+
+Future<CheckResult> _checkApiKeys() async {
+  const keys = ['GEMINI_API_KEY', 'ANTHROPIC_API_KEY', 'OPENAI_API_KEY'];
+
+  final env = loadEnv();
+
+  final present = keys.where((k) => env.containsKey(k));
+  final missing = keys.where((k) => !env.containsKey(k));
+
+  if (present.isEmpty) {
+    return const CheckResult(
+      status: CheckStatus.error,
+      message: 'no API keys found',
+      fix:
+          'Set at least one of: GEMINI_API_KEY, ANTHROPIC_API_KEY, OPENAI_API_KEY\n'
+          '    Tip: add them to your .env file (see .env.example)',
+    );
+  }
+  if (missing.isNotEmpty) {
+    return CheckResult(
+      status: CheckStatus.warning,
+      message: '${present.join(', ')} set; ${missing.join(', ')} missing',
+    );
+  }
+  return CheckResult(
+    status: CheckStatus.ok,
+    message: 'all keys set',
+  );
+}
+
+Future<CheckResult> _checkPublishConfig() async {
+  final env = loadEnv();
+
+  final requiredKeys = [
+    EnvKeys.gcsBucket,
+    EnvKeys.gcpProjectId,
+    EnvKeys.googleApplicationCredentials,
+  ];
+
+  final present = <String>[];
+  final missing = <String>[];
+
+  for (final key in requiredKeys) {
+    final value = env[key];
+    if (value != null && value.isNotEmpty) {
+      present.add(key);
+    } else {
+      missing.add(key);
+    }
+  }
+
+  if (present.isEmpty) {
+    return const CheckResult(
+      status: CheckStatus.warning,
+      message: 'not configured',
+      fix:
+          'cp .env.example .env and fill in GCS_BUCKET, '
+          'GCP_PROJECT_ID, GOOGLE_APPLICATION_CREDENTIALS',
+    );
+  }
+
+  // Check that credentials file actually exists
+  final credPath = env[EnvKeys.googleApplicationCredentials];
+  if (credPath != null && credPath.isNotEmpty) {
+    var resolvedPath = expandHomeDir(credPath);
+    if (!File(resolvedPath).existsSync()) {
+      return CheckResult(
+        status: CheckStatus.error,
+        message: 'credentials file not found: $credPath',
+      );
+    }
+  }
+
+  if (missing.isNotEmpty) {
+    return CheckResult(
+      status: CheckStatus.warning,
+      message: '${present.join(', ')} set; ${missing.join(', ')} missing',
+      fix: 'Set missing values in .env (see .env.example)',
+    );
+  }
+
+  return const CheckResult(
+    status: CheckStatus.ok,
+    message: 'all configured',
+  );
+}
diff --git a/packages/eval_cli/lib/src/commands/init_command.dart b/packages/eval_cli/lib/src/commands/init_command.dart
new file mode 100644
index 0000000..40ff771
--- /dev/null
+++ b/packages/eval_cli/lib/src/commands/init_command.dart
@@ -0,0 +1,91 @@
+import 'dart:io';
+
+import 'package:args/command_runner.dart';
+import 'package:devals/src/cli_exception.dart';
+import 'package:devals/src/dataset/file_templates/init_templates/init_job_template.dart';
+import 'package:devals/src/dataset/file_templates/init_templates/init_sample_template.dart';
+import 'package:devals/src/dataset/filesystem_utils.dart';
+import 'package:howdy/howdy.dart';
+import 'package:path/path.dart' as p;
+
+class InitCommand extends Command<int> {
+  @override
+  String get name => 'init';
+
+  @override
+  String get description =>
+      'Initialize a new dataset configuration in the current directory.';
+
+  @override
+  Future<int> run() async {
+    terminal.scrollClear();
+    terminal.writeln();
+
+    final currentDir = Directory.current.path;
+    final devalsYaml = File(p.join(currentDir, devalsYamlFilename));
+
+    // Check if already initialized.
+    if (devalsYaml.existsSync()) {
+      Text.error(
+        '$devalsYamlFilename already exists in this directory. '
+        'This project appears to be already initialized.',
+      );
+      return 1;
+    }
+
+    final success = await SpinnerTask.send<bool>(
+      'Initializing in $currentDir',
+      task: () async {
+        // Create devals.yaml marker file
+        try {
+          devalsYaml.writeAsStringSync(
+            '# Marks this directory as a project that contains dash evals.\n'
+            '# Created by `devals init`.\n'
+            'dataset: ./evals\n',
+          );
+        } catch (e) {
+          throw CliException('Failed to create $devalsYamlFilename: $e');
+        }
+
+        final evalsDir = p.join(currentDir, 'evals');
+
+        // Create evals/tasks/get_started/task.yaml
+        try {
+          final taskDir = Directory(p.join(evalsDir, 'tasks', 'get_started'));
+          taskDir.createSync(recursive: true);
+          final taskPath = p.join(taskDir.path, 'task.yaml');
+          File(taskPath).writeAsStringSync(initTaskTemplate());
+        } catch (e) {
+          throw CliException('Failed to create task: $e');
+        }
+
+        // Create evals/jobs/local_dev.yaml
+        final jobsDir = p.join(evalsDir, 'jobs');
+        try {
+          Directory(jobsDir).createSync(recursive: true);
+          final jobPath = p.join(jobsDir, 'local_dev.yaml');
+          File(jobPath).writeAsStringSync(
+            initJobTemplate(
+              name: 'local_dev',
+              models: ['google/gemini-2.0-flash'],
+              tasks: ['get_started'],
+            ),
+          );
+        } catch (e) {
+          throw CliException('Failed to create job file: $e');
+        }
+
+        return true;
+      },
+    );
+
+    if (success) {
+      Text.success('Initialized — dataset at $currentDir/evals');
+      terminal.writeln();
+      Text.body('To run your first evaluation:');
+      Text.body('  devals run local_dev');
+    }
+
+    return success ? 0 : 1;
+  }
+}
diff --git a/packages/eval_cli/lib/src/commands/publish_command.dart b/packages/eval_cli/lib/src/commands/publish_command.dart
new file mode 100644
index 0000000..d9b480a
--- /dev/null
+++ b/packages/eval_cli/lib/src/commands/publish_command.dart
@@ -0,0 +1,275 @@
+/// Command to publish InspectAI log files to Google Cloud Storage.
+library;
+
+import 'dart:io';
+
+import 'package:args/command_runner.dart';
+import 'package:devals/src/config/expand_home_dir.dart';
+import 'package:howdy/howdy.dart';
+import 'package:path/path.dart' as p;
+
+import '../cli_exception.dart';
+import '../config/env.dart';
+import '../gcs/gcs_client.dart';
+import '../gcs/log_validator.dart';
+
+/// Publishes InspectAI JSON log files to a GCS bucket.
+///
+/// Usage:
+///   devals publish {path}           Upload a file or directory of logs
+///   devals publish --dry-run {path} Preview what would be uploaded
+///
+/// The target bucket and credentials are configured via `.env` file,
+/// environment variables, or CLI flags. Precedence: flag > env var > .env.
+class PublishCommand extends Command<int> {
+  PublishCommand() {
+    argParser
+      ..addFlag(
+        'dry-run',
+        help: 'Preview what would be uploaded without uploading.',
+        negatable: false,
+      )
+      ..addOption(
+        'bucket',
+        abbr: 'b',
+        help: 'GCS bucket name (or set GCS_BUCKET in .env).',
+      )
+      ..addOption(
+        'project',
+        abbr: 'p',
+        help: 'GCP project ID (or set GCP_PROJECT_ID in .env).',
+      )
+      ..addOption(
+        'credentials',
+        abbr: 'c',
+        help:
+            'Path to service account JSON key file '
+            '(default: from .env or GOOGLE_APPLICATION_CREDENTIALS).',
+      )
+      ..addOption(
+        'prefix',
+        help:
+            'GCS object prefix (default: directory name for dirs, empty for files).',
+      );
+  }
+
+  @override
+  String get name => 'publish';
+
+  @override
+  String get description =>
+      'Publish InspectAI log files to Google Cloud Storage.';
+
+  @override
+  String get invocation => '${runner?.executableName} publish <path>';
+
+  @override
+  Future<int> run() async {
+    if (argResults?.rest.isEmpty ?? true) {
+      Text.error(
+        'Missing required argument: <path>\n'
+        'Usage: devals publish <path>\n'
+        'Example: devals publish logs/2026-01-07_17-11-47/\n',
+      );
+      return 1;
+    }
+
+    final targetPath = argResults!.rest.first;
+    final dryRun = argResults!['dry-run'] as bool;
+
+    // Discover log files
+    final discoveredFiles = _discoverLogFiles(targetPath);
+    if (discoveredFiles.isEmpty) {
+      Text.error('No .json log files found at: $targetPath\n');
+      return 1;
+    }
+
+    // Validate that each file looks like an Inspect AI log.
+    final files = <File>[];
+    for (final file in discoveredFiles) {
+      final result = await validateInspectLog(file);
+      if (result.isValid) {
+        files.add(file);
+      } else {
+        Text.warning(
+          '⚠️  Skipping ${p.basename(file.path)} — ${result.reason}\n',
+        );
+      }
+    }
+
+    if (files.isEmpty) {
+      Text.error(
+        'No valid Inspect AI log files found at: $targetPath\n'
+        'All discovered .json files failed validation.\n',
+      );
+      return 1;
+    }
+
+    // Load environment config
+    final env = loadEnv();
+
+    String bucket;
+    try {
+      bucket = resolveEnvValue(
+        flagValue: argResults!['bucket'] as String?,
+        envKey: EnvKeys.gcsBucket,
+        env: env,
+      );
+    } on StateError {
+      Text.error(
+        'No GCS bucket configured.\n'
+        'Set GCS_BUCKET in your .env file or pass --bucket <name>.\n\n'
+        'See .env.example for a template.\n',
+      );
+      return 1;
+    }
+
+    // Determine GCS prefix
+    final prefix = _resolvePrefix(
+      flagPrefix: argResults!['prefix'] as String?,
+      targetPath: targetPath,
+    );
+
+    final prefixDisplay = prefix.isNotEmpty ? '$prefix/' : '';
+
+    Text.body(
+      '🚀 Publishing ${files.length} log file(s) to '
+      'gs://$bucket/$prefixDisplay...\n',
+    );
+
+    if (dryRun) {
+      Text.body('DRY RUN — no files will be uploaded.\n');
+      for (final file in files) {
+        final objectName = _objectName(prefix, file);
+        Text.body('  • $objectName');
+      }
+      terminal.writeln('');
+      Text.success('${files.length} file(s) would be published.\n');
+      return 0;
+    }
+
+    // Resolve credentials for real upload
+    String projectId;
+    try {
+      projectId = resolveEnvValue(
+        flagValue: argResults!['project'] as String?,
+        envKey: EnvKeys.gcpProjectId,
+        env: env,
+      );
+    } on StateError {
+      Text.error(
+        'No GCP project ID configured.\n'
+        'Set GCP_PROJECT_ID in your .env file or pass --project <id>.\n\n'
+        'See .env.example for a template.\n',
+      );
+      return 1;
+    }
+
+    String credentialsPath;
+    try {
+      credentialsPath = resolveEnvValue(
+        flagValue: argResults!['credentials'] as String?,
+        envKey: EnvKeys.googleApplicationCredentials,
+        env: env,
+      );
+    } on StateError {
+      Text.error(
+        'No credentials configured.\n'
+        'Set GOOGLE_APPLICATION_CREDENTIALS in your .env file or environment,\n'
+        'or pass --credentials <path-to-key.json>.\n\n'
+        'See .env.example for a template.\n',
+      );
+      return 1;
+    }
+
+    credentialsPath = expandHomeDir(credentialsPath);
+
+    // Create GCS client and upload
+    GcsClient? client;
+    try {
+      client = await GcsClient.create(
+        projectId: projectId,
+        credentialsPath: credentialsPath,
+      );
+
+      var successCount = 0;
+      var failCount = 0;
+
+      for (final file in files) {
+        final objectName = _objectName(prefix, file);
+        try {
+          await client.uploadFile(bucket, objectName, file);
+          Text.success('  $objectName');
+          successCount++;
+        } catch (e) {
+          Text.error('  $objectName — $e\n');
+          failCount++;
+        }
+      }
+
+      terminal.writeln('');
+      if (failCount == 0) {
+        Text.success('Published $successCount file(s).\n');
+      } else {
+        Text.warning('Published $successCount file(s), $failCount failed.\n');
+      }
+
+      return failCount > 0 ? 1 : 0;
+    } on FileSystemException catch (e) {
+      throw CliException(
+        'Credentials error: ${e.message}\n'
+        'Path: ${e.path ?? credentialsPath}\n',
+      );
+    } catch (e) {
+      throw CliException('Upload failed: $e\n');
+    } finally {
+      client?.close();
+    }
+  }
+
+  /// Discovers JSON log files from a path (file or directory).
+  List<File> _discoverLogFiles(String path) {
+    final entity = FileSystemEntity.typeSync(path);
+
+    if (entity == FileSystemEntityType.file) {
+      if (path.endsWith('.json')) {
+        return [File(path)];
+      }
+      return [];
+    }
+
+    if (entity == FileSystemEntityType.directory) {
+      return Directory(path)
+          .listSync(recursive: true)
+          .whereType<File>()
+          .where((f) => f.path.endsWith('.json'))
+          .where((f) => !p.basename(f.path).startsWith('runner'))
+          .toList()
+        ..sort((a, b) => a.path.compareTo(b.path));
+    }
+
+    return [];
+  }
+
+  /// Resolves the GCS object prefix.
+  ///
+  /// If an explicit prefix is given, use that.
+  /// If the target is a directory, use its name as a prefix.
+  /// If the target is a single file, no prefix.
+  String _resolvePrefix({String? flagPrefix, required String targetPath}) {
+    if (flagPrefix != null) return flagPrefix;
+
+    final entity = FileSystemEntity.typeSync(targetPath);
+    if (entity == FileSystemEntityType.directory) {
+      return p.basename(p.normalize(targetPath));
+    }
+    return '';
+  }
+
+  /// Computes the GCS object name for a file.
+  String _objectName(String prefix, File file) {
+    final fileName = p.basename(file.path);
+    if (prefix.isEmpty) return fileName;
+    return '$prefix/$fileName';
+  }
+}
diff --git a/packages/eval_cli/lib/src/commands/run_command.dart b/packages/eval_cli/lib/src/commands/run_command.dart
new file mode 100644
index 0000000..6c52bd9
--- /dev/null
+++ b/packages/eval_cli/lib/src/commands/run_command.dart
@@ -0,0 +1,93 @@
+import 'dart:io';
+
+import 'package:args/command_runner.dart';
+import 'package:devals/src/dataset/dry_run.dart';
+import 'package:devals/src/dataset/filesystem_utils.dart';
+import 'package:eval_config/eval_config.dart';
+import 'package:howdy/howdy.dart';
+import 'package:path/path.dart' as p;
+
+/// Command to run evaluations using the Python eval_runner.
+///
+/// Config resolution and dry-run happen entirely in Dart. For actual runs,
+/// Dart writes an EvalSet JSON file, then Python reads it and calls
+/// `eval_set()` directly.
+class RunCommand extends Command<int> {
+  RunCommand() {
+    argParser.addFlag(
+      'dry-run',
+      help: 'Preview what would be run without executing.',
+      negatable: false,
+    );
+  }
+
+  @override
+  String get name => 'run';
+
+  @override
+  String get description => 'Run evaluations using the eval_runner.';
+
+  @override
+  String get invocation => '${runner?.executableName} run <job_name>';
+
+  @override
+  Future<int> run() async {
+    if (argResults?.rest.isEmpty ?? true) {
+      Text.error(
+        'Missing required argument: <job_name>\n'
+        'Usage: devals run <job_name>\n'
+        'Example: devals run local_dev',
+      );
+      return 1;
+    }
+    final jobName = argResults!.rest.first;
+
+    final datasetPath = findDatasetDirectory();
+
+    // Resolve config in Dart
+    Text.body('📋 Resolving config for job "$jobName"...');
+    final resolver = ConfigResolver();
+    final configs = resolver.resolve(datasetPath, [jobName]);
+
+    if (configs.isEmpty) {
+      Text.error('No configs resolved for job: $jobName');
+      return 1;
+    }
+
+    // Handle --dry-run entirely in Dart
+    if (argResults?['dry-run'] == true) {
+      final isValid = dryRun(configs);
+      return isValid ? 0 : 1;
+    }
+
+    // Write EvalSet JSON to the .devals-tool directory
+    final outputDir = p.join(datasetPath, '.devals-tool', jobName);
+
+    final writer = EvalSetWriter();
+    final evalSetPath = writer.write(configs, outputDir);
+
+    Text.body('🚀 Running: run-evals --json $evalSetPath');
+    Text.body('   Working directory: $datasetPath\n');
+
+    // Use inheritStdio to preserve inspect-ai's interactive terminal display
+    try {
+      final process = await Process.start(
+        'run-evals',
+        ['--json', evalSetPath],
+        mode: ProcessStartMode.inheritStdio,
+        workingDirectory: datasetPath,
+      );
+      return process.exitCode;
+    } on ProcessException catch (e) {
+      if (e.errorCode == 2) {
+        Text.error(
+          'Command "run-evals" not found.\n'
+          'Please install the eval_runner Python package:\n'
+          '  pip install -e <path-to-dash-evals>/pkgs/eval_runner',
+        );
+        return 1;
+      }
+      rethrow;
+    }
+  }
+}
diff --git a/packages/eval_cli/lib/src/commands/view_command.dart b/packages/eval_cli/lib/src/commands/view_command.dart
new file mode 100644
index 0000000..2188fe0
--- /dev/null
+++ b/packages/eval_cli/lib/src/commands/view_command.dart
@@ -0,0 +1,58 @@
+import 'dart:io';
+
+import 'package:args/command_runner.dart';
+import 'package:devals/src/dataset/filesystem_utils.dart';
+import 'package:howdy/howdy.dart';
+import 'package:path/path.dart' as p;
+
+/// Command to launch the Inspect AI viewer.
+class ViewCommand extends Command<int> {
+  @override
+  String get name => 'view';
+
+  @override
+  String get description =>
+      'Launch the Inspect AI viewer to view evaluation results.';
+
+  @override
+  String get invocation => '${runner?.executableName} view [log_path]';
+
+  @override
+  Future<int> run() async {
+    final logPath = argResults?.rest.isNotEmpty == true
+        ? argResults!.rest.first
+        : null;
+
+    // Use tryFindDatasetDirectory to get optional dataset path
+    final datasetPath = tryFindDatasetDirectory();
+
+    // Build command arguments
+    final args = <String>['view'];
+    if (logPath != null) {
+      // inspect view expects --log-dir for a directory;
+      // if a file path was given, use its parent directory.
+      final resolved = File(logPath).existsSync()
+          ? p.dirname(logPath)
+          : logPath;
+      args.addAll(['--log-dir', resolved]);
+    } else if (datasetPath != null) {
+      // Default to the logs directory if it exists
+      final logsDir = findLogsDir(datasetPath);
+      if (logsDir != null) {
+        args.addAll(['--log-dir', logsDir]);
+      }
+    }
+
+    Text.body('🔍 Launching: inspect ${args.join(' ')}\n');
+
+    // Use inheritStdio to preserve the interactive viewer
+    final process = await Process.start(
+      'inspect',
+      args,
+      mode: ProcessStartMode.inheritStdio,
+      workingDirectory: datasetPath != null ? p.dirname(datasetPath) : null,
+    );
+
+    return process.exitCode;
+  }
+}
diff --git a/packages/eval_cli/lib/src/config/env.dart b/packages/eval_cli/lib/src/config/env.dart
new file mode 100644
index 0000000..4a61b17
--- /dev/null
+++ b/packages/eval_cli/lib/src/config/env.dart
@@ -0,0 +1,113 @@
+/// Loads environment configuration from `.env` files.
+///
+/// Searches for `.env` at the project root (walking up from cwd to find
+/// a directory containing a `pubspec.yaml` with `workspace:`, or
+/// any `.env` file along the way).
+library;
+
+import 'dart:io';
+
+import 'package:dotenv/dotenv.dart' as dotenv;
+import 'package:path/path.dart' as p;
+
+/// Well-known environment variable keys used by the CLI.
+abstract final class EnvKeys {
+  static const gcsBucket = 'GCS_BUCKET';
+  static const gcpProjectId = 'GCP_PROJECT_ID';
+  static const googleApplicationCredentials = 'GOOGLE_APPLICATION_CREDENTIALS';
+  static const geminiApiKey = 'GEMINI_API_KEY';
+  static const anthropicApiKey = 'ANTHROPIC_API_KEY';
+  static const openaiApiKey = 'OPENAI_API_KEY';
+
+  /// All keys that `loadEnv` will look for.
+  static const all = [
+    gcsBucket,
+    gcpProjectId,
+    googleApplicationCredentials,
+    geminiApiKey,
+    anthropicApiKey,
+    openaiApiKey,
+  ];
+}
+
+/// Loads environment configuration, merging `.env` file values with
+/// system environment variables. System env vars take precedence.
+///
+/// Returns a map of resolved environment key-value pairs.
+Map<String, String> loadEnv() {
+  final envFile = _findEnvFile();
+  final env = <String, String>{};
+
+  // Load from .env file if found
+  dotenv.DotEnv? dotEnv;
+  if (envFile != null) {
+    dotEnv = dotenv.DotEnv(includePlatformEnvironment: false)..load([envFile]);
+  }
+
+  // For each known key, check .env first, then system env overrides
+  for (final key in EnvKeys.all) {
+    // .env file value (using public [] operator)
+    if (dotEnv != null && dotEnv.isDefined(key)) {
+      final value = dotEnv[key];
+      if (value != null && value.isNotEmpty) {
+        env[key] = value;
+      }
+    }
+
+    // System environment variables override .env file values
+    final systemValue = Platform.environment[key];
+    if (systemValue != null && systemValue.isNotEmpty) {
+      env[key] = systemValue;
+    }
+  }
+
+  return env;
+}
+
+/// Resolves a value with the following precedence:
+/// 1. Explicit CLI flag value
+/// 2. Environment variable (from .env or system)
+/// 3. Default value (if provided)
+///
+/// Throws [StateError] if no value is found and no default is given.
+String resolveEnvValue({
+  String? flagValue,
+  required String envKey,
+  required Map<String, String> env,
+  String? defaultValue,
+}) {
+  if (flagValue != null && flagValue.isNotEmpty) return flagValue;
+  final envValue = env[envKey];
+  if (envValue != null && envValue.isNotEmpty) return envValue;
+  if (defaultValue != null) return defaultValue;
+  throw StateError(
+    'Missing required configuration: $envKey. '
+    'Set it in .env, as an environment variable, or pass it as a CLI flag.',
+  );
+}
+
+/// Walks up from the current directory to find a `.env` file
+/// at or below the repo root.
+String? _findEnvFile() {
+  var dir = Directory.current.absolute;
+  // Walk up at most 10 levels
+  for (var i = 0; i < 10; i++) {
+    final envPath = p.join(dir.path, '.env');
+    if (File(envPath).existsSync()) {
+      return envPath;
+    }
+    // Check if this looks like the repo root
+    final pubspecPath = p.join(dir.path, 'pubspec.yaml');
+    if (File(pubspecPath).existsSync()) {
+      final contents = File(pubspecPath).readAsStringSync();
+      if (contents.contains('workspace:')) {
+        // This is the repo root — no .env file here
+        return null;
+      }
+    }
+    final parent = dir.parent;
+    if (parent.path == dir.path) break; // reached filesystem root
+    dir = parent;
+  }
+  return null;
+}
diff --git a/packages/eval_cli/lib/src/config/expand_home_dir.dart b/packages/eval_cli/lib/src/config/expand_home_dir.dart
new file mode 100644
index 0000000..b78c0bd
--- /dev/null
+++ b/packages/eval_cli/lib/src/config/expand_home_dir.dart
@@ -0,0 +1,28 @@
+import 'dart:io';
+import 'package:path/path.dart' as p;
+
+String expandHomeDir(String path) {
+  if (!path.startsWith('~')) {
+    return path;
+  }
+
+  String? home;
+  if (Platform.isWindows) {
+    home = Platform.environment['USERPROFILE'];
+  } else {
+    home = Platform.environment['HOME'];
+  }
+
+  if (home == null) {
+    return path; // Cannot expand
+  }
+
+  if (path == '~') {
+    return home;
+  }
+  if (path.startsWith('~/')) {
+    return p.join(home, path.substring(2));
+  }
+
+  return path; // or throw an exception for unsupported formats like ~user
+}
diff --git a/packages/eval_cli/lib/src/dataset/dataset_reader.dart b/packages/eval_cli/lib/src/dataset/dataset_reader.dart
new file mode 100644
index 0000000..cc27db2
--- /dev/null
+++ b/packages/eval_cli/lib/src/dataset/dataset_reader.dart
@@ -0,0 +1,76 @@
+import 'dart:io';
+
+import 'package:path/path.dart' as p;
+import 'filesystem_utils.dart';
+
+/// Global accessor for the dataset reader singleton.
+DatasetReader get datasetReader => DatasetReader();
+
+/// Singleton reader for dataset configuration.
+///
+/// Task functions and variants are now discovered from the filesystem
+/// (tasks/ directory YAML files) rather than a generated registry.
+class DatasetReader {
+  DatasetReader._();
+  static final DatasetReader _instance = DatasetReader._();
+  factory DatasetReader() => _instance;
+
+  String? _cachedDatasetPath;
+
+  /// Clears the cached dataset path. Useful for testing.
+  void clearCache() {
+    _cachedDatasetPath = null;
+  }
+
+  /// Gets the path to the dataset directory.
+  String get datasetDirPath {
+    _cachedDatasetPath ??= findDatasetDirectory();
+    return _cachedDatasetPath!;
+  }
+
+  /// Gets the path to the tasks directory.
+  String get tasksDirPath => p.join(datasetDirPath, 'tasks');
+
+  /// Returns the list of common variant names for scaffolding.
+  List<String> getVariants() => const [
+    'baseline',
+    'context_only',
+    'mcp_only',
+    'full',
+  ];
+
+  /// Returns the list of task names discovered from tasks/ directory.
+  ///
+  /// Each subdirectory in tasks/ that contains a task.yaml file is a task.
+  /// The task name is derived from the directory name.
+  List<String> getTasks() {
+    final tasksDir = Directory(tasksDirPath);
+    if (!tasksDir.existsSync()) {
+      return [];
+    }
+
+    final taskNames = <String>[];
+    for (final entity in tasksDir.listSync()) {
+      if (entity is Directory) {
+        final taskFile = File(p.join(entity.path, 'task.yaml'));
+        if (taskFile.existsSync()) {
+          taskNames.add(p.basename(entity.path));
+        }
+      }
+    }
+    taskNames.sort();
+    return taskNames;
+  }
+
+  /// Returns the set of existing task names for duplicate checking.
+  Set<String> getExistingTaskNames() => getTasks().toSet();
+
+  /// Returns task function info discovered from task.yaml files.
+  ///
+  /// Reads the `func` and optional `description` field from each task.yaml.
+  List<({String name, String? help})> getTaskFuncs() {
+    return getTasks().map((name) {
+      return (name: name, help: null as String?);
+    }).toList();
+  }
+}
diff --git a/packages/eval_cli/lib/src/dataset/dry_run.dart b/packages/eval_cli/lib/src/dataset/dry_run.dart
new file mode 100644
index 0000000..d8abc12
--- /dev/null
+++ b/packages/eval_cli/lib/src/dataset/dry_run.dart
@@ -0,0 +1,159 @@
+import 'package:eval_config/eval_config.dart';
+
+/// Preview resolved config without running evaluations.
+///
+/// Validates the config and prints a formatted summary of what would be
+/// passed to the Python eval runner.
+///
+/// Returns `true` if the config is valid, `false` if there are errors.
+bool dryRun(List<EvalSet> configs) {
+  var allValid = true;
+
+  for (var i = 0; i < configs.length; i++) {
+    if (configs.length > 1) {
+      print('\n${'=' * 70}');
+      print('📦 Job ${i + 1}/${configs.length}');
+      print('=' * 70);
+    }
+
+    if (!_validateConfig(configs[i])) {
+      allValid = false;
+    }
+  }
+
+  return allValid;
+}
+
+bool _validateConfig(EvalSet config) {
+  final errors = <String>[];
+  final warnings = <String>[];
+
+  // {taskName: sampleCount}
+  final taskSummaries = <String, int>{};
+
+  for (final task in config.tasks) {
+    final name = task.name ?? task.taskFunc ?? '(unknown)';
+
+    if (task.taskFunc == null) {
+      warnings.add(
+        'Task "$name" has no task_func — Mode 2 hydration required',
+      );
+    }
+
+    final sampleCount = task.dataset?.samples.length ?? 0;
+    taskSummaries[name] = sampleCount;
+  }
+
+  final models = config.model ?? [];
+  if (models.isEmpty) {
+    errors.add('No models specified in config');
+  }
+
+  _printSummary(config, taskSummaries, errors, warnings);
+  return errors.isEmpty;
+}
+
+void _printSummary(
+  EvalSet config,
+  Map<String, int> taskSummaries,
+  List<String> errors,
+  List<String> warnings,
+) {
+  print('=' * 70);
+  print('🔍 DRY RUN - Configuration Summary');
+  print('=' * 70);
+  print('');
+
+  // Log directory
+  print('📁 Log Directory: ${config.logDir}');
+  print('');
+
+  // Models
+  final models = config.model ?? [];
+  print('🤖 Models (${models.length}):');
+  for (final model in models) {
+    print('   • $model');
+  }
+  print('');
+
+  // Sandbox
+  final sandbox = config.sandbox;
+  if (sandbox is List && sandbox.length == 2) {
+    print('🏖️  Sandbox: ${sandbox[0]} (${sandbox[1]})');
+  } else if (sandbox != null) {
+    print('🏖️  Sandbox: $sandbox');
+  } else {
+    print('🏖️  Sandbox: local');
+  }
+  print('');
+
+  // Rate limits
+  print('⚡ Rate Limits:');
+  if (config.retryAttempts != null) {
+    print('   • Retry attempts: ${config.retryAttempts}');
+  }
+  if (config.retryOnError != null) {
+    print('   • Retry on error: ${config.retryOnError}');
+  }
+  print('');
+
+  // Tasks tree
+  final numModels = models.length;
+  final totalTasks = taskSummaries.length;
+  final totalRuns = totalTasks * numModels;
+  final totalSamples =
+      taskSummaries.values.fold<int>(0, (s, c) => s + c) * numModels;
+
+  print(
+    '📋 Tasks ($totalTasks tasks, '
+    'run $totalRuns total times, $totalSamples total samples):',
+  );
+
+  final taskNames = taskSummaries.keys.toList();
+  for (var i = 0; i < taskNames.length; i++) {
+    final taskName = taskNames[i];
+    final sampleCount = taskSummaries[taskName]!;
+    final isLast = i == taskNames.length - 1;
+    final prefix = isLast ? '└─' : '├─';
+
+    final taskRuns = numModels;
+    final taskSamples = sampleCount * numModels;
+
+    print('   $prefix $taskName ($taskRuns runs, $taskSamples samples)');
+
+    for (var j = 0; j < models.length; j++) {
+      final isLastModel = j == models.length - 1;
+      final indent = isLast ? '      ' : '   │  ';
+      final modelPrefix = isLastModel ? '└─' : '├─';
+      final model = models[j];
+      final shortModel = model.contains('/') ? model.split('/').last : model;
+      print('$indent$modelPrefix $shortModel ($sampleCount samples)');
+    }
+  }
+  print('');
+
+  // Warnings
+  if (warnings.isNotEmpty) {
+    print('⚠️  Warnings:');
+    for (final warning in warnings) {
+      print('   • $warning');
+    }
+    print('');
+  }
+
+  // Errors
+  if (errors.isNotEmpty) {
+    print('❌ Errors:');
+    for (final error in errors) {
+      print('   • $error');
+    }
+    print('');
+    print('=' * 70);
+    print('❌ Configuration invalid - fix errors before running');
+    print('=' * 70);
+  } else {
+    print('=' * 70);
+    print('✅ Configuration valid - ready to run');
+    print('=' * 70);
+  }
+}
diff --git a/packages/eval_cli/lib/src/dataset/eval_writer.dart b/packages/eval_cli/lib/src/dataset/eval_writer.dart
new file mode 100644
index 0000000..7220748
--- /dev/null
+++ b/packages/eval_cli/lib/src/dataset/eval_writer.dart
@@ -0,0 +1,51 @@
+import 'package:devals/src/cli_exception.dart';
+import 'package:devals/src/dataset/filesystem_utils.dart';
+import 'package:path/path.dart' as p;
+
+EvalsWriter get generator => EvalsWriter();
+
+/// Contains methods for writing and editing YAML files.
+/// For reading operations, use [DatasetReader].
+class EvalsWriter {
+  EvalsWriter._();
+  static final EvalsWriter _instance = EvalsWriter._();
+  factory EvalsWriter() => _instance;
+
+  String get datasetDirPath => findDatasetDirectory();
+  String get tasksDirPath => p.join(datasetDirPath, 'tasks');
+
+  /// Returns the path for a task directory: tasks/{taskName}/
+  String taskDirPath(String taskName) => p.join(tasksDirPath, taskName);
+
+  /// Returns the path for a task YAML file: tasks/{taskName}/task.yaml
+  String taskYamlFilePath(String taskName) =>
+      p.join(taskDirPath(taskName), 'task.yaml');
+
+  /// Writes a new task file at tasks/{taskName}/task.yaml.
+  void writeTaskFile(String taskName, {required String yaml}) {
+    final filePath = taskYamlFilePath(taskName);
+    writeFile(filePath, yaml);
+  }
+
+  /// Appends content to an existing task file.
+  void appendToTaskFile(String taskName, {required String content}) {
+    final filePath = taskYamlFilePath(taskName);
+    try {
+      appendToFile(filePath, content);
+    } on CliException catch (e) {
+      throw CliException(
+        '${e.message}\n\n'
+        'Could not append sample to task file.\n'
+        'Please manually add the sample to: $filePath\n'
+        '$content',
+      );
+    }
+  }
+
+  /// Writes a job file to the jobs directory.
+  void writeJobFile(String jobName, String content) {
+    final jobsDir = findJobsDir(datasetDirPath);
+    final jobFilePath = p.join(jobsDir, '$jobName.yaml');
+    writeFile(jobFilePath, content);
+  }
+}
diff --git a/packages/eval_cli/lib/src/dataset/file_templates/flutter_test_file.dart b/packages/eval_cli/lib/src/dataset/file_templates/flutter_test_file.dart
new file mode 100644
index 0000000..6a36c17
--- /dev/null
+++ b/packages/eval_cli/lib/src/dataset/file_templates/flutter_test_file.dart
@@ -0,0 +1,19 @@
+String getTestFile() {
+  return '''
+/// Tests added here will be copied into your workspace and run against
+/// the workspace after code is generated.
+///
+/// These tests run IN ADDITION to any tests already present in the
+/// workspace project (e.g., tests in the git repo or local path project).
+///
+/// Write your test cases below.
+import 'package:flutter_test/flutter_test.dart';
+
+void main() {
+  group('Tests', () {
+    test('run', () {
+      // Write tests
+    });
+  });
+''';
+}
diff --git a/packages/eval_cli/lib/src/dataset/file_templates/init_templates/init_job_template.dart b/packages/eval_cli/lib/src/dataset/file_templates/init_templates/init_job_template.dart
new file mode 100644
index 0000000..b3007c5
--- /dev/null
+++ b/packages/eval_cli/lib/src/dataset/file_templates/init_templates/init_job_template.dart
@@ -0,0 +1,98 @@
+/// Builds a String of valid, heavily commented YAML for a job configuration file.
+String initJobTemplate({
+  required String name,
+  required List<String> models,
+  required List<String> tasks,
+}) {
+  final modelsList = models.map((m) => '  - $m').join('\n');
+  final tasksList = tasks.map((t) => '    $t: {}').join('\n');
+
+  return '''
+# =============================================================================
+# Job Configuration: $name
+# =============================================================================
+# A job defines what subset of your dataset to run and how to run it.
+# Jobs are the primary way to control evaluation runs.
+#
+# To run this job:
+#   devals run $name
+
+
+# =============================================================================
+# RUNTIME SETTINGS (Optional)
+# =============================================================================
+# !!!Important!!!
+# These override built-in defaults. If you're just getting started,
+# I recommend you ignore these for now.
+# Uncomment and modify as needed.
+
+# Directory for evaluation logs (relative to dataset root)
+# A timestamped subdirectory is created automatically for each run.
+# logs_dir: ../logs
+
+# Sandbox environment: "local", "docker", or "podman"
+# - local: Run directly on host (fastest, no isolation)
+# - docker: Run in Docker containers (recommended for code execution)
+# - podman: Run in Podman containers (rootless alternative to Docker)
+# sandbox_type: local
+
+# Maximum concurrent API connections to model providers.
+# Higher = faster but may hit rate limits with a large dataset
+# max_connections: 10
+
+# Maximum retry attempts for failed API calls.
+# Helps handle transient errors.
+# max_retries: 3
+
+# =============================================================================
+# MODELS
+# =============================================================================
+# Which models to evaluate. Format: "provider/model-name"
+# If omitted, falls back to DEFAULT_MODELS from the Python registries.
+models:
+$modelsList
+
+# =============================================================================
+# VARIANTS (Optional)
+# =============================================================================
+# Which configuration variants to test.
+# Variants control access to tools and context.
+# Each variant is a map of feature flags. An empty map {} is the baseline.
+# If omitted, only the baseline (no features) is used.
+#
+# Example:
+#   variants:
+#     baseline: {}                              # no extra features
+#     context_only: { context_files: [../../context/flutter.md] }
+#     mcp_only: { mcp_servers: [dart] }
+
+# =============================================================================
+# TASKS
+# =============================================================================
+# Which tasks to run and how. Uses paths for discovery and inline for overrides.
+# If omitted, runs ALL discovered tasks.
+#
+# Task discovery via glob patterns (relative to dataset root):
+#   tasks:
+#     paths: [tasks/*]
+#
+# Per-task overrides:
+#   tasks:
+#     inline:
+#       task_id:
+#         # (use allowed_variants in task.yaml to whitelist variants)
+#         include-samples: [sample1]   # Only run specific samples (mutually exclusive with exclude)
+#         exclude-samples: [sample2]   # Skip specific samples (mutually exclusive with include)
+#         system_message: |            # Override system prompt for this task
+#           Custom instructions...
+#
+# Simple format (run all samples with job-level settings):
+#   tasks:
+#     inline:
+#       task_id: {}
+#
+tasks:
+  inline:
+$tasksList
+''';
+}
diff --git a/packages/eval_cli/lib/src/dataset/file_templates/init_templates/init_sample_template.dart b/packages/eval_cli/lib/src/dataset/file_templates/init_templates/init_sample_template.dart
new file mode 100644
index 0000000..589a123
--- /dev/null
+++ b/packages/eval_cli/lib/src/dataset/file_templates/init_templates/init_sample_template.dart
@@ -0,0 +1,34 @@
+/// Template for the starter task created by `devals init`.
+///
+/// Creates a task.yaml at tasks/get_started/task.yaml that points at
+/// the parent project as its workspace.
+String initTaskTemplate() {
+  return '''
+# =============================================================================
+# Starter Task
+# =============================================================================
+# This task points at your project root as its workspace and runs a simple
+# codebase analysis evaluation.
+
+func: analyze_codebase
+
+# Workspace: points to the project root containing pubspec.yaml
+workspace:
+  path: ../../
+
+samples:
+  inline:
+    - id: get_started
+      difficulty: easy
+      tags: []
+      # Input: The prompt given to the model
+      input: |
+        Explore this codebase and suggest one improvement
+        to the code quality, readability, or architecture.
+      # Target: Expected output or grading criteria
+      target: |
+        The suggestion should be specific, actionable, and reference
+        actual code in the project. It should explain why the change
+        improves the codebase.
+''';
+}
diff --git a/packages/eval_cli/lib/src/dataset/file_templates/job_template.dart b/packages/eval_cli/lib/src/dataset/file_templates/job_template.dart
new file mode 100644
index 0000000..b402a6b
--- /dev/null
+++ b/packages/eval_cli/lib/src/dataset/file_templates/job_template.dart
@@ -0,0 +1,120 @@
+import '../variant_defaults.dart';
+
+/// Builds a String of valid YAML for a job configuration file.
+///
+/// Job files define WHAT to run and HOW to run it. They live in the jobs/
+/// directory and are selected via `devals run <job_name>`.
+///
+/// Jobs can:
+/// - Override runtime settings (logs, sandbox, rate limits)
+/// - Define named variants to run
+/// - Filter which models and tasks to run
+/// - Configure per-task options (sample filtering)
+///
+String jobTemplate({
+  required String name,
+  required List<String> models,
+  required List<String> variants,
+  required List<String> tasks,
+}) {
+  final modelsList = models.map((m) => '  - $m').join('\n');
+  final tasksList = tasks.map((t) => '    $t: {}').join('\n');
+
+  // Build named variant map YAML
+  // Currently this doesn't work
+  final variantsMap = variantDefaults();
+
+  return '''
+# =============================================================================
+# Job Configuration: $name
+# =============================================================================
+# A job defines what subset of your dataset to run and how to run it.
+# Jobs are the primary way to control evaluation runs.
+#
+# To run this job:
+#   devals run $name
+
+
+# =============================================================================
+# RUNTIME SETTINGS (Optional)
+# =============================================================================
+# !!!Important!!!
+# These override built-in defaults. If you're just getting started,
+# I recommend you ignore these for now.
+# Uncomment and modify as needed.
+
+# Directory for evaluation logs (relative to dataset root)
+# A timestamped subdirectory is created automatically for each run.
+# logs_dir: ../logs
+
+# Sandbox environment: "local", "docker", or "podman"
+# - local: Run directly on host (fastest, no isolation)
+# - docker: Run in Docker containers (recommended for code execution)
+# - podman: Run in Podman containers (rootless alternative to Docker)
+# sandbox_type: local
+
+# Maximum concurrent API connections to model providers.
+# Higher = faster but may hit rate limits with a large dataset
+# max_connections: 10
+
+# Maximum retry attempts for failed API calls.
+# Helps handle transient errors.
+# max_retries: 3
+
+# Save the agent's final workspace to logs/<run>/examples/ after each sample.
+# Useful for reviewing the code produced during an eval run.
+# save_examples: false
+
+# =============================================================================
+# MODELS
+# =============================================================================
+# Which models to evaluate. Format: "provider/model-name"
+# If omitted, falls back to DEFAULT_MODELS from the Python registries.
+models:
+$modelsList
+
+# =============================================================================
+# VARIANTS
+# =============================================================================
+# Named variant configurations to test.
+# Each variant defines what tools/context the agent has access to.
+#
+# Format: variant_name: { config }
+#   baseline: {}                                     # no extra features
+#   context_only: { context_files: [./path/to.md] }  # injects context files
+#   mcp_only: { mcp_servers: [dart] }                # enables MCP servers
+#   full: { context_files: [...], mcp_servers: [...] }
+#
+# Tasks can optionally restrict which variants they support
+# via `allowed_variants:` in their task.yaml.
+variants:
+${variantsMap.toString().trimRight()}
+
+# =============================================================================
+# TASKS
+# =============================================================================
+# Which tasks to run and how. Uses paths for discovery and inline for overrides.
+#
+# Task discovery via glob patterns (relative to dataset root):
+#   tasks:
+#     paths: [tasks/*]
+#
+# Per-task overrides (keys must match directory names in tasks/):
+#   tasks:
+#     inline:
+#       task_id:
+#         include-samples: [sample1]   # Only run specific samples (mutually exclusive with exclude)
+#         exclude-samples: [sample2]   # Skip specific samples (mutually exclusive with include)
+#         system_message: |            # Override system prompt for this task
+#           Custom instructions...
+#
+# Simple format (run all samples with job-level settings):
+#   tasks:
+#     inline:
+#       task_id: {}
+#
+tasks:
+  inline:
+$tasksList
+''';
+}
diff --git a/packages/eval_cli/lib/src/dataset/file_templates/pubspec_template.dart b/packages/eval_cli/lib/src/dataset/file_templates/pubspec_template.dart
new file mode 100644
index 0000000..4a91003
--- /dev/null
+++ b/packages/eval_cli/lib/src/dataset/file_templates/pubspec_template.dart
@@ -0,0 +1,87 @@
+import '../workspace.dart';
+
+/// Builds a pubspec.yaml string for a sample's project directory.
+///
+/// The pubspec imports the workspace so that Dart tooling (analyzer, etc.)
+/// can resolve the code.
+///
+/// When [workspaceKey] is [WorkspaceType.template], [templatePackage] must be
+/// provided. For other workspace types, [workspaceValue] carries the
+/// user-provided string (path, git URL, etc.).
+String pubspecTemplate({
+  required String sampleId,
+  required WorkspaceType workspaceKey,
+  TemplatePackage? templatePackage,
+  String? workspaceValue,
+}) {
+  final isFlutter = templatePackage?.isFlutter ?? false;
+  final dependencySection = _buildDependencySection(
+    workspaceKey: workspaceKey,
+    templatePackage: templatePackage,
+    workspaceValue: workspaceValue,
+    sampleId: sampleId,
+  );
+
+  final header =
+      '''
+name: ${sampleId}_tests
+description: 'Test workspace for $sampleId'
+publish_to: 'none'
+version: 1.0.0
+''';
+
+  final flutterBase =
+      '''
+environment:
+  sdk: ^3.5.0
+  flutter: ">=3.10.0"
+
+dependencies:
+  flutter:
+    sdk: flutter
+  $dependencySection
+
+dev_dependencies:
+  flutter_test:
+    sdk: flutter
+  flutter_lints: ^3.0.0
+''';
+
+  final dartBase =
+      '''
+environment:
+  sdk: ^3.5.0
+
+dependencies:
+  $dependencySection
+
+dev_dependencies:
+''';
+
+  return isFlutter ? '$header\n$flutterBase' : '$header\n$dartBase';
+}
+
+String _buildDependencySection({
+  required WorkspaceType workspaceKey,
+  required String sampleId,
+  TemplatePackage? templatePackage,
+  String? workspaceValue,
+}) {
+  return switch (workspaceKey) {
+    WorkspaceType.template =>
+      '''
+  ${templatePackage!.packageName}:
+    path: ../../../workspaces/${templatePackage.yamlValue}''',
+    WorkspaceType.path =>
+      '''
+  # Workspace path dependency
+  # $sampleId:
+  #   path: ${workspaceValue ?? '<WORKSPACE_PATH>'}''',
+    WorkspaceType.git =>
+      '''
+  # Workspace git dependency
+  # $sampleId:
+  #   git: ${workspaceValue ?? '<GIT_URL>'}''',
+    _ => '  # No workspace dependency',
+  };
+}
diff --git a/packages/eval_cli/lib/src/dataset/file_templates/sample_template.dart b/packages/eval_cli/lib/src/dataset/file_templates/sample_template.dart
new file mode 100644
index 0000000..0a2b40e
--- /dev/null
+++ b/packages/eval_cli/lib/src/dataset/file_templates/sample_template.dart
@@ -0,0 +1,62 @@
+import '../workspace.dart';
+
+/// Builds a String of valid YAML for an inline sample block.
+///
+/// This generates a sample entry that can be appended to an existing
+/// task.yaml file under the `samples.inline:` key.
+String sampleTemplate({
+  required String id,
+  required String difficulty,
+  WorkspaceType? workspaceType,
+  TemplatePackage? templatePackage,
+  String? workspaceValue,
+}) {
+  final workspaceSection = _buildSampleWorkspaceSection(
+    workspaceType,
+    templatePackage: templatePackage,
+    workspaceValue: workspaceValue,
+  );
+
+  return '''
+    - id: $id
+      difficulty: $difficulty
+      tags: []$workspaceSection
+      input: |
+        # Write prompt here
+      target: |
+        # Write target here
+''';
+}
+
+/// Builds workspace/tests lines for an inline sample block.
+///
+/// Only needed if the sample overrides the task-level workspace.
+String _buildSampleWorkspaceSection(
+  WorkspaceType? workspaceType, {
+  TemplatePackage? templatePackage,
+  String? workspaceValue,
+}) {
+  return switch (workspaceType) {
+    WorkspaceType.git =>
+      '''
+
+      workspace:
+        git: ${workspaceValue ?? '<GIT_REPOSITORY_URL>'}''',
+    WorkspaceType.path =>
+      '''
+
+      workspace:
+        path: ${workspaceValue ?? '<RELATIVE_PATH>'}''',
+    WorkspaceType.template =>
+      '''
+
+      workspace:
+        template: ${templatePackage?.yamlValue ?? '<flutter_app OR jaspr_app OR dart_package>'}''',
+    WorkspaceType.create =>
+      '''
+
+      workspace:
+        path: ./project''',
+    _ => '',
+  };
+}
diff --git a/packages/eval_cli/lib/src/dataset/file_templates/task_template.dart b/packages/eval_cli/lib/src/dataset/file_templates/task_template.dart
new file mode 100644
index 0000000..4aa092d
--- /dev/null
+++ b/packages/eval_cli/lib/src/dataset/file_templates/task_template.dart
@@ -0,0 +1,84 @@
+import '../workspace.dart';
+
+/// Builds a String of valid YAML for a standalone task.yaml file.
+///
+/// This generates a complete task file with inline samples,
+/// to be written at tasks/{taskName}/task.yaml.
+String taskTemplate({
+  required String taskFunc,
+  WorkspaceType? workspaceType,
+  TemplatePackage? templatePackage,
+  String? workspaceValue,
+  List<String> variants = const [],
+  String? systemMessage,
+}) {
+  final workspaceSection = _buildTaskWorkspaceSection(
+    workspaceType,
+    templatePackage: templatePackage,
+    workspaceValue: workspaceValue,
+  );
+
+  final variantsLine = variants.isNotEmpty
+      ? 'allowed_variants: [${variants.join(', ')}]\n'
+      : '';
+
+  final systemMessageBlock = systemMessage != null && systemMessage.isNotEmpty
+      ? 'system_message: |\n  ${systemMessage.replaceAll('\n', '\n  ')}\n'
+      : '';
+
+  return '''
+# Task configuration
+# See docs/configuration_reference.md for full schema reference.
+func: $taskFunc
+$variantsLine$systemMessageBlock$workspaceSection
+samples:
+  inline:
+    - id: sample_1
+      difficulty: medium
+      input: |
+        # Write prompt here
+      target: |
+        # Write target here
+''';
+}
+
+/// Builds the workspace section for a task-level definition.
+String _buildTaskWorkspaceSection(
+  WorkspaceType? workspaceType, {
+  TemplatePackage? templatePackage,
+  String? workspaceValue,
+}) {
+  return switch (workspaceType) {
+    WorkspaceType.git =>
+      '''
+workspace:
+  git: ${workspaceValue ?? '<GIT_REPOSITORY_URL>'}
+  # ref: <BRANCH_TAG_OR_COMMIT>  # Optional
+''',
+    WorkspaceType.path =>
+      '''
+workspace:
+  path: ${workspaceValue ?? './project'}
+''',
+    WorkspaceType.template =>
+      '''
+workspace:
+  template: ${templatePackage?.yamlValue ?? '<flutter_app OR jaspr_app OR dart_package>'}
+''',
+    WorkspaceType.create =>
+      '''
+workspace:
+  path: ./project
+''',
+    _ =>
+      '''
+# Workspace configuration (uncomment one):
+# workspace:
+#   template: flutter_app  # OR dart_package OR jaspr_app
+# workspace:
+#   path: ./project
+# workspace:
+#   git: <REPOSITORY_URL>
+''',
+  };
+}
diff --git a/packages/eval_cli/lib/src/dataset/filesystem_utils.dart b/packages/eval_cli/lib/src/dataset/filesystem_utils.dart
new file mode 100644
index 0000000..dd1e658
--- /dev/null
+++ b/packages/eval_cli/lib/src/dataset/filesystem_utils.dart
@@ -0,0 +1,233 @@
+import 'dart:io';
+
+import 'package:devals/src/dataset/file_templates/pubspec_template.dart';
+import 'package:path/path.dart' as p;
+import 'package:yaml/yaml.dart';
+
+import '../cli_exception.dart';
+import 'workspace.dart';
+
+export 'dataset_reader.dart';
+export 'variant_defaults.dart';
+export 'workspace.dart';
+
+/// Finds or creates the tasks directory.
+String findTasksDir(String datasetDirPath) {
+  final tasksDirPath = p.join(datasetDirPath, 'tasks');
+  final dir = Directory(tasksDirPath);
+
+  if (!dir.existsSync()) {
+    stderr.writeln(
+      'Tasks directory not found. Creating: $tasksDirPath',
+    );
+    dir.createSync(recursive: true);
+  }
+  return tasksDirPath;
+}
+
+/// Creates the task directory structure at tasks/{taskName}/.
+///
+/// If a workspace type is provided, creates the project/ subdirectory
+/// with appropriate scaffolding.
+///
+/// Returns the path to the new dir at tasks/{taskName}
+Future<String> createTaskResources(
+  String taskName, {
+  required String tasksDirPath,
+  WorkspaceType? workspaceKey,
+  TemplatePackage? templatePackage,
+  String? workspaceValue,
+}) async {
+  final dir = p.join(tasksDirPath, taskName);
+
+  try {
+    // Create task directory
+    Directory(dir).createSync(recursive: true);
+
+    // For any workspace type, create the project/ directory
+    if (workspaceKey != null) {
+      final projectDir = p.join(dir, 'project');
+
+      if (workspaceKey == WorkspaceType.create) {
+        // Run the creation command from the task dir.
+        final parts = workspaceValue?.split(' ') ?? [];
+        if (parts.isEmpty) {
+          throw CliException('No creation command provided.');
+        }
+        final result = Process.runSync(
+          parts.first,
+          parts.skip(1).toList(),
+          workingDirectory: dir,
+        );
+        if (result.exitCode != 0) {
+          throw CliException(
+            'Creation command failed (exit ${result.exitCode}):\n${result.stderr}',
+          );
+        }
+      } else if (workspaceKey == WorkspaceType.template) {
+        // Template workspaces don't need a local project directory
+        // — the solver creates them at runtime.
+      } else {
+        // For path/git: create project/ and generate a pubspec
+        Directory(projectDir).createSync();
+        final pubspecContent = pubspecTemplate(
+          sampleId: taskName,
+          workspaceKey: workspaceKey,
+          templatePackage: templatePackage,
+          workspaceValue: workspaceValue,
+        );
+        File(
+          p.join(projectDir, 'pubspec.yaml'),
+        ).writeAsStringSync(pubspecContent);
+      }
+    }
+  } on FileSystemException {
+    rethrow;
+  }
+
+  return dir;
+}
+
+/// Finds or creates the jobs directory within the dataset directory.
+String findJobsDir(String datasetDirPath) {
+  final jobsDirPath = p.join(datasetDirPath, 'jobs');
+  ensureDirectoryExists(jobsDirPath);
+  return jobsDirPath;
+}
+
+/// Finds the logs directory within the dataset directory.
+/// Returns null if it doesn't exist.
+String? findLogsDir(String datasetDirPath) {
+  final logsPath = p.join(datasetDirPath, 'logs');
+  if (Directory(logsPath).existsSync()) {
+    return logsPath;
+  }
+  return null;
+}
+
+/// Ensures a directory exists, creating it if necessary.
+void ensureDirectoryExists(String dirPath) {
+  final dir = Directory(dirPath);
+  if (!dir.existsSync()) {
+    dir.createSync(recursive: true);
+  }
+}
+
+/// Writes content to a file with error handling.
+/// Creates parent directories if they don't exist.
+void writeFile(String filePath, String content) {
+  try {
+    final file = File(filePath);
+    final parent = file.parent;
+    if (!parent.existsSync()) {
+      parent.createSync(recursive: true);
+    }
+    file.writeAsStringSync(content);
+  } on FileSystemException catch (e) {
+    throw CliException(
+      'Failed to write file: $filePath\n${e.message}',
+    );
+  }
+}
+
+/// Reads file content as a string. Throws CliException if file doesn't exist.
+String readFile(String filePath) {
+  final file = File(filePath);
+  if (!file.existsSync()) {
+    throw CliException('File not found: $filePath');
+  }
+  return file.readAsStringSync();
+}
+
+/// Appends content to an existing file. Throws CliException if file doesn't exist.
+void appendToFile(String filePath, String content) {
+  final file = File(filePath);
+  if (!file.existsSync()) {
+    throw CliException('File not found: $filePath');
+  }
+  try {
+    file.writeAsStringSync(content, mode: FileMode.append);
+  } on FileSystemException catch (e) {
+    throw CliException(
+      'Failed to append to file: $filePath\n${e.message}',
+    );
+  }
+}
+
+// ------------------------------------------------------------------
+// Dataset discovery (moved from eval_config)
+// ------------------------------------------------------------------
+
+/// The marker file that identifies a devals project root.
+const devalsYamlFilename = 'devals.yaml';
+const maxSearchDepth = 10;
+
+/// Finds the dataset directory by walking up from the current directory
+/// looking for a `devals.yaml` marker file.
+///
+/// The `devals.yaml` file must contain a `dataset` field pointing to the
+/// directory containing `tasks/` and `jobs/`, relative to the yaml file.
+///
+/// This works like `flutter` finding `pubspec.yaml` — you can run `devals`
+/// from any subdirectory of your project.
+///
+/// Throws [CliException] if no `devals.yaml` is found.
+String findDatasetDirectory() {
+  var dir = Directory.current.absolute;
+
+  for (var i = 0; i < maxSearchDepth; i++) {
+    final yamlFile = File(p.join(dir.path, devalsYamlFilename));
+    if (yamlFile.existsSync()) {
+      return _resolveDatasetPath(yamlFile);
+    }
+    final parent = dir.parent;
+    if (parent.path == dir.path) break; // filesystem root
+    dir = parent;
+  }
+
+  throw CliException(
+    'Could not find $devalsYamlFilename in this directory or any parent.\n'
+    '\n'
+    'Run "devals init" to initialize a new devals project.',
+  );
+}
+
+/// Reads the `dataset` field from a `devals.yaml` file and resolves
+/// it to an absolute path.
+String _resolveDatasetPath(File yamlFile) {
+  final content = yamlFile.readAsStringSync();
+  final yaml = loadYaml(content);
+
+  if (yaml is! Map || !yaml.containsKey('dataset')) {
+    throw CliException(
+      '${yamlFile.path} is missing the required "dataset" field.\n'
+      'Expected format:\n'
+      '  dataset: ./evals',
+    );
+  }
+
+  final datasetRelative = yaml['dataset'] as String;
+  final projectRoot = p.dirname(yamlFile.path);
+  final datasetPath = p.normalize(p.join(projectRoot, datasetRelative));
+
+  // Verify the dataset directory contains tasks/
+  if (!Directory(p.join(datasetPath, 'tasks')).existsSync()) {
+    throw CliException(
+      'Dataset directory does not contain a tasks/ subdirectory: '
+      '$datasetPath\n'
+      'Check the "dataset" field in ${yamlFile.path}.',
+    );
+  }
+
+  return datasetPath;
+}
+
+/// Tries to find the dataset directory, returning null instead of throwing.
+/// Useful when the dataset directory is optional.
+String? tryFindDatasetDirectory() {
+  try {
+    return findDatasetDirectory();
+  } on CliException {
+    return null;
+  }
+}
diff --git a/packages/eval_cli/lib/src/dataset/variant_defaults.dart b/packages/eval_cli/lib/src/dataset/variant_defaults.dart
new file mode 100644
index 0000000..6147900
--- /dev/null
+++ b/packages/eval_cli/lib/src/dataset/variant_defaults.dart
@@ -0,0 +1,30 @@
+// TODO(ewindmill): The whole variants flow should be re-considered now that we don't have default variants.
+// I think the default variants should just be something helpful i.e.: baseline and skills
+
+/// Returns the list of common variant names for scaffolding.
+List<String> variants = const ['baseline', 'context_only', 'mcp_only', 'full'];
+
+String variantDefaults() {
+  // Build named variant map YAML
+  final variantsMap = StringBuffer();
+  for (final v in variants) {
+    switch (v) {
+      case 'baseline':
+        variantsMap.writeln('  baseline: {}');
+      case 'context_only':
+        variantsMap.writeln(
+          '  context_only: { context_files: [./context_files/flutter.md] }',
+        );
+      case 'mcp_only':
+        variantsMap.writeln('  mcp_only: { mcp_servers: [dart] }');
+      case 'full':
+        variantsMap.writeln(
+          '  full: { context_files: [./context_files/flutter.md], mcp_servers: [dart] }',
+        );
+      default:
+        variantsMap.writeln('  $v: {}');
+    }
+  }
+
+  return variantsMap.toString();
+}
diff --git a/packages/eval_cli/lib/src/dataset/workspace.dart b/packages/eval_cli/lib/src/dataset/workspace.dart
new file mode 100644
index 0000000..bf9edd5
--- /dev/null
+++ b/packages/eval_cli/lib/src/dataset/workspace.dart
@@ -0,0 +1,22 @@
+enum WorkspaceType { template, path, git, create }
+
+/// Available project templates for workspace creation.
+///
+/// Each template maps to a directory under `workspaces/` in the
+/// dataset and a corresponding package name used in pubspec dependencies.
+enum TemplatePackage {
+  flutterApp('flutter_app', 'flutter_eval_app'),
+  dartPackage('dart_package', 'dart_eval_package'),
+  jasprApp('jaspr_app', 'jaspr_eval_app');
+
+  const TemplatePackage(this.yamlValue, this.packageName);
+
+  /// The value written to sample.yaml (e.g., `flutter_app`).
+  final String yamlValue;
+
+  /// The package name used in pubspec dependencies (e.g., `flutter_eval_app`).
+  final String packageName;
+
+  /// Whether this template is Flutter-based (needs Flutter SDK deps).
+  bool get isFlutter => this == TemplatePackage.flutterApp;
+}
diff --git a/packages/eval_cli/lib/src/gcs/gcs_client.dart b/packages/eval_cli/lib/src/gcs/gcs_client.dart
new file mode 100644
index 0000000..55c0fa2
--- /dev/null
+++ b/packages/eval_cli/lib/src/gcs/gcs_client.dart
@@ -0,0 +1,69 @@
+/// Client for uploading files to Google Cloud Storage.
+///
+/// Uses the `gcloud` package for storage operations and `googleapis_auth`
+/// for service account authentication.
+library;
+
+import 'dart:io';
+
+import 'package:gcloud/storage.dart';
+import 'package:googleapis_auth/auth_io.dart' as auth;
+import 'package:http/http.dart' as http;
+
+/// A client for interacting with Google Cloud Storage.
+///
+/// Handles authentication via service account credentials and provides
+/// methods for uploading files to a GCS bucket.
+class GcsClient {
+  final Storage _storage;
+  final http.Client _httpClient;
+
+  GcsClient._(this._storage, this._httpClient);
+
+  /// Creates a [GcsClient] authenticated with a service account.
+  ///
+  /// [projectId] is the Google Cloud project ID.
+  /// [credentialsPath] is the path to the service account JSON key file.
+  static Future<GcsClient> create({
+    required String projectId,
+    required String credentialsPath,
+  }) async {
+    final file = File(credentialsPath);
+    if (!file.existsSync()) {
+      throw FileSystemException(
+        'Service account credentials file not found',
+        credentialsPath,
+      );
+    }
+
+    final jsonString = file.readAsStringSync();
+    final credentials = auth.ServiceAccountCredentials.fromJson(jsonString);
+    final scopes = [
+      ...Storage.SCOPES,
+    ];
+
+    final httpClient = await auth.clientViaServiceAccount(credentials, scopes);
+    final storage = Storage(httpClient, projectId);
+
+    return GcsClient._(storage, httpClient);
+  }
+
+  /// Uploads a file to the specified [bucketName] at [objectName].
+  ///
+  /// The [objectName] is the full path within the bucket,
+  /// e.g. `2026-01-07_17-11-47/some-log.json`.
+  Future<ObjectInfo> uploadFile(
+    String bucketName,
+    String objectName,
+    File file,
+  ) async {
+    final bucket = _storage.bucket(bucketName);
+    final bytes = await file.readAsBytes();
+    return bucket.writeBytes(objectName, bytes);
+  }
+
+  /// Releases the underlying HTTP client resources.
+  void close() {
+    _httpClient.close();
+  }
+}
diff --git a/packages/eval_cli/lib/src/gcs/log_validator.dart b/packages/eval_cli/lib/src/gcs/log_validator.dart
new file mode 100644
index 0000000..114b5ee
--- /dev/null
+++ b/packages/eval_cli/lib/src/gcs/log_validator.dart
@@ -0,0 +1,123 @@
+/// Validates that a JSON file is an Inspect AI evaluation log.
+///
+/// Uses a streaming approach that reads only the first few KB of the file
+/// to check the structural fingerprint (version, status, eval.task) without
+/// ever loading the full file into memory. This is important because Inspect
+/// logs can be tens of thousands of lines.
+library;
+
+import 'dart:convert';
+import 'dart:io';
+import 'dart:math';
+
+/// The number of bytes to read from the head of a file for validation.
+///
+/// The Inspect log format places `version`, `status`, and `eval` (including
+/// `eval.task`) as the first keys, well before the massive `samples` array.
+/// 4 KB is more than enough to capture these fields.
+const _headBytes = 4096;
+
+/// Result of validating a file against the Inspect AI log format.
+class LogValidationResult {
+  /// Whether the file appears to be a valid Inspect AI log.
+  final bool isValid;
+
+  /// Human-readable reason for validation failure.
+  /// `null` when [isValid] is `true`.
+  final String? reason;
+
+  const LogValidationResult.valid() : isValid = true, reason = null;
+
+  const LogValidationResult.invalid(this.reason) : isValid = false;
+
+  @override
+  String toString() =>
+      isValid ? 'LogValidationResult(valid)' : 'LogValidationResult($reason)';
+}
+
+/// Validates that [file] looks like an Inspect AI evaluation log.
+///
+/// Reads only the first [_headBytes] bytes of the file and checks:
+/// 1. The content starts with `{` (is a JSON object).
+/// 2. A `"version"` key exists with an integer value.
+/// 3. A `"status"` key exists with a string value.
+/// 4. An `"eval"` key exists containing a `"task"` string.
+///
+/// This is intentionally a shallow "envelope" check — it confirms the file
+/// is an Inspect log without parsing the full (potentially huge) payload.
+Future<LogValidationResult> validateInspectLog(File file) async {
+  if (!file.path.endsWith('.json')) {
+    return LogValidationResult.invalid('File does not have a .json extension');
+  }
+
+  final fileLength = await file.length();
+  if (fileLength == 0) {
+    return LogValidationResult.invalid('File is empty');
+  }
+
+  // Read only the first _headBytes bytes.
+  final raf = await file.open(mode: FileMode.read);
+  try {
+    final bytesToRead = min(_headBytes, fileLength);
+    final bytes = await raf.read(bytesToRead);
+    final head = utf8.decode(bytes, allowMalformed: true);
+
+    return _validateHead(head);
+  } finally {
+    await raf.close();
+  }
+}
+
+/// Validates the head (first few KB) of a JSON string.
+///
+/// Exposed for testing so tests can pass raw strings without creating files.
+LogValidationResult validateHead(String head) => _validateHead(head);
+
+LogValidationResult _validateHead(String head) {
+  final trimmed = head.trimLeft();
+  if (!trimmed.startsWith('{')) {
+    return LogValidationResult.invalid('Content is not a JSON object');
+  }
+
+  // Try to parse the head. Since we truncated the file, it won't be valid
+  // JSON on its own. We parse top-level keys by scanning for known patterns.
+  //
+  // Strategy: extract key-value pairs from the head using a lightweight
+  // approach that doesn't require the full JSON to be well-formed.
+
+  // Check for "version" key with an integer value.
+  final versionMatch = RegExp(r'"version"\s*:\s*(\d+)').firstMatch(trimmed);
+  if (versionMatch == null) {
+    return LogValidationResult.invalid(
+      'Missing or invalid "version" field (expected an integer)',
+    );
+  }
+
+  // Check for "status" key with a string value.
+  final statusMatch = RegExp(r'"status"\s*:\s*"([^"]*)"').firstMatch(trimmed);
+  if (statusMatch == null) {
+    return LogValidationResult.invalid(
+      'Missing or invalid "status" field (expected a string)',
+    );
+  }
+
+  // Check for "eval" key containing a "task" field.
+  // First confirm "eval" key exists and opens an object.
+  final evalMatch = RegExp(r'"eval"\s*:\s*\{').firstMatch(trimmed);
+  if (evalMatch == null) {
+    return LogValidationResult.invalid(
+      'Missing "eval" object',
+    );
+  }
+
+  // Within the eval object region, look for "task" with a non-empty string.
+  final afterEval = trimmed.substring(evalMatch.end);
+  final taskMatch = RegExp(r'"task"\s*:\s*"([^"]+)"').firstMatch(afterEval);
+  if (taskMatch == null) {
+    return LogValidationResult.invalid(
+      'Missing or empty "eval.task" field',
+    );
+  }
+
+  return LogValidationResult.valid();
+}
diff --git a/packages/eval_cli/pubspec.yaml b/packages/eval_cli/pubspec.yaml
new file mode 100644
index 0000000..241234d
--- /dev/null
+++ b/packages/eval_cli/pubspec.yaml
@@ -0,0 +1,32 @@
+name: devals
+description: CLI for managing dash-evals - create samples, run evaluations, and view results.
+version: 0.0.1
+publish_to: none
+resolution: workspace
+
+executables:
+  devals:
+
+environment:
+  sdk: ^3.10.0
+
+dependencies:
+  args: ^2.7.0
+  dotenv: ^4.2.0
+  gcloud: ^0.9.0
+  glob: ^2.1.0
+  googleapis_auth: ^1.6.0
+  http: ^1.2.0
+  howdy:
+    git: 
+      url: https://github.com/ericwindmill/howdy.git
+      path: packages/howdy-cli
+  eval_config:
+    path: ../eval_config
+  path: ^1.9.0
+  yaml: ^3.1.0
+  yaml_edit: ^2.2.0
+
+dev_dependencies:
+  lints: ^6.0.0
+  test: any
diff --git a/packages/eval_cli/test/commands/doctor_command_test.dart b/packages/eval_cli/test/commands/doctor_command_test.dart
new file mode 100644
index 0000000..0d5ec3a
--- /dev/null
+++ b/packages/eval_cli/test/commands/doctor_command_test.dart
@@ -0,0 +1,246 @@
+import 'dart:io';
+
+import 'package:devals/src/commands/doctor_command.dart';
+import 'package:test/test.dart';
+
+/// Creates a mock [ProcessRunner] that returns predefined results.
+///
+/// [responses] maps `'executable args'` keys to [ProcessResult] values.
+/// Any unmatched call throws a [ProcessException].
+ProcessRunner mockProcessRunner(Map<String, ProcessResult> responses) {
+  return (String executable, List<String> args) async {
+    final key = '$executable ${args.join(' ')}'.trim();
+    if (responses.containsKey(key)) {
+      return responses[key]!;
+    }
+    throw ProcessException(executable, args, 'not found', -1);
+  };
+}
+
+ProcessResult _ok(String stdout) => ProcessResult(0, 0, stdout, '');
+
+ProcessResult _fail([String stderr = '']) => ProcessResult(0, 1, '', stderr);
+
+void main() {
+  final mockRunner = mockProcessRunner({});
+
+  group('buildChecks', () {
+    test('returns 8 checks', () {
+      final checks = buildChecks(processRunner: mockRunner);
+      expect(checks.length, 8);
+    });
+  });
+
+  group('Dart SDK check', () {
+    test('succeeds with version', () async {
+      final checks = buildChecks(
+        processRunner: mockProcessRunner({
+          'dart --version': _ok('Dart SDK version: 3.10.1 (stable)'),
+        }),
+      );
+      final result = await checks[0].check();
+      expect(result.status, CheckStatus.ok);
+      expect(result.version, '3.10.1');
+    });
+
+    test('fails when not found', () async {
+      final checks = buildChecks(
+        processRunner: mockProcessRunner({}),
+      );
+      final result = await checks[0].check();
+      expect(result.status, CheckStatus.error);
+      expect(result.fix, contains('dart.dev'));
+    });
+  });
+
+  group('Python check', () {
+    test('succeeds with 3.13', () async {
+      final checks = buildChecks(
+        processRunner: mockProcessRunner({
+          'python3 --version': _ok('Python 3.13.2'),
+        }),
+      );
+      final result = await checks[1].check();
+      expect(result.status, CheckStatus.ok);
+      expect(result.version, '3.13.2');
+    });
+
+    test('fails with old version', () async {
+      final checks = buildChecks(
+        processRunner: mockProcessRunner({
+          'python3 --version': _ok('Python 3.12.1'),
+        }),
+      );
+      final result = await checks[1].check();
+      expect(result.status, CheckStatus.error);
+      expect(result.message, contains('3.13+'));
+    });
+
+    test('fails when not found', () async {
+      final checks = buildChecks(
+        processRunner: mockProcessRunner({}),
+      );
+      final result = await checks[1].check();
+      expect(result.status, CheckStatus.error);
+    });
+  });
+
+  group('eval_runner check', () {
+    test('succeeds when installed', () async {
+      final checks = buildChecks(
+        processRunner: mockProcessRunner({
+          'run-evals --help': _ok('usage: run-evals ...'),
+        }),
+      );
+      final result = await checks[2].check();
+      expect(result.status, CheckStatus.ok);
+    });
+
+    test('fails when not installed', () async {
+      final checks = buildChecks(
+        processRunner: mockProcessRunner({}),
+      );
+      final result = await checks[2].check();
+      expect(result.status, CheckStatus.error);
+      expect(result.fix, contains('pip install'));
+    });
+  });
+
+  group('Podman check', () {
+    test('succeeds with version', () async {
+      final checks = buildChecks(
+        processRunner: mockProcessRunner({
+          'podman --version': _ok('podman version 5.3.1'),
+        }),
+      );
+      final result = await checks[3].check();
+      expect(result.status, CheckStatus.ok);
+      expect(result.version, '5.3.1');
+    });
+
+    test('warns when not found (optional)', () async {
+      final checks = buildChecks(
+        processRunner: mockProcessRunner({}),
+      );
+      final result = await checks[3].check();
+      expect(result.status, CheckStatus.warning);
+      expect(result.message, contains('optional'));
+    });
+  });
+
+  group('Flutter SDK check', () {
+    test('succeeds with version', () async {
+      final checks = buildChecks(
+        processRunner: mockProcessRunner({
+          'flutter --version': _ok('Flutter 3.41.0 • channel stable'),
+        }),
+      );
+      final result = await checks[4].check();
+      expect(result.status, CheckStatus.ok);
+      expect(result.version, '3.41.0');
+    });
+
+    test('fails when not found', () async {
+      final checks = buildChecks(
+        processRunner: mockProcessRunner({}),
+      );
+      final result = await checks[4].check();
+      expect(result.status, CheckStatus.error);
+    });
+  });
+
+  group('Serverpod CLI check', () {
+    test('succeeds with version', () async {
+      final checks = buildChecks(
+        processRunner: mockProcessRunner({
+          'serverpod version': _ok('Serverpod version: 2.3.0'),
+        }),
+      );
+      final result = await checks[5].check();
+      expect(result.status, CheckStatus.ok);
+      expect(result.version, '2.3.0');
+    });
+
+    test('fails when not found', () async {
+      final checks = buildChecks(
+        processRunner: mockProcessRunner({}),
+      );
+      final result = await checks[5].check();
+      expect(result.status, CheckStatus.error);
+      expect(result.fix, contains('serverpod_cli'));
+    });
+  });
+
+  group('API keys check', () {
+    // Note: We can't easily mock Platform.environment, so the API key check
+    // results depend on the actual environment. We test the check runs
+    // without error and returns a valid status.
+    test('returns a valid status', () async {
+      final checks = buildChecks(
+        processRunner: mockProcessRunner({}),
+      );
+      final result = await checks[6].check();
+      expect(
+        result.status,
+        isIn([CheckStatus.ok, CheckStatus.warning, CheckStatus.error]),
+      );
+    });
+  });
+
+  group('CheckResult', () {
+    test('supports all statuses', () {
+      for (final status in CheckStatus.values) {
+        final result = CheckResult(status: status);
+        expect(result.status, status);
+        expect(result.version, isNull);
+        expect(result.message, isNull);
+        expect(result.fix, isNull);
+      }
+    });
+
+    test('stores all fields', () {
+      const result = CheckResult(
+        status: CheckStatus.warning,
+        version: '1.2.3',
+        message: 'some message',
+        fix: 'some fix',
+      );
+      expect(result.version, '1.2.3');
+      expect(result.message, 'some message');
+      expect(result.fix, 'some fix');
+    });
+  });
+
+  group('exit code logic', () {
+    test('exits 0 when all pass', () async {
+      final allPass = mockProcessRunner({
+        'dart --version': _ok('Dart SDK version: 3.10.1'),
+        'python3 --version': _ok('Python 3.13.2'),
+        'run-evals --help': _ok('usage'),
+        'podman --version': _ok('podman version 5.3.1'),
+        'flutter --version': _ok('Flutter 3.41.0'),
+        'serverpod version': _ok('Serverpod version: 2.3.0'),
+      });
+      final checks = buildChecks(processRunner: allPass);
+      // Run all checks and verify none are errors
+      // (API keys depend on env, filter it out for this test)
+      final results = <CheckResult>[];
+      for (final check in checks) {
+        if (check.name != 'API keys') {
+          results.add(await check.check());
+        }
+      }
+      final hasErrors = results.any((r) => r.status == CheckStatus.error);
+      expect(hasErrors, false);
+    });
+
+    test('process exit code 1 treated as failure', () async {
+      final failRunner = mockProcessRunner({
+        'dart --version': _fail('not found'),
+      });
+      final checks = buildChecks(processRunner: failRunner);
+      final result = await checks[0].check();
+      expect(result.status, CheckStatus.error);
+    });
+  });
+}
diff --git a/packages/eval_cli/test/commands/log_validator_test.dart b/packages/eval_cli/test/commands/log_validator_test.dart
new file mode 100644
index 0000000..b32274f
--- /dev/null
+++ b/packages/eval_cli/test/commands/log_validator_test.dart
@@ -0,0 +1,168 @@
+import 'dart:io';
+
+import 'package:devals/src/gcs/log_validator.dart';
+import 'package:test/test.dart';
+
+void main() {
+  late Directory tmpDir;
+
+  setUp(() {
+    tmpDir = Directory.systemTemp.createTempSync('log_validator_test_');
+  });
+
+  tearDown(() {
+    tmpDir.deleteSync(recursive: true);
+  });
+
+  File writeFile(String name, String content) {
+    final file = File('${tmpDir.path}/$name');
+    file.writeAsStringSync(content);
+    return file;
+  }
+
+  // A minimal valid Inspect log header.
+  const validHead = '''
+{
+  "version": 2,
+  "status": "success",
+  "eval": {
+    "task": "my_task:baseline",
+    "task_id": "abc123"
+  },
+  "plan": {},
+  "results": {},
+  "stats": {},
+  "samples": []
+}''';
+
+  group('validateInspectLog (file-based)', () {
+    test('accepts a valid Inspect log file', () async {
+      final file = writeFile('valid.json', validHead);
+      final result = await validateInspectLog(file);
+      expect(result.isValid, isTrue);
+      expect(result.reason, isNull);
+    });
+
+    test('rejects a non-.json file', () async {
+      final file = writeFile('data.csv', validHead);
+      final result = await validateInspectLog(file);
+      expect(result.isValid, isFalse);
+      expect(result.reason, contains('.json'));
+    });
+
+    test('rejects an empty file', () async {
+      final file = writeFile('empty.json', '');
+      final result = await validateInspectLog(file);
+      expect(result.isValid, isFalse);
+      expect(result.reason, contains('empty'));
+    });
+  });
+
+  group('validateHead (string-based)', () {
+    test('accepts a valid Inspect log head', () {
+      final result = validateHead(validHead);
+      expect(result.isValid, isTrue);
+    });
+
+    test('rejects non-JSON content', () {
+      final result = validateHead('this is not json');
+      expect(result.isValid, isFalse);
+      expect(result.reason, contains('not a JSON object'));
+    });
+
+    test('rejects a JSON array', () {
+      final result = validateHead('[1, 2, 3]');
+      expect(result.isValid, isFalse);
+      expect(result.reason, contains('not a JSON object'));
+    });
+
+    test('rejects JSON missing "version"', () {
+      final result = validateHead('''
+{
+  "status": "success",
+  "eval": { "task": "my_task" }
+}''');
+      expect(result.isValid, isFalse);
+      expect(result.reason, contains('version'));
+    });
+
+    test('rejects JSON where "version" is a string', () {
+      final result = validateHead('''
+{
+  "version": "2",
+  "status": "success",
+  "eval": { "task": "my_task" }
+}''');
+      expect(result.isValid, isFalse);
+      expect(result.reason, contains('version'));
+    });
+
+    test('rejects JSON missing "status"', () {
+      final result = validateHead('''
+{
+  "version": 2,
+  "eval": { "task": "my_task" }
+}''');
+      expect(result.isValid, isFalse);
+      expect(result.reason, contains('status'));
+    });
+
+    test('rejects JSON missing "eval" object', () {
+      final result = validateHead('''
+{
+  "version": 2,
+  "status": "success"
+}''');
+      expect(result.isValid, isFalse);
+      expect(result.reason, contains('eval'));
+    });
+
+    test('rejects JSON where "eval" is not an object', () {
+      final result = validateHead('''
+{
+  "version": 2,
+  "status": "success",
+  "eval": "not_an_object"
+}''');
+      expect(result.isValid, isFalse);
+      expect(result.reason, contains('eval'));
+    });
+
+    test('rejects JSON missing "eval.task"', () {
+      final result = validateHead('''
+{
+  "version": 2,
+  "status": "success",
+  "eval": { "run_id": "abc" }
+}''');
+      expect(result.isValid, isFalse);
+      expect(result.reason, contains('eval.task'));
+    });
+
+    test('rejects a random JSON object', () {
+      final result = validateHead('{"name": "foo", "count": 42}');
+      expect(result.isValid, isFalse);
+      expect(result.reason, contains('version'));
+    });
+
+    test('accepts version 1 format', () {
+      final result = validateHead('''
+{
+  "version": 1,
+  "status": "error",
+  "eval": { "task": "some_eval:variant" }
+}''');
+      expect(result.isValid, isTrue);
+    });
+
+    test('accepts with leading whitespace', () {
+      final result = validateHead('''
+  {
+    "version": 2,
+    "status": "success",
+    "eval": { "task": "my_task:baseline" }
+  }''');
+      expect(result.isValid, isTrue);
+    });
+  });
+}
diff --git a/packages/eval_cli/test/dataset/dataset_reader_test.dart b/packages/eval_cli/test/dataset/dataset_reader_test.dart
new file mode 100644
index 0000000..7ef1d2f
--- /dev/null
+++ b/packages/eval_cli/test/dataset/dataset_reader_test.dart
@@ -0,0 +1,191 @@
+import 'dart:io';
+import 'package:devals/src/dataset/dataset_reader.dart';
+import 'package:path/path.dart' as p;
+import 'package:test/test.dart';
+
+void main() {
+  late Directory tempDir;
+  late Directory originalDir;
+
+  /// The dataset root — contains tasks/ directly.
+  late String datasetPath;
+
+  /// Creates a devals.yaml + dataset directory with a tasks/ subdirectory
+  /// so findDatasetDirectory() can discover it.
+  void createDatasetDir() {
+    Directory('$datasetPath/tasks').createSync(recursive: true);
+    // Create devals.yaml in tempDir pointing to the dataset
+    File(
+      p.join(tempDir.path, 'devals.yaml'),
+    ).writeAsStringSync('dataset: ./evals\n');
+  }
+
+  setUp(() {
+    originalDir = Directory.current;
+    tempDir = Directory.systemTemp.createTempSync('dataset_reader_test_');
+    datasetPath = p.join(tempDir.path, 'evals');
+    // Clear singleton cache between tests
+    DatasetReader().clearCache();
+  });
+
+  tearDown(() {
+    Directory.current = originalDir;
+    DatasetReader().clearCache();
+    if (tempDir.existsSync()) {
+      tempDir.deleteSync(recursive: true);
+    }
+  });
+
+  group('getVariants()', () {
+    test('returns common variant names for scaffolding', () {
+      createDatasetDir();
+
+      try {
+        Directory.current = tempDir;
+        final reader = DatasetReader();
+        final variants = reader.getVariants();
+        expect(
+          variants,
+          containsAll(['baseline', 'context_only', 'mcp_only', 'full']),
+        );
+      } finally {
+        Directory.current = originalDir;
+      }
+    });
+  });
+
+  group('getTasks()', () {
+    test('discovers task directories containing task.yaml', () {
+      createDatasetDir();
+      final tasksDir = Directory('$datasetPath/tasks');
+      for (final name in ['task_one', 'task_two']) {
+        final dir = Directory('${tasksDir.path}/$name');
+        dir.createSync(recursive: true);
+        File('${dir.path}/task.yaml').writeAsStringSync('func: solve');
+      }
+
+      try {
+        Directory.current = tempDir;
+        final reader = DatasetReader();
+        final tasks = reader.getTasks();
+        expect(tasks, containsAll(['task_one', 'task_two']));
+      } finally {
+        Directory.current = originalDir;
+      }
+    });
+
+    test('returns empty list when tasks dir is empty', () {
+      createDatasetDir();
+      Directory('$datasetPath/tasks').createSync(recursive: true);
+
+      try {
+        Directory.current = tempDir;
+        final reader = DatasetReader();
+        final tasks = reader.getTasks();
+        expect(tasks, isEmpty);
+      } finally {
+        Directory.current = originalDir;
+      }
+    });
+
+    test('returns empty list when tasks dir missing', () {
+      createDatasetDir();
+
+      try {
+        Directory.current = tempDir;
+        final reader = DatasetReader();
+        final tasks = reader.getTasks();
+        expect(tasks, isEmpty);
+      } finally {
+        Directory.current = originalDir;
+      }
+    });
+
+    test('ignores directories without task.yaml', () {
+      createDatasetDir();
+      final tasksDir = Directory('$datasetPath/tasks');
+      final validDir = Directory('${tasksDir.path}/valid_task');
+      validDir.createSync(recursive: true);
+      File('${validDir.path}/task.yaml').writeAsStringSync('func: solve');
+
+      final invalidDir = Directory('${tasksDir.path}/no_yaml');
+      invalidDir.createSync(recursive: true);
+
+      try {
+        Directory.current = tempDir;
+        final reader = DatasetReader();
+        final tasks = reader.getTasks();
+        expect(tasks, equals(['valid_task']));
+      } finally {
+        Directory.current = originalDir;
+      }
+    });
+  });
+
+  group('getExistingTaskNames()', () {
+    test('returns Set of task names from filesystem', () {
+      createDatasetDir();
+      final tasksDir = Directory('$datasetPath/tasks');
+      for (final name in ['task_a', 'task_b']) {
+        final dir = Directory('${tasksDir.path}/$name');
+        dir.createSync(recursive: true);
+        File('${dir.path}/task.yaml').writeAsStringSync('func: solve');
+      }
+
+      try {
+        Directory.current = tempDir;
+        final reader = DatasetReader();
+        final names = reader.getExistingTaskNames();
+        expect(names, isA<Set<String>>());
+        expect(names, containsAll(['task_a', 'task_b']));
+      } finally {
+        Directory.current = originalDir;
+      }
+    });
+  });
+
+  group('getTaskFuncs()', () {
+    test('returns list of task func records from filesystem', () {
+      createDatasetDir();
+      final tasksDir = Directory('$datasetPath/tasks');
+      for (final name in ['task_a', 'task_b']) {
+        final dir = Directory('${tasksDir.path}/$name');
+        dir.createSync(recursive: true);
+        File('${dir.path}/task.yaml').writeAsStringSync('func: solve');
+      }
+
+      try {
+        Directory.current = tempDir;
+        final reader = DatasetReader();
+        final funcs = reader.getTaskFuncs();
+        expect(funcs, hasLength(2));
+
+        // Verify discovered task names are present
+        final funcNames = funcs.map((f) => f.name).toSet();
+        expect(funcNames, containsAll(['task_a', 'task_b']));
+      } finally {
+        Directory.current = originalDir;
+      }
+    });
+  });
+
+  group('clearCache()', () {
+    test('clears cached dataset path', () {
+      createDatasetDir();
+
+      try {
+        Directory.current = tempDir;
+        final reader = DatasetReader();
+
+        // Access to prime the cache
+        reader.datasetDirPath;
+
+        // Clear and verify it re-discovers
+        reader.clearCache();
+        expect(() => reader.datasetDirPath, returnsNormally);
+      } finally {
+        Directory.current = originalDir;
+      }
+    });
+  });
+}
diff --git a/packages/eval_cli/test/dataset/filesystem_utils_test.dart b/packages/eval_cli/test/dataset/filesystem_utils_test.dart
new file mode 100644
index 0000000..36acc8b
--- /dev/null
+++ b/packages/eval_cli/test/dataset/filesystem_utils_test.dart
@@ -0,0 +1,292 @@
+import 'dart:io';
+
+import 'package:devals/src/cli_exception.dart';
+import 'package:devals/src/dataset/filesystem_utils.dart';
+import 'package:path/path.dart' as p;
+import 'package:test/test.dart';
+
+void main() {
+  late Directory tempDir;
+  late Directory originalDir;
+
+  setUp(() {
+    originalDir = Directory.current;
+    tempDir = Directory.systemTemp.createTempSync('fs_utils_test_');
+    DatasetReader().clearCache();
+  });
+
+  tearDown(() {
+    Directory.current = originalDir;
+    DatasetReader().clearCache();
+    if (tempDir.existsSync()) {
+      tempDir.deleteSync(recursive: true);
+    }
+  });
+
+  group('findDatasetDirectory()', () {
+    test(
+      'finds devals.yaml in current directory and resolves dataset path',
+      () {
+        // Create: tempDir/devals.yaml pointing to ./evals
+        final evalsDir = Directory(p.join(tempDir.path, 'evals', 'tasks'));
+        evalsDir.createSync(recursive: true);
+        File(
+          p.join(tempDir.path, 'devals.yaml'),
+        ).writeAsStringSync('dataset: ./evals\n');
+
+        try {
+          Directory.current = tempDir;
+          final result = findDatasetDirectory();
+          expect(
+            result,
+            equals(
+              p.normalize(
+                p.join(tempDir.resolveSymbolicLinksSync(), 'evals'),
+              ),
+            ),
+          );
+        } finally {
+          Directory.current = originalDir;
+        }
+      },
+    );
+
+    test('walks up to find devals.yaml in ancestor directory', () {
+      // Create: tempDir/devals.yaml
+      // Cwd:    tempDir/some/deep/subdir
+      final evalsDir = Directory(p.join(tempDir.path, 'evals', 'tasks'));
+      evalsDir.createSync(recursive: true);
+      File(
+        p.join(tempDir.path, 'devals.yaml'),
+      ).writeAsStringSync('dataset: ./evals\n');
+
+      final subDir = Directory(p.join(tempDir.path, 'some', 'deep', 'subdir'));
+      subDir.createSync(recursive: true);
+
+      try {
+        Directory.current = subDir;
+        final result = findDatasetDirectory();
+        expect(
+          result,
+          equals(
+            p.normalize(
+              p.join(tempDir.resolveSymbolicLinksSync(), 'evals'),
+            ),
+          ),
+        );
+      } finally {
+        Directory.current = originalDir;
+      }
+    });
+
+    test('throws CliException when no devals.yaml found', () {
+      try {
+        Directory.current = tempDir;
+        expect(
+          () => findDatasetDirectory(),
+          throwsA(isA<CliException>()),
+        );
+      } finally {
+        Directory.current = originalDir;
+      }
+    });
+
+    test('throws when devals.yaml is missing dataset field', () {
+      File(
+        p.join(tempDir.path, 'devals.yaml'),
+      ).writeAsStringSync('name: test\n');
+
+      try {
+        Directory.current = tempDir;
+        expect(
+          () => findDatasetDirectory(),
+          throwsA(isA<CliException>()),
+        );
+      } finally {
+        Directory.current = originalDir;
+      }
+    });
+
+    test('throws when dataset path has no tasks/ subdirectory', () {
+      Directory(p.join(tempDir.path, 'evals')).createSync();
+      File(
+        p.join(tempDir.path, 'devals.yaml'),
+      ).writeAsStringSync('dataset: ./evals\n');
+
+      try {
+        Directory.current = tempDir;
+        expect(
+          () => findDatasetDirectory(),
+          throwsA(isA<CliException>()),
+        );
+      } finally {
+        Directory.current = originalDir;
+      }
+    });
+  });
+
+  group('ensureDirectoryExists()', () {
+    test('creates directory when it does not exist', () {
+      final newPath = '${tempDir.path}/new_dir';
+      expect(Directory(newPath).existsSync(), isFalse);
+
+      ensureDirectoryExists(newPath);
+
+      expect(Directory(newPath).existsSync(), isTrue);
+    });
+
+    test('does not error when directory already exists', () {
+      final existingPath = '${tempDir.path}/existing';
+      Directory(existingPath).createSync();
+
+      expect(() => ensureDirectoryExists(existingPath), returnsNormally);
+      expect(Directory(existingPath).existsSync(), isTrue);
+    });
+
+    test('creates nested directories', () {
+      final nestedPath = '${tempDir.path}/a/b/c';
+
+      ensureDirectoryExists(nestedPath);
+
+      expect(Directory(nestedPath).existsSync(), isTrue);
+    });
+  });
+
+  group('writeFile()', () {
+    test('writes content to new file', () {
+      final filePath = '${tempDir.path}/test.txt';
+
+      writeFile(filePath, 'Hello, World!');
+
+      expect(File(filePath).existsSync(), isTrue);
+      expect(File(filePath).readAsStringSync(), equals('Hello, World!'));
+    });
+
+    test('creates parent directories if missing', () {
+      final filePath = '${tempDir.path}/nested/dir/file.txt';
+
+      writeFile(filePath, 'content');
+
+      expect(File(filePath).existsSync(), isTrue);
+    });
+
+    test('overwrites existing file', () {
+      final filePath = '${tempDir.path}/overwrite.txt';
+      File(filePath).writeAsStringSync('original');
+
+      writeFile(filePath, 'updated');
+
+      expect(File(filePath).readAsStringSync(), equals('updated'));
+    });
+
+    test('handles empty content', () {
+      final filePath = '${tempDir.path}/empty.txt';
+
+      writeFile(filePath, '');
+
+      expect(File(filePath).existsSync(), isTrue);
+      expect(File(filePath).readAsStringSync(), equals(''));
+    });
+
+    test('handles multiline content', () {
+      final filePath = '${tempDir.path}/multiline.txt';
+      final content = 'Line 1\nLine 2\nLine 3';
+
+      writeFile(filePath, content);
+
+      expect(File(filePath).readAsStringSync(), equals(content));
+    });
+  });
+
+  group('readFile()', () {
+    test('reads content from existing file', () {
+      final filePath = '${tempDir.path}/readable.txt';
+      File(filePath).writeAsStringSync('test content');
+
+      final result = readFile(filePath);
+
+      expect(result, equals('test content'));
+    });
+
+    test('throws CliException for non-existent file', () {
+      expect(
+        () => readFile('${tempDir.path}/nonexistent.txt'),
+        throwsA(isA<CliException>()),
+      );
+    });
+
+    test('reads empty file', () {
+      final filePath = '${tempDir.path}/empty.txt';
+      File(filePath).writeAsStringSync('');
+
+      final result = readFile(filePath);
+
+      expect(result, equals(''));
+    });
+
+    test('preserves newlines', () {
+      final filePath = '${tempDir.path}/lines.txt';
+      File(filePath).writeAsStringSync('a\nb\nc');
+
+      final result = readFile(filePath);
+
+      expect(result, equals('a\nb\nc'));
+    });
+  });
+
+  group('findJobsDir()', () {
+    test('returns path when jobs directory exists', () {
+      final datasetPath = '${tempDir.path}/dataset';
+      final jobsPath = '$datasetPath/jobs';
+      Directory(datasetPath).createSync();
+      Directory(jobsPath).createSync();
+
+      final result = findJobsDir(datasetPath);
+
+      expect(result, equals(jobsPath));
+    });
+
+    test('creates jobs directory if missing', () {
+      final datasetPath = '${tempDir.path}/dataset';
+      Directory(datasetPath).createSync();
+
+      final result = findJobsDir(datasetPath);
+
+      expect(Directory(result).existsSync(), isTrue);
+    });
+  });
+
+  group('findLogsDir()', () {
+    test('returns path when logs directory exists inside dataset dir', () {
+      final datasetPath = '${tempDir.path}/dataset';
+      final logsPath = '$datasetPath/logs';
+      Directory(datasetPath).createSync();
+      Directory(logsPath).createSync();
+
+      final result = findLogsDir(datasetPath);
+
+      expect(result, equals(logsPath));
+    });
+
+    test('returns null when logs directory missing', () {
+      final datasetPath = '${tempDir.path}/dataset';
+      Directory(datasetPath).createSync();
+
+      final result = findLogsDir(datasetPath);
+
+      expect(result, isNull);
+    });
+  });
+
+  group('tryFindDatasetDirectory()', () {
+    test('returns null when devals.yaml not found', () {
+      try {
+        Directory.current = tempDir;
+        final result = tryFindDatasetDirectory();
+        expect(result, isNull);
+      } finally {
+        Directory.current = originalDir;
+      }
+    });
+  });
+}
diff --git a/packages/eval_cli/test/dataset/job_template_test.dart b/packages/eval_cli/test/dataset/job_template_test.dart
new file mode 100644
index 0000000..64acbff
--- /dev/null
+++ b/packages/eval_cli/test/dataset/job_template_test.dart
@@ -0,0 +1,167 @@
+import 'package:devals/src/dataset/file_templates/job_template.dart';
+import 'package:test/test.dart';
+import 'package:yaml/yaml.dart';
+
+void main() {
+  group('jobTemplate()', () {
+    test('generates valid YAML', () {
+      final result = jobTemplate(
+        name: 'test_job',
+        models: ['model1'],
+        variants: ['variant1'],
+        tasks: ['task1'],
+      );
+
+      // Should be valid YAML
+      expect(() => loadYaml(result), returnsNormally);
+    });
+
+    test('includes job name', () {
+      final result = jobTemplate(
+        name: 'my_job',
+        models: ['m1'],
+        variants: ['v1'],
+        tasks: ['t1'],
+      );
+      expect(result, contains('# Job Configuration: my_job'));
+    });
+
+    test('single model formatted correctly', () {
+      final result = jobTemplate(
+        name: 'test',
+        models: ['gemini-pro'],
+        variants: ['v1'],
+        tasks: ['t1'],
+      );
+      expect(result, contains('- gemini-pro'));
+    });
+
+    test('multiple models listed', () {
+      final result = jobTemplate(
+        name: 'test',
+        models: ['model1', 'model2', 'model3'],
+        variants: ['v1'],
+        tasks: ['t1'],
+      );
+      expect(result, contains('- model1'));
+      expect(result, contains('- model2'));
+      expect(result, contains('- model3'));
+    });
+
+    test('single variant formatted correctly', () {
+      final result = jobTemplate(
+        name: 'test',
+        models: ['m1'],
+        variants: ['baseline'],
+        tasks: ['t1'],
+      );
+      expect(result, contains('baseline: {}'));
+    });
+
+    test(
+      'multiple variants listed',
+      () {
+        final result = jobTemplate(
+          name: 'test',
+          models: ['m1'],
+          variants: ['v1', 'v2', 'v3'],
+          tasks: ['t1'],
+        );
+        expect(result, contains('v1: {}'));
+        expect(result, contains('v2: {}'));
+        expect(result, contains('v3: {}'));
+      },
+      skip: 'The way CLI presents variants is being refactored.',
+    );
+
+    test('single task with empty config', () {
+      final result = jobTemplate(
+        name: 'test',
+        models: ['m1'],
+        variants: ['v1'],
+        tasks: ['my_task'],
+      );
+      expect(result, contains('my_task: {}'));
+    });
+
+    test('multiple tasks listed', () {
+      final result = jobTemplate(
+        name: 'test',
+        models: ['m1'],
+        variants: ['v1'],
+        tasks: ['task1', 'task2', 'task3'],
+      );
+      expect(result, contains('task1: {}'));
+      expect(result, contains('task2: {}'));
+      expect(result, contains('task3: {}'));
+    });
+
+    test('empty models list', () {
+      final result = jobTemplate(
+        name: 'test',
+        models: [],
+        variants: ['v1'],
+        tasks: ['t1'],
+      );
+      // Should still generate valid YAML structure
+      expect(result, contains('models:'));
+    });
+
+    test('empty variants list', () {
+      final result = jobTemplate(
+        name: 'test',
+        models: ['m1'],
+        variants: [],
+        tasks: ['t1'],
+      );
+      expect(result, contains('variants:'));
+    });
+
+    test('empty tasks list', () {
+      final result = jobTemplate(
+        name: 'test',
+        models: ['m1'],
+        variants: ['v1'],
+        tasks: [],
+      );
+      expect(result, contains('tasks:'));
+    });
+
+    test('special characters in name', () {
+      final result = jobTemplate(
+        name: 'test-job_v2',
+        models: ['m1'],
+        variants: ['v1'],
+        tasks: ['t1'],
+      );
+      expect(result, contains('# Job Configuration: test-job_v2'));
+    });
+
+    test('includes header comment', () {
+      final result = jobTemplate(
+        name: 'test',
+        models: ['m1'],
+        variants: ['v1'],
+        tasks: ['t1'],
+      );
+      expect(result, contains('# Job Configuration:'));
+    });
+
+    test('sections appear in correct order', () {
+      final result = jobTemplate(
+        name: 'test',
+        models: ['m1'],
+        variants: ['v1'],
+        tasks: ['t1'],
+      );
+      final configIndex = result.indexOf('# Job Configuration:');
+      final modelsIndex = result.indexOf('models:');
+      final variantsIndex = result.indexOf('variants:');
+      final tasksIndex = result.indexOf('tasks:');
+
+      expect(configIndex, lessThan(modelsIndex));
+      expect(modelsIndex, lessThan(variantsIndex));
+      expect(variantsIndex, lessThan(tasksIndex));
+    });
+  });
+}
diff --git a/packages/eval_cli/test/dataset/sample_template_test.dart b/packages/eval_cli/test/dataset/sample_template_test.dart
new file mode 100644
index 0000000..51257b6
--- /dev/null
+++ b/packages/eval_cli/test/dataset/sample_template_test.dart
@@ -0,0 +1,85 @@
+import 'package:devals/src/dataset/file_templates/sample_template.dart';
+import 'package:devals/src/dataset/workspace.dart';
+import 'package:test/test.dart';
+
+void main() {
+  group('sampleTemplate()', () {
+    test('generates sample block with required params only', () {
+      final result = sampleTemplate(
+        id: 'test_sample',
+        difficulty: 'easy',
+      );
+
+      expect(result, contains('id: test_sample'));
+      expect(result, contains('difficulty: easy'));
+      expect(result, contains('input:'));
+      expect(result, contains('target:'));
+    });
+
+    test('includes tags field', () {
+      final result = sampleTemplate(id: 'test', difficulty: 'easy');
+      expect(result, contains('tags: []'));
+    });
+
+    test('with git workspace includes git section', () {
+      final result = sampleTemplate(
+        id: 'test',
+        difficulty: 'easy',
+        workspaceType: WorkspaceType.git,
+        workspaceValue: 'https://github.com/example/repo.git',
+      );
+      expect(result, contains('git:'));
+      expect(result, contains('https://github.com/example/repo.git'));
+    });
+
+    test('with path workspace includes path section', () {
+      final result = sampleTemplate(
+        id: 'test',
+        difficulty: 'easy',
+        workspaceType: WorkspaceType.path,
+        workspaceValue: './project',
+      );
+      expect(result, contains('path:'));
+      expect(result, contains('./project'));
+    });
+
+    test('with template workspace includes template section', () {
+      final result = sampleTemplate(
+        id: 'test',
+        difficulty: 'easy',
+        workspaceType: WorkspaceType.template,
+        templatePackage: TemplatePackage.flutterApp,
+      );
+      expect(result, contains('flutter_app'));
+    });
+
+    test('without workspace type has no workspace section', () {
+      final result = sampleTemplate(id: 'test', difficulty: 'easy');
+      expect(result, isNot(contains('workspace:')));
+    });
+
+    test('generates indented block for appending to task file', () {
+      final result = sampleTemplate(id: 'test', difficulty: 'medium');
+      // Should start with indented list marker for inline sample
+      expect(result, contains('  - id: test'));
+    });
+
+    test('git type with null value uses placeholder', () {
+      final result = sampleTemplate(
+        id: 'test',
+        difficulty: 'easy',
+        workspaceType: WorkspaceType.git,
+      );
+      expect(result, contains('<GIT_REPOSITORY_URL>'));
+    });
+
+    test('path type with null value uses placeholder', () {
+      final result = sampleTemplate(
+        id: 'test',
+        difficulty: 'easy',
+        workspaceType: WorkspaceType.path,
+      );
+      expect(result, contains('<RELATIVE_PATH>'));
+    });
+  });
+}
diff --git a/packages/eval_cli/test/dataset/task_template_test.dart b/packages/eval_cli/test/dataset/task_template_test.dart
new file mode 100644
index 0000000..463ae62
--- /dev/null
+++ b/packages/eval_cli/test/dataset/task_template_test.dart
@@ -0,0 +1,134 @@
+import 'package:devals/src/dataset/file_templates/task_template.dart';
+import 'package:devals/src/dataset/workspace.dart';
+import 'package:test/test.dart';
+
+void main() {
+  group('taskTemplate', () {
+    test('generates YAML with func field', () {
+      final result = taskTemplate(taskFunc: 'question_answer');
+      expect(result, contains('func: question_answer'));
+    });
+
+    test('includes samples section', () {
+      final result = taskTemplate(taskFunc: 'flutter_bug_fix');
+      expect(result, contains('samples:'));
+      expect(result, contains('- id: sample_1'));
+      expect(result, contains('input: |'));
+      expect(result, contains('target: |'));
+    });
+
+    test('includes variants when provided', () {
+      final result = taskTemplate(
+        taskFunc: 'flutter_code_gen',
+        variants: ['baseline', 'mcp_only'],
+      );
+      expect(result, contains('variants: [baseline, mcp_only]'));
+    });
+
+    test('omits variants line when list is empty', () {
+      final result = taskTemplate(taskFunc: 'question_answer');
+      expect(result, isNot(contains('variants:')));
+    });
+
+    test('includes system_message when provided', () {
+      final result = taskTemplate(
+        taskFunc: 'flutter_bug_fix',
+        systemMessage: 'You are a helpful assistant.',
+      );
+      expect(result, contains('system_message: |'));
+      expect(result, contains('You are a helpful assistant.'));
+    });
+
+    test('omits system_message when null', () {
+      final result = taskTemplate(taskFunc: 'flutter_bug_fix');
+      expect(result, isNot(contains('system_message:')));
+    });
+
+    test('omits system_message when empty string', () {
+      final result = taskTemplate(
+        taskFunc: 'flutter_bug_fix',
+        systemMessage: '',
+      );
+      expect(result, isNot(contains('system_message:')));
+    });
+
+    group('workspace section', () {
+      test('generates git workspace', () {
+        final result = taskTemplate(
+          taskFunc: 'flutter_bug_fix',
+          workspaceType: WorkspaceType.git,
+          workspaceValue: 'https://github.com/example/repo',
+        );
+        expect(result, contains('workspace:'));
+        expect(result, contains('git: https://github.com/example/repo'));
+      });
+
+      test('generates path workspace', () {
+        final result = taskTemplate(
+          taskFunc: 'flutter_bug_fix',
+          workspaceType: WorkspaceType.path,
+          workspaceValue: './my_project',
+        );
+        expect(result, contains('workspace:'));
+        expect(result, contains('path: ./my_project'));
+      });
+
+      test('generates template workspace', () {
+        final result = taskTemplate(
+          taskFunc: 'flutter_code_gen',
+          workspaceType: WorkspaceType.template,
+          templatePackage: TemplatePackage.flutterApp,
+        );
+        expect(result, contains('workspace:'));
+        expect(result, contains('template: flutter_app'));
+      });
+
+      test('generates create workspace as path', () {
+        final result = taskTemplate(
+          taskFunc: 'flutter_bug_fix',
+          workspaceType: WorkspaceType.create,
+        );
+        expect(result, contains('workspace:'));
+        expect(result, contains('path: ./project'));
+      });
+
+      test('generates commented workspace section when type is null', () {
+        final result = taskTemplate(taskFunc: 'question_answer');
+        expect(result, contains('# Workspace configuration'));
+        expect(result, contains('#   template: flutter_app'));
+      });
+
+      test('generates git with default URL when workspaceValue is null', () {
+        final result = taskTemplate(
+          taskFunc: 'flutter_bug_fix',
+          workspaceType: WorkspaceType.git,
+        );
+        expect(result, contains('git: <GIT_REPOSITORY_URL>'));
+      });
+
+      test('generates path with default when workspaceValue is null', () {
+        final result = taskTemplate(
+          taskFunc: 'flutter_bug_fix',
+          workspaceType: WorkspaceType.path,
+        );
+        expect(result, contains('path: ./project'));
+      });
+
+      test(
+        'generates template with placeholder when templatePackage is null',
+        () {
+          final result = taskTemplate(
+            taskFunc: 'flutter_code_gen',
+            workspaceType: WorkspaceType.template,
+          );
+          expect(
+            result,
+            contains(
+              'template: <flutter_app OR jaspr_app OR dart_package>',
+            ),
+          );
+        },
+      );
+    });
+  });
+}
diff --git a/packages/eval_cli/test/e2e/create_job_e2e_test.dart b/packages/eval_cli/test/e2e/create_job_e2e_test.dart
new file mode 100644
index 0000000..de42459
--- /dev/null
+++ b/packages/eval_cli/test/e2e/create_job_e2e_test.dart
@@ -0,0 +1,84 @@
+@Tags(['e2e'])
+library;
+
+import 'dart:io' as io;
+
+import 'package:path/path.dart' as p;
+import 'package:test/test.dart';
+
+import 'e2e_helpers.dart';
+
+void main() {
+  late io.Directory tempDir;
+
+  setUp(() {
+    // Create dataset with tasks so that create job can reference them
+    tempDir = createTestDatasetDir();
+
+    // Create an existing task so it shows up in selections
+    final taskDir = io.Directory(
+      p.join(tempDir.path, 'evals', 'tasks', 'existing_task'),
+    );
+    taskDir.createSync(recursive: true);
+    io.File(p.join(taskDir.path, 'task.yaml')).writeAsStringSync(
+      'func: question_answer\nsamples:\n  - id: s1\n    input: test\n    target: test\n',
+    );
+  });
+
+  tearDown(() {
+    if (tempDir.existsSync()) {
+      tempDir.deleteSync(recursive: true);
+    }
+  });
+
+  group('devals create job', () {
+    // The CreateJobCommand now uses howdy interactive widgets (Prompt and
+    // Multiselect) which require a real TTY — they call stdin.lineMode = false
+    // internally, which throws a StdinException when stdin is a pipe.
+    //
+    // Because of this, this command cannot be driven via piped stdin in an
+    // automated test. Run `devals create job` manually in a real terminal to
+    // verify the full interactive flow.
+    test(
+      'creates a job file interactively',
+      skip:
+          'howdy widgets require a real TTY — run `devals create job` manually to verify',
+      () async {
+        final result = await runDevals(
+          ['create', 'job'],
+          stdinLines: [
+            'my_test_job',
+            ' ', // space=toggle, \n=submit for Multiselect (models)
+            ' ', // variants
+            ' ', // tasks
+          ],
+          workingDirectory: tempDir.path,
+        );
+
+        expect(
+          result.exitCode,
+          0,
+          reason: 'stdout: ${result.stdout}\nstderr: ${result.stderr}',
+        );
+
+        // Verify the job file was created
+        final jobFile = io.File(
+          p.join(tempDir.path, 'evals', 'jobs', 'my_test_job.yaml'),
+        );
+        expect(
+          jobFile.existsSync(),
+          isTrue,
+          reason: 'jobs/my_test_job.yaml should exist',
+        );
+
+        // Verify content
+        final content = jobFile.readAsStringSync();
+        expect(content, contains('my_test_job'));
+        expect(content, contains('claude-haiku-4-5'));
+
+        // Verify output
+        expect(result.stdout, contains('Created'));
+      },
+    );
+  });
+}
diff --git a/packages/eval_cli/test/e2e/create_sample_e2e_test.dart b/packages/eval_cli/test/e2e/create_sample_e2e_test.dart
new file mode 100644
index 0000000..a454416
--- /dev/null
+++ b/packages/eval_cli/test/e2e/create_sample_e2e_test.dart
@@ -0,0 +1,85 @@
+@Tags(['e2e'])
+library;
+
+import 'dart:io' as io;
+
+import 'package:path/path.dart' as p;
+import 'package:test/test.dart';
+
+import 'e2e_helpers.dart';
+
+void main() {
+  late io.Directory tempDir;
+
+  setUp(() {
+    tempDir = createTestDatasetDir();
+
+    // Create an existing task with a sample so `create sample` can append to it
+    final taskDir = io.Directory(
+      p.join(tempDir.path, 'evals', 'tasks', 'my_task'),
+    );
+    taskDir.createSync(recursive: true);
+    io.File(p.join(taskDir.path, 'task.yaml')).writeAsStringSync('''
+func: question_answer
+
+samples:
+  - id: first_sample
+    input: What is Dart?
+    target: A programming language
+''');
+  });
+
+  tearDown(() {
+    if (tempDir.existsSync()) {
+      tempDir.deleteSync(recursive: true);
+    }
+  });
+
+  group('devals create sample', () {
+    // CreateSampleCommand uses howdy interactive widgets (Form, Select, Prompt)
+    // which require a real TTY — they call stdin.lineMode = false internally,
+    // which throws a StdinException when stdin is a pipe.
+    test(
+      'appends a sample to an existing task',
+      skip:
+          'howdy widgets require a real TTY — run `devals create sample` manually to verify',
+      () async {
+        // Stdin sequence for CreateSampleCommand:
+        // 1. Task selection (SelectPrompt: "1" for first task)
+        // 2. Sample ID (TextInputPrompt)
+        // 3. Difficulty (SelectPrompt: 1=easy, 2=medium, 3=hard)
+        // 4. Confirm (YesNoPrompt)
+        final result = await runDevals(
+          ['create', 'sample'],
+          stdinLines: [
+            '1', // 1. select "my_task"
+            'new_sample', // 2. sample ID
+            '2', // 3. difficulty: medium
+            'y', // 4. confirm
+          ],
+          workingDirectory: tempDir.path,
+        );
+
+        expect(
+          result.exitCode,
+          0,
+          reason: 'stdout: ${result.stdout}\nstderr: ${result.stderr}',
+        );
+
+        // Verify the task file was modified
+        final taskFile = io.File(
+          p.join(tempDir.path, 'evals', 'tasks', 'my_task', 'task.yaml'),
+        );
+        final content = taskFile.readAsStringSync();
+        expect(
+          content,
+          contains('new_sample'),
+          reason: 'task.yaml should contain the new sample ID',
+        );
+
+        // Verify output
+        expect(result.stdout, contains('Added sample'));
+      },
+    );
+  });
+}
diff --git a/packages/eval_cli/test/e2e/create_task_e2e_test.dart b/packages/eval_cli/test/e2e/create_task_e2e_test.dart
new file mode 100644
index 0000000..5915e9d
--- /dev/null
+++ b/packages/eval_cli/test/e2e/create_task_e2e_test.dart
@@ -0,0 +1,111 @@
+@Tags(['e2e'])
+library;
+
+import 'dart:io' as io;
+
+import 'package:path/path.dart' as p;
+import 'package:test/test.dart';
+
+import 'e2e_helpers.dart';
+
+void main() {
+  late io.Directory tempDir;
+
+  setUp(() {
+    tempDir = createTestDatasetDir();
+  });
+
+  tearDown(() {
+    if (tempDir.existsSync()) {
+      tempDir.deleteSync(recursive: true);
+    }
+  });
+
+  group('devals create task', () {
+    // CreateTaskCommand uses howdy interactive widgets (Form, Prompt, Select)
+    // which require a real TTY — they call stdin.lineMode = false internally,
+    // which throws a StdinException when stdin is a pipe.
+    test(
+      'creates a task with path workspace',
+      skip:
+          'howdy widgets require a real TTY — run `devals create task` manually to verify',
+      () async {
+        // Stdin sequence for CreateTaskCommand:
+        // 1. Task name (TextInputPrompt)
+        // 2. Task function (SelectPrompt, 1-indexed)
+        // 3. Variants (MultiSelectPrompt, "1" for baseline)
+        // 4. Workspace type (SelectPrompt: 1=path, 2=git, 3=create)
+        // 5. Relative path (TextInputPrompt, for path workspace)
+        // 6. System message (TextInputPrompt, optional — empty for skip)
+        // 7. Confirm (YesNoPrompt)
+        final result = await runDevals(
+          ['create', 'task'],
+          stdinLines: [
+            'my_test_task', // 1. task name
+            '1', // 2. select "analyze_codebase" (first task func)
+            '1', // 3. select "baseline" variant
+            '1', // 4. workspace type: "path"
+            '../../app', // 5. relative path
+            '', // 6. system message: skip (optional)
+            'y', // 7. confirm
+          ],
+          workingDirectory: tempDir.path,
+        );
+
+        expect(
+          result.exitCode,
+          0,
+          reason: 'stdout: ${result.stdout}\nstderr: ${result.stderr}',
+        );
+
+        // Verify the task file was created
+        final taskYaml = io.File(
+          p.join(tempDir.path, 'evals', 'tasks', 'my_test_task', 'task.yaml'),
+        );
+        expect(
+          taskYaml.existsSync(),
+          isTrue,
+          reason: 'tasks/my_test_task/task.yaml should exist',
+        );
+
+        // Verify the task.yaml has expected content
+        final content = taskYaml.readAsStringSync();
+        expect(content, contains('analyze_codebase'));
+
+        // Verify output
+        expect(result.stdout, contains('Created task'));
+      },
+    );
+
+    test(
+      'creates a task with create workspace',
+      skip:
+          'howdy widgets require a real TTY — run `devals create task` manually to verify',
+      () async {
+        final result = await runDevals(
+          ['create', 'task'],
+          stdinLines: [
+            'create_ws_task', // 1. task name
+            '2', // 2. select "flutter_bug_fix"
+            '', // 3. variants: skip (optional)
+            '3', // 4. workspace type: "create"
+            '', // 5. creation command (use default)
+            '', // 6. system message: skip
+            'y', // 7. confirm
+          ],
+          workingDirectory: tempDir.path,
+        );
+
+        // This may fail because `flutter create` command isn't available
+        // in all test environments. We test the input flow reaches confirmation.
+        // The important thing is it gets past the prompts without hanging.
+        final combined = result.stdout + result.stderr;
+        expect(
+          combined,
+          isNotEmpty,
+          reason: 'Command should produce output, not hang',
+        );
+      },
+    );
+  });
+}
diff --git a/packages/eval_cli/test/e2e/doctor_e2e_test.dart b/packages/eval_cli/test/e2e/doctor_e2e_test.dart
new file mode 100644
index 0000000..dd980fe
--- /dev/null
+++ b/packages/eval_cli/test/e2e/doctor_e2e_test.dart
@@ -0,0 +1,34 @@
+@Tags(['e2e'])
+library;
+
+import 'package:test/test.dart';
+
+import 'e2e_helpers.dart';
+
+void main() {
+  late Directory tempDir;
+
+  setUp(() {
+    tempDir = createTestDatasetDir();
+  });
+
+  tearDown(() {
+    if (tempDir.existsSync()) {
+      tempDir.deleteSync(recursive: true);
+    }
+  });
+
+  group('devals doctor', () {
+    test('runs and outputs check names', () async {
+      final result = await runDevals(
+        ['doctor'],
+        workingDirectory: tempDir.path,
+      );
+      // Doctor may exit 0 or 1 depending on the host environment,
+      // but it should always run and produce output.
+      expect(result.exitCode, isIn([0, 1]));
+      expect(result.stdout, contains('Dart SDK'));
+      expect(result.stdout, contains('Python'));
+    });
+  });
+}
diff --git a/packages/eval_cli/test/e2e/e2e_helpers.dart b/packages/eval_cli/test/e2e/e2e_helpers.dart
new file mode 100644
index 0000000..3b153e4
--- /dev/null
+++ b/packages/eval_cli/test/e2e/e2e_helpers.dart
@@ -0,0 +1,110 @@
+import 'dart:convert';
+import 'dart:io';
+import 'package:path/path.dart' as p;
+
+export 'dart:io' show Directory;
+
+/// Result of running the devals CLI as a subprocess.
+class DevalResult {
+  final int exitCode;
+  final String stdout;
+  final String stderr;
+
+  const DevalResult({
+    required this.exitCode,
+    required this.stdout,
+    required this.stderr,
+  });
+
+  /// Whether the command exited successfully.
+  bool get isSuccess => exitCode == 0;
+
+  @override
+  String toString() =>
+      'DevalResult(exit: $exitCode, stdout: ${stdout.length} chars, stderr: ${stderr.length} chars)';
+}
+
+/// Runs the devals CLI as a subprocess.
+///
+/// [args] are the command-line arguments (e.g., `['init']`, `['create', 'task']`).
+/// [stdinLines] are lines to feed to the process's stdin (for interactive prompts).
+/// [workingDirectory] is the directory to run in (defaults to eval_cli package root).
+///
+/// Returns a [DevalResult] with captured exit code, stdout, and stderr.
+Future<DevalResult> runDevals(
+  List<String> args, {
+  List<String>? stdinLines,
+  required String workingDirectory,
+}) async {
+  // Resolve the path to bin/devals.dart relative to the eval_cli package.
+  final evalCliRoot = _findEvalCliRoot();
+  final devalsScript = p.join(evalCliRoot, 'bin', 'devals.dart');
+
+  final process = await Process.start(
+    'dart',
+    ['run', devalsScript, ...args],
+    workingDirectory: workingDirectory,
+  );
+
+  // Feed stdin lines if provided, then close stdin.
+  if (stdinLines != null) {
+    for (final line in stdinLines) {
+      process.stdin.writeln(line);
+    }
+  }
+  await process.stdin.close();
+
+  final stdoutFuture = process.stdout.transform(utf8.decoder).join();
+  final stderrFuture = process.stderr.transform(utf8.decoder).join();
+
+  final exitCode = await process.exitCode;
+  final stdout = await stdoutFuture;
+  final stderr = await stderrFuture;
+
+  return DevalResult(exitCode: exitCode, stdout: stdout, stderr: stderr);
+}
+
+/// Finds the eval_cli package root by walking up from this test file.
+String _findEvalCliRoot() {
+  // This file lives at pkgs/eval_cli/test/e2e/e2e_helpers.dart
+  // We need to find pkgs/eval_cli/
+  var dir = Directory(p.dirname(Platform.script.toFilePath()));
+
+  // Walk up until we find pubspec.yaml with name: devals
+  for (var i = 0; i < 10; i++) {
+    final pubspec = File(p.join(dir.path, 'pubspec.yaml'));
+    if (pubspec.existsSync() &&
+        pubspec.readAsStringSync().contains('name: devals')) {
+      return dir.path;
+    }
+    dir = dir.parent;
+  }
+
+  // Fallback: assume we're running from the eval_cli directory
+  return Directory.current.path;
+}
+
+/// Creates a minimal dataset directory structure in a temp directory.
+///
+/// The returned directory is the project root, containing:
+/// - `devals.yaml` — marker file pointing to `./evals`
+/// - `evals/tasks/` — empty tasks directory
+/// - `evals/jobs/` — empty jobs directory
+///
+/// Caller is responsible for deleting the directory when done.
+Directory createTestDatasetDir() {
+  final tempDir = Directory.systemTemp.createTempSync('devals_e2e_');
+  File(
+    p.join(tempDir.path, 'devals.yaml'),
+  ).writeAsStringSync('dataset: ./evals\n');
+  Directory(p.join(tempDir.path, 'evals', 'tasks')).createSync(recursive: true);
+  Directory(p.join(tempDir.path, 'evals', 'jobs')).createSync(recursive: true);
+
+  return tempDir;
+}
+
+/// Creates a bare temp directory with no dataset structure.
+/// Used to test commands that create the structure themselves (e.g., `init`).
+Directory createEmptyTempDir() {
+  return Directory.systemTemp.createTempSync('devals_e2e_');
+}
diff --git a/packages/eval_cli/test/e2e/help_e2e_test.dart b/packages/eval_cli/test/e2e/help_e2e_test.dart
new file mode 100644
index 0000000..47e192b
--- /dev/null
+++ b/packages/eval_cli/test/e2e/help_e2e_test.dart
@@ -0,0 +1,58 @@
+@Tags(['e2e'])
+library;
+
+import 'package:test/test.dart';
+
+import 'e2e_helpers.dart';
+
+void main() {
+  late Directory tempDir;
+
+  setUp(() {
+    tempDir = createTestDatasetDir();
+  });
+
+  tearDown(() {
+    if (tempDir.existsSync()) {
+      tempDir.deleteSync(recursive: true);
+    }
+  });
+
+  group('devals --help', () {
+    test('exits 0 and shows usage', () async {
+      final result = await runDevals(
+        ['--help'],
+        workingDirectory: tempDir.path,
+      );
+      expect(result.exitCode, 0);
+      expect(result.stdout, contains('Available commands'));
+      expect(result.stdout, contains('create'));
+      expect(result.stdout, contains('doctor'));
+      expect(result.stdout, contains('init'));
+      expect(result.stdout, contains('run'));
+    });
+  });
+
+  group('devals help create', () {
+    test('exits 0 and shows create subcommands', () async {
+      final result = await runDevals(
+        ['help', 'create'],
+        workingDirectory: tempDir.path,
+      );
+      expect(result.exitCode, 0);
+      expect(result.stdout, contains('task'));
+      expect(result.stdout, contains('job'));
+      expect(result.stdout, contains('sample'));
+    });
+  });
+
+  group('devals <invalid command>', () {
+    test('exits with error for unknown command', () async {
+      final result = await runDevals(
+        ['nonexistent'],
+        workingDirectory: tempDir.path,
+      );
+      expect(result.exitCode, isNot(0));
+    });
+  });
+}
diff --git a/packages/eval_cli/test/e2e/init_e2e_test.dart b/packages/eval_cli/test/e2e/init_e2e_test.dart
new file mode 100644
index 0000000..dbf87ff
--- /dev/null
+++ b/packages/eval_cli/test/e2e/init_e2e_test.dart
@@ -0,0 +1,84 @@
+@Tags(['e2e'])
+library;
+
+import 'dart:io' as io;
+
+import 'package:path/path.dart' as p;
+import 'package:test/test.dart';
+
+import 'e2e_helpers.dart';
+
+void main() {
+  late io.Directory tempDir;
+
+  setUp(() {
+    tempDir = createEmptyTempDir();
+  });
+
+  tearDown(() {
+    if (tempDir.existsSync()) {
+      tempDir.deleteSync(recursive: true);
+    }
+  });
+
+  group('devals init', () {
+    test('creates dataset structure in empty directory', () async {
+      final result = await runDevals(
+        ['init'],
+        workingDirectory: tempDir.path,
+      );
+
+      expect(
+        result.exitCode,
+        0,
+        reason: 'stdout: ${result.stdout}\nstderr: ${result.stderr}',
+      );
+
+      // Verify devals.yaml marker file
+      expect(
+        io.File(p.join(tempDir.path, 'devals.yaml')).existsSync(),
+        isTrue,
+        reason: 'devals.yaml should be created',
+      );
+
+      // Verify created files under evals/
+      expect(
+        io.Directory(p.join(tempDir.path, 'evals', 'tasks')).existsSync(),
+        isTrue,
+        reason: 'evals/tasks/ directory should be created',
+      );
+      expect(
+        io.File(
+          p.join(tempDir.path, 'evals', 'tasks', 'get_started', 'task.yaml'),
+        ).existsSync(),
+        isTrue,
+        reason: 'evals/tasks/get_started/task.yaml should be created',
+      );
+      expect(
+        io.File(
+          p.join(tempDir.path, 'evals', 'jobs', 'local_dev.yaml'),
+        ).existsSync(),
+        isTrue,
+        reason: 'evals/jobs/local_dev.yaml should be created',
+      );
+
+      // Verify output messages
+      expect(result.stdout, contains('Initialized'));
+    });
+
+    test('fails when already initialized (devals.yaml exists)', () async {
+      // Create existing devals.yaml
+      io.File(
+        p.join(tempDir.path, 'devals.yaml'),
+      ).writeAsStringSync('dataset: ./evals\n');
+
+      final result = await runDevals(
+        ['init'],
+        workingDirectory: tempDir.path,
+      );
+
+      expect(result.exitCode, 1);
+      expect(result.stdout, contains('already'));
+    });
+  });
+}
diff --git a/packages/eval_cli/test/e2e/run_e2e_test.dart b/packages/eval_cli/test/e2e/run_e2e_test.dart
new file mode 100644
index 0000000..5701f6a
--- /dev/null
+++ b/packages/eval_cli/test/e2e/run_e2e_test.dart
@@ -0,0 +1,45 @@
+@Tags(['e2e'])
+library;
+
+import 'package:test/test.dart';
+
+import 'e2e_helpers.dart';
+
+void main() {
+  late Directory tempDir;
+
+  setUp(() {
+    tempDir = createTestDatasetDir();
+  });
+
+  tearDown(() {
+    if (tempDir.existsSync()) {
+      tempDir.deleteSync(recursive: true);
+    }
+  });
+
+  group('devals run', () {
+    test('fails with missing job argument', () async {
+      final result = await runDevals(
+        ['run'],
+        workingDirectory: tempDir.path,
+      );
+      expect(result.exitCode, 1);
+      expect(result.stdout, contains('Missing required argument'));
+    });
+
+    test('dry-run outputs the command that would run', () async {
+      final result = await runDevals(
+        ['run', '--dry-run', 'local_dev'],
+        workingDirectory: tempDir.path,
+      );
+
+      // The command will try to run `run-evals`, which may not be installed.
+      // If installed, it should mention the dry-run args.
+      // If not installed, it exits with an error about run-evals not found.
+      // Either way, the output should reference the job name.
+      final combined = result.stdout + result.stderr;
+      expect(combined, contains('local_dev'));
+    });
+  });
+}
diff --git a/packages/eval_config/.gitignore b/packages/eval_config/.gitignore
new file mode 100644
index 0000000..3a85790
--- /dev/null
+++ b/packages/eval_config/.gitignore
@@ -0,0 +1,3 @@
+# https://dart.dev/guides/libraries/private-files
+# Created by `dart pub`
+.dart_tool/
diff --git a/packages/eval_config/CHANGELOG.md b/packages/eval_config/CHANGELOG.md
new file mode 100644
index 0000000..effe43c
--- /dev/null
+++ b/packages/eval_config/CHANGELOG.md
@@ -0,0 +1,3 @@
+## 1.0.0
+
+- Initial version.
diff --git a/packages/eval_config/README.md b/packages/eval_config/README.md
new file mode 100644
index 0000000..3816eca
--- /dev/null
+++ b/packages/eval_config/README.md
@@ -0,0 +1,2 @@
+A sample command-line application with an entrypoint in `bin/`, library code
+in `lib/`, and example unit test in `test/`.
diff --git a/packages/eval_config/analysis_options.yaml b/packages/eval_config/analysis_options.yaml
new file mode 100644
index 0000000..36dda18
--- /dev/null
+++ b/packages/eval_config/analysis_options.yaml
@@ -0,0 +1,33 @@
+# This file configures the static analysis results for your project (errors,
+# warnings, and lints).
+#
+# This enables the 'recommended' set of lints from `package:lints`.
+# This set helps identify many issues that may lead to problems when running
+# or consuming Dart code, and enforces writing Dart using a single, idiomatic
+# style and format.
+#
+# If you want a smaller set of lints you can change this to specify
+# 'package:lints/core.yaml'. These are just the most critical lints
+# (the recommended set includes the core lints).
+# The core lints are also what is used by pub.dev for scoring packages.
+
+include: package:lints/recommended.yaml
+
+formatter: 
+  trailing_commas: preserve
+
+# Uncomment the following section to specify additional rules.
+
+# linter:
+#   rules:
+#     - camel_case_types
+
+# analyzer:
+#   exclude:
+#     - path/to/excluded/files/**
+
+# For more information about the core and recommended set of lints, see
+# https://dart.dev/go/core-lints
+
+# For additional information about configuring this file, see
+# https://dart.dev/guides/language/analysis-options
diff --git a/packages/eval_config/lib/eval_config.dart b/packages/eval_config/lib/eval_config.dart
new file mode 100644
index 0000000..90c5640
--- /dev/null
+++ b/packages/eval_config/lib/eval_config.dart
@@ -0,0 +1,51 @@
+/// Core library for resolving eval dataset YAML into EvalSet JSON.
+///
+/// This package contains the business logic for:
+/// - Parsing task and job YAML files (or pre-parsed JSON maps)
+/// - Resolving configs (models, sandboxes, variants)
+/// - Writing EvalSet JSON for the Python runner
+///
+/// It is frontend-agnostic — both the CLI and a future web interface
+/// can use this library.
+///
+/// ## Quick start
+///
+/// Use [ConfigResolver] for a single-call convenience facade:
+///
+/// ```dart
+/// final resolver = ConfigResolver();
+/// final configs = resolver.resolve(datasetPath, ['my_job']);
+/// ```
+///
+/// ## Layered API
+///
+/// For finer-grained control, use the individual layers:
+///
+/// 1. **Parsers** — [YamlParser], [JsonParser]
+/// 2. **Resolvers** — [EvalSetResolver]
+/// 3. **Writers** — [EvalSetWriter]
+library;
+
+// Facade
+export 'src/config_resolver.dart';
+
+// Parsers
+export 'src/parsers/parser.dart';
+export 'src/parsers/yaml_parser.dart';
+export 'src/parsers/json_parser.dart';
+
+// Resolvers
+export 'src/resolvers/eval_set_resolver.dart';
+
+// Internal types (used by Parser/Resolver API)
+export 'src/parsed_task.dart';
+
+// Writers
+export 'src/writers/eval_set_writer.dart';
+
+// Supporting
+export 'src/runner_config_exception.dart';
+export 'src/utils/yaml_utils.dart';
+
+// Models (merged from the former `models` package)
+export 'src/models/models.dart';
diff --git a/packages/eval_config/lib/src/config_resolver.dart b/packages/eval_config/lib/src/config_resolver.dart
new file mode 100644
index 0000000..1f4e4ed
--- /dev/null
+++ b/packages/eval_config/lib/src/config_resolver.dart
@@ -0,0 +1,30 @@
+import 'models/models.dart';
+
+import 'parsers/yaml_parser.dart';
+import 'resolvers/eval_set_resolver.dart';
+
+/// Convenience facade that composes Parser → Resolver into a single call.
+///
+/// For finer-grained control, use [YamlParser], [JsonParser],
+/// and [EvalSetResolver] directly.
+class ConfigResolver {
+  /// Resolve dataset + job(s) into [EvalSet] objects.
+  ///
+  /// [datasetPath] is the root directory containing `tasks/` and `jobs/`.
+  /// [jobNames] are the job names (looked up in `jobs/`) or paths.
+  List<EvalSet> resolve(String datasetPath, List<String> jobNames) {
+    final parser = YamlParser();
+    final resolver = EvalSetResolver();
+
+    final taskConfigs = parser.parseTasks(datasetPath);
+    final configs = <EvalSet>[];
+
+    for (final jobName in jobNames) {
+      final jobPath = findJobFile(datasetPath, jobName);
+      final job = parser.parseJob(jobPath, datasetPath);
+      configs.addAll(resolver.resolve(taskConfigs, job, datasetPath));
+    }
+
+    return configs;
+  }
+}
diff --git a/packages/eval_config/lib/src/models/context_file.dart b/packages/eval_config/lib/src/models/context_file.dart
new file mode 100644
index 0000000..fd05931
--- /dev/null
+++ b/packages/eval_config/lib/src/models/context_file.dart
@@ -0,0 +1,115 @@
+import 'dart:io';
+
+import 'package:freezed_annotation/freezed_annotation.dart';
+import 'package:yaml/yaml.dart';
+
+part 'context_file.freezed.dart';
+part 'context_file.g.dart';
+
+/// Metadata parsed from a context file's YAML frontmatter.
+@freezed
+sealed class ContextFileMetadata with _$ContextFileMetadata {
+  const factory ContextFileMetadata({
+    /// Title of the context file.
+    required String title,
+
+    /// Version string.
+    required String version,
+
+    /// Description of the context file.
+    required String description,
+
+    /// Dart SDK version this context targets.
+    @JsonKey(name: 'dart_version') String? dartVersion,
+
+    /// Flutter SDK version this context targets.
+    @JsonKey(name: 'flutter_version') String? flutterVersion,
+
+    /// Last updated date string.
+    String? updated,
+  }) = _ContextFileMetadata;
+
+  factory ContextFileMetadata.fromJson(Map<String, dynamic> json) =>
+      _$ContextFileMetadataFromJson(json);
+}
+
+/// A context file with parsed YAML frontmatter and markdown content.
+///
+/// Context files provide additional documentation or guidelines that are
+/// injected into the model's conversation as part of a variant configuration.
+///
+/// File format:
+/// ```markdown
+/// ---
+/// title: Flutter Widget Guide
+/// version: "1.0"
+/// description: Comprehensive guide to Flutter widgets
+/// ---
+/// # Content starts here...
+///
+/// ```
+@freezed
+sealed class ContextFile with _$ContextFile {
+  const factory ContextFile({
+    /// Parsed frontmatter metadata.
+    required ContextFileMetadata metadata,
+
+    /// File content after the frontmatter section.
+    required String content,
+
+    /// Absolute path to the context file on disk.
+    @JsonKey(name: 'file_path') required String filePath,
+  }) = _ContextFile;
+
+  const ContextFile._();
+
+  factory ContextFile.fromJson(Map<String, dynamic> json) =>
+      _$ContextFileFromJson(json);
+
+  /// Load a context file from disk, parsing its YAML frontmatter.
+  ///
+  /// The file must begin with `---` and contain valid YAML frontmatter
+  /// followed by a closing `---` delimiter.
+  ///
+  /// Throws [FileSystemException] if the file doesn't exist.
+  /// Throws [FormatException] if the file lacks valid YAML frontmatter.
+  static ContextFile load(String filePath) {
+    final file = File(filePath);
+    if (!file.existsSync()) {
+      throw FileSystemException('Context file not found', filePath);
+    }
+
+    final text = file.readAsStringSync();
+
+    if (!text.startsWith('---')) {
+      throw FormatException(
+        'Context file must have YAML frontmatter: $filePath',
+      );
+    }
+
+    final parts = text.split('---');
+    if (parts.length < 3) {
+      throw FormatException('Invalid frontmatter in $filePath');
+    }
+
+    // parts[0] is empty (before first ---), parts[1] is frontmatter,
+    // parts[2..] is content (rejoin in case content contains ---)
+    final yamlContent = loadYaml(parts[1]) as Map;
+    final content = parts.sublist(2).join('---').trim();
+
+    final metadata = ContextFileMetadata(
+      title: yamlContent['title'] as String,
+      version: yamlContent['version'].toString(),
+      description: yamlContent['description'] as String,
+      dartVersion: yamlContent['dart_version']?.toString(),
+      flutterVersion: yamlContent['flutter_version']?.toString(),
+      updated: yamlContent['updated']?.toString(),
+    );
+
+    return ContextFile(
+      metadata: metadata,
+      content: content,
+      filePath: filePath,
+    );
+  }
+}
diff --git a/packages/eval_config/lib/src/models/context_file.freezed.dart b/packages/eval_config/lib/src/models/context_file.freezed.dart
new file mode 100644
index 0000000..581b522
--- /dev/null
+++ b/packages/eval_config/lib/src/models/context_file.freezed.dart
@@ -0,0 +1,585 @@
+// GENERATED CODE - DO NOT MODIFY BY HAND
+// coverage:ignore-file
+// ignore_for_file: type=lint
+// ignore_for_file: unused_element, deprecated_member_use, deprecated_member_use_from_same_package, use_function_type_syntax_for_parameters, unnecessary_const, avoid_init_to_null, invalid_override_different_default_values_named, prefer_expression_function_bodies, annotate_overrides, invalid_annotation_target, unnecessary_question_mark
+
+part of 'context_file.dart';
+
+// **************************************************************************
+// FreezedGenerator
+// **************************************************************************
+
+// dart format off
+T _$identity<T>(T value) => value;
+
+/// @nodoc
+mixin _$ContextFileMetadata {
+
+/// Title of the context file.
+ String get title;/// Version string.
+ String get version;/// Description of the context file.
+ String get description;/// Dart SDK version this context targets.
+@JsonKey(name: 'dart_version') String? get dartVersion;/// Flutter SDK version this context targets.
+@JsonKey(name: 'flutter_version') String? get flutterVersion;/// Last updated date string.
+ String? get updated;
+/// Create a copy of ContextFileMetadata
+/// with the given fields replaced by the non-null parameter values.
+@JsonKey(includeFromJson: false, includeToJson: false)
+@pragma('vm:prefer-inline')
+$ContextFileMetadataCopyWith<ContextFileMetadata> get copyWith => _$ContextFileMetadataCopyWithImpl<ContextFileMetadata>(this as ContextFileMetadata, _$identity);
+
+  /// Serializes this ContextFileMetadata to a JSON map.
+  Map<String, dynamic> toJson();
+
+
+@override
+bool operator ==(Object other) {
+  return identical(this, other) || (other.runtimeType == runtimeType&&other is ContextFileMetadata&&(identical(other.title, title) || other.title == title)&&(identical(other.version, version) || other.version == version)&&(identical(other.description, description) || other.description == description)&&(identical(other.dartVersion, dartVersion) || other.dartVersion == dartVersion)&&(identical(other.flutterVersion, flutterVersion) || other.flutterVersion == flutterVersion)&&(identical(other.updated, updated) || other.updated == updated));
+}
+
+@JsonKey(includeFromJson: false, includeToJson: false)
+@override
+int get hashCode => Object.hash(runtimeType,title,version,description,dartVersion,flutterVersion,updated);
+
+@override
+String toString() {
+  return 'ContextFileMetadata(title: $title, version: $version, description: $description, dartVersion: $dartVersion, flutterVersion: $flutterVersion, updated: $updated)';
+}
+
+
+}
+
+/// @nodoc
+abstract mixin class $ContextFileMetadataCopyWith<$Res>  {
+  factory $ContextFileMetadataCopyWith(ContextFileMetadata value, $Res Function(ContextFileMetadata) _then) = _$ContextFileMetadataCopyWithImpl;
+@useResult
+$Res call({
+ String title, String version, String description,@JsonKey(name: 'dart_version') String? dartVersion,@JsonKey(name: 'flutter_version') String? flutterVersion, String? updated
+});
+
+
+
+
+}
+/// @nodoc
+class _$ContextFileMetadataCopyWithImpl<$Res>
+    implements $ContextFileMetadataCopyWith<$Res> {
+  _$ContextFileMetadataCopyWithImpl(this._self, this._then);
+
+  final ContextFileMetadata _self;
+  final $Res Function(ContextFileMetadata) _then;
+
+/// Create a copy of ContextFileMetadata
+/// with the given fields replaced by the non-null parameter values.
+@pragma('vm:prefer-inline') @override $Res call({Object? title = null,Object? version = null,Object? description = null,Object? dartVersion = freezed,Object? flutterVersion = freezed,Object? updated = freezed,}) {
+  return _then(_self.copyWith(
+title: null == title ? _self.title : title // ignore: cast_nullable_to_non_nullable
+as String,version: null == version ? _self.version : version // ignore: cast_nullable_to_non_nullable
+as String,description: null == description ? _self.description : description // ignore: cast_nullable_to_non_nullable
+as String,dartVersion: freezed == dartVersion ? _self.dartVersion : dartVersion // ignore: cast_nullable_to_non_nullable
+as String?,flutterVersion: freezed == flutterVersion ? _self.flutterVersion : flutterVersion // ignore: cast_nullable_to_non_nullable
+as String?,updated: freezed == updated ? _self.updated : updated // ignore: cast_nullable_to_non_nullable
+as String?,
+  ));
+}
+
+}
+
+
+/// Adds pattern-matching-related methods to [ContextFileMetadata].
+extension ContextFileMetadataPatterns on ContextFileMetadata {
+/// A variant of `map` that fallback to returning `orElse`.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case _:
+///     return orElse();
+/// }
+/// ```
+
+@optionalTypeArgs TResult maybeMap<TResult extends Object?>(TResult Function( _ContextFileMetadata value)?  $default,{required TResult orElse(),}){
+final _that = this;
+switch (_that) {
+case _ContextFileMetadata() when $default != null:
+return $default(_that);case _:
+  return orElse();
+
+}
+}
+/// A `switch`-like method, using callbacks.
+///
+/// Callbacks receives the raw object, upcasted.
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case final Subclass2 value:
+///     return ...;
+/// }
+/// ```
+
+@optionalTypeArgs TResult map<TResult extends Object?>(TResult Function( _ContextFileMetadata value)  $default,){
+final _that = this;
+switch (_that) {
+case _ContextFileMetadata():
+return $default(_that);}
+}
+/// A variant of `map` that fallback to returning `null`.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case _:
+///     return null;
+/// }
+/// ```
+
+@optionalTypeArgs TResult? mapOrNull<TResult extends Object?>(TResult? Function( _ContextFileMetadata value)?  $default,){
+final _that = this;
+switch (_that) {
+case _ContextFileMetadata() when $default != null:
+return $default(_that);case _:
+  return null;
+
+}
+}
+/// A variant of `when` that fallback to an `orElse` callback.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case _:
+///     return orElse();
+/// }
+/// ```
+
+@optionalTypeArgs TResult maybeWhen<TResult extends Object?>(TResult Function( String title,  String version,  String description, @JsonKey(name: 'dart_version')  String? dartVersion, @JsonKey(name: 'flutter_version')  String? flutterVersion,  String? updated)?  $default,{required TResult orElse(),}) {final _that = this;
+switch (_that) {
+case _ContextFileMetadata() when $default != null:
+return $default(_that.title,_that.version,_that.description,_that.dartVersion,_that.flutterVersion,_that.updated);case _:
+  return orElse();
+
+}
+}
+/// A `switch`-like method, using callbacks.
+///
+/// As opposed to `map`, this offers destructuring.
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case Subclass2(:final field2):
+///     return ...;
+/// }
+/// ```
+
+@optionalTypeArgs TResult when<TResult extends Object?>(TResult Function( String title,  String version,  String description, @JsonKey(name: 'dart_version')  String? dartVersion, @JsonKey(name: 'flutter_version')  String? flutterVersion,  String? updated)  $default,) {final _that = this;
+switch (_that) {
+case _ContextFileMetadata():
+return $default(_that.title,_that.version,_that.description,_that.dartVersion,_that.flutterVersion,_that.updated);}
+}
+/// A variant of `when` that fallback to returning `null`
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case _:
+///     return null;
+/// }
+/// ```
+
+@optionalTypeArgs TResult? whenOrNull<TResult extends Object?>(TResult? Function( String title,  String version,  String description, @JsonKey(name: 'dart_version')  String? dartVersion, @JsonKey(name: 'flutter_version')  String? flutterVersion,  String? updated)?  $default,) {final _that = this;
+switch (_that) {
+case _ContextFileMetadata() when $default != null:
+return $default(_that.title,_that.version,_that.description,_that.dartVersion,_that.flutterVersion,_that.updated);case _:
+  return null;
+
+}
+}
+
+}
+
+/// @nodoc
+@JsonSerializable()
+
+class _ContextFileMetadata implements ContextFileMetadata {
+  const _ContextFileMetadata({required this.title, required this.version, required this.description, @JsonKey(name: 'dart_version') this.dartVersion, @JsonKey(name: 'flutter_version') this.flutterVersion, this.updated});
+  factory _ContextFileMetadata.fromJson(Map<String, dynamic> json) => _$ContextFileMetadataFromJson(json);
+
+/// Title of the context file.
+@override final  String title;
+/// Version string.
+@override final  String version;
+/// Description of the context file.
+@override final  String description;
+/// Dart SDK version this context targets.
+@override@JsonKey(name: 'dart_version') final  String? dartVersion;
+/// Flutter SDK version this context targets.
+@override@JsonKey(name: 'flutter_version') final  String? flutterVersion;
+/// Last updated date string.
+@override final  String? updated;
+
+/// Create a copy of ContextFileMetadata
+/// with the given fields replaced by the non-null parameter values.
+@override @JsonKey(includeFromJson: false, includeToJson: false)
+@pragma('vm:prefer-inline')
+_$ContextFileMetadataCopyWith<_ContextFileMetadata> get copyWith => __$ContextFileMetadataCopyWithImpl<_ContextFileMetadata>(this, _$identity);
+
+@override
+Map<String, dynamic> toJson() {
+  return _$ContextFileMetadataToJson(this, );
+}
+
+@override
+bool operator ==(Object other) {
+  return identical(this, other) || (other.runtimeType == runtimeType&&other is _ContextFileMetadata&&(identical(other.title, title) || other.title == title)&&(identical(other.version, version) || other.version == version)&&(identical(other.description, description) || other.description == description)&&(identical(other.dartVersion, dartVersion) || other.dartVersion == dartVersion)&&(identical(other.flutterVersion, flutterVersion) || other.flutterVersion == flutterVersion)&&(identical(other.updated, updated) || other.updated == updated));
+}
+
+@JsonKey(includeFromJson: false, includeToJson: false)
+@override
+int get hashCode => Object.hash(runtimeType,title,version,description,dartVersion,flutterVersion,updated);
+
+@override
+String toString() {
+  return 'ContextFileMetadata(title: $title, version: $version, description: $description, dartVersion: $dartVersion, flutterVersion: $flutterVersion, updated: $updated)';
+}
+
+
+}
+
+/// @nodoc
+abstract mixin class _$ContextFileMetadataCopyWith<$Res> implements $ContextFileMetadataCopyWith<$Res> {
+  factory _$ContextFileMetadataCopyWith(_ContextFileMetadata value, $Res Function(_ContextFileMetadata) _then) = __$ContextFileMetadataCopyWithImpl;
+@override @useResult
+$Res call({
+ String title, String version, String description,@JsonKey(name: 'dart_version') String? dartVersion,@JsonKey(name: 'flutter_version') String? flutterVersion, String? updated
+});
+
+
+
+
+}
+/// @nodoc
+class __$ContextFileMetadataCopyWithImpl<$Res>
+    implements _$ContextFileMetadataCopyWith<$Res> {
+  __$ContextFileMetadataCopyWithImpl(this._self, this._then);
+
+  final _ContextFileMetadata _self;
+  final $Res Function(_ContextFileMetadata) _then;
+
+/// Create a copy of ContextFileMetadata
+/// with the given fields replaced by the non-null parameter values.
+@override @pragma('vm:prefer-inline') $Res call({Object? title = null,Object? version = null,Object? description = null,Object? dartVersion = freezed,Object? flutterVersion = freezed,Object? updated = freezed,}) {
+  return _then(_ContextFileMetadata(
+title: null == title ? _self.title : title // ignore: cast_nullable_to_non_nullable
+as String,version: null == version ? _self.version : version // ignore: cast_nullable_to_non_nullable
+as String,description: null == description ? _self.description : description // ignore: cast_nullable_to_non_nullable
+as String,dartVersion: freezed == dartVersion ? _self.dartVersion : dartVersion // ignore: cast_nullable_to_non_nullable
+as String?,flutterVersion: freezed == flutterVersion ? _self.flutterVersion : flutterVersion // ignore: cast_nullable_to_non_nullable
+as String?,updated: freezed == updated ? _self.updated : updated // ignore: cast_nullable_to_non_nullable
+as String?,
+  ));
+}
+
+
+}
+
+
+/// @nodoc
+mixin _$ContextFile {
+
+/// Parsed frontmatter metadata.
+ ContextFileMetadata get metadata;/// File content after the frontmatter section.
+ String get content;/// Absolute path to the context file on disk.
+@JsonKey(name: 'file_path') String get filePath;
+/// Create a copy of ContextFile
+/// with the given fields replaced by the non-null parameter values.
+@JsonKey(includeFromJson: false, includeToJson: false)
+@pragma('vm:prefer-inline')
+$ContextFileCopyWith<ContextFile> get copyWith => _$ContextFileCopyWithImpl<ContextFile>(this as ContextFile, _$identity);
+
+  /// Serializes this ContextFile to a JSON map.
+  Map<String, dynamic> toJson();
+
+
+@override
+bool operator ==(Object other) {
+  return identical(this, other) || (other.runtimeType == runtimeType&&other is ContextFile&&(identical(other.metadata, metadata) || other.metadata == metadata)&&(identical(other.content, content) || other.content == content)&&(identical(other.filePath, filePath) || other.filePath == filePath));
+}
+
+@JsonKey(includeFromJson: false, includeToJson: false)
+@override
+int get hashCode => Object.hash(runtimeType,metadata,content,filePath);
+
+@override
+String toString() {
+  return 'ContextFile(metadata: $metadata, content: $content, filePath: $filePath)';
+}
+
+
+}
+
+/// @nodoc
+abstract mixin class $ContextFileCopyWith<$Res>  {
+  factory $ContextFileCopyWith(ContextFile value, $Res Function(ContextFile) _then) = _$ContextFileCopyWithImpl;
+@useResult
+$Res call({
+ ContextFileMetadata metadata, String content,@JsonKey(name: 'file_path') String filePath
+});
+
+
+$ContextFileMetadataCopyWith<$Res> get metadata;
+
+}
+/// @nodoc
+class _$ContextFileCopyWithImpl<$Res>
+    implements $ContextFileCopyWith<$Res> {
+  _$ContextFileCopyWithImpl(this._self, this._then);
+
+  final ContextFile _self;
+  final $Res Function(ContextFile) _then;
+
+/// Create a copy of ContextFile
+/// with the given fields replaced by the non-null parameter values.
+@pragma('vm:prefer-inline') @override $Res call({Object? metadata = null,Object? content = null,Object? filePath = null,}) {
+  return _then(_self.copyWith(
+metadata: null == metadata ? _self.metadata : metadata // ignore: cast_nullable_to_non_nullable
+as ContextFileMetadata,content: null == content ? _self.content : content // ignore: cast_nullable_to_non_nullable
+as String,filePath: null == filePath ? _self.filePath : filePath // ignore: cast_nullable_to_non_nullable
+as String,
+  ));
+}
+/// Create a copy of ContextFile
+/// with the given fields replaced by the non-null parameter values.
+@override
+@pragma('vm:prefer-inline')
+$ContextFileMetadataCopyWith<$Res> get metadata {
+  
+  return $ContextFileMetadataCopyWith<$Res>(_self.metadata, (value) {
+    return _then(_self.copyWith(metadata: value));
+  });
+}
+}
+
+
+/// Adds pattern-matching-related methods to [ContextFile].
+extension ContextFilePatterns on ContextFile {
+/// A variant of `map` that fallback to returning `orElse`.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case _:
+///     return orElse();
+/// }
+/// ```
+
+@optionalTypeArgs TResult maybeMap<TResult extends Object?>(TResult Function( _ContextFile value)?  $default,{required TResult orElse(),}){
+final _that = this;
+switch (_that) {
+case _ContextFile() when $default != null:
+return $default(_that);case _:
+  return orElse();
+
+}
+}
+/// A `switch`-like method, using callbacks.
+///
+/// Callbacks receives the raw object, upcasted.
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case final Subclass2 value:
+///     return ...;
+/// }
+/// ```
+
+@optionalTypeArgs TResult map<TResult extends Object?>(TResult Function( _ContextFile value)  $default,){
+final _that = this;
+switch (_that) {
+case _ContextFile():
+return $default(_that);}
+}
+/// A variant of `map` that fallback to returning `null`.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case _:
+///     return null;
+/// }
+/// ```
+
+@optionalTypeArgs TResult? mapOrNull<TResult extends Object?>(TResult? Function( _ContextFile value)?  $default,){
+final _that = this;
+switch (_that) {
+case _ContextFile() when $default != null:
+return $default(_that);case _:
+  return null;
+
+}
+}
+/// A variant of `when` that fallback to an `orElse` callback.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case _:
+///     return orElse();
+/// }
+/// ```
+
+@optionalTypeArgs TResult maybeWhen<TResult extends Object?>(TResult Function( ContextFileMetadata metadata,  String content, @JsonKey(name: 'file_path')  String filePath)?  $default,{required TResult orElse(),}) {final _that = this;
+switch (_that) {
+case _ContextFile() when $default != null:
+return $default(_that.metadata,_that.content,_that.filePath);case _:
+  return orElse();
+
+}
+}
+/// A `switch`-like method, using callbacks.
+///
+/// As opposed to `map`, this offers destructuring.
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case Subclass2(:final field2):
+///     return ...;
+/// }
+/// ```
+
+@optionalTypeArgs TResult when<TResult extends Object?>(TResult Function( ContextFileMetadata metadata,  String content, @JsonKey(name: 'file_path')  String filePath)  $default,) {final _that = this;
+switch (_that) {
+case _ContextFile():
+return $default(_that.metadata,_that.content,_that.filePath);}
+}
+/// A variant of `when` that fallback to returning `null`
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case _:
+///     return null;
+/// }
+/// ```
+
+@optionalTypeArgs TResult? whenOrNull<TResult extends Object?>(TResult? Function( ContextFileMetadata metadata,  String content, @JsonKey(name: 'file_path')  String filePath)?  $default,) {final _that = this;
+switch (_that) {
+case _ContextFile() when $default != null:
+return $default(_that.metadata,_that.content,_that.filePath);case _:
+  return null;
+
+}
+}
+
+}
+
+/// @nodoc
+@JsonSerializable()
+
+class _ContextFile extends ContextFile {
+  const _ContextFile({required this.metadata, required this.content, @JsonKey(name: 'file_path') required this.filePath}): super._();
+  factory _ContextFile.fromJson(Map<String, dynamic> json) => _$ContextFileFromJson(json);
+
+/// Parsed frontmatter metadata.
+@override final  ContextFileMetadata metadata;
+/// File content after the frontmatter section.
+@override final  String content;
+/// Absolute path to the context file on disk.
+@override@JsonKey(name: 'file_path') final  String filePath;
+
+/// Create a copy of ContextFile
+/// with the given fields replaced by the non-null parameter values.
+@override @JsonKey(includeFromJson: false, includeToJson: false)
+@pragma('vm:prefer-inline')
+_$ContextFileCopyWith<_ContextFile> get copyWith => __$ContextFileCopyWithImpl<_ContextFile>(this, _$identity);
+
+@override
+Map<String, dynamic> toJson() {
+  return _$ContextFileToJson(this, );
+}
+
+@override
+bool operator ==(Object other) {
+  return identical(this, other) || (other.runtimeType == runtimeType&&other is _ContextFile&&(identical(other.metadata, metadata) || other.metadata == metadata)&&(identical(other.content, content) || other.content == content)&&(identical(other.filePath, filePath) || other.filePath == filePath));
+}
+
+@JsonKey(includeFromJson: false, includeToJson: false)
+@override
+int get hashCode => Object.hash(runtimeType,metadata,content,filePath);
+
+@override
+String toString() {
+  return 'ContextFile(metadata: $metadata, content: $content, filePath: $filePath)';
+}
+
+
+}
+
+/// @nodoc
+abstract mixin class _$ContextFileCopyWith<$Res> implements $ContextFileCopyWith<$Res> {
+  factory _$ContextFileCopyWith(_ContextFile value, $Res Function(_ContextFile) _then) = __$ContextFileCopyWithImpl;
+@override @useResult
+$Res call({
+ ContextFileMetadata metadata, String content,@JsonKey(name: 'file_path') String filePath
+});
+
+
+@override $ContextFileMetadataCopyWith<$Res> get metadata;
+
+}
+/// @nodoc
+class __$ContextFileCopyWithImpl<$Res>
+    implements _$ContextFileCopyWith<$Res> {
+  __$ContextFileCopyWithImpl(this._self, this._then);
+
+  final _ContextFile _self;
+  final $Res Function(_ContextFile) _then;
+
+/// Create a copy of ContextFile
+/// with the given fields replaced by the non-null parameter values.
+@override @pragma('vm:prefer-inline') $Res call({Object? metadata = null,Object? content = null,Object? filePath = null,}) {
+  return _then(_ContextFile(
+metadata: null == metadata ? _self.metadata : metadata // ignore: cast_nullable_to_non_nullable
+as ContextFileMetadata,content: null == content ? _self.content : content // ignore: cast_nullable_to_non_nullable
+as String,filePath: null == filePath ? _self.filePath : filePath // ignore: cast_nullable_to_non_nullable
+as String,
+  ));
+}
+
+/// Create a copy of ContextFile
+/// with the given fields replaced by the non-null parameter values.
+@override
+@pragma('vm:prefer-inline')
+$ContextFileMetadataCopyWith<$Res> get metadata {
+  
+  return $ContextFileMetadataCopyWith<$Res>(_self.metadata, (value) {
+    return _then(_self.copyWith(metadata: value));
+  });
+}
+}
+
+// dart format on
diff --git a/packages/eval_config/lib/src/models/context_file.g.dart b/packages/eval_config/lib/src/models/context_file.g.dart
new file mode 100644
index 0000000..fcea90e
--- /dev/null
+++ b/packages/eval_config/lib/src/models/context_file.g.dart
@@ -0,0 +1,43 @@
+// GENERATED CODE - DO NOT MODIFY BY HAND
+
+part of 'context_file.dart';
+
+// **************************************************************************
+// JsonSerializableGenerator
+// **************************************************************************
+
+_ContextFileMetadata _$ContextFileMetadataFromJson(Map<String, dynamic> json) =>
+    _ContextFileMetadata(
+      title: json['title'] as String,
+      version: json['version'] as String,
+      description: json['description'] as String,
+      dartVersion: json['dart_version'] as String?,
+      flutterVersion: json['flutter_version'] as String?,
+      updated: json['updated'] as String?,
+    );
+
+Map<String, dynamic> _$ContextFileMetadataToJson(
+  _ContextFileMetadata instance,
+) => <String, dynamic>{
+  'title': instance.title,
+  'version': instance.version,
+  'description': instance.description,
+  'dart_version': instance.dartVersion,
+  'flutter_version': instance.flutterVersion,
+  'updated': instance.updated,
+};
+
+_ContextFile _$ContextFileFromJson(Map<String, dynamic> json) => _ContextFile(
+  metadata: ContextFileMetadata.fromJson(
+    json['metadata'] as Map<String, dynamic>,
+  ),
+  content: json['content'] as String,
+  filePath: json['file_path'] as String,
+);
+
+Map<String, dynamic> _$ContextFileToJson(_ContextFile instance) =>
+    <String, dynamic>{
+      'metadata': instance.metadata.toJson(),
+      'content': instance.content,
+      'file_path': instance.filePath,
+    };
diff --git a/packages/eval_config/lib/src/models/dataset.dart b/packages/eval_config/lib/src/models/dataset.dart
new file mode 100644
index 0000000..874080e
--- /dev/null
+++ b/packages/eval_config/lib/src/models/dataset.dart
@@ -0,0 +1,35 @@
+import 'package:freezed_annotation/freezed_annotation.dart';
+
+import 'sample.dart';
+
+part 'dataset.freezed.dart';
+part 'dataset.g.dart';
+
+/// Dart representation of Inspect AI's `Dataset` / `MemoryDataset` class.
+///
+/// A sequence of [Sample] objects.
+///
+/// This models the `MemoryDataset` variant which holds samples in an
+/// in-memory list.
+///
+/// See [`Dataset`](https://inspect.aisi.org.uk/reference/inspect_ai.dataset.html#dataset)
+/// and [`MemoryDataset`](https://inspect.aisi.org.uk/reference/inspect_ai.dataset.html#memorydataset).
+@freezed
+sealed class Dataset with _$Dataset {
+  const factory Dataset({
+    /// The list of sample objects.
+    @Default([]) List<Sample> samples,
+
+    /// Dataset name.
+    String? name,
+
+    /// Dataset location (file path or remote URL).
+    String? location,
+
+    /// Whether the dataset was shuffled after reading.
+    @Default(false) bool shuffled,
+  }) = _Dataset;
+
+  factory Dataset.fromJson(Map<String, dynamic> json) =>
+      _$DatasetFromJson(json);
+}
diff --git a/packages/eval_config/lib/src/models/dataset.freezed.dart b/packages/eval_config/lib/src/models/dataset.freezed.dart
new file mode 100644
index 0000000..fdd77dc
--- /dev/null
+++ b/packages/eval_config/lib/src/models/dataset.freezed.dart
@@ -0,0 +1,295 @@
+// GENERATED CODE - DO NOT MODIFY BY HAND
+// coverage:ignore-file
+// ignore_for_file: type=lint
+// ignore_for_file: unused_element, deprecated_member_use, deprecated_member_use_from_same_package, use_function_type_syntax_for_parameters, unnecessary_const, avoid_init_to_null, invalid_override_different_default_values_named, prefer_expression_function_bodies, annotate_overrides, invalid_annotation_target, unnecessary_question_mark
+
+part of 'dataset.dart';
+
+// **************************************************************************
+// FreezedGenerator
+// **************************************************************************
+
+// dart format off
+T _$identity<T>(T value) => value;
+
+/// @nodoc
+mixin _$Dataset {
+
+/// The list of sample objects.
+ List<Sample> get samples;/// Dataset name.
+ String? get name;/// Dataset location (file path or remote URL).
+ String? get location;/// Whether the dataset was shuffled after reading.
+ bool get shuffled;
+/// Create a copy of Dataset
+/// with the given fields replaced by the non-null parameter values.
+@JsonKey(includeFromJson: false, includeToJson: false)
+@pragma('vm:prefer-inline')
+$DatasetCopyWith<Dataset> get copyWith => _$DatasetCopyWithImpl<Dataset>(this as Dataset, _$identity);
+
+  /// Serializes this Dataset to a JSON map.
+  Map<String, dynamic> toJson();
+
+
+@override
+bool operator ==(Object other) {
+  return identical(this, other) || (other.runtimeType == runtimeType&&other is Dataset&&const DeepCollectionEquality().equals(other.samples, samples)&&(identical(other.name, name) || other.name == name)&&(identical(other.location, location) || other.location == location)&&(identical(other.shuffled, shuffled) || other.shuffled == shuffled));
+}
+
+@JsonKey(includeFromJson: false, includeToJson: false)
+@override
+int get hashCode => Object.hash(runtimeType,const DeepCollectionEquality().hash(samples),name,location,shuffled);
+
+@override
+String toString() {
+  return 'Dataset(samples: $samples, name: $name, location: $location, shuffled: $shuffled)';
+}
+
+
+}
+
+/// @nodoc
+abstract mixin class $DatasetCopyWith<$Res>  {
+  factory $DatasetCopyWith(Dataset value, $Res Function(Dataset) _then) = _$DatasetCopyWithImpl;
+@useResult
+$Res call({
+ List<Sample> samples, String? name, String? location, bool shuffled
+});
+
+
+
+
+}
+/// @nodoc
+class _$DatasetCopyWithImpl<$Res>
+    implements $DatasetCopyWith<$Res> {
+  _$DatasetCopyWithImpl(this._self, this._then);
+
+  final Dataset _self;
+  final $Res Function(Dataset) _then;
+
+/// Create a copy of Dataset
+/// with the given fields replaced by the non-null parameter values.
+@pragma('vm:prefer-inline') @override $Res call({Object? samples = null,Object? name = freezed,Object? location = freezed,Object? shuffled = null,}) {
+  return _then(_self.copyWith(
+samples: null == samples ? _self.samples : samples // ignore: cast_nullable_to_non_nullable
+as List<Sample>,name: freezed == name ? _self.name : name // ignore: cast_nullable_to_non_nullable
+as String?,location: freezed == location ? _self.location : location // ignore: cast_nullable_to_non_nullable
+as String?,shuffled: null == shuffled ? _self.shuffled : shuffled // ignore: cast_nullable_to_non_nullable
+as bool,
+  ));
+}
+
+}
+
+
+/// Adds pattern-matching-related methods to [Dataset].
+extension DatasetPatterns on Dataset {
+/// A variant of `map` that fallback to returning `orElse`.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case _:
+///     return orElse();
+/// }
+/// ```
+
+@optionalTypeArgs TResult maybeMap<TResult extends Object?>(TResult Function( _Dataset value)?  $default,{required TResult orElse(),}){
+final _that = this;
+switch (_that) {
+case _Dataset() when $default != null:
+return $default(_that);case _:
+  return orElse();
+
+}
+}
+/// A `switch`-like method, using callbacks.
+///
+/// Callbacks receives the raw object, upcasted.
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case final Subclass2 value:
+///     return ...;
+/// }
+/// ```
+
+@optionalTypeArgs TResult map<TResult extends Object?>(TResult Function( _Dataset value)  $default,){
+final _that = this;
+switch (_that) {
+case _Dataset():
+return $default(_that);}
+}
+/// A variant of `map` that fallback to returning `null`.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case _:
+///     return null;
+/// }
+/// ```
+
+@optionalTypeArgs TResult? mapOrNull<TResult extends Object?>(TResult? Function( _Dataset value)?  $default,){
+final _that = this;
+switch (_that) {
+case _Dataset() when $default != null:
+return $default(_that);case _:
+  return null;
+
+}
+}
+/// A variant of `when` that fallback to an `orElse` callback.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case _:
+///     return orElse();
+/// }
+/// ```
+
+@optionalTypeArgs TResult maybeWhen<TResult extends Object?>(TResult Function( List<Sample> samples,  String? name,  String? location,  bool shuffled)?  $default,{required TResult orElse(),}) {final _that = this;
+switch (_that) {
+case _Dataset() when $default != null:
+return $default(_that.samples,_that.name,_that.location,_that.shuffled);case _:
+  return orElse();
+
+}
+}
+/// A `switch`-like method, using callbacks.
+///
+/// As opposed to `map`, this offers destructuring.
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case Subclass2(:final field2):
+///     return ...;
+/// }
+/// ```
+
+@optionalTypeArgs TResult when<TResult extends Object?>(TResult Function( List<Sample> samples,  String? name,  String? location,  bool shuffled)  $default,) {final _that = this;
+switch (_that) {
+case _Dataset():
+return $default(_that.samples,_that.name,_that.location,_that.shuffled);}
+}
+/// A variant of `when` that fallback to returning `null`
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case _:
+///     return null;
+/// }
+/// ```
+
+@optionalTypeArgs TResult? whenOrNull<TResult extends Object?>(TResult? Function( List<Sample> samples,  String? name,  String? location,  bool shuffled)?  $default,) {final _that = this;
+switch (_that) {
+case _Dataset() when $default != null:
+return $default(_that.samples,_that.name,_that.location,_that.shuffled);case _:
+  return null;
+
+}
+}
+
+}
+
+/// @nodoc
+@JsonSerializable()
+
+class _Dataset implements Dataset {
+  const _Dataset({final  List<Sample> samples = const [], this.name, this.location, this.shuffled = false}): _samples = samples;
+  factory _Dataset.fromJson(Map<String, dynamic> json) => _$DatasetFromJson(json);
+
+/// The list of sample objects.
+ final  List<Sample> _samples;
+/// The list of sample objects.
+@override@JsonKey() List<Sample> get samples {
+  if (_samples is EqualUnmodifiableListView) return _samples;
+  // ignore: implicit_dynamic_type
+  return EqualUnmodifiableListView(_samples);
+}
+
+/// Dataset name.
+@override final  String? name;
+/// Dataset location (file path or remote URL).
+@override final  String? location;
+/// Whether the dataset was shuffled after reading.
+@override@JsonKey() final  bool shuffled;
+
+/// Create a copy of Dataset
+/// with the given fields replaced by the non-null parameter values.
+@override @JsonKey(includeFromJson: false, includeToJson: false)
+@pragma('vm:prefer-inline')
+_$DatasetCopyWith<_Dataset> get copyWith => __$DatasetCopyWithImpl<_Dataset>(this, _$identity);
+
+@override
+Map<String, dynamic> toJson() {
+  return _$DatasetToJson(this, );
+}
+
+@override
+bool operator ==(Object other) {
+  return identical(this, other) || (other.runtimeType == runtimeType&&other is _Dataset&&const DeepCollectionEquality().equals(other._samples, _samples)&&(identical(other.name, name) || other.name == name)&&(identical(other.location, location) || other.location == location)&&(identical(other.shuffled, shuffled) || other.shuffled == shuffled));
+}
+
+@JsonKey(includeFromJson: false, includeToJson: false)
+@override
+int get hashCode => Object.hash(runtimeType,const DeepCollectionEquality().hash(_samples),name,location,shuffled);
+
+@override
+String toString() {
+  return 'Dataset(samples: $samples, name: $name, location: $location, shuffled: $shuffled)';
+}
+
+
+}
+
+/// @nodoc
+abstract mixin class _$DatasetCopyWith<$Res> implements $DatasetCopyWith<$Res> {
+  factory _$DatasetCopyWith(_Dataset value, $Res Function(_Dataset) _then) = __$DatasetCopyWithImpl;
+@override @useResult
+$Res call({
+ List<Sample> samples, String? name, String? location, bool shuffled
+});
+
+
+
+
+}
+/// @nodoc
+class __$DatasetCopyWithImpl<$Res>
+    implements _$DatasetCopyWith<$Res> {
+  __$DatasetCopyWithImpl(this._self, this._then);
+
+  final _Dataset _self;
+  final $Res Function(_Dataset) _then;
+
+/// Create a copy of Dataset
+/// with the given fields replaced by the non-null parameter values.
+@override @pragma('vm:prefer-inline') $Res call({Object? samples = null,Object? name = freezed,Object? location = freezed,Object? shuffled = null,}) {
+  return _then(_Dataset(
+samples: null == samples ? _self._samples : samples // ignore: cast_nullable_to_non_nullable
+as List<Sample>,name: freezed == name ? _self.name : name // ignore: cast_nullable_to_non_nullable
+as String?,location: freezed == location ? _self.location : location // ignore: cast_nullable_to_non_nullable
+as String?,shuffled: null == shuffled ? _self.shuffled : shuffled // ignore: cast_nullable_to_non_nullable
+as bool,
+  ));
+}
+
+
+}
+
+// dart format on
diff --git a/packages/eval_config/lib/src/models/dataset.g.dart b/packages/eval_config/lib/src/models/dataset.g.dart
new file mode 100644
index 0000000..0b281d8
--- /dev/null
+++ b/packages/eval_config/lib/src/models/dataset.g.dart
@@ -0,0 +1,25 @@
+// GENERATED CODE - DO NOT MODIFY BY HAND
+
+part of 'dataset.dart';
+
+// **************************************************************************
+// JsonSerializableGenerator
+// **************************************************************************
+
+_Dataset _$DatasetFromJson(Map<String, dynamic> json) => _Dataset(
+  samples:
+      (json['samples'] as List<dynamic>?)
+          ?.map((e) => Sample.fromJson(e as Map<String, dynamic>))
+          .toList() ??
+      const [],
+  name: json['name'] as String?,
+  location: json['location'] as String?,
+  shuffled: json['shuffled'] as bool? ?? false,
+);
+
+Map<String, dynamic> _$DatasetToJson(_Dataset instance) => <String, dynamic>{
+  'samples': instance.samples.map((e) => e.toJson()).toList(),
+  'name': instance.name,
+  'location': instance.location,
+  'shuffled': instance.shuffled,
+};
diff --git a/packages/eval_config/lib/src/models/eval_log.dart b/packages/eval_config/lib/src/models/eval_log.dart
new file mode 100644
index 0000000..064a08b
--- /dev/null
+++ b/packages/eval_config/lib/src/models/eval_log.dart
@@ -0,0 +1,1264 @@
+import 'package:freezed_annotation/freezed_annotation.dart';
+
+part 'eval_log.freezed.dart';
+part 'eval_log.g.dart';
+
+/// Evaluation log.
+@freezed
+abstract class EvalLog with _$EvalLog {
+  /// Creates an evaluation log.
+  const factory EvalLog({
+    /// Eval log file format version.
+    @Default(2) int version,
+
+    /// Status of evaluation (did it succeed or fail).
+    @Default('started') String status,
+
+    /// Eval identity and configuration.
+    required EvalSpec eval,
+
+    /// Eval plan (solvers and config).
+    EvalPlan? plan,
+
+    /// Eval results (scores and metrics).
+    EvalResults? results,
+
+    /// Eval stats (runtime, model usage).
+    EvalStats? stats,
+
+    /// Error that halted eval (if status==“error”).
+    EvalError? error,
+
+    /// Whether any samples were invalidated.
+    @Default(false) bool invalidated,
+
+    /// Samples processed by eval.
+    List<EvalSample>? samples,
+
+    /// Reduced sample values.
+    List<EvalSampleReductions>? reductions,
+
+    /// Location that the log file was read from.
+    String? location,
+
+    /// ETag from S3 for conditional writes.
+    String? etag,
+
+    /// Eval set information.
+    @JsonKey(name: 'eval_set_info') EvalSetInfo? evalSetInfo,
+  }) = _EvalLog;
+
+  const EvalLog._();
+
+  factory EvalLog.fromJson(Map<String, dynamic> json) =>
+      _$EvalLogFromJson(json);
+}
+
+/// Eval target and configuration.
+@freezed
+abstract class EvalSpec with _$EvalSpec {
+  /// Creates an evaluation specification.
+  const factory EvalSpec({
+    /// Globally unique id for eval set (if any).
+    @JsonKey(name: 'eval_set_id') String? evalSetId,
+
+    /// Globally unique id for eval.
+    @JsonKey(name: 'eval_id') required String evalId,
+
+    /// Unique run id.
+    @JsonKey(name: 'run_id') required String runId,
+
+    /// Time created.
+    required String created,
+
+    /// Task name.
+    required String task,
+
+    /// Unique task id.
+    @JsonKey(name: 'task_id') required String taskId,
+
+    /// Task version.
+    @JsonKey(name: 'task_version', defaultValue: 0)
+    @Default(0)
+    Object taskVersion,
+
+    /// Task source file.
+    @JsonKey(name: 'task_file') String? taskFile,
+
+    /// Task display name.
+    @JsonKey(name: 'task_display_name') String? taskDisplayName,
+
+    /// Task registry name.
+    @JsonKey(name: 'task_registry_name') String? taskRegistryName,
+
+    /// Attributes of the @task decorator.
+    @JsonKey(name: 'task_attribs', defaultValue: {})
+    @Default({})
+    Map<String, dynamic> taskAttribs,
+
+    /// Arguments used for invoking the task (including defaults).
+    @JsonKey(name: 'task_args', defaultValue: {})
+    @Default({})
+    Map<String, dynamic> taskArgs,
+
+    /// Arguments explicitly passed by caller for invoking the task.
+    @JsonKey(name: 'task_args_passed', defaultValue: {})
+    @Default({})
+    Map<String, dynamic> taskArgsPassed,
+
+    /// Solver name.
+    String? solver,
+
+    /// Arguments used for invoking the solver.
+    @JsonKey(name: 'solver_args', defaultValue: {})
+    @Default({})
+    Map<String, dynamic> solverArgs,
+
+    /// Arguments explicitly passed by caller for invoking the solver.
+    @JsonKey(name: 'solver_args_passed', defaultValue: {})
+    @Default({})
+    Map<String, dynamic> solverArgsPassed,
+
+    /// Tags associated with evaluation run.
+    @Default([]) List<String> tags,
+
+    /// Dataset used for eval.
+    EvalDataset? dataset,
+
+    /// Sandbox environment type and optional config file.
+    Object? sandbox,
+
+    /// Model used for eval.
+    @JsonKey(name: 'model') required String model,
+
+    /// Generate config specified for model instance.
+    @JsonKey(name: 'model_generate_config') GenerateConfig? modelGenerateConfig,
+
+    /// Optional override of model base url.
+    @JsonKey(name: 'model_base_url') String? modelBaseUrl,
+
+    /// Model specific arguments.
+    @JsonKey(name: 'model_args', defaultValue: {})
+    @Default({})
+    Map<String, dynamic> modelArgs,
+
+    /// Model roles.
+    @JsonKey(name: 'model_roles') Map<String, String>? modelRoles,
+
+    /// Configuration values for eval.
+    @Default(EvalConfig()) EvalConfig config,
+
+    /// Source revision of eval.
+    EvalRevision? revision,
+
+    /// Package versions for eval.
+    @JsonKey(name: 'packages', defaultValue: {})
+    @Default({})
+    Map<String, String> packages,
+
+    /// Additional eval metadata.
+    @JsonKey(name: 'metadata') Map<String, dynamic>? metadata,
+
+    /// Scorers and args for this eval.
+    @Default([]) List<Object> scorers,
+
+    /// Metrics and args for this eval.
+    @Default([]) List<Object> metrics,
+  }) = _EvalSpec;
+
+  const EvalSpec._();
+
+  factory EvalSpec.fromJson(Map<String, dynamic> json) =>
+      _$EvalSpecFromJson(json);
+}
+
+/// Dataset used for evaluation.
+@freezed
+abstract class EvalDataset with _$EvalDataset {
+  /// Creates an evaluation dataset.
+  const factory EvalDataset({
+    /// Dataset name.
+    String? name,
+
+    /// Dataset location (file path or remote URL).
+    String? location,
+
+    /// Number of samples in the dataset.
+    required int samples,
+
+    /// IDs of samples in the dataset.
+    @JsonKey(name: 'sample_ids') List<Object>? sampleIds,
+
+    /// Was the dataset shuffled after reading.
+    @Default(false) bool shuffled,
+  }) = _EvalDataset;
+
+  const EvalDataset._();
+
+  factory EvalDataset.fromJson(Map<String, dynamic> json) =>
+      _$EvalDatasetFromJson(json);
+}
+
+/// Configuration used for evaluation.
+@freezed
+abstract class EvalConfig with _$EvalConfig {
+  /// Creates an evaluation configuration.
+  const factory EvalConfig({
+    /// Sample limit (number of samples or range of samples).
+    Object? limit,
+
+    /// Evaluate specific sample(s).
+    @JsonKey(name: 'sample_id') Object? sampleId,
+
+    /// Shuffle order of samples.
+    @JsonKey(name: 'sample_shuffle') bool? sampleShuffle,
+
+    /// Number of epochs to run samples over.
+    int? epochs,
+
+    /// Reducers for aggregating per-sample scores.
+    @JsonKey(name: 'epochs_reducer') List<String>? epochsReducer,
+
+    /// Approval policy for tool use.
+    String? approval,
+
+    /// Fail eval when sample errors occur.
+    /// True to fail on first sample error (default); False to never fail on sample errors;
+    /// Value between 0 and 1 to fail if a proportion of total samples fails.
+    /// Value greater than 1 to fail eval if a count of samples fails.
+    @JsonKey(name: 'fail_on_error') Object? failOnError,
+
+    /// Continue eval even if the fail_on_error condition is met.
+    @JsonKey(name: 'continue_on_fail') bool? continueOnFail,
+
+    /// Number of times to retry samples if they encounter errors.
+    @JsonKey(name: 'retry_on_error') int? retryOnError,
+
+    /// Maximum messages to allow per sample.
+    @JsonKey(name: 'message_limit') int? messageLimit,
+
+    /// Maximum tokens usage per sample.
+    @JsonKey(name: 'token_limit') int? tokenLimit,
+
+    /// Maximum clock time per sample.
+    @JsonKey(name: 'time_limit') int? timeLimit,
+
+    /// Maximum working time per sample.
+    @JsonKey(name: 'working_limit') int? workingLimit,
+
+    /// Maximum number of samples to run in parallel.
+    @JsonKey(name: 'max_samples') int? maxSamples,
+
+    /// Maximum number of tasks to run in parallel.
+    @JsonKey(name: 'max_tasks') int? maxTasks,
+
+    /// Maximum number of subprocesses to run concurrently.
+    @JsonKey(name: 'max_subprocesses') int? maxSubprocesses,
+
+    /// Maximum number of sandboxes to run concurrently.
+    @JsonKey(name: 'max_sandboxes') int? maxSandboxes,
+
+    /// Cleanup sandbox environments after task completes.
+    @JsonKey(name: 'sandbox_cleanup') bool? sandboxCleanup,
+
+    /// Log detailed information on each sample.
+    @JsonKey(name: 'log_samples') bool? logSamples,
+
+    /// Log events in realtime (enables live viewing of samples in inspect view).
+    @JsonKey(name: 'log_realtime') bool? logRealtime,
+
+    /// Log base64 encoded versions of images.
+    @JsonKey(name: 'log_images') bool? logImages,
+
+    /// Number of samples to buffer before writing log file.
+    @JsonKey(name: 'log_buffer') int? logBuffer,
+
+    /// Interval (in seconds) for syncing sample events to log directory.
+    @JsonKey(name: 'log_shared') int? logShared,
+
+    /// Display scoring metrics realtime.
+    @JsonKey(name: 'score_display') bool? scoreDisplay,
+  }) = _EvalConfig;
+
+  const EvalConfig._();
+
+  factory EvalConfig.fromJson(Map<String, dynamic> json) =>
+      _$EvalConfigFromJson(json);
+}
+
+/// Git revision for evaluation.
+@freezed
+abstract class EvalRevision with _$EvalRevision {
+  /// Creates an evaluation revision.
+  const factory EvalRevision({
+    /// Type of revision (currently only “git”).
+    required String type,
+
+    /// Revision origin server.
+    required String origin,
+
+    /// Revision commit.
+    required String commit,
+
+    /// Working tree has uncommitted changes or untracked files.
+    @Default(false) bool dirty,
+  }) = _EvalRevision;
+
+  const EvalRevision._();
+
+  factory EvalRevision.fromJson(Map<String, dynamic> json) =>
+      _$EvalRevisionFromJson(json);
+}
+
+/// Plan (solvers) used in evaluation.
+@freezed
+abstract class EvalPlan with _$EvalPlan {
+  /// Creates an evaluation plan.
+  const factory EvalPlan({
+    /// Plan name.
+    @Default('plan') String name,
+
+    /// Steps in plan.
+    @Default([]) List<EvalPlanStep> steps,
+
+    /// Step to always run at the end.
+    EvalPlanStep? finish,
+
+    /// Generation config.
+    @Default(GenerateConfig()) GenerateConfig config,
+  }) = _EvalPlan;
+
+  const EvalPlan._();
+
+  factory EvalPlan.fromJson(Map<String, dynamic> json) =>
+      _$EvalPlanFromJson(json);
+}
+
+/// Solver step.
+@freezed
+abstract class EvalPlanStep with _$EvalPlanStep {
+  /// Creates an evaluation plan step.
+  const factory EvalPlanStep({
+    /// Name of solver.
+    required String solver,
+
+    /// Parameters used to instantiate solver.
+    @Default({}) Map<String, dynamic> params,
+
+    /// Parameters explicitly passed to the eval plan.
+    @JsonKey(name: 'params_passed') Map<String, dynamic>? paramsPassed,
+  }) = _EvalPlanStep;
+
+  const EvalPlanStep._();
+
+  factory EvalPlanStep.fromJson(Map<String, dynamic> json) =>
+      _$EvalPlanStepFromJson(json);
+}
+
+/// Scoring results from evaluation.
+@freezed
+abstract class EvalResults with _$EvalResults {
+  /// Creates evaluation results.
+  const factory EvalResults({
+    /// Total samples in eval (dataset samples * epochs).
+    @JsonKey(name: 'total_samples', defaultValue: 0)
+    @Default(0)
+    int totalSamples,
+
+    /// Samples completed without error.
+    @JsonKey(name: 'completed_samples', defaultValue: 0)
+    @Default(0)
+    int completedSamples,
+
+    /// Early stopping summary (if an early stopping manager was present).
+    @JsonKey(name: 'early_stopping') EarlyStoppingSummary? earlyStopping,
+
+    /// Scorers used to compute results.
+    @Default([]) List<EvalScore> scores,
+
+    /// Additional results metadata.
+    @Default({}) Map<String, dynamic> metadata,
+
+    /// List of per sample scores reduced across epochs.
+    @JsonKey(name: 'sample_reductions')
+    List<EvalSampleReductions>? sampleReductions,
+  }) = _EvalResults;
+
+  const EvalResults._();
+
+  factory EvalResults.fromJson(Map<String, dynamic> json) =>
+      _$EvalResultsFromJson(json);
+}
+
+/// Early stopping summary.
+@freezed
+abstract class EarlyStoppingSummary with _$EarlyStoppingSummary {
+  /// Creates an early stopping summary.
+  const factory EarlyStoppingSummary({
+    /// Type of early stopping.
+    required String type,
+
+    /// Limit that triggered early stopping.
+    double? limit,
+
+    /// Score that triggered early stopping.
+    double? score,
+
+    /// Additional metadata.
+    @Default({}) Map<String, dynamic> metadata,
+  }) = _EarlyStoppingSummary;
+
+  const EarlyStoppingSummary._();
+
+  factory EarlyStoppingSummary.fromJson(Map<String, dynamic> json) =>
+      _$EarlyStoppingSummaryFromJson(json);
+}
+
+/// Score for evaluation task.
+@freezed
+abstract class EvalScore with _$EvalScore {
+  /// Creates an evaluation score.
+  const factory EvalScore({
+    /// Score name.
+    required String name,
+
+    /// Scorer name.
+    required String scorer,
+
+    /// Reducer name.
+    String? reducer,
+
+    /// Number of samples scored by this scorer.
+    @JsonKey(name: 'scored_samples') int? scoredSamples,
+
+    /// Number of samples not scored by this scorer.
+    @JsonKey(name: 'unscored_samples') int? unscoredSamples,
+
+    /// Parameters specified when creating scorer.
+    @Default({}) Map<String, dynamic> params,
+
+    /// Metrics computed for this scorer.
+    @JsonKey(fromJson: _metricsFromJson) @Default([]) List<EvalMetric> metrics,
+
+    /// Additional scorer metadata.
+    @JsonKey(name: 'metadata') Map<String, dynamic>? metadata,
+  }) = _EvalScore;
+
+  const EvalScore._();
+
+  factory EvalScore.fromJson(Map<String, dynamic> json) =>
+      _$EvalScoreFromJson(json);
+}
+
+/// Converts metrics from Map or List format to [List<EvalMetric>].
+List<EvalMetric> _metricsFromJson(Object? json) {
+  if (json == null) return [];
+
+  // If it's already a list, parse it normally
+  if (json is List) {
+    return json
+        .map((e) => EvalMetric.fromJson(e as Map<String, dynamic>))
+        .toList();
+  }
+
+  // If it's a map (old format), convert to list
+  if (json is Map<String, dynamic>) {
+    return json.values
+        .map((e) => EvalMetric.fromJson(e as Map<String, dynamic>))
+        .toList();
+  }
+
+  return [];
+}
+
+/// Metric for evaluation score.
+@freezed
+abstract class EvalMetric with _$EvalMetric {
+  /// Creates an evaluation metric.
+  const factory EvalMetric({
+    /// Metric name.
+    required String name,
+
+    /// Metric value.
+    required Object value,
+
+    /// Params specified when creating metric.
+    @Default({}) Map<String, dynamic> params,
+
+    /// Additional metadata associated with metric.
+    Map<String, dynamic>? metadata,
+  }) = _EvalMetric;
+
+  const EvalMetric._();
+
+  factory EvalMetric.fromJson(Map<String, dynamic> json) =>
+      _$EvalMetricFromJson(json);
+}
+
+/// Score reductions.
+@freezed
+abstract class EvalSampleReductions with _$EvalSampleReductions {
+  /// Creates evaluation sample reductions.
+  const factory EvalSampleReductions({
+    /// Name the of scorer.
+    required String scorer,
+
+    /// Name the of reducer.
+    String? reducer,
+
+    /// List of reduced scores.
+    required List<EvalSampleScore> samples,
+  }) = _EvalSampleReductions;
+
+  const EvalSampleReductions._();
+
+  factory EvalSampleReductions.fromJson(Map<String, dynamic> json) =>
+      _$EvalSampleReductionsFromJson(json);
+}
+
+/// Timing and usage statistics.
+@freezed
+abstract class EvalStats with _$EvalStats {
+  /// Creates evaluation statistics.
+  const factory EvalStats({
+    /// Evaluation start time. Empty string if eval interrupted before start time set.
+    @JsonKey(name: 'started_at') required String startedAt,
+
+    /// Evaluation completion time. Empty string if eval interrupted before completion.
+    @JsonKey(name: 'completed_at') required String completedAt,
+
+    /// Model token usage for evaluation.
+    @JsonKey(name: 'model_usage', defaultValue: {})
+    @Default({})
+    Map<String, ModelUsage> modelUsage,
+  }) = _EvalStats;
+
+  const EvalStats._();
+
+  factory EvalStats.fromJson(Map<String, dynamic> json) =>
+      _$EvalStatsFromJson(json);
+}
+
+/// Eval error details.
+@freezed
+abstract class EvalError with _$EvalError {
+  /// Creates evaluation error details.
+  const factory EvalError({
+    /// Error message.
+    required String message,
+
+    /// Error traceback.
+    required String traceback,
+
+    /// Error traceback with ANSI color codes.
+    @JsonKey(name: 'traceback_ansi') required String tracebackAnsi,
+  }) = _EvalError;
+
+  const EvalError._();
+
+  factory EvalError.fromJson(Map<String, dynamic> json) =>
+      _$EvalErrorFromJson(json);
+}
+
+/// Sample from evaluation task.
+@freezed
+abstract class EvalSample with _$EvalSample {
+  /// Creates an evaluation sample.
+  const factory EvalSample({
+    /// Unique id for sample.
+    required Object id,
+
+    /// Epoch number for sample.
+    required int epoch,
+
+    /// Sample input.
+    required Object input,
+
+    /// Sample choices.
+    List<String>? choices,
+
+    /// Sample target value(s).
+    Object? target,
+
+    /// Additional sample metadata.
+    @Default({}) Map<String, dynamic> metadata,
+
+    /// Sandbox environment type and optional config file.
+    Object? sandbox,
+
+    /// Files that go along with the sample (copied to SandboxEnvironment).
+    List<String>? files,
+
+    /// Setup script to run for sample (run within default SandboxEnvironment).
+    String? setup,
+
+    /// Chat conversation history for sample.
+    @Default([]) List<ChatMessage> messages,
+
+    /// Model output from sample.
+    required ModelOutput output,
+
+    /// Scores for sample.
+    Map<String, Score>? scores,
+
+    /// State at end of sample execution.
+    @Default({}) Map<String, dynamic> store,
+
+    /// Events that occurred during sample execution.
+    @Default([]) List<Object> events,
+
+    /// Model token usage for sample.
+    @JsonKey(name: 'model_usage', defaultValue: {})
+    @Default({})
+    Map<String, ModelUsage> modelUsage,
+
+    /// Time sample started.
+    @JsonKey(name: 'started_at') String? startedAt,
+
+    /// Time sample completed.
+    @JsonKey(name: 'completed_at') String? completedAt,
+
+    /// Total time that the sample was running.
+    @JsonKey(name: 'total_time') double? totalTime,
+
+    /// Time spent working (model generation, sandbox calls, etc.).
+    @JsonKey(name: 'working_time') double? workingTime,
+
+    /// Globally unique identifier for sample run.
+    String? uuid,
+
+    /// Provenance data for invalidation.
+    ProvenanceData? invalidation,
+
+    /// Error that halted sample.
+    EvalError? error,
+
+    /// Errors that were retried for this sample.
+    @JsonKey(name: 'error_retries') List<EvalError>? errorRetries,
+
+    /// Attachments referenced from messages and events.
+    @Default({}) Map<String, String> attachments,
+
+    /// The limit that halted the sample.
+    EvalSampleLimit? limit,
+  }) = _EvalSample;
+
+  const EvalSample._();
+
+  factory EvalSample.fromJson(Map<String, dynamic> json) =>
+      _$EvalSampleFromJson(json);
+}
+
+/// Model output.
+@freezed
+abstract class ModelOutput with _$ModelOutput {
+  /// Creates model output.
+  const factory ModelOutput({
+    /// Model used for generation.
+    required String model,
+
+    /// Completion choices.
+    @Default([]) List<ChatCompletionChoice> choices,
+
+    /// Model token usage.
+    ModelUsage? usage,
+
+    /// Model completion.
+    required String completion,
+
+    /// First message stop reason.
+    @JsonKey(name: 'stop_reason', defaultValue: 'unknown')
+    @Default('unknown')
+    String stopReason,
+
+    /// Time elapsed (in seconds) for call to generate.
+    double? time,
+
+    /// Additional metadata associated with model output.
+    @Default({}) Map<String, dynamic> metadata,
+
+    /// Error message in the case of content moderation refusals.
+    String? error,
+
+    /// First message choice.
+    ChatMessageAssistant? message,
+  }) = _ModelOutput;
+
+  const ModelOutput._();
+
+  factory ModelOutput.fromJson(Map<String, dynamic> json) =>
+      _$ModelOutputFromJson(json);
+}
+
+/// Choice generated for completion.
+@freezed
+abstract class ChatCompletionChoice with _$ChatCompletionChoice {
+  /// Creates a chat completion choice.
+  const factory ChatCompletionChoice({
+    /// Assistant message.
+    required ChatMessageAssistant message,
+
+    /// Reason that the model stopped generating.
+    @JsonKey(name: 'stop_reason', defaultValue: 'unknown')
+    @Default('unknown')
+    String stopReason,
+
+    /// Logprobs.
+    Logprobs? logprobs,
+  }) = _ChatCompletionChoice;
+
+  const ChatCompletionChoice._();
+
+  factory ChatCompletionChoice.fromJson(Map<String, dynamic> json) =>
+      _$ChatCompletionChoiceFromJson(json);
+}
+
+/// Token usage for completion.
+@freezed
+abstract class ModelUsage with _$ModelUsage {
+  /// Creates model usage details.
+  const factory ModelUsage({
+    /// Total input tokens used.
+    @JsonKey(name: 'input_tokens', defaultValue: 0) @Default(0) int inputTokens,
+
+    /// Total output tokens used.
+    @JsonKey(name: 'output_tokens', defaultValue: 0)
+    @Default(0)
+    int outputTokens,
+
+    /// Total tokens used.
+    @JsonKey(name: 'total_tokens', defaultValue: 0) @Default(0) int totalTokens,
+
+    /// Number of tokens written to the cache.
+    @JsonKey(name: 'input_tokens_cache_write') int? inputTokensCacheWrite,
+
+    /// Number of tokens retrieved from the cache.
+    @JsonKey(name: 'input_tokens_cache_read') int? inputTokensCacheRead,
+
+    /// Number of tokens used for reasoning.
+    @JsonKey(name: 'reasoning_tokens', defaultValue: 0)
+    @Default(0)
+    int reasoningTokens,
+  }) = _ModelUsage;
+
+  const ModelUsage._();
+
+  factory ModelUsage.fromJson(Map<String, dynamic> json) =>
+      _$ModelUsageFromJson(json);
+}
+
+/// Chat message.
+@Freezed(unionKey: 'role', unionValueCase: FreezedUnionCase.snake)
+sealed class ChatMessage with _$ChatMessage {
+  /// System chat message.
+  const factory ChatMessage.system({
+    /// Unique identifer for message.
+    String? id,
+
+    /// Content (simple string or list of content objects).
+    required Object content,
+
+    /// Source of message.
+    String? source,
+
+    /// Additional message metadata.
+    Map<String, dynamic>? metadata,
+
+    /// Conversation role.
+    @Default('system') String role,
+  }) = ChatMessageSystem;
+
+  /// User chat message.
+  const factory ChatMessage.user({
+    /// Unique identifer for message.
+    String? id,
+
+    /// Content (simple string or list of content objects).
+    required Object content,
+
+    /// Source of message.
+    String? source,
+
+    /// Additional message metadata.
+    Map<String, dynamic>? metadata,
+
+    /// Conversation role.
+    @Default('user') String role,
+
+    /// ID(s) of tool call(s) this message has the content payload for.
+    @JsonKey(name: 'tool_call_id') Object? toolCallId,
+  }) = ChatMessageUser;
+
+  /// Assistant chat message.
+  const factory ChatMessage.assistant({
+    /// Unique identifer for message.
+    String? id,
+
+    /// Content (simple string or list of content objects).
+    required Object content,
+
+    /// Source of message.
+    String? source,
+
+    /// Additional message metadata.
+    Map<String, dynamic>? metadata,
+
+    /// Conversation role.
+    @Default('assistant') String role,
+
+    /// Tool calls made by the model.
+    @JsonKey(name: 'tool_calls') List<ToolCall>? toolCalls,
+
+    /// Model used to generate assistant message.
+    String? model,
+  }) = ChatMessageAssistant;
+
+  /// Tool chat message.
+  const factory ChatMessage.tool({
+    /// Unique identifer for message.
+    String? id,
+
+    /// Content (simple string or list of content objects).
+    required Object content,
+
+    /// Source of message.
+    String? source,
+
+    /// Additional message metadata.
+    Map<String, dynamic>? metadata,
+
+    /// Conversation role.
+    @Default('tool') String role,
+
+    /// ID of tool call.
+    @JsonKey(name: 'tool_call_id') String? toolCallId,
+
+    /// Name of function called.
+    String? function,
+
+    /// Error which occurred during tool call.
+    ToolCallError? error,
+  }) = ChatMessageTool;
+
+  const ChatMessage._();
+
+  factory ChatMessage.fromJson(Map<String, dynamic> json) =>
+      _$ChatMessageFromJson(json);
+}
+
+/// Content sent to or received from a model.
+@Freezed(unionKey: 'type', unionValueCase: FreezedUnionCase.snake)
+sealed class Content with _$Content {
+  /// Text content.
+  const factory Content.text({
+    /// Text content.
+    required String text,
+
+    /// Was this a refusal message?
+    @Default(false) bool refusal,
+
+    /// Citations supporting the text block.
+    List<Object>? citations,
+
+    /// Content type.
+    @Default('text') String type,
+  }) = ContentText;
+
+  /// Reasoning content.
+  const factory Content.reasoning({
+    /// Reasoning content.
+    required String reasoning,
+
+    /// Reasoning summary.
+    String? summary,
+
+    /// Signature for reasoning content.
+    String? signature,
+
+    /// Indicates that the explicit content of this reasoning block has been redacted.
+    @Default(false) bool redacted,
+
+    /// Pure text rendering of reasoning.
+    String? text,
+
+    /// Content type.
+    @Default('reasoning') String type,
+  }) = ContentReasoning;
+
+  /// Image content.
+  const factory Content.image({
+    /// Either a URL of the image or the base64 encoded image data.
+    required String image,
+
+    /// Specifies the detail level of the image.
+    @Default('auto') String detail,
+
+    /// Content type.
+    @Default('image') String type,
+  }) = ContentImage;
+
+  /// Audio content.
+  const factory Content.audio({
+    /// Audio file path or base64 encoded data URL.
+    required String audio,
+
+    /// Format of audio data (‘mp3’ or ‘wav’).
+    required String format,
+
+    /// Content type.
+    @Default('audio') String type,
+  }) = ContentAudio;
+
+  /// Video content.
+  const factory Content.video({
+    /// Video file path or base64 encoded data URL.
+    required String video,
+
+    /// Format of video data (‘mp4’, ‘mpeg’, or ‘mov’).
+    required String format,
+
+    /// Content type.
+    @Default('video') String type,
+  }) = ContentVideo;
+
+  /// Document content.
+  const factory Content.document({
+    /// Document file path or base64 encoded data URL.
+    required String document,
+
+    /// Document filename.
+    String? filename,
+
+    /// Document mime type.
+    @JsonKey(name: 'mime_type') String? mimeType,
+
+    /// Content type.
+    @Default('document') String type,
+  }) = ContentDocument;
+
+  /// Model internal data.
+  const factory Content.data({
+    /// Model provider specific payload.
+    required Map<String, dynamic> data,
+
+    /// Content type.
+    @Default('data') String type,
+  }) = ContentData;
+
+  /// Server side tool use.
+  const factory Content.toolUse({
+    /// The type of the tool call.
+    @JsonKey(name: 'tool_type') required String toolType,
+
+    /// The unique ID of the tool call.
+    required String id,
+
+    /// Name of the tool.
+    required String name,
+
+    /// Tool context (e.g. MCP Server).
+    Map<String, dynamic>? context,
+
+    /// Arguments passed to the tool.
+    required Map<String, dynamic> arguments,
+
+    /// Result from the tool call.
+    Object? result,
+
+    /// The error from the tool call (if any).
+    Object? error,
+
+    /// Content type.
+    @Default('tool_use') String type,
+  }) = ContentToolUse;
+
+  const Content._();
+
+  factory Content.fromJson(Map<String, dynamic> json) =>
+      _$ContentFromJson(json);
+}
+
+/// Score and sample_id scored.
+@freezed
+abstract class EvalSampleScore with _$EvalSampleScore {
+  /// Creates an evaluation sample score.
+  const factory EvalSampleScore({
+    /// Score value.
+    required Object value,
+
+    /// Model's answer (for logging).
+    String? answer,
+
+    /// Why this score was given.
+    String? explanation,
+
+    /// Additional metadata.
+    @Default({}) Map<String, dynamic> metadata,
+
+    /// History of scores (if applicable).
+    @Default([]) List<Object> history,
+
+    /// Sample ID.
+    @JsonKey(name: 'sample_id') Object? sampleId,
+  }) = _EvalSampleScore;
+
+  const EvalSampleScore._();
+
+  factory EvalSampleScore.fromJson(Map<String, dynamic> json) =>
+      _$EvalSampleScoreFromJson(json);
+}
+
+/// Score for evaluation.
+@freezed
+abstract class Score with _$Score {
+  /// Creates a score.
+  const factory Score({
+    /// Score value.
+    required Object value,
+
+    /// Model's answer (for logging).
+    String? answer,
+
+    /// Why this score was given.
+    String? explanation,
+
+    /// Additional metadata.
+    Map<String, dynamic>? metadata,
+  }) = _Score;
+
+  const Score._();
+
+  factory Score.fromJson(Map<String, dynamic> json) => _$ScoreFromJson(json);
+}
+
+/// Tool call details.
+@freezed
+abstract class ToolCall with _$ToolCall {
+  /// Creates tool call details.
+  const factory ToolCall({
+    /// Unique ID of tool call.
+    required String id,
+
+    /// Name of function called.
+    required String function,
+
+    /// Arguments passed to function.
+    required Map<String, dynamic> arguments,
+
+    /// Type of tool call.
+    @Default('call') String type,
+  }) = _ToolCall;
+
+  const ToolCall._();
+
+  factory ToolCall.fromJson(Map<String, dynamic> json) =>
+      _$ToolCallFromJson(json);
+}
+
+/// Tool call error.
+@freezed
+abstract class ToolCallError with _$ToolCallError {
+  /// Creates a tool call error.
+  const factory ToolCallError({
+    /// Error message.
+    required String message,
+
+    /// Error code.
+    int? code,
+
+    /// Additional error data.
+    @JsonKey(name: 'data') Map<String, dynamic>? data,
+  }) = _ToolCallError;
+
+  const ToolCallError._();
+
+  factory ToolCallError.fromJson(Map<String, dynamic> json) =>
+      _$ToolCallErrorFromJson(json);
+}
+
+/// Model generation options.
+@freezed
+abstract class GenerateConfig with _$GenerateConfig {
+  /// Creates model generation options.
+  const factory GenerateConfig({
+    /// Maximum number of times to retry a request.
+    @JsonKey(name: 'max_retries') int? maxRetries,
+
+    /// Request timeout (in seconds).
+    int? timeout,
+
+    /// Timeout for each individual request attempt (in seconds).
+    @JsonKey(name: 'attempt_timeout') int? attemptTimeout,
+
+    /// Maximum number of concurrent connections to the model API.
+    @JsonKey(name: 'max_connections') int? maxConnections,
+
+    /// System message to provide to the model.
+    @JsonKey(name: 'system_message') String? systemMessage,
+
+    /// Maximum number of tokens to generate.
+    @JsonKey(name: 'max_tokens') int? maxTokens,
+
+    /// Top-p sampling parameter.
+    @JsonKey(name: 'top_p') double? topP,
+
+    /// Temperature sampling parameter.
+    double? temperature,
+
+    /// Sequences that should stop generation.
+    @JsonKey(name: 'stop_seqs') List<String>? stopSeqs,
+
+    /// Number of completions to generate and choose the best from.
+    @JsonKey(name: 'best_of') int? bestOf,
+
+    /// Frequency penalty parameter.
+    @JsonKey(name: 'frequency_penalty') double? frequencyPenalty,
+
+    /// Presence penalty parameter.
+    @JsonKey(name: 'presence_penalty') double? presencePenalty,
+
+    /// Logit bias parameter.
+    @JsonKey(name: 'logit_bias') Map<String, double>? logitBias,
+
+    /// Random seed for generation.
+    int? seed,
+
+    /// Top-k sampling parameter.
+    @JsonKey(name: 'top_k') int? topK,
+
+    /// Number of completion choices to return.
+    @JsonKey(name: 'num_choices') int? numChoices,
+
+    /// Whether to return logprobs.
+    bool? logprobs,
+
+    /// Number of top logprobs to return.
+    @JsonKey(name: 'top_logprobs') int? topLogprobs,
+
+    /// Whether to allow parallel tool calls.
+    @JsonKey(name: 'parallel_tool_calls') bool? parallelToolCalls,
+
+    /// Whether to allow internal model tools.
+    @JsonKey(name: 'internal_tools') bool? internalTools,
+
+    /// Maximum number of characters to retain for tool output.
+    @JsonKey(name: 'max_tool_output') int? maxToolOutput,
+
+    /// Cache the prompt (if supported by the provider).
+    @JsonKey(name: 'cache_prompt') Object? cachePrompt,
+  }) = _GenerateConfig;
+
+  const GenerateConfig._();
+
+  factory GenerateConfig.fromJson(Map<String, dynamic> json) =>
+      _$GenerateConfigFromJson(json);
+}
+
+/// Logprobs for chat completion.
+@freezed
+abstract class Logprobs with _$Logprobs {
+  /// Creates logprobs.
+  const factory Logprobs({
+    /// Logprob content.
+    required List<Object> content,
+  }) = _Logprobs;
+
+  const Logprobs._();
+
+  factory Logprobs.fromJson(Map<String, dynamic> json) =>
+      _$LogprobsFromJson(json);
+}
+
+/// Provenance data for invalidation.
+@freezed
+abstract class ProvenanceData with _$ProvenanceData {
+  /// Creates provenance data.
+  const factory ProvenanceData({
+    /// Source location.
+    required String location,
+
+    /// Static hash.
+    required String shash,
+  }) = _ProvenanceData;
+
+  const ProvenanceData._();
+
+  factory ProvenanceData.fromJson(Map<String, dynamic> json) =>
+      _$ProvenanceDataFromJson(json);
+}
+
+/// Limit encountered by sample.
+@freezed
+abstract class EvalSampleLimit with _$EvalSampleLimit {
+  /// Creates an evaluation sample limit.
+  const factory EvalSampleLimit({
+    /// The type of limit.
+    required String type,
+
+    /// The limit value.
+    required double limit,
+  }) = _EvalSampleLimit;
+
+  const EvalSampleLimit._();
+
+  factory EvalSampleLimit.fromJson(Map<String, dynamic> json) =>
+      _$EvalSampleLimitFromJson(json);
+}
+
+/// Eval set information.
+@freezed
+abstract class EvalSetInfo with _$EvalSetInfo {
+  /// Creates evaluation set information.
+  const factory EvalSetInfo({
+    /// Globally unique id for eval set.
+    @JsonKey(name: 'eval_set_id') required String evalSetId,
+
+    /// Tasks in the eval set.
+    required List<EvalSetTask> tasks,
+  }) = _EvalSetInfo;
+
+  const EvalSetInfo._();
+
+  factory EvalSetInfo.fromJson(Map<String, dynamic> json) =>
+      _$EvalSetInfoFromJson(json);
+}
+
+/// Task in an eval set.
+@freezed
+abstract class EvalSetTask with _$EvalSetTask {
+  /// Creates an evaluation set task.
+  const factory EvalSetTask({
+    /// Task name.
+    String? name,
+
+    /// Unique task id.
+    @JsonKey(name: 'task_id') required String taskId,
+
+    /// Task source file.
+    @JsonKey(name: 'task_file') String? taskFile,
+
+    /// Task arguments.
+    @JsonKey(name: 'task_args', defaultValue: {})
+    @Default({})
+    Map<String, dynamic> taskArgs,
+
+    /// Model used for evaluation.
+    required String model,
+
+    /// Model specific arguments.
+    @JsonKey(name: 'model_args', defaultValue: {})
+    @Default({})
+    Map<String, dynamic> modelArgs,
+
+    /// Model roles.
+    @JsonKey(name: 'model_roles') Map<String, String>? modelRoles,
+
+    /// Sequence number of task in eval set.
+    required int sequence,
+  }) = _EvalSetTask;
+
+  const EvalSetTask._();
+
+  factory EvalSetTask.fromJson(Map<String, dynamic> json) =>
+      _$EvalSetTaskFromJson(json);
+}
diff --git a/packages/eval_config/lib/src/models/eval_log.freezed.dart b/packages/eval_config/lib/src/models/eval_log.freezed.dart
new file mode 100644
index 0000000..ef455a5
--- /dev/null
+++ b/packages/eval_config/lib/src/models/eval_log.freezed.dart
@@ -0,0 +1,10761 @@
+// GENERATED CODE - DO NOT MODIFY BY HAND
+// coverage:ignore-file
+// ignore_for_file: type=lint
+// ignore_for_file: unused_element, deprecated_member_use, deprecated_member_use_from_same_package, use_function_type_syntax_for_parameters, unnecessary_const, avoid_init_to_null, invalid_override_different_default_values_named, prefer_expression_function_bodies, annotate_overrides, invalid_annotation_target, unnecessary_question_mark
+
+part of 'eval_log.dart';
+
+// **************************************************************************
+// FreezedGenerator
+// **************************************************************************
+
+// dart format off
+T _$identity<T>(T value) => value;
+
+/// @nodoc
+mixin _$EvalLog {
+
+/// Eval log file format version.
+ int get version;/// Status of evaluation (did it succeed or fail).
+ String get status;/// Eval identity and configuration.
+ EvalSpec get eval;/// Eval plan (solvers and config).
+ EvalPlan? get plan;/// Eval results (scores and metrics).
+ EvalResults? get results;/// Eval stats (runtime, model usage).
+ EvalStats? get stats;/// Error that halted eval (if status==“error”).
+ EvalError? get error;/// Whether any samples were invalidated.
+ bool get invalidated;/// Samples processed by eval.
+ List<EvalSample>? get samples;/// Reduced sample values.
+ List<EvalSampleReductions>? get reductions;/// Location that the log file was read from.
+ String? get location;/// ETag from S3 for conditional writes.
+ String? get etag;/// Eval set information.
+@JsonKey(name: 'eval_set_info') EvalSetInfo? get evalSetInfo;
+/// Create a copy of EvalLog
+/// with the given fields replaced by the non-null parameter values.
+@JsonKey(includeFromJson: false, includeToJson: false)
+@pragma('vm:prefer-inline')
+$EvalLogCopyWith<EvalLog> get copyWith => _$EvalLogCopyWithImpl<EvalLog>(this as EvalLog, _$identity);
+
+  /// Serializes this EvalLog to a JSON map.
+  Map<String, dynamic> toJson();
+
+
+@override
+bool operator ==(Object other) {
+  return identical(this, other) || (other.runtimeType == runtimeType&&other is EvalLog&&(identical(other.version, version) || other.version == version)&&(identical(other.status, status) || other.status == status)&&(identical(other.eval, eval) || other.eval == eval)&&(identical(other.plan, plan) || other.plan == plan)&&(identical(other.results, results) || other.results == results)&&(identical(other.stats, stats) || other.stats == stats)&&(identical(other.error, error) || other.error == error)&&(identical(other.invalidated, invalidated) || other.invalidated == invalidated)&&const DeepCollectionEquality().equals(other.samples, samples)&&const DeepCollectionEquality().equals(other.reductions, reductions)&&(identical(other.location, location) || other.location == location)&&(identical(other.etag, etag) || other.etag == etag)&&(identical(other.evalSetInfo, evalSetInfo) || other.evalSetInfo == evalSetInfo));
+}
+
+@JsonKey(includeFromJson: false, includeToJson: false)
+@override
+int get hashCode => Object.hash(runtimeType,version,status,eval,plan,results,stats,error,invalidated,const DeepCollectionEquality().hash(samples),const DeepCollectionEquality().hash(reductions),location,etag,evalSetInfo);
+
+@override
+String toString() {
+  return 'EvalLog(version: $version, status: $status, eval: $eval, plan: $plan, results: $results, stats: $stats, error: $error, invalidated: $invalidated, samples: $samples, reductions: $reductions, location: $location, etag: $etag, evalSetInfo: $evalSetInfo)';
+}
+
+
+}
+
+/// @nodoc
+abstract mixin class $EvalLogCopyWith<$Res>  {
+  factory $EvalLogCopyWith(EvalLog value, $Res Function(EvalLog) _then) = _$EvalLogCopyWithImpl;
+@useResult
+$Res call({
+ int version, String status, EvalSpec eval, EvalPlan? plan, EvalResults? results, EvalStats? stats, EvalError? error, bool invalidated, List<EvalSample>? samples, List<EvalSampleReductions>? reductions, String? location, String? etag,@JsonKey(name: 'eval_set_info') EvalSetInfo? evalSetInfo
+});
+
+
+$EvalSpecCopyWith<$Res> get eval;$EvalPlanCopyWith<$Res>? get plan;$EvalResultsCopyWith<$Res>? get results;$EvalStatsCopyWith<$Res>? get stats;$EvalErrorCopyWith<$Res>? get error;$EvalSetInfoCopyWith<$Res>? get evalSetInfo;
+
+}
+/// @nodoc
+class _$EvalLogCopyWithImpl<$Res>
+    implements $EvalLogCopyWith<$Res> {
+  _$EvalLogCopyWithImpl(this._self, this._then);
+
+  final EvalLog _self;
+  final $Res Function(EvalLog) _then;
+
+/// Create a copy of EvalLog
+/// with the given fields replaced by the non-null parameter values.
+@pragma('vm:prefer-inline') @override $Res call({Object? version = null,Object? status = null,Object? eval = null,Object? plan = freezed,Object? results = freezed,Object? stats = freezed,Object? error = freezed,Object? invalidated = null,Object? samples = freezed,Object? reductions = freezed,Object? location = freezed,Object? etag = freezed,Object? evalSetInfo = freezed,}) {
+  return _then(_self.copyWith(
+version: null == version ? _self.version : version // ignore: cast_nullable_to_non_nullable
+as int,status: null == status ? _self.status : status // ignore: cast_nullable_to_non_nullable
+as String,eval: null == eval ? _self.eval : eval // ignore: cast_nullable_to_non_nullable
+as EvalSpec,plan: freezed == plan ? _self.plan : plan // ignore: cast_nullable_to_non_nullable
+as EvalPlan?,results: freezed == results ? _self.results : results // ignore: cast_nullable_to_non_nullable
+as EvalResults?,stats: freezed == stats ? _self.stats : stats // ignore: cast_nullable_to_non_nullable
+as EvalStats?,error: freezed == error ? _self.error : error // ignore: cast_nullable_to_non_nullable
+as EvalError?,invalidated: null == invalidated ? _self.invalidated : invalidated // ignore: cast_nullable_to_non_nullable
+as bool,samples: freezed == samples ? _self.samples : samples // ignore: cast_nullable_to_non_nullable
+as List<EvalSample>?,reductions: freezed == reductions ? _self.reductions : reductions // ignore: cast_nullable_to_non_nullable
+as List<EvalSampleReductions>?,location: freezed == location ? _self.location : location // ignore: cast_nullable_to_non_nullable
+as String?,etag: freezed == etag ? _self.etag : etag // ignore: cast_nullable_to_non_nullable
+as String?,evalSetInfo: freezed == evalSetInfo ? _self.evalSetInfo : evalSetInfo // ignore: cast_nullable_to_non_nullable
+as EvalSetInfo?,
+  ));
+}
+/// Create a copy of EvalLog
+/// with the given fields replaced by the non-null parameter values.
+@override
+@pragma('vm:prefer-inline')
+$EvalSpecCopyWith<$Res> get eval {
+  
+  return $EvalSpecCopyWith<$Res>(_self.eval, (value) {
+    return _then(_self.copyWith(eval: value));
+  });
+}/// Create a copy of EvalLog
+/// with the given fields replaced by the non-null parameter values.
+@override
+@pragma('vm:prefer-inline')
+$EvalPlanCopyWith<$Res>? get plan {
+    if (_self.plan == null) {
+    return null;
+  }
+
+  return $EvalPlanCopyWith<$Res>(_self.plan!, (value) {
+    return _then(_self.copyWith(plan: value));
+  });
+}/// Create a copy of EvalLog
+/// with the given fields replaced by the non-null parameter values.
+@override
+@pragma('vm:prefer-inline')
+$EvalResultsCopyWith<$Res>? get results {
+    if (_self.results == null) {
+    return null;
+  }
+
+  return $EvalResultsCopyWith<$Res>(_self.results!, (value) {
+    return _then(_self.copyWith(results: value));
+  });
+}/// Create a copy of EvalLog
+/// with the given fields replaced by the non-null parameter values.
+@override
+@pragma('vm:prefer-inline')
+$EvalStatsCopyWith<$Res>? get stats {
+    if (_self.stats == null) {
+    return null;
+  }
+
+  return $EvalStatsCopyWith<$Res>(_self.stats!, (value) {
+    return _then(_self.copyWith(stats: value));
+  });
+}/// Create a copy of EvalLog
+/// with the given fields replaced by the non-null parameter values.
+@override
+@pragma('vm:prefer-inline')
+$EvalErrorCopyWith<$Res>? get error {
+    if (_self.error == null) {
+    return null;
+  }
+
+  return $EvalErrorCopyWith<$Res>(_self.error!, (value) {
+    return _then(_self.copyWith(error: value));
+  });
+}/// Create a copy of EvalLog
+/// with the given fields replaced by the non-null parameter values.
+@override
+@pragma('vm:prefer-inline')
+$EvalSetInfoCopyWith<$Res>? get evalSetInfo {
+    if (_self.evalSetInfo == null) {
+    return null;
+  }
+
+  return $EvalSetInfoCopyWith<$Res>(_self.evalSetInfo!, (value) {
+    return _then(_self.copyWith(evalSetInfo: value));
+  });
+}
+}
+
+
+/// Adds pattern-matching-related methods to [EvalLog].
+extension EvalLogPatterns on EvalLog {
+/// A variant of `map` that fallback to returning `orElse`.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case _:
+///     return orElse();
+/// }
+/// ```
+
+@optionalTypeArgs TResult maybeMap<TResult extends Object?>(TResult Function( _EvalLog value)?  $default,{required TResult orElse(),}){
+final _that = this;
+switch (_that) {
+case _EvalLog() when $default != null:
+return $default(_that);case _:
+  return orElse();
+
+}
+}
+/// A `switch`-like method, using callbacks.
+///
+/// Callbacks receives the raw object, upcasted.
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case final Subclass2 value:
+///     return ...;
+/// }
+/// ```
+
+@optionalTypeArgs TResult map<TResult extends Object?>(TResult Function( _EvalLog value)  $default,){
+final _that = this;
+switch (_that) {
+case _EvalLog():
+return $default(_that);case _:
+  throw StateError('Unexpected subclass');
+
+}
+}
+/// A variant of `map` that fallback to returning `null`.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case _:
+///     return null;
+/// }
+/// ```
+
+@optionalTypeArgs TResult? mapOrNull<TResult extends Object?>(TResult? Function( _EvalLog value)?  $default,){
+final _that = this;
+switch (_that) {
+case _EvalLog() when $default != null:
+return $default(_that);case _:
+  return null;
+
+}
+}
+/// A variant of `when` that fallback to an `orElse` callback.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case _:
+///     return orElse();
+/// }
+/// ```
+
+@optionalTypeArgs TResult maybeWhen<TResult extends Object?>(TResult Function( int version,  String status,  EvalSpec eval,  EvalPlan? plan,  EvalResults? results,  EvalStats? stats,  EvalError? error,  bool invalidated,  List<EvalSample>? samples,  List<EvalSampleReductions>? reductions,  String? location,  String? etag, @JsonKey(name: 'eval_set_info')  EvalSetInfo? evalSetInfo)?  $default,{required TResult orElse(),}) {final _that = this;
+switch (_that) {
+case _EvalLog() when $default != null:
+return $default(_that.version,_that.status,_that.eval,_that.plan,_that.results,_that.stats,_that.error,_that.invalidated,_that.samples,_that.reductions,_that.location,_that.etag,_that.evalSetInfo);case _:
+  return orElse();
+
+}
+}
+/// A `switch`-like method, using callbacks.
+///
+/// As opposed to `map`, this offers destructuring.
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case Subclass2(:final field2):
+///     return ...;
+/// }
+/// ```
+
+@optionalTypeArgs TResult when<TResult extends Object?>(TResult Function( int version,  String status,  EvalSpec eval,  EvalPlan? plan,  EvalResults? results,  EvalStats? stats,  EvalError? error,  bool invalidated,  List<EvalSample>? samples,  List<EvalSampleReductions>? reductions,  String? location,  String? etag, @JsonKey(name: 'eval_set_info')  EvalSetInfo? evalSetInfo)  $default,) {final _that = this;
+switch (_that) {
+case _EvalLog():
+return $default(_that.version,_that.status,_that.eval,_that.plan,_that.results,_that.stats,_that.error,_that.invalidated,_that.samples,_that.reductions,_that.location,_that.etag,_that.evalSetInfo);case _:
+  throw StateError('Unexpected subclass');
+
+}
+}
+/// A variant of `when` that fallback to returning `null`
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case _:
+///     return null;
+/// }
+/// ```
+
+@optionalTypeArgs TResult? whenOrNull<TResult extends Object?>(TResult? Function( int version,  String status,  EvalSpec eval,  EvalPlan? plan,  EvalResults? results,  EvalStats? stats,  EvalError? error,  bool invalidated,  List<EvalSample>? samples,  List<EvalSampleReductions>? reductions,  String? location,  String? etag, @JsonKey(name: 'eval_set_info')  EvalSetInfo? evalSetInfo)?  $default,) {final _that = this;
+switch (_that) {
+case _EvalLog() when $default != null:
+return $default(_that.version,_that.status,_that.eval,_that.plan,_that.results,_that.stats,_that.error,_that.invalidated,_that.samples,_that.reductions,_that.location,_that.etag,_that.evalSetInfo);case _:
+  return null;
+
+}
+}
+
+}
+
+/// @nodoc
+@JsonSerializable()
+
+class _EvalLog extends EvalLog {
+  const _EvalLog({this.version = 2, this.status = 'started', required this.eval, this.plan, this.results, this.stats, this.error, this.invalidated = false, final  List<EvalSample>? samples, final  List<EvalSampleReductions>? reductions, this.location, this.etag, @JsonKey(name: 'eval_set_info') this.evalSetInfo}): _samples = samples,_reductions = reductions,super._();
+  factory _EvalLog.fromJson(Map<String, dynamic> json) => _$EvalLogFromJson(json);
+
+/// Eval log file format version.
+@override@JsonKey() final  int version;
+/// Status of evaluation (did it succeed or fail).
+@override@JsonKey() final  String status;
+/// Eval identity and configuration.
+@override final  EvalSpec eval;
+/// Eval plan (solvers and config).
+@override final  EvalPlan? plan;
+/// Eval results (scores and metrics).
+@override final  EvalResults? results;
+/// Eval stats (runtime, model usage).
+@override final  EvalStats? stats;
+/// Error that halted eval (if status==“error”).
+@override final  EvalError? error;
+/// Whether any samples were invalidated.
+@override@JsonKey() final  bool invalidated;
+/// Samples processed by eval.
+ final  List<EvalSample>? _samples;
+/// Samples processed by eval.
+@override List<EvalSample>? get samples {
+  final value = _samples;
+  if (value == null) return null;
+  if (_samples is EqualUnmodifiableListView) return _samples;
+  // ignore: implicit_dynamic_type
+  return EqualUnmodifiableListView(value);
+}
+
+/// Reduced sample values.
+ final  List<EvalSampleReductions>? _reductions;
+/// Reduced sample values.
+@override List<EvalSampleReductions>? get reductions {
+  final value = _reductions;
+  if (value == null) return null;
+  if (_reductions is EqualUnmodifiableListView) return _reductions;
+  // ignore: implicit_dynamic_type
+  return EqualUnmodifiableListView(value);
+}
+
+/// Location that the log file was read from.
+@override final  String? location;
+/// ETag from S3 for conditional writes.
+@override final  String? etag;
+/// Eval set information.
+@override@JsonKey(name: 'eval_set_info') final  EvalSetInfo? evalSetInfo;
+
+/// Create a copy of EvalLog
+/// with the given fields replaced by the non-null parameter values.
+@override @JsonKey(includeFromJson: false, includeToJson: false)
+@pragma('vm:prefer-inline')
+_$EvalLogCopyWith<_EvalLog> get copyWith => __$EvalLogCopyWithImpl<_EvalLog>(this, _$identity);
+
+@override
+Map<String, dynamic> toJson() {
+  return _$EvalLogToJson(this, );
+}
+
+@override
+bool operator ==(Object other) {
+  return identical(this, other) || (other.runtimeType == runtimeType&&other is _EvalLog&&(identical(other.version, version) || other.version == version)&&(identical(other.status, status) || other.status == status)&&(identical(other.eval, eval) || other.eval == eval)&&(identical(other.plan, plan) || other.plan == plan)&&(identical(other.results, results) || other.results == results)&&(identical(other.stats, stats) || other.stats == stats)&&(identical(other.error, error) || other.error == error)&&(identical(other.invalidated, invalidated) || other.invalidated == invalidated)&&const DeepCollectionEquality().equals(other._samples, _samples)&&const DeepCollectionEquality().equals(other._reductions, _reductions)&&(identical(other.location, location) || other.location == location)&&(identical(other.etag, etag) || other.etag == etag)&&(identical(other.evalSetInfo, evalSetInfo) || other.evalSetInfo == evalSetInfo));
+}
+
+@JsonKey(includeFromJson: false, includeToJson: false)
+@override
+int get hashCode => Object.hash(runtimeType,version,status,eval,plan,results,stats,error,invalidated,const DeepCollectionEquality().hash(_samples),const DeepCollectionEquality().hash(_reductions),location,etag,evalSetInfo);
+
+@override
+String toString() {
+  return 'EvalLog(version: $version, status: $status, eval: $eval, plan: $plan, results: $results, stats: $stats, error: $error, invalidated: $invalidated, samples: $samples, reductions: $reductions, location: $location, etag: $etag, evalSetInfo: $evalSetInfo)';
+}
+
+
+}
+
+/// @nodoc
+abstract mixin class _$EvalLogCopyWith<$Res> implements $EvalLogCopyWith<$Res> {
+  factory _$EvalLogCopyWith(_EvalLog value, $Res Function(_EvalLog) _then) = __$EvalLogCopyWithImpl;
+@override @useResult
+$Res call({
+ int version, String status, EvalSpec eval, EvalPlan? plan, EvalResults? results, EvalStats? stats, EvalError? error, bool invalidated, List<EvalSample>? samples, List<EvalSampleReductions>? reductions, String? location, String? etag,@JsonKey(name: 'eval_set_info') EvalSetInfo? evalSetInfo
+});
+
+
+@override $EvalSpecCopyWith<$Res> get eval;@override $EvalPlanCopyWith<$Res>? get plan;@override $EvalResultsCopyWith<$Res>? get results;@override $EvalStatsCopyWith<$Res>? get stats;@override $EvalErrorCopyWith<$Res>? get error;@override $EvalSetInfoCopyWith<$Res>? get evalSetInfo;
+
+}
+/// @nodoc
+class __$EvalLogCopyWithImpl<$Res>
+    implements _$EvalLogCopyWith<$Res> {
+  __$EvalLogCopyWithImpl(this._self, this._then);
+
+  final _EvalLog _self;
+  final $Res Function(_EvalLog) _then;
+
+/// Create a copy of EvalLog
+/// with the given fields replaced by the non-null parameter values.
+@override @pragma('vm:prefer-inline') $Res call({Object? version = null,Object? status = null,Object? eval = null,Object? plan = freezed,Object? results = freezed,Object? stats = freezed,Object? error = freezed,Object? invalidated = null,Object? samples = freezed,Object? reductions = freezed,Object? location = freezed,Object? etag = freezed,Object? evalSetInfo = freezed,}) {
+  return _then(_EvalLog(
+version: null == version ? _self.version : version // ignore: cast_nullable_to_non_nullable
+as int,status: null == status ? _self.status : status // ignore: cast_nullable_to_non_nullable
+as String,eval: null == eval ? _self.eval : eval // ignore: cast_nullable_to_non_nullable
+as EvalSpec,plan: freezed == plan ? _self.plan : plan // ignore: cast_nullable_to_non_nullable
+as EvalPlan?,results: freezed == results ? _self.results : results // ignore: cast_nullable_to_non_nullable
+as EvalResults?,stats: freezed == stats ? _self.stats : stats // ignore: cast_nullable_to_non_nullable
+as EvalStats?,error: freezed == error ? _self.error : error // ignore: cast_nullable_to_non_nullable
+as EvalError?,invalidated: null == invalidated ? _self.invalidated : invalidated // ignore: cast_nullable_to_non_nullable
+as bool,samples: freezed == samples ? _self._samples : samples // ignore: cast_nullable_to_non_nullable
+as List<EvalSample>?,reductions: freezed == reductions ? _self._reductions : reductions // ignore: cast_nullable_to_non_nullable
+as List<EvalSampleReductions>?,location: freezed == location ? _self.location : location // ignore: cast_nullable_to_non_nullable
+as String?,etag: freezed == etag ? _self.etag : etag // ignore: cast_nullable_to_non_nullable
+as String?,evalSetInfo: freezed == evalSetInfo ? _self.evalSetInfo : evalSetInfo // ignore: cast_nullable_to_non_nullable
+as EvalSetInfo?,
+  ));
+}
+
+/// Create a copy of EvalLog
+/// with the given fields replaced by the non-null parameter values.
+@override
+@pragma('vm:prefer-inline')
+$EvalSpecCopyWith<$Res> get eval {
+  
+  return $EvalSpecCopyWith<$Res>(_self.eval, (value) {
+    return _then(_self.copyWith(eval: value));
+  });
+}/// Create a copy of EvalLog
+/// with the given fields replaced by the non-null parameter values.
+@override
+@pragma('vm:prefer-inline')
+$EvalPlanCopyWith<$Res>? get plan {
+    if (_self.plan == null) {
+    return null;
+  }
+
+  return $EvalPlanCopyWith<$Res>(_self.plan!, (value) {
+    return _then(_self.copyWith(plan: value));
+  });
+}/// Create a copy of EvalLog
+/// with the given fields replaced by the non-null parameter values.
+@override
+@pragma('vm:prefer-inline')
+$EvalResultsCopyWith<$Res>? get results {
+    if (_self.results == null) {
+    return null;
+  }
+
+  return $EvalResultsCopyWith<$Res>(_self.results!, (value) {
+    return _then(_self.copyWith(results: value));
+  });
+}/// Create a copy of EvalLog
+/// with the given fields replaced by the non-null parameter values.
+@override
+@pragma('vm:prefer-inline')
+$EvalStatsCopyWith<$Res>? get stats {
+    if (_self.stats == null) {
+    return null;
+  }
+
+  return $EvalStatsCopyWith<$Res>(_self.stats!, (value) {
+    return _then(_self.copyWith(stats: value));
+  });
+}/// Create a copy of EvalLog
+/// with the given fields replaced by the non-null parameter values.
+@override
+@pragma('vm:prefer-inline')
+$EvalErrorCopyWith<$Res>? get error {
+    if (_self.error == null) {
+    return null;
+  }
+
+  return $EvalErrorCopyWith<$Res>(_self.error!, (value) {
+    return _then(_self.copyWith(error: value));
+  });
+}/// Create a copy of EvalLog
+/// with the given fields replaced by the non-null parameter values.
+@override
+@pragma('vm:prefer-inline')
+$EvalSetInfoCopyWith<$Res>? get evalSetInfo {
+    if (_self.evalSetInfo == null) {
+    return null;
+  }
+
+  return $EvalSetInfoCopyWith<$Res>(_self.evalSetInfo!, (value) {
+    return _then(_self.copyWith(evalSetInfo: value));
+  });
+}
+}
+
+
+/// @nodoc
+mixin _$EvalSpec {
+
+/// Globally unique id for eval set (if any).
+@JsonKey(name: 'eval_set_id') String? get evalSetId;/// Globally unique id for eval.
+@JsonKey(name: 'eval_id') String get evalId;/// Unique run id.
+@JsonKey(name: 'run_id') String get runId;/// Time created.
+ String get created;/// Task name.
+ String get task;/// Unique task id.
+@JsonKey(name: 'task_id') String get taskId;/// Task version.
+@JsonKey(name: 'task_version', defaultValue: 0) Object get taskVersion;/// Task source file.
+@JsonKey(name: 'task_file') String? get taskFile;/// Task display name.
+@JsonKey(name: 'task_display_name') String? get taskDisplayName;/// Task registry name.
+@JsonKey(name: 'task_registry_name') String? get taskRegistryName;/// Attributes of the @task decorator.
+@JsonKey(name: 'task_attribs', defaultValue: {}) Map<String, dynamic> get taskAttribs;/// Arguments used for invoking the task (including defaults).
+@JsonKey(name: 'task_args', defaultValue: {}) Map<String, dynamic> get taskArgs;/// Arguments explicitly passed by caller for invoking the task.
+@JsonKey(name: 'task_args_passed', defaultValue: {}) Map<String, dynamic> get taskArgsPassed;/// Solver name.
+ String? get solver;/// Arguments used for invoking the solver.
+@JsonKey(name: 'solver_args', defaultValue: {}) Map<String, dynamic> get solverArgs;/// Arguments explicitly passed by caller for invoking the solver.
+@JsonKey(name: 'solver_args_passed', defaultValue: {}) Map<String, dynamic> get solverArgsPassed;/// Tags associated with evaluation run.
+ List<String> get tags;/// Dataset used for eval.
+ EvalDataset? get dataset;/// Sandbox environment type and optional config file.
+ Object? get sandbox;/// Model used for eval.
+@JsonKey(name: 'model') String get model;/// Generate config specified for model instance.
+@JsonKey(name: 'model_generate_config') GenerateConfig? get modelGenerateConfig;/// Optional override of model base url.
+@JsonKey(name: 'model_base_url') String? get modelBaseUrl;/// Model specific arguments.
+@JsonKey(name: 'model_args', defaultValue: {}) Map<String, dynamic> get modelArgs;/// Model roles.
+@JsonKey(name: 'model_roles') Map<String, String>? get modelRoles;/// Configuration values for eval.
+ EvalConfig get config;/// Source revision of eval.
+ EvalRevision? get revision;/// Package versions for eval.
+@JsonKey(name: 'packages', defaultValue: {}) Map<String, String> get packages;/// Additional eval metadata.
+@JsonKey(name: 'metadata') Map<String, dynamic>? get metadata;/// Scorers and args for this eval.
+ List<Object> get scorers;/// Metrics and args for this eval.
+ List<Object> get metrics;
+/// Create a copy of EvalSpec
+/// with the given fields replaced by the non-null parameter values.
+@JsonKey(includeFromJson: false, includeToJson: false)
+@pragma('vm:prefer-inline')
+$EvalSpecCopyWith<EvalSpec> get copyWith => _$EvalSpecCopyWithImpl<EvalSpec>(this as EvalSpec, _$identity);
+
+  /// Serializes this EvalSpec to a JSON map.
+  Map<String, dynamic> toJson();
+
+
+@override
+bool operator ==(Object other) {
+  return identical(this, other) || (other.runtimeType == runtimeType&&other is EvalSpec&&(identical(other.evalSetId, evalSetId) || other.evalSetId == evalSetId)&&(identical(other.evalId, evalId) || other.evalId == evalId)&&(identical(other.runId, runId) || other.runId == runId)&&(identical(other.created, created) || other.created == created)&&(identical(other.task, task) || other.task == task)&&(identical(other.taskId, taskId) || other.taskId == taskId)&&const DeepCollectionEquality().equals(other.taskVersion, taskVersion)&&(identical(other.taskFile, taskFile) || other.taskFile == taskFile)&&(identical(other.taskDisplayName, taskDisplayName) || other.taskDisplayName == taskDisplayName)&&(identical(other.taskRegistryName, taskRegistryName) || other.taskRegistryName == taskRegistryName)&&const DeepCollectionEquality().equals(other.taskAttribs, taskAttribs)&&const DeepCollectionEquality().equals(other.taskArgs, taskArgs)&&const DeepCollectionEquality().equals(other.taskArgsPassed, taskArgsPassed)&&(identical(other.solver, solver) || other.solver == solver)&&const DeepCollectionEquality().equals(other.solverArgs, solverArgs)&&const DeepCollectionEquality().equals(other.solverArgsPassed, solverArgsPassed)&&const DeepCollectionEquality().equals(other.tags, tags)&&(identical(other.dataset, dataset) || other.dataset == dataset)&&const DeepCollectionEquality().equals(other.sandbox, sandbox)&&(identical(other.model, model) || other.model == model)&&(identical(other.modelGenerateConfig, modelGenerateConfig) || other.modelGenerateConfig == modelGenerateConfig)&&(identical(other.modelBaseUrl, modelBaseUrl) || other.modelBaseUrl == modelBaseUrl)&&const DeepCollectionEquality().equals(other.modelArgs, modelArgs)&&const DeepCollectionEquality().equals(other.modelRoles, modelRoles)&&(identical(other.config, config) || other.config == config)&&(identical(other.revision, revision) || other.revision == revision)&&const DeepCollectionEquality().equals(other.packages, packages)&&const DeepCollectionEquality().equals(other.metadata, metadata)&&const DeepCollectionEquality().equals(other.scorers, scorers)&&const DeepCollectionEquality().equals(other.metrics, metrics));
+}
+
+@JsonKey(includeFromJson: false, includeToJson: false)
+@override
+int get hashCode => Object.hashAll([runtimeType,evalSetId,evalId,runId,created,task,taskId,const DeepCollectionEquality().hash(taskVersion),taskFile,taskDisplayName,taskRegistryName,const DeepCollectionEquality().hash(taskAttribs),const DeepCollectionEquality().hash(taskArgs),const DeepCollectionEquality().hash(taskArgsPassed),solver,const DeepCollectionEquality().hash(solverArgs),const DeepCollectionEquality().hash(solverArgsPassed),const DeepCollectionEquality().hash(tags),dataset,const DeepCollectionEquality().hash(sandbox),model,modelGenerateConfig,modelBaseUrl,const DeepCollectionEquality().hash(modelArgs),const DeepCollectionEquality().hash(modelRoles),config,revision,const DeepCollectionEquality().hash(packages),const DeepCollectionEquality().hash(metadata),const DeepCollectionEquality().hash(scorers),const DeepCollectionEquality().hash(metrics)]);
+
+@override
+String toString() {
+  return 'EvalSpec(evalSetId: $evalSetId, evalId: $evalId, runId: $runId, created: $created, task: $task, taskId: $taskId, taskVersion: $taskVersion, taskFile: $taskFile, taskDisplayName: $taskDisplayName, taskRegistryName: $taskRegistryName, taskAttribs: $taskAttribs, taskArgs: $taskArgs, taskArgsPassed: $taskArgsPassed, solver: $solver, solverArgs: $solverArgs, solverArgsPassed: $solverArgsPassed, tags: $tags, dataset: $dataset, sandbox: $sandbox, model: $model, modelGenerateConfig: $modelGenerateConfig, modelBaseUrl: $modelBaseUrl, modelArgs: $modelArgs, modelRoles: $modelRoles, config: $config, revision: $revision, packages: $packages, metadata: $metadata, scorers: $scorers, metrics: $metrics)';
+}
+
+
+}
+
+/// @nodoc
+abstract mixin class $EvalSpecCopyWith<$Res>  {
+  factory $EvalSpecCopyWith(EvalSpec value, $Res Function(EvalSpec) _then) = _$EvalSpecCopyWithImpl;
+@useResult
+$Res call({
+@JsonKey(name: 'eval_set_id') String? evalSetId,@JsonKey(name: 'eval_id') String evalId,@JsonKey(name: 'run_id') String runId, String created, String task,@JsonKey(name: 'task_id') String taskId,@JsonKey(name: 'task_version', defaultValue: 0) Object taskVersion,@JsonKey(name: 'task_file') String? taskFile,@JsonKey(name: 'task_display_name') String? taskDisplayName,@JsonKey(name: 'task_registry_name') String? taskRegistryName,@JsonKey(name: 'task_attribs', defaultValue: {}) Map<String, dynamic> taskAttribs,@JsonKey(name: 'task_args', defaultValue: {}) Map<String, dynamic> taskArgs,@JsonKey(name: 'task_args_passed', defaultValue: {}) Map<String, dynamic> taskArgsPassed, String? solver,@JsonKey(name: 'solver_args', defaultValue: {}) Map<String, dynamic> solverArgs,@JsonKey(name: 'solver_args_passed', defaultValue: {}) Map<String, dynamic> solverArgsPassed, List<String> tags, EvalDataset? dataset, Object? sandbox,@JsonKey(name: 'model') String model,@JsonKey(name: 'model_generate_config') GenerateConfig? modelGenerateConfig,@JsonKey(name: 'model_base_url') String? modelBaseUrl,@JsonKey(name: 'model_args', defaultValue: {}) Map<String, dynamic> modelArgs,@JsonKey(name: 'model_roles') Map<String, String>? modelRoles, EvalConfig config, EvalRevision? revision,@JsonKey(name: 'packages', defaultValue: {}) Map<String, String> packages,@JsonKey(name: 'metadata') Map<String, dynamic>? metadata, List<Object> scorers, List<Object> metrics
+});
+
+
+$EvalDatasetCopyWith<$Res>? get dataset;$GenerateConfigCopyWith<$Res>? get modelGenerateConfig;$EvalConfigCopyWith<$Res> get config;$EvalRevisionCopyWith<$Res>? get revision;
+
+}
+/// @nodoc
+class _$EvalSpecCopyWithImpl<$Res>
+    implements $EvalSpecCopyWith<$Res> {
+  _$EvalSpecCopyWithImpl(this._self, this._then);
+
+  final EvalSpec _self;
+  final $Res Function(EvalSpec) _then;
+
+/// Create a copy of EvalSpec
+/// with the given fields replaced by the non-null parameter values.
+@pragma('vm:prefer-inline') @override $Res call({Object? evalSetId = freezed,Object? evalId = null,Object? runId = null,Object? created = null,Object? task = null,Object? taskId = null,Object? taskVersion = null,Object? taskFile = freezed,Object? taskDisplayName = freezed,Object? taskRegistryName = freezed,Object? taskAttribs = null,Object? taskArgs = null,Object? taskArgsPassed = null,Object? solver = freezed,Object? solverArgs = null,Object? solverArgsPassed = null,Object? tags = null,Object? dataset = freezed,Object? sandbox = freezed,Object? model = null,Object? modelGenerateConfig = freezed,Object? modelBaseUrl = freezed,Object? modelArgs = null,Object? modelRoles = freezed,Object? config = null,Object? revision = freezed,Object? packages = null,Object? metadata = freezed,Object? scorers = null,Object? metrics = null,}) {
+  return _then(_self.copyWith(
+evalSetId: freezed == evalSetId ? _self.evalSetId : evalSetId // ignore: cast_nullable_to_non_nullable
+as String?,evalId: null == evalId ? _self.evalId : evalId // ignore: cast_nullable_to_non_nullable
+as String,runId: null == runId ? _self.runId : runId // ignore: cast_nullable_to_non_nullable
+as String,created: null == created ? _self.created : created // ignore: cast_nullable_to_non_nullable
+as String,task: null == task ? _self.task : task // ignore: cast_nullable_to_non_nullable
+as String,taskId: null == taskId ? _self.taskId : taskId // ignore: cast_nullable_to_non_nullable
+as String,taskVersion: null == taskVersion ? _self.taskVersion : taskVersion ,taskFile: freezed == taskFile ? _self.taskFile : taskFile // ignore: cast_nullable_to_non_nullable
+as String?,taskDisplayName: freezed == taskDisplayName ? _self.taskDisplayName : taskDisplayName // ignore: cast_nullable_to_non_nullable
+as String?,taskRegistryName: freezed == taskRegistryName ? _self.taskRegistryName : taskRegistryName // ignore: cast_nullable_to_non_nullable
+as String?,taskAttribs: null == taskAttribs ? _self.taskAttribs : taskAttribs // ignore: cast_nullable_to_non_nullable
+as Map<String, dynamic>,taskArgs: null == taskArgs ? _self.taskArgs : taskArgs // ignore: cast_nullable_to_non_nullable
+as Map<String, dynamic>,taskArgsPassed: null == taskArgsPassed ? _self.taskArgsPassed : taskArgsPassed // ignore: cast_nullable_to_non_nullable
+as Map<String, dynamic>,solver: freezed == solver ? _self.solver : solver // ignore: cast_nullable_to_non_nullable
+as String?,solverArgs: null == solverArgs ? _self.solverArgs : solverArgs // ignore: cast_nullable_to_non_nullable
+as Map<String, dynamic>,solverArgsPassed: null == solverArgsPassed ? _self.solverArgsPassed : solverArgsPassed // ignore: cast_nullable_to_non_nullable
+as Map<String, dynamic>,tags: null == tags ? _self.tags : tags // ignore: cast_nullable_to_non_nullable
+as List<String>,dataset: freezed == dataset ? _self.dataset : dataset // ignore: cast_nullable_to_non_nullable
+as EvalDataset?,sandbox: freezed == sandbox ? _self.sandbox : sandbox ,model: null == model ? _self.model : model // ignore: cast_nullable_to_non_nullable
+as String,modelGenerateConfig: freezed == modelGenerateConfig ? _self.modelGenerateConfig : modelGenerateConfig // ignore: cast_nullable_to_non_nullable
+as GenerateConfig?,modelBaseUrl: freezed == modelBaseUrl ? _self.modelBaseUrl : modelBaseUrl // ignore: cast_nullable_to_non_nullable
+as String?,modelArgs: null == modelArgs ? _self.modelArgs : modelArgs // ignore: cast_nullable_to_non_nullable
+as Map<String, dynamic>,modelRoles: freezed == modelRoles ? _self.modelRoles : modelRoles // ignore: cast_nullable_to_non_nullable
+as Map<String, String>?,config: null == config ? _self.config : config // ignore: cast_nullable_to_non_nullable
+as EvalConfig,revision: freezed == revision ? _self.revision : revision // ignore: cast_nullable_to_non_nullable
+as EvalRevision?,packages: null == packages ? _self.packages : packages // ignore: cast_nullable_to_non_nullable
+as Map<String, String>,metadata: freezed == metadata ? _self.metadata : metadata // ignore: cast_nullable_to_non_nullable
+as Map<String, dynamic>?,scorers: null == scorers ? _self.scorers : scorers // ignore: cast_nullable_to_non_nullable
+as List<Object>,metrics: null == metrics ? _self.metrics : metrics // ignore: cast_nullable_to_non_nullable
+as List<Object>,
+  ));
+}
+/// Create a copy of EvalSpec
+/// with the given fields replaced by the non-null parameter values.
+@override
+@pragma('vm:prefer-inline')
+$EvalDatasetCopyWith<$Res>? get dataset {
+    if (_self.dataset == null) {
+    return null;
+  }
+
+  return $EvalDatasetCopyWith<$Res>(_self.dataset!, (value) {
+    return _then(_self.copyWith(dataset: value));
+  });
+}/// Create a copy of EvalSpec
+/// with the given fields replaced by the non-null parameter values.
+@override
+@pragma('vm:prefer-inline')
+$GenerateConfigCopyWith<$Res>? get modelGenerateConfig {
+    if (_self.modelGenerateConfig == null) {
+    return null;
+  }
+
+  return $GenerateConfigCopyWith<$Res>(_self.modelGenerateConfig!, (value) {
+    return _then(_self.copyWith(modelGenerateConfig: value));
+  });
+}/// Create a copy of EvalSpec
+/// with the given fields replaced by the non-null parameter values.
+@override
+@pragma('vm:prefer-inline')
+$EvalConfigCopyWith<$Res> get config {
+  
+  return $EvalConfigCopyWith<$Res>(_self.config, (value) {
+    return _then(_self.copyWith(config: value));
+  });
+}/// Create a copy of EvalSpec
+/// with the given fields replaced by the non-null parameter values.
+@override
+@pragma('vm:prefer-inline')
+$EvalRevisionCopyWith<$Res>? get revision {
+    if (_self.revision == null) {
+    return null;
+  }
+
+  return $EvalRevisionCopyWith<$Res>(_self.revision!, (value) {
+    return _then(_self.copyWith(revision: value));
+  });
+}
+}
+
+
+/// Adds pattern-matching-related methods to [EvalSpec].
+extension EvalSpecPatterns on EvalSpec {
+/// A variant of `map` that fallback to returning `orElse`.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case _:
+///     return orElse();
+/// }
+/// ```
+
+@optionalTypeArgs TResult maybeMap<TResult extends Object?>(TResult Function( _EvalSpec value)?  $default,{required TResult orElse(),}){
+final _that = this;
+switch (_that) {
+case _EvalSpec() when $default != null:
+return $default(_that);case _:
+  return orElse();
+
+}
+}
+/// A `switch`-like method, using callbacks.
+///
+/// Callbacks receives the raw object, upcasted.
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case final Subclass2 value:
+///     return ...;
+/// }
+/// ```
+
+@optionalTypeArgs TResult map<TResult extends Object?>(TResult Function( _EvalSpec value)  $default,){
+final _that = this;
+switch (_that) {
+case _EvalSpec():
+return $default(_that);case _:
+  throw StateError('Unexpected subclass');
+
+}
+}
+/// A variant of `map` that fallback to returning `null`.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case _:
+///     return null;
+/// }
+/// ```
+
+@optionalTypeArgs TResult? mapOrNull<TResult extends Object?>(TResult? Function( _EvalSpec value)?  $default,){
+final _that = this;
+switch (_that) {
+case _EvalSpec() when $default != null:
+return $default(_that);case _:
+  return null;
+
+}
+}
+/// A variant of `when` that fallback to an `orElse` callback.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case _:
+///     return orElse();
+/// }
+/// ```
+
+@optionalTypeArgs TResult maybeWhen<TResult extends Object?>(TResult Function(@JsonKey(name: 'eval_set_id')  String? evalSetId, @JsonKey(name: 'eval_id')  String evalId, @JsonKey(name: 'run_id')  String runId,  String created,  String task, @JsonKey(name: 'task_id')  String taskId, @JsonKey(name: 'task_version', defaultValue: 0)  Object taskVersion, @JsonKey(name: 'task_file')  String? taskFile, @JsonKey(name: 'task_display_name')  String? taskDisplayName, @JsonKey(name: 'task_registry_name')  String? taskRegistryName, @JsonKey(name: 'task_attribs', defaultValue: {})  Map<String, dynamic> taskAttribs, @JsonKey(name: 'task_args', defaultValue: {})  Map<String, dynamic> taskArgs, @JsonKey(name: 'task_args_passed', defaultValue: {})  Map<String, dynamic> taskArgsPassed,  String? solver, @JsonKey(name: 'solver_args', defaultValue: {})  Map<String, dynamic> solverArgs, @JsonKey(name: 'solver_args_passed', defaultValue: {})  Map<String, dynamic> solverArgsPassed,  List<String> tags,  EvalDataset? dataset,  Object? sandbox, @JsonKey(name: 'model')  String model, @JsonKey(name: 'model_generate_config')  GenerateConfig? modelGenerateConfig, @JsonKey(name: 'model_base_url')  String? modelBaseUrl, @JsonKey(name: 'model_args', defaultValue: {})  Map<String, dynamic> modelArgs, @JsonKey(name: 'model_roles')  Map<String, String>? modelRoles,  EvalConfig config,  EvalRevision? revision, @JsonKey(name: 'packages', defaultValue: {})  Map<String, String> packages, @JsonKey(name: 'metadata')  Map<String, dynamic>? metadata,  List<Object> scorers,  List<Object> metrics)?  $default,{required TResult orElse(),}) {final _that = this;
+switch (_that) {
+case _EvalSpec() when $default != null:
+return $default(_that.evalSetId,_that.evalId,_that.runId,_that.created,_that.task,_that.taskId,_that.taskVersion,_that.taskFile,_that.taskDisplayName,_that.taskRegistryName,_that.taskAttribs,_that.taskArgs,_that.taskArgsPassed,_that.solver,_that.solverArgs,_that.solverArgsPassed,_that.tags,_that.dataset,_that.sandbox,_that.model,_that.modelGenerateConfig,_that.modelBaseUrl,_that.modelArgs,_that.modelRoles,_that.config,_that.revision,_that.packages,_that.metadata,_that.scorers,_that.metrics);case _:
+  return orElse();
+
+}
+}
+/// A `switch`-like method, using callbacks.
+///
+/// As opposed to `map`, this offers destructuring.
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case Subclass2(:final field2):
+///     return ...;
+/// }
+/// ```
+
+@optionalTypeArgs TResult when<TResult extends Object?>(TResult Function(@JsonKey(name: 'eval_set_id')  String? evalSetId, @JsonKey(name: 'eval_id')  String evalId, @JsonKey(name: 'run_id')  String runId,  String created,  String task, @JsonKey(name: 'task_id')  String taskId, @JsonKey(name: 'task_version', defaultValue: 0)  Object taskVersion, @JsonKey(name: 'task_file')  String? taskFile, @JsonKey(name: 'task_display_name')  String? taskDisplayName, @JsonKey(name: 'task_registry_name')  String? taskRegistryName, @JsonKey(name: 'task_attribs', defaultValue: {})  Map<String, dynamic> taskAttribs, @JsonKey(name: 'task_args', defaultValue: {})  Map<String, dynamic> taskArgs, @JsonKey(name: 'task_args_passed', defaultValue: {})  Map<String, dynamic> taskArgsPassed,  String? solver, @JsonKey(name: 'solver_args', defaultValue: {})  Map<String, dynamic> solverArgs, @JsonKey(name: 'solver_args_passed', defaultValue: {})  Map<String, dynamic> solverArgsPassed,  List<String> tags,  EvalDataset? dataset,  Object? sandbox, @JsonKey(name: 'model')  String model, @JsonKey(name: 'model_generate_config')  GenerateConfig? modelGenerateConfig, @JsonKey(name: 'model_base_url')  String? modelBaseUrl, @JsonKey(name: 'model_args', defaultValue: {})  Map<String, dynamic> modelArgs, @JsonKey(name: 'model_roles')  Map<String, String>? modelRoles,  EvalConfig config,  EvalRevision? revision, @JsonKey(name: 'packages', defaultValue: {})  Map<String, String> packages, @JsonKey(name: 'metadata')  Map<String, dynamic>? metadata,  List<Object> scorers,  List<Object> metrics)  $default,) {final _that = this;
+switch (_that) {
+case _EvalSpec():
+return $default(_that.evalSetId,_that.evalId,_that.runId,_that.created,_that.task,_that.taskId,_that.taskVersion,_that.taskFile,_that.taskDisplayName,_that.taskRegistryName,_that.taskAttribs,_that.taskArgs,_that.taskArgsPassed,_that.solver,_that.solverArgs,_that.solverArgsPassed,_that.tags,_that.dataset,_that.sandbox,_that.model,_that.modelGenerateConfig,_that.modelBaseUrl,_that.modelArgs,_that.modelRoles,_that.config,_that.revision,_that.packages,_that.metadata,_that.scorers,_that.metrics);case _:
+  throw StateError('Unexpected subclass');
+
+}
+}
+/// A variant of `when` that fallback to returning `null`
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case _:
+///     return null;
+/// }
+/// ```
+
+@optionalTypeArgs TResult? whenOrNull<TResult extends Object?>(TResult? Function(@JsonKey(name: 'eval_set_id')  String? evalSetId, @JsonKey(name: 'eval_id')  String evalId, @JsonKey(name: 'run_id')  String runId,  String created,  String task, @JsonKey(name: 'task_id')  String taskId, @JsonKey(name: 'task_version', defaultValue: 0)  Object taskVersion, @JsonKey(name: 'task_file')  String? taskFile, @JsonKey(name: 'task_display_name')  String? taskDisplayName, @JsonKey(name: 'task_registry_name')  String? taskRegistryName, @JsonKey(name: 'task_attribs', defaultValue: {})  Map<String, dynamic> taskAttribs, @JsonKey(name: 'task_args', defaultValue: {})  Map<String, dynamic> taskArgs, @JsonKey(name: 'task_args_passed', defaultValue: {})  Map<String, dynamic> taskArgsPassed,  String? solver, @JsonKey(name: 'solver_args', defaultValue: {})  Map<String, dynamic> solverArgs, @JsonKey(name: 'solver_args_passed', defaultValue: {})  Map<String, dynamic> solverArgsPassed,  List<String> tags,  EvalDataset? dataset,  Object? sandbox, @JsonKey(name: 'model')  String model, @JsonKey(name: 'model_generate_config')  GenerateConfig? modelGenerateConfig, @JsonKey(name: 'model_base_url')  String? modelBaseUrl, @JsonKey(name: 'model_args', defaultValue: {})  Map<String, dynamic> modelArgs, @JsonKey(name: 'model_roles')  Map<String, String>? modelRoles,  EvalConfig config,  EvalRevision? revision, @JsonKey(name: 'packages', defaultValue: {})  Map<String, String> packages, @JsonKey(name: 'metadata')  Map<String, dynamic>? metadata,  List<Object> scorers,  List<Object> metrics)?  $default,) {final _that = this;
+switch (_that) {
+case _EvalSpec() when $default != null:
+return $default(_that.evalSetId,_that.evalId,_that.runId,_that.created,_that.task,_that.taskId,_that.taskVersion,_that.taskFile,_that.taskDisplayName,_that.taskRegistryName,_that.taskAttribs,_that.taskArgs,_that.taskArgsPassed,_that.solver,_that.solverArgs,_that.solverArgsPassed,_that.tags,_that.dataset,_that.sandbox,_that.model,_that.modelGenerateConfig,_that.modelBaseUrl,_that.modelArgs,_that.modelRoles,_that.config,_that.revision,_that.packages,_that.metadata,_that.scorers,_that.metrics);case _:
+  return null;
+
+}
+}
+
+}
+
+/// @nodoc
+@JsonSerializable()
+
+class _EvalSpec extends EvalSpec {
+  const _EvalSpec({@JsonKey(name: 'eval_set_id') this.evalSetId, @JsonKey(name: 'eval_id') required this.evalId, @JsonKey(name: 'run_id') required this.runId, required this.created, required this.task, @JsonKey(name: 'task_id') required this.taskId, @JsonKey(name: 'task_version', defaultValue: 0) this.taskVersion = 0, @JsonKey(name: 'task_file') this.taskFile, @JsonKey(name: 'task_display_name') this.taskDisplayName, @JsonKey(name: 'task_registry_name') this.taskRegistryName, @JsonKey(name: 'task_attribs', defaultValue: {}) final  Map<String, dynamic> taskAttribs = const {}, @JsonKey(name: 'task_args', defaultValue: {}) final  Map<String, dynamic> taskArgs = const {}, @JsonKey(name: 'task_args_passed', defaultValue: {}) final  Map<String, dynamic> taskArgsPassed = const {}, this.solver, @JsonKey(name: 'solver_args', defaultValue: {}) final  Map<String, dynamic> solverArgs = const {}, @JsonKey(name: 'solver_args_passed', defaultValue: {}) final  Map<String, dynamic> solverArgsPassed = const {}, final  List<String> tags = const [], this.dataset, this.sandbox, @JsonKey(name: 'model') required this.model, @JsonKey(name: 'model_generate_config') this.modelGenerateConfig, @JsonKey(name: 'model_base_url') this.modelBaseUrl, @JsonKey(name: 'model_args', defaultValue: {}) final  Map<String, dynamic> modelArgs = const {}, @JsonKey(name: 'model_roles') final  Map<String, String>? modelRoles, this.config = const EvalConfig(), this.revision, @JsonKey(name: 'packages', defaultValue: {}) final  Map<String, String> packages = const {}, @JsonKey(name: 'metadata') final  Map<String, dynamic>? metadata, final  List<Object> scorers = const [], final  List<Object> metrics = const []}): _taskAttribs = taskAttribs,_taskArgs = taskArgs,_taskArgsPassed = taskArgsPassed,_solverArgs = solverArgs,_solverArgsPassed = solverArgsPassed,_tags = tags,_modelArgs = modelArgs,_modelRoles = modelRoles,_packages = packages,_metadata = metadata,_scorers = scorers,_metrics = metrics,super._();
+  factory _EvalSpec.fromJson(Map<String, dynamic> json) => _$EvalSpecFromJson(json);
+
+/// Globally unique id for eval set (if any).
+@override@JsonKey(name: 'eval_set_id') final  String? evalSetId;
+/// Globally unique id for eval.
+@override@JsonKey(name: 'eval_id') final  String evalId;
+/// Unique run id.
+@override@JsonKey(name: 'run_id') final  String runId;
+/// Time created.
+@override final  String created;
+/// Task name.
+@override final  String task;
+/// Unique task id.
+@override@JsonKey(name: 'task_id') final  String taskId;
+/// Task version.
+@override@JsonKey(name: 'task_version', defaultValue: 0) final  Object taskVersion;
+/// Task source file.
+@override@JsonKey(name: 'task_file') final  String? taskFile;
+/// Task display name.
+@override@JsonKey(name: 'task_display_name') final  String? taskDisplayName;
+/// Task registry name.
+@override@JsonKey(name: 'task_registry_name') final  String? taskRegistryName;
+/// Attributes of the @task decorator.
+ final  Map<String, dynamic> _taskAttribs;
+/// Attributes of the @task decorator.
+@override@JsonKey(name: 'task_attribs', defaultValue: {}) Map<String, dynamic> get taskAttribs {
+  if (_taskAttribs is EqualUnmodifiableMapView) return _taskAttribs;
+  // ignore: implicit_dynamic_type
+  return EqualUnmodifiableMapView(_taskAttribs);
+}
+
+/// Arguments used for invoking the task (including defaults).
+ final  Map<String, dynamic> _taskArgs;
+/// Arguments used for invoking the task (including defaults).
+@override@JsonKey(name: 'task_args', defaultValue: {}) Map<String, dynamic> get taskArgs {
+  if (_taskArgs is EqualUnmodifiableMapView) return _taskArgs;
+  // ignore: implicit_dynamic_type
+  return EqualUnmodifiableMapView(_taskArgs);
+}
+
+/// Arguments explicitly passed by caller for invoking the task.
+ final  Map<String, dynamic> _taskArgsPassed;
+/// Arguments explicitly passed by caller for invoking the task.
+@override@JsonKey(name: 'task_args_passed', defaultValue: {}) Map<String, dynamic> get taskArgsPassed {
+  if (_taskArgsPassed is EqualUnmodifiableMapView) return _taskArgsPassed;
+  // ignore: implicit_dynamic_type
+  return EqualUnmodifiableMapView(_taskArgsPassed);
+}
+
+/// Solver name.
+@override final  String? solver;
+/// Arguments used for invoking the solver.
+ final  Map<String, dynamic> _solverArgs;
+/// Arguments used for invoking the solver.
+@override@JsonKey(name: 'solver_args', defaultValue: {}) Map<String, dynamic> get solverArgs {
+  if (_solverArgs is EqualUnmodifiableMapView) return _solverArgs;
+  // ignore: implicit_dynamic_type
+  return EqualUnmodifiableMapView(_solverArgs);
+}
+
+/// Arguments explicitly passed by caller for invoking the solver.
+ final  Map<String, dynamic> _solverArgsPassed;
+/// Arguments explicitly passed by caller for invoking the solver.
+@override@JsonKey(name: 'solver_args_passed', defaultValue: {}) Map<String, dynamic> get solverArgsPassed {
+  if (_solverArgsPassed is EqualUnmodifiableMapView) return _solverArgsPassed;
+  // ignore: implicit_dynamic_type
+  return EqualUnmodifiableMapView(_solverArgsPassed);
+}
+
+/// Tags associated with evaluation run.
+ final  List<String> _tags;
+/// Tags associated with evaluation run.
+@override@JsonKey() List<String> get tags {
+  if (_tags is EqualUnmodifiableListView) return _tags;
+  // ignore: implicit_dynamic_type
+  return EqualUnmodifiableListView(_tags);
+}
+
+/// Dataset used for eval.
+@override final  EvalDataset? dataset;
+/// Sandbox environment type and optional config file.
+@override final  Object? sandbox;
+/// Model used for eval.
+@override@JsonKey(name: 'model') final  String model;
+/// Generate config specified for model instance.
+@override@JsonKey(name: 'model_generate_config') final  GenerateConfig? modelGenerateConfig;
+/// Optional override of model base url.
+@override@JsonKey(name: 'model_base_url') final  String? modelBaseUrl;
+/// Model specific arguments.
+ final  Map<String, dynamic> _modelArgs;
+/// Model specific arguments.
+@override@JsonKey(name: 'model_args', defaultValue: {}) Map<String, dynamic> get modelArgs {
+  if (_modelArgs is EqualUnmodifiableMapView) return _modelArgs;
+  // ignore: implicit_dynamic_type
+  return EqualUnmodifiableMapView(_modelArgs);
+}
+
+/// Model roles.
+ final  Map<String, String>? _modelRoles;
+/// Model roles.
+@override@JsonKey(name: 'model_roles') Map<String, String>? get modelRoles {
+  final value = _modelRoles;
+  if (value == null) return null;
+  if (_modelRoles is EqualUnmodifiableMapView) return _modelRoles;
+  // ignore: implicit_dynamic_type
+  return EqualUnmodifiableMapView(value);
+}
+
+/// Configuration values for eval.
+@override@JsonKey() final  EvalConfig config;
+/// Source revision of eval.
+@override final  EvalRevision? revision;
+/// Package versions for eval.
+ final  Map<String, String> _packages;
+/// Package versions for eval.
+@override@JsonKey(name: 'packages', defaultValue: {}) Map<String, String> get packages {
+  if (_packages is EqualUnmodifiableMapView) return _packages;
+  // ignore: implicit_dynamic_type
+  return EqualUnmodifiableMapView(_packages);
+}
+
+/// Additional eval metadata.
+ final  Map<String, dynamic>? _metadata;
+/// Additional eval metadata.
+@override@JsonKey(name: 'metadata') Map<String, dynamic>? get metadata {
+  final value = _metadata;
+  if (value == null) return null;
+  if (_metadata is EqualUnmodifiableMapView) return _metadata;
+  // ignore: implicit_dynamic_type
+  return EqualUnmodifiableMapView(value);
+}
+
+/// Scorers and args for this eval.
+ final  List<Object> _scorers;
+/// Scorers and args for this eval.
+@override@JsonKey() List<Object> get scorers {
+  if (_scorers is EqualUnmodifiableListView) return _scorers;
+  // ignore: implicit_dynamic_type
+  return EqualUnmodifiableListView(_scorers);
+}
+
+/// Metrics and args for this eval.
+ final  List<Object> _metrics;
+/// Metrics and args for this eval.
+@override@JsonKey() List<Object> get metrics {
+  if (_metrics is EqualUnmodifiableListView) return _metrics;
+  // ignore: implicit_dynamic_type
+  return EqualUnmodifiableListView(_metrics);
+}
+
+
+/// Create a copy of EvalSpec
+/// with the given fields replaced by the non-null parameter values.
+@override @JsonKey(includeFromJson: false, includeToJson: false)
+@pragma('vm:prefer-inline')
+_$EvalSpecCopyWith<_EvalSpec> get copyWith => __$EvalSpecCopyWithImpl<_EvalSpec>(this, _$identity);
+
+@override
+Map<String, dynamic> toJson() {
+  return _$EvalSpecToJson(this, );
+}
+
+@override
+bool operator ==(Object other) {
+  return identical(this, other) || (other.runtimeType == runtimeType&&other is _EvalSpec&&(identical(other.evalSetId, evalSetId) || other.evalSetId == evalSetId)&&(identical(other.evalId, evalId) || other.evalId == evalId)&&(identical(other.runId, runId) || other.runId == runId)&&(identical(other.created, created) || other.created == created)&&(identical(other.task, task) || other.task == task)&&(identical(other.taskId, taskId) || other.taskId == taskId)&&const DeepCollectionEquality().equals(other.taskVersion, taskVersion)&&(identical(other.taskFile, taskFile) || other.taskFile == taskFile)&&(identical(other.taskDisplayName, taskDisplayName) || other.taskDisplayName == taskDisplayName)&&(identical(other.taskRegistryName, taskRegistryName) || other.taskRegistryName == taskRegistryName)&&const DeepCollectionEquality().equals(other._taskAttribs, _taskAttribs)&&const DeepCollectionEquality().equals(other._taskArgs, _taskArgs)&&const DeepCollectionEquality().equals(other._taskArgsPassed, _taskArgsPassed)&&(identical(other.solver, solver) || other.solver == solver)&&const DeepCollectionEquality().equals(other._solverArgs, _solverArgs)&&const DeepCollectionEquality().equals(other._solverArgsPassed, _solverArgsPassed)&&const DeepCollectionEquality().equals(other._tags, _tags)&&(identical(other.dataset, dataset) || other.dataset == dataset)&&const DeepCollectionEquality().equals(other.sandbox, sandbox)&&(identical(other.model, model) || other.model == model)&&(identical(other.modelGenerateConfig, modelGenerateConfig) || other.modelGenerateConfig == modelGenerateConfig)&&(identical(other.modelBaseUrl, modelBaseUrl) || other.modelBaseUrl == modelBaseUrl)&&const DeepCollectionEquality().equals(other._modelArgs, _modelArgs)&&const DeepCollectionEquality().equals(other._modelRoles, _modelRoles)&&(identical(other.config, config) || other.config == config)&&(identical(other.revision, revision) || other.revision == revision)&&const DeepCollectionEquality().equals(other._packages, _packages)&&const DeepCollectionEquality().equals(other._metadata, _metadata)&&const DeepCollectionEquality().equals(other._scorers, _scorers)&&const DeepCollectionEquality().equals(other._metrics, _metrics));
+}
+
+@JsonKey(includeFromJson: false, includeToJson: false)
+@override
+int get hashCode => Object.hashAll([runtimeType,evalSetId,evalId,runId,created,task,taskId,const DeepCollectionEquality().hash(taskVersion),taskFile,taskDisplayName,taskRegistryName,const DeepCollectionEquality().hash(_taskAttribs),const DeepCollectionEquality().hash(_taskArgs),const DeepCollectionEquality().hash(_taskArgsPassed),solver,const DeepCollectionEquality().hash(_solverArgs),const DeepCollectionEquality().hash(_solverArgsPassed),const DeepCollectionEquality().hash(_tags),dataset,const DeepCollectionEquality().hash(sandbox),model,modelGenerateConfig,modelBaseUrl,const DeepCollectionEquality().hash(_modelArgs),const DeepCollectionEquality().hash(_modelRoles),config,revision,const DeepCollectionEquality().hash(_packages),const DeepCollectionEquality().hash(_metadata),const DeepCollectionEquality().hash(_scorers),const DeepCollectionEquality().hash(_metrics)]);
+
+@override
+String toString() {
+  return 'EvalSpec(evalSetId: $evalSetId, evalId: $evalId, runId: $runId, created: $created, task: $task, taskId: $taskId, taskVersion: $taskVersion, taskFile: $taskFile, taskDisplayName: $taskDisplayName, taskRegistryName: $taskRegistryName, taskAttribs: $taskAttribs, taskArgs: $taskArgs, taskArgsPassed: $taskArgsPassed, solver: $solver, solverArgs: $solverArgs, solverArgsPassed: $solverArgsPassed, tags: $tags, dataset: $dataset, sandbox: $sandbox, model: $model, modelGenerateConfig: $modelGenerateConfig, modelBaseUrl: $modelBaseUrl, modelArgs: $modelArgs, modelRoles: $modelRoles, config: $config, revision: $revision, packages: $packages, metadata: $metadata, scorers: $scorers, metrics: $metrics)';
+}
+
+
+}
+
+/// @nodoc
+abstract mixin class _$EvalSpecCopyWith<$Res> implements $EvalSpecCopyWith<$Res> {
+  factory _$EvalSpecCopyWith(_EvalSpec value, $Res Function(_EvalSpec) _then) = __$EvalSpecCopyWithImpl;
+@override @useResult
+$Res call({
+@JsonKey(name: 'eval_set_id') String? evalSetId,@JsonKey(name: 'eval_id') String evalId,@JsonKey(name: 'run_id') String runId, String created, String task,@JsonKey(name: 'task_id') String taskId,@JsonKey(name: 'task_version', defaultValue: 0) Object taskVersion,@JsonKey(name: 'task_file') String? taskFile,@JsonKey(name: 'task_display_name') String? taskDisplayName,@JsonKey(name: 'task_registry_name') String? taskRegistryName,@JsonKey(name: 'task_attribs', defaultValue: {}) Map<String, dynamic> taskAttribs,@JsonKey(name: 'task_args', defaultValue: {}) Map<String, dynamic> taskArgs,@JsonKey(name: 'task_args_passed', defaultValue: {}) Map<String, dynamic> taskArgsPassed, String? solver,@JsonKey(name: 'solver_args', defaultValue: {}) Map<String, dynamic> solverArgs,@JsonKey(name: 'solver_args_passed', defaultValue: {}) Map<String, dynamic> solverArgsPassed, List<String> tags, EvalDataset? dataset, Object? sandbox,@JsonKey(name: 'model') String model,@JsonKey(name: 'model_generate_config') GenerateConfig? modelGenerateConfig,@JsonKey(name: 'model_base_url') String? modelBaseUrl,@JsonKey(name: 'model_args', defaultValue: {}) Map<String, dynamic> modelArgs,@JsonKey(name: 'model_roles') Map<String, String>? modelRoles, EvalConfig config, EvalRevision? revision,@JsonKey(name: 'packages', defaultValue: {}) Map<String, String> packages,@JsonKey(name: 'metadata') Map<String, dynamic>? metadata, List<Object> scorers, List<Object> metrics
+});
+
+
+@override $EvalDatasetCopyWith<$Res>? get dataset;@override $GenerateConfigCopyWith<$Res>? get modelGenerateConfig;@override $EvalConfigCopyWith<$Res> get config;@override $EvalRevisionCopyWith<$Res>? get revision;
+
+}
+/// @nodoc
+class __$EvalSpecCopyWithImpl<$Res>
+    implements _$EvalSpecCopyWith<$Res> {
+  __$EvalSpecCopyWithImpl(this._self, this._then);
+
+  final _EvalSpec _self;
+  final $Res Function(_EvalSpec) _then;
+
+/// Create a copy of EvalSpec
+/// with the given fields replaced by the non-null parameter values.
+@override @pragma('vm:prefer-inline') $Res call({Object? evalSetId = freezed,Object? evalId = null,Object? runId = null,Object? created = null,Object? task = null,Object? taskId = null,Object? taskVersion = null,Object? taskFile = freezed,Object? taskDisplayName = freezed,Object? taskRegistryName = freezed,Object? taskAttribs = null,Object? taskArgs = null,Object? taskArgsPassed = null,Object? solver = freezed,Object? solverArgs = null,Object? solverArgsPassed = null,Object? tags = null,Object? dataset = freezed,Object? sandbox = freezed,Object? model = null,Object? modelGenerateConfig = freezed,Object? modelBaseUrl = freezed,Object? modelArgs = null,Object? modelRoles = freezed,Object? config = null,Object? revision = freezed,Object? packages = null,Object? metadata = freezed,Object? scorers = null,Object? metrics = null,}) {
+  return _then(_EvalSpec(
+evalSetId: freezed == evalSetId ? _self.evalSetId : evalSetId // ignore: cast_nullable_to_non_nullable
+as String?,evalId: null == evalId ? _self.evalId : evalId // ignore: cast_nullable_to_non_nullable
+as String,runId: null == runId ? _self.runId : runId // ignore: cast_nullable_to_non_nullable
+as String,created: null == created ? _self.created : created // ignore: cast_nullable_to_non_nullable
+as String,task: null == task ? _self.task : task // ignore: cast_nullable_to_non_nullable
+as String,taskId: null == taskId ? _self.taskId : taskId // ignore: cast_nullable_to_non_nullable
+as String,taskVersion: null == taskVersion ? _self.taskVersion : taskVersion ,taskFile: freezed == taskFile ? _self.taskFile : taskFile // ignore: cast_nullable_to_non_nullable
+as String?,taskDisplayName: freezed == taskDisplayName ? _self.taskDisplayName : taskDisplayName // ignore: cast_nullable_to_non_nullable
+as String?,taskRegistryName: freezed == taskRegistryName ? _self.taskRegistryName : taskRegistryName // ignore: cast_nullable_to_non_nullable
+as String?,taskAttribs: null == taskAttribs ? _self._taskAttribs : taskAttribs // ignore: cast_nullable_to_non_nullable
+as Map<String, dynamic>,taskArgs: null == taskArgs ? _self._taskArgs : taskArgs // ignore: cast_nullable_to_non_nullable
+as Map<String, dynamic>,taskArgsPassed: null == taskArgsPassed ? _self._taskArgsPassed : taskArgsPassed // ignore: cast_nullable_to_non_nullable
+as Map<String, dynamic>,solver: freezed == solver ? _self.solver : solver // ignore: cast_nullable_to_non_nullable
+as String?,solverArgs: null == solverArgs ? _self._solverArgs : solverArgs // ignore: cast_nullable_to_non_nullable
+as Map<String, dynamic>,solverArgsPassed: null == solverArgsPassed ? _self._solverArgsPassed : solverArgsPassed // ignore: cast_nullable_to_non_nullable
+as Map<String, dynamic>,tags: null == tags ? _self._tags : tags // ignore: cast_nullable_to_non_nullable
+as List<String>,dataset: freezed == dataset ? _self.dataset : dataset // ignore: cast_nullable_to_non_nullable
+as EvalDataset?,sandbox: freezed == sandbox ? _self.sandbox : sandbox ,model: null == model ? _self.model : model // ignore: cast_nullable_to_non_nullable
+as String,modelGenerateConfig: freezed == modelGenerateConfig ? _self.modelGenerateConfig : modelGenerateConfig // ignore: cast_nullable_to_non_nullable
+as GenerateConfig?,modelBaseUrl: freezed == modelBaseUrl ? _self.modelBaseUrl : modelBaseUrl // ignore: cast_nullable_to_non_nullable
+as String?,modelArgs: null == modelArgs ? _self._modelArgs : modelArgs // ignore: cast_nullable_to_non_nullable
+as Map<String, dynamic>,modelRoles: freezed == modelRoles ? _self._modelRoles : modelRoles // ignore: cast_nullable_to_non_nullable
+as Map<String, String>?,config: null == config ? _self.config : config // ignore: cast_nullable_to_non_nullable
+as EvalConfig,revision: freezed == revision ? _self.revision : revision // ignore: cast_nullable_to_non_nullable
+as EvalRevision?,packages: null == packages ? _self._packages : packages // ignore: cast_nullable_to_non_nullable
+as Map<String, String>,metadata: freezed == metadata ? _self._metadata : metadata // ignore: cast_nullable_to_non_nullable
+as Map<String, dynamic>?,scorers: null == scorers ? _self._scorers : scorers // ignore: cast_nullable_to_non_nullable
+as List<Object>,metrics: null == metrics ? _self._metrics : metrics // ignore: cast_nullable_to_non_nullable
+as List<Object>,
+  ));
+}
+
+/// Create a copy of EvalSpec
+/// with the given fields replaced by the non-null parameter values.
+@override
+@pragma('vm:prefer-inline')
+$EvalDatasetCopyWith<$Res>? get dataset {
+    if (_self.dataset == null) {
+    return null;
+  }
+
+  return $EvalDatasetCopyWith<$Res>(_self.dataset!, (value) {
+    return _then(_self.copyWith(dataset: value));
+  });
+}/// Create a copy of EvalSpec
+/// with the given fields replaced by the non-null parameter values.
+@override
+@pragma('vm:prefer-inline')
+$GenerateConfigCopyWith<$Res>? get modelGenerateConfig {
+    if (_self.modelGenerateConfig == null) {
+    return null;
+  }
+
+  return $GenerateConfigCopyWith<$Res>(_self.modelGenerateConfig!, (value) {
+    return _then(_self.copyWith(modelGenerateConfig: value));
+  });
+}/// Create a copy of EvalSpec
+/// with the given fields replaced by the non-null parameter values.
+@override
+@pragma('vm:prefer-inline')
+$EvalConfigCopyWith<$Res> get config {
+  
+  return $EvalConfigCopyWith<$Res>(_self.config, (value) {
+    return _then(_self.copyWith(config: value));
+  });
+}/// Create a copy of EvalSpec
+/// with the given fields replaced by the non-null parameter values.
+@override
+@pragma('vm:prefer-inline')
+$EvalRevisionCopyWith<$Res>? get revision {
+    if (_self.revision == null) {
+    return null;
+  }
+
+  return $EvalRevisionCopyWith<$Res>(_self.revision!, (value) {
+    return _then(_self.copyWith(revision: value));
+  });
+}
+}
+
+
+/// @nodoc
+mixin _$EvalDataset {
+
+/// Dataset name.
+ String? get name;/// Dataset location (file path or remote URL).
+ String? get location;/// Number of samples in the dataset.
+ int get samples;/// IDs of samples in the dataset.
+@JsonKey(name: 'sample_ids') List<Object>? get sampleIds;/// Was the dataset shuffled after reading.
+ bool get shuffled;
+/// Create a copy of EvalDataset
+/// with the given fields replaced by the non-null parameter values.
+@JsonKey(includeFromJson: false, includeToJson: false)
+@pragma('vm:prefer-inline')
+$EvalDatasetCopyWith<EvalDataset> get copyWith => _$EvalDatasetCopyWithImpl<EvalDataset>(this as EvalDataset, _$identity);
+
+  /// Serializes this EvalDataset to a JSON map.
+  Map<String, dynamic> toJson();
+
+
+@override
+bool operator ==(Object other) {
+  return identical(this, other) || (other.runtimeType == runtimeType&&other is EvalDataset&&(identical(other.name, name) || other.name == name)&&(identical(other.location, location) || other.location == location)&&(identical(other.samples, samples) || other.samples == samples)&&const DeepCollectionEquality().equals(other.sampleIds, sampleIds)&&(identical(other.shuffled, shuffled) || other.shuffled == shuffled));
+}
+
+@JsonKey(includeFromJson: false, includeToJson: false)
+@override
+int get hashCode => Object.hash(runtimeType,name,location,samples,const DeepCollectionEquality().hash(sampleIds),shuffled);
+
+@override
+String toString() {
+  return 'EvalDataset(name: $name, location: $location, samples: $samples, sampleIds: $sampleIds, shuffled: $shuffled)';
+}
+
+
+}
+
+/// @nodoc
+abstract mixin class $EvalDatasetCopyWith<$Res>  {
+  factory $EvalDatasetCopyWith(EvalDataset value, $Res Function(EvalDataset) _then) = _$EvalDatasetCopyWithImpl;
+@useResult
+$Res call({
+ String? name, String? location, int samples,@JsonKey(name: 'sample_ids') List<Object>? sampleIds, bool shuffled
+});
+
+
+
+
+}
+/// @nodoc
+class _$EvalDatasetCopyWithImpl<$Res>
+    implements $EvalDatasetCopyWith<$Res> {
+  _$EvalDatasetCopyWithImpl(this._self, this._then);
+
+  final EvalDataset _self;
+  final $Res Function(EvalDataset) _then;
+
+/// Create a copy of EvalDataset
+/// with the given fields replaced by the non-null parameter values.
+@pragma('vm:prefer-inline') @override $Res call({Object? name = freezed,Object? location = freezed,Object? samples = null,Object? sampleIds = freezed,Object? shuffled = null,}) {
+  return _then(_self.copyWith(
+name: freezed == name ? _self.name : name // ignore: cast_nullable_to_non_nullable
+as String?,location: freezed == location ? _self.location : location // ignore: cast_nullable_to_non_nullable
+as String?,samples: null == samples ? _self.samples : samples // ignore: cast_nullable_to_non_nullable
+as int,sampleIds: freezed == sampleIds ? _self.sampleIds : sampleIds // ignore: cast_nullable_to_non_nullable
+as List<Object>?,shuffled: null == shuffled ? _self.shuffled : shuffled // ignore: cast_nullable_to_non_nullable
+as bool,
+  ));
+}
+
+}
+
+
+/// Adds pattern-matching-related methods to [EvalDataset].
+extension EvalDatasetPatterns on EvalDataset {
+/// A variant of `map` that fallback to returning `orElse`.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case _:
+///     return orElse();
+/// }
+/// ```
+
+@optionalTypeArgs TResult maybeMap<TResult extends Object?>(TResult Function( _EvalDataset value)?  $default,{required TResult orElse(),}){
+final _that = this;
+switch (_that) {
+case _EvalDataset() when $default != null:
+return $default(_that);case _:
+  return orElse();
+
+}
+}
+/// A `switch`-like method, using callbacks.
+///
+/// Callbacks receives the raw object, upcasted.
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case final Subclass2 value:
+///     return ...;
+/// }
+/// ```
+
+@optionalTypeArgs TResult map<TResult extends Object?>(TResult Function( _EvalDataset value)  $default,){
+final _that = this;
+switch (_that) {
+case _EvalDataset():
+return $default(_that);case _:
+  throw StateError('Unexpected subclass');
+
+}
+}
+/// A variant of `map` that fallback to returning `null`.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case _:
+///     return null;
+/// }
+/// ```
+
+@optionalTypeArgs TResult? mapOrNull<TResult extends Object?>(TResult? Function( _EvalDataset value)?  $default,){
+final _that = this;
+switch (_that) {
+case _EvalDataset() when $default != null:
+return $default(_that);case _:
+  return null;
+
+}
+}
+/// A variant of `when` that fallback to an `orElse` callback.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case _:
+///     return orElse();
+/// }
+/// ```
+
+@optionalTypeArgs TResult maybeWhen<TResult extends Object?>(TResult Function( String? name,  String? location,  int samples, @JsonKey(name: 'sample_ids')  List<Object>? sampleIds,  bool shuffled)?  $default,{required TResult orElse(),}) {final _that = this;
+switch (_that) {
+case _EvalDataset() when $default != null:
+return $default(_that.name,_that.location,_that.samples,_that.sampleIds,_that.shuffled);case _:
+  return orElse();
+
+}
+}
+/// A `switch`-like method, using callbacks.
+///
+/// As opposed to `map`, this offers destructuring.
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case Subclass2(:final field2):
+///     return ...;
+/// }
+/// ```
+
+@optionalTypeArgs TResult when<TResult extends Object?>(TResult Function( String? name,  String? location,  int samples, @JsonKey(name: 'sample_ids')  List<Object>? sampleIds,  bool shuffled)  $default,) {final _that = this;
+switch (_that) {
+case _EvalDataset():
+return $default(_that.name,_that.location,_that.samples,_that.sampleIds,_that.shuffled);case _:
+  throw StateError('Unexpected subclass');
+
+}
+}
+/// A variant of `when` that fallback to returning `null`
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case _:
+///     return null;
+/// }
+/// ```
+
+@optionalTypeArgs TResult? whenOrNull<TResult extends Object?>(TResult? Function( String? name,  String? location,  int samples, @JsonKey(name: 'sample_ids')  List<Object>? sampleIds,  bool shuffled)?  $default,) {final _that = this;
+switch (_that) {
+case _EvalDataset() when $default != null:
+return $default(_that.name,_that.location,_that.samples,_that.sampleIds,_that.shuffled);case _:
+  return null;
+
+}
+}
+
+}
+
+/// @nodoc
+@JsonSerializable()
+
+class _EvalDataset extends EvalDataset {
+  const _EvalDataset({this.name, this.location, required this.samples, @JsonKey(name: 'sample_ids') final  List<Object>? sampleIds, this.shuffled = false}): _sampleIds = sampleIds,super._();
+  factory _EvalDataset.fromJson(Map<String, dynamic> json) => _$EvalDatasetFromJson(json);
+
+/// Dataset name.
+@override final  String? name;
+/// Dataset location (file path or remote URL).
+@override final  String? location;
+/// Number of samples in the dataset.
+@override final  int samples;
+/// IDs of samples in the dataset.
+ final  List<Object>? _sampleIds;
+/// IDs of samples in the dataset.
+@override@JsonKey(name: 'sample_ids') List<Object>? get sampleIds {
+  final value = _sampleIds;
+  if (value == null) return null;
+  if (_sampleIds is EqualUnmodifiableListView) return _sampleIds;
+  // ignore: implicit_dynamic_type
+  return EqualUnmodifiableListView(value);
+}
+
+/// Was the dataset shuffled after reading.
+@override@JsonKey() final  bool shuffled;
+
+/// Create a copy of EvalDataset
+/// with the given fields replaced by the non-null parameter values.
+@override @JsonKey(includeFromJson: false, includeToJson: false)
+@pragma('vm:prefer-inline')
+_$EvalDatasetCopyWith<_EvalDataset> get copyWith => __$EvalDatasetCopyWithImpl<_EvalDataset>(this, _$identity);
+
+@override
+Map<String, dynamic> toJson() {
+  return _$EvalDatasetToJson(this, );
+}
+
+@override
+bool operator ==(Object other) {
+  return identical(this, other) || (other.runtimeType == runtimeType&&other is _EvalDataset&&(identical(other.name, name) || other.name == name)&&(identical(other.location, location) || other.location == location)&&(identical(other.samples, samples) || other.samples == samples)&&const DeepCollectionEquality().equals(other._sampleIds, _sampleIds)&&(identical(other.shuffled, shuffled) || other.shuffled == shuffled));
+}
+
+@JsonKey(includeFromJson: false, includeToJson: false)
+@override
+int get hashCode => Object.hash(runtimeType,name,location,samples,const DeepCollectionEquality().hash(_sampleIds),shuffled);
+
+@override
+String toString() {
+  return 'EvalDataset(name: $name, location: $location, samples: $samples, sampleIds: $sampleIds, shuffled: $shuffled)';
+}
+
+
+}
+
+/// @nodoc
+abstract mixin class _$EvalDatasetCopyWith<$Res> implements $EvalDatasetCopyWith<$Res> {
+  factory _$EvalDatasetCopyWith(_EvalDataset value, $Res Function(_EvalDataset) _then) = __$EvalDatasetCopyWithImpl;
+@override @useResult
+$Res call({
+ String? name, String? location, int samples,@JsonKey(name: 'sample_ids') List<Object>? sampleIds, bool shuffled
+});
+
+
+
+
+}
+/// @nodoc
+class __$EvalDatasetCopyWithImpl<$Res>
+    implements _$EvalDatasetCopyWith<$Res> {
+  __$EvalDatasetCopyWithImpl(this._self, this._then);
+
+  final _EvalDataset _self;
+  final $Res Function(_EvalDataset) _then;
+
+/// Create a copy of EvalDataset
+/// with the given fields replaced by the non-null parameter values.
+@override @pragma('vm:prefer-inline') $Res call({Object? name = freezed,Object? location = freezed,Object? samples = null,Object? sampleIds = freezed,Object? shuffled = null,}) {
+  return _then(_EvalDataset(
+name: freezed == name ? _self.name : name // ignore: cast_nullable_to_non_nullable
+as String?,location: freezed == location ? _self.location : location // ignore: cast_nullable_to_non_nullable
+as String?,samples: null == samples ? _self.samples : samples // ignore: cast_nullable_to_non_nullable
+as int,sampleIds: freezed == sampleIds ? _self._sampleIds : sampleIds // ignore: cast_nullable_to_non_nullable
+as List<Object>?,shuffled: null == shuffled ? _self.shuffled : shuffled // ignore: cast_nullable_to_non_nullable
+as bool,
+  ));
+}
+
+
+}
+
+
+/// @nodoc
+mixin _$EvalConfig {
+
+/// Sample limit (number of samples or range of samples).
+ Object? get limit;/// Evaluate specific sample(s).
+@JsonKey(name: 'sample_id') Object? get sampleId;/// Shuffle order of samples.
+@JsonKey(name: 'sample_shuffle') bool? get sampleShuffle;/// Number of epochs to run samples over.
+ int? get epochs;/// Reducers for aggregating per-sample scores.
+@JsonKey(name: 'epochs_reducer') List<String>? get epochsReducer;/// Approval policy for tool use.
+ String? get approval;/// Fail eval when sample errors occur.
+/// True to fail on first sample error (default); False to never fail on sample errors;
+/// Value between 0 and 1 to fail if a proportion of total samples fails.
+/// Value greater than 1 to fail eval if a count of samples fails.
+@JsonKey(name: 'fail_on_error') Object? get failOnError;/// Continue eval even if the fail_on_error condition is met.
+@JsonKey(name: 'continue_on_fail') bool? get continueOnFail;/// Number of times to retry samples if they encounter errors.
+@JsonKey(name: 'retry_on_error') int? get retryOnError;/// Maximum messages to allow per sample.
+@JsonKey(name: 'message_limit') int? get messageLimit;/// Maximum tokens usage per sample.
+@JsonKey(name: 'token_limit') int? get tokenLimit;/// Maximum clock time per sample.
+@JsonKey(name: 'time_limit') int? get timeLimit;/// Maximum working time per sample.
+@JsonKey(name: 'working_limit') int? get workingLimit;/// Maximum number of samples to run in parallel.
+@JsonKey(name: 'max_samples') int? get maxSamples;/// Maximum number of tasks to run in parallel.
+@JsonKey(name: 'max_tasks') int? get maxTasks;/// Maximum number of subprocesses to run concurrently.
+@JsonKey(name: 'max_subprocesses') int? get maxSubprocesses;/// Maximum number of sandboxes to run concurrently.
+@JsonKey(name: 'max_sandboxes') int? get maxSandboxes;/// Cleanup sandbox environments after task completes.
+@JsonKey(name: 'sandbox_cleanup') bool? get sandboxCleanup;/// Log detailed information on each sample.
+@JsonKey(name: 'log_samples') bool? get logSamples;/// Log events in realtime (enables live viewing of samples in inspect view).
+@JsonKey(name: 'log_realtime') bool? get logRealtime;/// Log base64 encoded versions of images.
+@JsonKey(name: 'log_images') bool? get logImages;/// Number of samples to buffer before writing log file.
+@JsonKey(name: 'log_buffer') int? get logBuffer;/// Interval (in seconds) for syncing sample events to log directory.
+@JsonKey(name: 'log_shared') int? get logShared;/// Display scoring metrics realtime.
+@JsonKey(name: 'score_display') bool? get scoreDisplay;
+/// Create a copy of EvalConfig
+/// with the given fields replaced by the non-null parameter values.
+@JsonKey(includeFromJson: false, includeToJson: false)
+@pragma('vm:prefer-inline')
+$EvalConfigCopyWith<EvalConfig> get copyWith => _$EvalConfigCopyWithImpl<EvalConfig>(this as EvalConfig, _$identity);
+
+  /// Serializes this EvalConfig to a JSON map.
+  Map<String, dynamic> toJson();
+
+
+@override
+bool operator ==(Object other) {
+  return identical(this, other) || (other.runtimeType == runtimeType&&other is EvalConfig&&const DeepCollectionEquality().equals(other.limit, limit)&&const DeepCollectionEquality().equals(other.sampleId, sampleId)&&(identical(other.sampleShuffle, sampleShuffle) || other.sampleShuffle == sampleShuffle)&&(identical(other.epochs, epochs) || other.epochs == epochs)&&const DeepCollectionEquality().equals(other.epochsReducer, epochsReducer)&&(identical(other.approval, approval) || other.approval == approval)&&const DeepCollectionEquality().equals(other.failOnError, failOnError)&&(identical(other.continueOnFail, continueOnFail) || other.continueOnFail == continueOnFail)&&(identical(other.retryOnError, retryOnError) || other.retryOnError == retryOnError)&&(identical(other.messageLimit, messageLimit) || other.messageLimit == messageLimit)&&(identical(other.tokenLimit, tokenLimit) || other.tokenLimit == tokenLimit)&&(identical(other.timeLimit, timeLimit) || other.timeLimit == timeLimit)&&(identical(other.workingLimit, workingLimit) || other.workingLimit == workingLimit)&&(identical(other.maxSamples, maxSamples) || other.maxSamples == maxSamples)&&(identical(other.maxTasks, maxTasks) || other.maxTasks == maxTasks)&&(identical(other.maxSubprocesses, maxSubprocesses) || other.maxSubprocesses == maxSubprocesses)&&(identical(other.maxSandboxes, maxSandboxes) || other.maxSandboxes == maxSandboxes)&&(identical(other.sandboxCleanup, sandboxCleanup) || other.sandboxCleanup == sandboxCleanup)&&(identical(other.logSamples, logSamples) || other.logSamples == logSamples)&&(identical(other.logRealtime, logRealtime) || other.logRealtime == logRealtime)&&(identical(other.logImages, logImages) || other.logImages == logImages)&&(identical(other.logBuffer, logBuffer) || other.logBuffer == logBuffer)&&(identical(other.logShared, logShared) || other.logShared == logShared)&&(identical(other.scoreDisplay, scoreDisplay) || other.scoreDisplay == scoreDisplay));
+}
+
+@JsonKey(includeFromJson: false, includeToJson: false)
+@override
+int get hashCode => Object.hashAll([runtimeType,const DeepCollectionEquality().hash(limit),const DeepCollectionEquality().hash(sampleId),sampleShuffle,epochs,const DeepCollectionEquality().hash(epochsReducer),approval,const DeepCollectionEquality().hash(failOnError),continueOnFail,retryOnError,messageLimit,tokenLimit,timeLimit,workingLimit,maxSamples,maxTasks,maxSubprocesses,maxSandboxes,sandboxCleanup,logSamples,logRealtime,logImages,logBuffer,logShared,scoreDisplay]);
+
+@override
+String toString() {
+  return 'EvalConfig(limit: $limit, sampleId: $sampleId, sampleShuffle: $sampleShuffle, epochs: $epochs, epochsReducer: $epochsReducer, approval: $approval, failOnError: $failOnError, continueOnFail: $continueOnFail, retryOnError: $retryOnError, messageLimit: $messageLimit, tokenLimit: $tokenLimit, timeLimit: $timeLimit, workingLimit: $workingLimit, maxSamples: $maxSamples, maxTasks: $maxTasks, maxSubprocesses: $maxSubprocesses, maxSandboxes: $maxSandboxes, sandboxCleanup: $sandboxCleanup, logSamples: $logSamples, logRealtime: $logRealtime, logImages: $logImages, logBuffer: $logBuffer, logShared: $logShared, scoreDisplay: $scoreDisplay)';
+}
+
+
+}
+
+/// @nodoc
+abstract mixin class $EvalConfigCopyWith<$Res>  {
+  factory $EvalConfigCopyWith(EvalConfig value, $Res Function(EvalConfig) _then) = _$EvalConfigCopyWithImpl;
+@useResult
+$Res call({
+ Object? limit,@JsonKey(name: 'sample_id') Object? sampleId,@JsonKey(name: 'sample_shuffle') bool? sampleShuffle, int? epochs,@JsonKey(name: 'epochs_reducer') List<String>? epochsReducer, String? approval,@JsonKey(name: 'fail_on_error') Object? failOnError,@JsonKey(name: 'continue_on_fail') bool? continueOnFail,@JsonKey(name: 'retry_on_error') int? retryOnError,@JsonKey(name: 'message_limit') int? messageLimit,@JsonKey(name: 'token_limit') int? tokenLimit,@JsonKey(name: 'time_limit') int? timeLimit,@JsonKey(name: 'working_limit') int? workingLimit,@JsonKey(name: 'max_samples') int? maxSamples,@JsonKey(name: 'max_tasks') int? maxTasks,@JsonKey(name: 'max_subprocesses') int? maxSubprocesses,@JsonKey(name: 'max_sandboxes') int? maxSandboxes,@JsonKey(name: 'sandbox_cleanup') bool? sandboxCleanup,@JsonKey(name: 'log_samples') bool? logSamples,@JsonKey(name: 'log_realtime') bool? logRealtime,@JsonKey(name: 'log_images') bool? logImages,@JsonKey(name: 'log_buffer') int? logBuffer,@JsonKey(name: 'log_shared') int? logShared,@JsonKey(name: 'score_display') bool? scoreDisplay
+});
+
+
+
+
+}
+/// @nodoc
+class _$EvalConfigCopyWithImpl<$Res>
+    implements $EvalConfigCopyWith<$Res> {
+  _$EvalConfigCopyWithImpl(this._self, this._then);
+
+  final EvalConfig _self;
+  final $Res Function(EvalConfig) _then;
+
+/// Create a copy of EvalConfig
+/// with the given fields replaced by the non-null parameter values.
+@pragma('vm:prefer-inline') @override $Res call({Object? limit = freezed,Object? sampleId = freezed,Object? sampleShuffle = freezed,Object? epochs = freezed,Object? epochsReducer = freezed,Object? approval = freezed,Object? failOnError = freezed,Object? continueOnFail = freezed,Object? retryOnError = freezed,Object? messageLimit = freezed,Object? tokenLimit = freezed,Object? timeLimit = freezed,Object? workingLimit = freezed,Object? maxSamples = freezed,Object? maxTasks = freezed,Object? maxSubprocesses = freezed,Object? maxSandboxes = freezed,Object? sandboxCleanup = freezed,Object? logSamples = freezed,Object? logRealtime = freezed,Object? logImages = freezed,Object? logBuffer = freezed,Object? logShared = freezed,Object? scoreDisplay = freezed,}) {
+  return _then(_self.copyWith(
+limit: freezed == limit ? _self.limit : limit ,sampleId: freezed == sampleId ? _self.sampleId : sampleId ,sampleShuffle: freezed == sampleShuffle ? _self.sampleShuffle : sampleShuffle // ignore: cast_nullable_to_non_nullable
+as bool?,epochs: freezed == epochs ? _self.epochs : epochs // ignore: cast_nullable_to_non_nullable
+as int?,epochsReducer: freezed == epochsReducer ? _self.epochsReducer : epochsReducer // ignore: cast_nullable_to_non_nullable
+as List<String>?,approval: freezed == approval ? _self.approval : approval // ignore: cast_nullable_to_non_nullable
+as String?,failOnError: freezed == failOnError ? _self.failOnError : failOnError ,continueOnFail: freezed == continueOnFail ? _self.continueOnFail : continueOnFail // ignore: cast_nullable_to_non_nullable
+as bool?,retryOnError: freezed == retryOnError ? _self.retryOnError : retryOnError // ignore: cast_nullable_to_non_nullable
+as int?,messageLimit: freezed == messageLimit ? _self.messageLimit : messageLimit // ignore: cast_nullable_to_non_nullable
+as int?,tokenLimit: freezed == tokenLimit ? _self.tokenLimit : tokenLimit // ignore: cast_nullable_to_non_nullable
+as int?,timeLimit: freezed == timeLimit ? _self.timeLimit : timeLimit // ignore: cast_nullable_to_non_nullable
+as int?,workingLimit: freezed == workingLimit ? _self.workingLimit : workingLimit // ignore: cast_nullable_to_non_nullable
+as int?,maxSamples: freezed == maxSamples ? _self.maxSamples : maxSamples // ignore: cast_nullable_to_non_nullable
+as int?,maxTasks: freezed == maxTasks ? _self.maxTasks : maxTasks // ignore: cast_nullable_to_non_nullable
+as int?,maxSubprocesses: freezed == maxSubprocesses ? _self.maxSubprocesses : maxSubprocesses // ignore: cast_nullable_to_non_nullable
+as int?,maxSandboxes: freezed == maxSandboxes ? _self.maxSandboxes : maxSandboxes // ignore: cast_nullable_to_non_nullable
+as int?,sandboxCleanup: freezed == sandboxCleanup ? _self.sandboxCleanup : sandboxCleanup // ignore: cast_nullable_to_non_nullable
+as bool?,logSamples: freezed == logSamples ? _self.logSamples : logSamples // ignore: cast_nullable_to_non_nullable
+as bool?,logRealtime: freezed == logRealtime ? _self.logRealtime : logRealtime // ignore: cast_nullable_to_non_nullable
+as bool?,logImages: freezed == logImages ? _self.logImages : logImages // ignore: cast_nullable_to_non_nullable
+as bool?,logBuffer: freezed == logBuffer ? _self.logBuffer : logBuffer // ignore: cast_nullable_to_non_nullable
+as int?,logShared: freezed == logShared ? _self.logShared : logShared // ignore: cast_nullable_to_non_nullable
+as int?,scoreDisplay: freezed == scoreDisplay ? _self.scoreDisplay : scoreDisplay // ignore: cast_nullable_to_non_nullable
+as bool?,
+  ));
+}
+
+}
+
+
+/// Adds pattern-matching-related methods to [EvalConfig].
+extension EvalConfigPatterns on EvalConfig {
+/// A variant of `map` that fallback to returning `orElse`.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case _:
+///     return orElse();
+/// }
+/// ```
+
+@optionalTypeArgs TResult maybeMap<TResult extends Object?>(TResult Function( _EvalConfig value)?  $default,{required TResult orElse(),}){
+final _that = this;
+switch (_that) {
+case _EvalConfig() when $default != null:
+return $default(_that);case _:
+  return orElse();
+
+}
+}
+/// A `switch`-like method, using callbacks.
+///
+/// Callbacks receives the raw object, upcasted.
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case final Subclass2 value:
+///     return ...;
+/// }
+/// ```
+
+@optionalTypeArgs TResult map<TResult extends Object?>(TResult Function( _EvalConfig value)  $default,){
+final _that = this;
+switch (_that) {
+case _EvalConfig():
+return $default(_that);case _:
+  throw StateError('Unexpected subclass');
+
+}
+}
+/// A variant of `map` that fallback to returning `null`.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case _:
+///     return null;
+/// }
+/// ```
+
+@optionalTypeArgs TResult? mapOrNull<TResult extends Object?>(TResult? Function( _EvalConfig value)?  $default,){
+final _that = this;
+switch (_that) {
+case _EvalConfig() when $default != null:
+return $default(_that);case _:
+  return null;
+
+}
+}
+/// A variant of `when` that fallback to an `orElse` callback.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case _:
+///     return orElse();
+/// }
+/// ```
+
+@optionalTypeArgs TResult maybeWhen<TResult extends Object?>(TResult Function( Object? limit, @JsonKey(name: 'sample_id')  Object? sampleId, @JsonKey(name: 'sample_shuffle')  bool? sampleShuffle,  int? epochs, @JsonKey(name: 'epochs_reducer')  List<String>? epochsReducer,  String? approval, @JsonKey(name: 'fail_on_error')  Object? failOnError, @JsonKey(name: 'continue_on_fail')  bool? continueOnFail, @JsonKey(name: 'retry_on_error')  int? retryOnError, @JsonKey(name: 'message_limit')  int? messageLimit, @JsonKey(name: 'token_limit')  int? tokenLimit, @JsonKey(name: 'time_limit')  int? timeLimit, @JsonKey(name: 'working_limit')  int? workingLimit, @JsonKey(name: 'max_samples')  int? maxSamples, @JsonKey(name: 'max_tasks')  int? maxTasks, @JsonKey(name: 'max_subprocesses')  int? maxSubprocesses, @JsonKey(name: 'max_sandboxes')  int? maxSandboxes, @JsonKey(name: 'sandbox_cleanup')  bool? sandboxCleanup, @JsonKey(name: 'log_samples')  bool? logSamples, @JsonKey(name: 'log_realtime')  bool? logRealtime, @JsonKey(name: 'log_images')  bool? logImages, @JsonKey(name: 'log_buffer')  int? logBuffer, @JsonKey(name: 'log_shared')  int? logShared, @JsonKey(name: 'score_display')  bool? scoreDisplay)?  $default,{required TResult orElse(),}) {final _that = this;
+switch (_that) {
+case _EvalConfig() when $default != null:
+return $default(_that.limit,_that.sampleId,_that.sampleShuffle,_that.epochs,_that.epochsReducer,_that.approval,_that.failOnError,_that.continueOnFail,_that.retryOnError,_that.messageLimit,_that.tokenLimit,_that.timeLimit,_that.workingLimit,_that.maxSamples,_that.maxTasks,_that.maxSubprocesses,_that.maxSandboxes,_that.sandboxCleanup,_that.logSamples,_that.logRealtime,_that.logImages,_that.logBuffer,_that.logShared,_that.scoreDisplay);case _:
+  return orElse();
+
+}
+}
+/// A `switch`-like method, using callbacks.
+///
+/// As opposed to `map`, this offers destructuring.
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case Subclass2(:final field2):
+///     return ...;
+/// }
+/// ```
+
+@optionalTypeArgs TResult when<TResult extends Object?>(TResult Function( Object? limit, @JsonKey(name: 'sample_id')  Object? sampleId, @JsonKey(name: 'sample_shuffle')  bool? sampleShuffle,  int? epochs, @JsonKey(name: 'epochs_reducer')  List<String>? epochsReducer,  String? approval, @JsonKey(name: 'fail_on_error')  Object? failOnError, @JsonKey(name: 'continue_on_fail')  bool? continueOnFail, @JsonKey(name: 'retry_on_error')  int? retryOnError, @JsonKey(name: 'message_limit')  int? messageLimit, @JsonKey(name: 'token_limit')  int? tokenLimit, @JsonKey(name: 'time_limit')  int? timeLimit, @JsonKey(name: 'working_limit')  int? workingLimit, @JsonKey(name: 'max_samples')  int? maxSamples, @JsonKey(name: 'max_tasks')  int? maxTasks, @JsonKey(name: 'max_subprocesses')  int? maxSubprocesses, @JsonKey(name: 'max_sandboxes')  int? maxSandboxes, @JsonKey(name: 'sandbox_cleanup')  bool? sandboxCleanup, @JsonKey(name: 'log_samples')  bool? logSamples, @JsonKey(name: 'log_realtime')  bool? logRealtime, @JsonKey(name: 'log_images')  bool? logImages, @JsonKey(name: 'log_buffer')  int? logBuffer, @JsonKey(name: 'log_shared')  int? logShared, @JsonKey(name: 'score_display')  bool? scoreDisplay)  $default,) {final _that = this;
+switch (_that) {
+case _EvalConfig():
+return $default(_that.limit,_that.sampleId,_that.sampleShuffle,_that.epochs,_that.epochsReducer,_that.approval,_that.failOnError,_that.continueOnFail,_that.retryOnError,_that.messageLimit,_that.tokenLimit,_that.timeLimit,_that.workingLimit,_that.maxSamples,_that.maxTasks,_that.maxSubprocesses,_that.maxSandboxes,_that.sandboxCleanup,_that.logSamples,_that.logRealtime,_that.logImages,_that.logBuffer,_that.logShared,_that.scoreDisplay);case _:
+  throw StateError('Unexpected subclass');
+
+}
+}
+/// A variant of `when` that fallback to returning `null`
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case _:
+///     return null;
+/// }
+/// ```
+
+@optionalTypeArgs TResult? whenOrNull<TResult extends Object?>(TResult? Function( Object? limit, @JsonKey(name: 'sample_id')  Object? sampleId, @JsonKey(name: 'sample_shuffle')  bool? sampleShuffle,  int? epochs, @JsonKey(name: 'epochs_reducer')  List<String>? epochsReducer,  String? approval, @JsonKey(name: 'fail_on_error')  Object? failOnError, @JsonKey(name: 'continue_on_fail')  bool? continueOnFail, @JsonKey(name: 'retry_on_error')  int? retryOnError, @JsonKey(name: 'message_limit')  int? messageLimit, @JsonKey(name: 'token_limit')  int? tokenLimit, @JsonKey(name: 'time_limit')  int? timeLimit, @JsonKey(name: 'working_limit')  int? workingLimit, @JsonKey(name: 'max_samples')  int? maxSamples, @JsonKey(name: 'max_tasks')  int? maxTasks, @JsonKey(name: 'max_subprocesses')  int? maxSubprocesses, @JsonKey(name: 'max_sandboxes')  int? maxSandboxes, @JsonKey(name: 'sandbox_cleanup')  bool? sandboxCleanup, @JsonKey(name: 'log_samples')  bool? logSamples, @JsonKey(name: 'log_realtime')  bool? logRealtime, @JsonKey(name: 'log_images')  bool? logImages, @JsonKey(name: 'log_buffer')  int? logBuffer, @JsonKey(name: 'log_shared')  int? logShared, @JsonKey(name: 'score_display')  bool? scoreDisplay)?  $default,) {final _that = this;
+switch (_that) {
+case _EvalConfig() when $default != null:
+return $default(_that.limit,_that.sampleId,_that.sampleShuffle,_that.epochs,_that.epochsReducer,_that.approval,_that.failOnError,_that.continueOnFail,_that.retryOnError,_that.messageLimit,_that.tokenLimit,_that.timeLimit,_that.workingLimit,_that.maxSamples,_that.maxTasks,_that.maxSubprocesses,_that.maxSandboxes,_that.sandboxCleanup,_that.logSamples,_that.logRealtime,_that.logImages,_that.logBuffer,_that.logShared,_that.scoreDisplay);case _:
+  return null;
+
+}
+}
+
+}
+
+/// @nodoc
+@JsonSerializable()
+
+class _EvalConfig extends EvalConfig {
+  const _EvalConfig({this.limit, @JsonKey(name: 'sample_id') this.sampleId, @JsonKey(name: 'sample_shuffle') this.sampleShuffle, this.epochs, @JsonKey(name: 'epochs_reducer') final  List<String>? epochsReducer, this.approval, @JsonKey(name: 'fail_on_error') this.failOnError, @JsonKey(name: 'continue_on_fail') this.continueOnFail, @JsonKey(name: 'retry_on_error') this.retryOnError, @JsonKey(name: 'message_limit') this.messageLimit, @JsonKey(name: 'token_limit') this.tokenLimit, @JsonKey(name: 'time_limit') this.timeLimit, @JsonKey(name: 'working_limit') this.workingLimit, @JsonKey(name: 'max_samples') this.maxSamples, @JsonKey(name: 'max_tasks') this.maxTasks, @JsonKey(name: 'max_subprocesses') this.maxSubprocesses, @JsonKey(name: 'max_sandboxes') this.maxSandboxes, @JsonKey(name: 'sandbox_cleanup') this.sandboxCleanup, @JsonKey(name: 'log_samples') this.logSamples, @JsonKey(name: 'log_realtime') this.logRealtime, @JsonKey(name: 'log_images') this.logImages, @JsonKey(name: 'log_buffer') this.logBuffer, @JsonKey(name: 'log_shared') this.logShared, @JsonKey(name: 'score_display') this.scoreDisplay}): _epochsReducer = epochsReducer,super._();
+  factory _EvalConfig.fromJson(Map<String, dynamic> json) => _$EvalConfigFromJson(json);
+
+/// Sample limit (number of samples or range of samples).
+@override final  Object? limit;
+/// Evaluate specific sample(s).
+@override@JsonKey(name: 'sample_id') final  Object? sampleId;
+/// Shuffle order of samples.
+@override@JsonKey(name: 'sample_shuffle') final  bool? sampleShuffle;
+/// Number of epochs to run samples over.
+@override final  int? epochs;
+/// Reducers for aggregating per-sample scores.
+ final  List<String>? _epochsReducer;
+/// Reducers for aggregating per-sample scores.
+@override@JsonKey(name: 'epochs_reducer') List<String>? get epochsReducer {
+  final value = _epochsReducer;
+  if (value == null) return null;
+  if (_epochsReducer is EqualUnmodifiableListView) return _epochsReducer;
+  // ignore: implicit_dynamic_type
+  return EqualUnmodifiableListView(value);
+}
+
+/// Approval policy for tool use.
+@override final  String? approval;
+/// Fail eval when sample errors occur.
+/// True to fail on first sample error (default); False to never fail on sample errors;
+/// Value between 0 and 1 to fail if a proportion of total samples fails.
+/// Value greater than 1 to fail eval if a count of samples fails.
+@override@JsonKey(name: 'fail_on_error') final  Object? failOnError;
+/// Continue eval even if the fail_on_error condition is met.
+@override@JsonKey(name: 'continue_on_fail') final  bool? continueOnFail;
+/// Number of times to retry samples if they encounter errors.
+@override@JsonKey(name: 'retry_on_error') final  int? retryOnError;
+/// Maximum messages to allow per sample.
+@override@JsonKey(name: 'message_limit') final  int? messageLimit;
+/// Maximum tokens usage per sample.
+@override@JsonKey(name: 'token_limit') final  int? tokenLimit;
+/// Maximum clock time per sample.
+@override@JsonKey(name: 'time_limit') final  int? timeLimit;
+/// Maximum working time per sample.
+@override@JsonKey(name: 'working_limit') final  int? workingLimit;
+/// Maximum number of samples to run in parallel.
+@override@JsonKey(name: 'max_samples') final  int? maxSamples;
+/// Maximum number of tasks to run in parallel.
+@override@JsonKey(name: 'max_tasks') final  int? maxTasks;
+/// Maximum number of subprocesses to run concurrently.
+@override@JsonKey(name: 'max_subprocesses') final  int? maxSubprocesses;
+/// Maximum number of sandboxes to run concurrently.
+@override@JsonKey(name: 'max_sandboxes') final  int? maxSandboxes;
+/// Cleanup sandbox environments after task completes.
+@override@JsonKey(name: 'sandbox_cleanup') final  bool? sandboxCleanup;
+/// Log detailed information on each sample.
+@override@JsonKey(name: 'log_samples') final  bool? logSamples;
+/// Log events in realtime (enables live viewing of samples in inspect view).
+@override@JsonKey(name: 'log_realtime') final  bool? logRealtime;
+/// Log base64 encoded versions of images.
+@override@JsonKey(name: 'log_images') final  bool? logImages;
+/// Number of samples to buffer before writing log file.
+@override@JsonKey(name: 'log_buffer') final  int? logBuffer;
+/// Interval (in seconds) for syncing sample events to log directory.
+@override@JsonKey(name: 'log_shared') final  int? logShared;
+/// Display scoring metrics realtime.
+@override@JsonKey(name: 'score_display') final  bool? scoreDisplay;
+
+/// Create a copy of EvalConfig
+/// with the given fields replaced by the non-null parameter values.
+@override @JsonKey(includeFromJson: false, includeToJson: false)
+@pragma('vm:prefer-inline')
+_$EvalConfigCopyWith<_EvalConfig> get copyWith => __$EvalConfigCopyWithImpl<_EvalConfig>(this, _$identity);
+
+@override
+Map<String, dynamic> toJson() {
+  return _$EvalConfigToJson(this, );
+}
+
+@override
+bool operator ==(Object other) {
+  return identical(this, other) || (other.runtimeType == runtimeType&&other is _EvalConfig&&const DeepCollectionEquality().equals(other.limit, limit)&&const DeepCollectionEquality().equals(other.sampleId, sampleId)&&(identical(other.sampleShuffle, sampleShuffle) || other.sampleShuffle == sampleShuffle)&&(identical(other.epochs, epochs) || other.epochs == epochs)&&const DeepCollectionEquality().equals(other._epochsReducer, _epochsReducer)&&(identical(other.approval, approval) || other.approval == approval)&&const DeepCollectionEquality().equals(other.failOnError, failOnError)&&(identical(other.continueOnFail, continueOnFail) || other.continueOnFail == continueOnFail)&&(identical(other.retryOnError, retryOnError) || other.retryOnError == retryOnError)&&(identical(other.messageLimit, messageLimit) || other.messageLimit == messageLimit)&&(identical(other.tokenLimit, tokenLimit) || other.tokenLimit == tokenLimit)&&(identical(other.timeLimit, timeLimit) || other.timeLimit == timeLimit)&&(identical(other.workingLimit, workingLimit) || other.workingLimit == workingLimit)&&(identical(other.maxSamples, maxSamples) || other.maxSamples == maxSamples)&&(identical(other.maxTasks, maxTasks) || other.maxTasks == maxTasks)&&(identical(other.maxSubprocesses, maxSubprocesses) || other.maxSubprocesses == maxSubprocesses)&&(identical(other.maxSandboxes, maxSandboxes) || other.maxSandboxes == maxSandboxes)&&(identical(other.sandboxCleanup, sandboxCleanup) || other.sandboxCleanup == sandboxCleanup)&&(identical(other.logSamples, logSamples) || other.logSamples == logSamples)&&(identical(other.logRealtime, logRealtime) || other.logRealtime == logRealtime)&&(identical(other.logImages, logImages) || other.logImages == logImages)&&(identical(other.logBuffer, logBuffer) || other.logBuffer == logBuffer)&&(identical(other.logShared, logShared) || other.logShared == logShared)&&(identical(other.scoreDisplay, scoreDisplay) || other.scoreDisplay == scoreDisplay));
+}
+
+@JsonKey(includeFromJson: false, includeToJson: false)
+@override
+int get hashCode => Object.hashAll([runtimeType,const DeepCollectionEquality().hash(limit),const DeepCollectionEquality().hash(sampleId),sampleShuffle,epochs,const DeepCollectionEquality().hash(_epochsReducer),approval,const DeepCollectionEquality().hash(failOnError),continueOnFail,retryOnError,messageLimit,tokenLimit,timeLimit,workingLimit,maxSamples,maxTasks,maxSubprocesses,maxSandboxes,sandboxCleanup,logSamples,logRealtime,logImages,logBuffer,logShared,scoreDisplay]);
+
+@override
+String toString() {
+  return 'EvalConfig(limit: $limit, sampleId: $sampleId, sampleShuffle: $sampleShuffle, epochs: $epochs, epochsReducer: $epochsReducer, approval: $approval, failOnError: $failOnError, continueOnFail: $continueOnFail, retryOnError: $retryOnError, messageLimit: $messageLimit, tokenLimit: $tokenLimit, timeLimit: $timeLimit, workingLimit: $workingLimit, maxSamples: $maxSamples, maxTasks: $maxTasks, maxSubprocesses: $maxSubprocesses, maxSandboxes: $maxSandboxes, sandboxCleanup: $sandboxCleanup, logSamples: $logSamples, logRealtime: $logRealtime, logImages: $logImages, logBuffer: $logBuffer, logShared: $logShared, scoreDisplay: $scoreDisplay)';
+}
+
+
+}
+
+/// @nodoc
+abstract mixin class _$EvalConfigCopyWith<$Res> implements $EvalConfigCopyWith<$Res> {
+  factory _$EvalConfigCopyWith(_EvalConfig value, $Res Function(_EvalConfig) _then) = __$EvalConfigCopyWithImpl;
+@override @useResult
+$Res call({
+ Object? limit,@JsonKey(name: 'sample_id') Object? sampleId,@JsonKey(name: 'sample_shuffle') bool? sampleShuffle, int? epochs,@JsonKey(name: 'epochs_reducer') List<String>? epochsReducer, String? approval,@JsonKey(name: 'fail_on_error') Object? failOnError,@JsonKey(name: 'continue_on_fail') bool? continueOnFail,@JsonKey(name: 'retry_on_error') int? retryOnError,@JsonKey(name: 'message_limit') int? messageLimit,@JsonKey(name: 'token_limit') int? tokenLimit,@JsonKey(name: 'time_limit') int? timeLimit,@JsonKey(name: 'working_limit') int? workingLimit,@JsonKey(name: 'max_samples') int? maxSamples,@JsonKey(name: 'max_tasks') int? maxTasks,@JsonKey(name: 'max_subprocesses') int? maxSubprocesses,@JsonKey(name: 'max_sandboxes') int? maxSandboxes,@JsonKey(name: 'sandbox_cleanup') bool? sandboxCleanup,@JsonKey(name: 'log_samples') bool? logSamples,@JsonKey(name: 'log_realtime') bool? logRealtime,@JsonKey(name: 'log_images') bool? logImages,@JsonKey(name: 'log_buffer') int? logBuffer,@JsonKey(name: 'log_shared') int? logShared,@JsonKey(name: 'score_display') bool? scoreDisplay
+});
+
+
+
+
+}
+/// @nodoc
+class __$EvalConfigCopyWithImpl<$Res>
+    implements _$EvalConfigCopyWith<$Res> {
+  __$EvalConfigCopyWithImpl(this._self, this._then);
+
+  final _EvalConfig _self;
+  final $Res Function(_EvalConfig) _then;
+
+/// Create a copy of EvalConfig
+/// with the given fields replaced by the non-null parameter values.
+@override @pragma('vm:prefer-inline') $Res call({Object? limit = freezed,Object? sampleId = freezed,Object? sampleShuffle = freezed,Object? epochs = freezed,Object? epochsReducer = freezed,Object? approval = freezed,Object? failOnError = freezed,Object? continueOnFail = freezed,Object? retryOnError = freezed,Object? messageLimit = freezed,Object? tokenLimit = freezed,Object? timeLimit = freezed,Object? workingLimit = freezed,Object? maxSamples = freezed,Object? maxTasks = freezed,Object? maxSubprocesses = freezed,Object? maxSandboxes = freezed,Object? sandboxCleanup = freezed,Object? logSamples = freezed,Object? logRealtime = freezed,Object? logImages = freezed,Object? logBuffer = freezed,Object? logShared = freezed,Object? scoreDisplay = freezed,}) {
+  return _then(_EvalConfig(
+limit: freezed == limit ? _self.limit : limit ,sampleId: freezed == sampleId ? _self.sampleId : sampleId ,sampleShuffle: freezed == sampleShuffle ? _self.sampleShuffle : sampleShuffle // ignore: cast_nullable_to_non_nullable
+as bool?,epochs: freezed == epochs ? _self.epochs : epochs // ignore: cast_nullable_to_non_nullable
+as int?,epochsReducer: freezed == epochsReducer ? _self._epochsReducer : epochsReducer // ignore: cast_nullable_to_non_nullable
+as List<String>?,approval: freezed == approval ? _self.approval : approval // ignore: cast_nullable_to_non_nullable
+as String?,failOnError: freezed == failOnError ? _self.failOnError : failOnError ,continueOnFail: freezed == continueOnFail ? _self.continueOnFail : continueOnFail // ignore: cast_nullable_to_non_nullable
+as bool?,retryOnError: freezed == retryOnError ? _self.retryOnError : retryOnError // ignore: cast_nullable_to_non_nullable
+as int?,messageLimit: freezed == messageLimit ? _self.messageLimit : messageLimit // ignore: cast_nullable_to_non_nullable
+as int?,tokenLimit: freezed == tokenLimit ? _self.tokenLimit : tokenLimit // ignore: cast_nullable_to_non_nullable
+as int?,timeLimit: freezed == timeLimit ? _self.timeLimit : timeLimit // ignore: cast_nullable_to_non_nullable
+as int?,workingLimit: freezed == workingLimit ? _self.workingLimit : workingLimit // ignore: cast_nullable_to_non_nullable
+as int?,maxSamples: freezed == maxSamples ? _self.maxSamples : maxSamples // ignore: cast_nullable_to_non_nullable
+as int?,maxTasks: freezed == maxTasks ? _self.maxTasks : maxTasks // ignore: cast_nullable_to_non_nullable
+as int?,maxSubprocesses: freezed == maxSubprocesses ? _self.maxSubprocesses : maxSubprocesses // ignore: cast_nullable_to_non_nullable
+as int?,maxSandboxes: freezed == maxSandboxes ? _self.maxSandboxes : maxSandboxes // ignore: cast_nullable_to_non_nullable
+as int?,sandboxCleanup: freezed == sandboxCleanup ? _self.sandboxCleanup : sandboxCleanup // ignore: cast_nullable_to_non_nullable
+as bool?,logSamples: freezed == logSamples ? _self.logSamples : logSamples // ignore: cast_nullable_to_non_nullable
+as bool?,logRealtime: freezed == logRealtime ? _self.logRealtime : logRealtime // ignore: cast_nullable_to_non_nullable
+as bool?,logImages: freezed == logImages ? _self.logImages : logImages // ignore: cast_nullable_to_non_nullable
+as bool?,logBuffer: freezed == logBuffer ? _self.logBuffer : logBuffer // ignore: cast_nullable_to_non_nullable
+as int?,logShared: freezed == logShared ? _self.logShared : logShared // ignore: cast_nullable_to_non_nullable
+as int?,scoreDisplay: freezed == scoreDisplay ? _self.scoreDisplay : scoreDisplay // ignore: cast_nullable_to_non_nullable
+as bool?,
+  ));
+}
+
+
+}
+
+
+/// @nodoc
+mixin _$EvalRevision {
+
+/// Type of revision (currently only “git”).
+ String get type;/// Revision origin server.
+ String get origin;/// Revision commit.
+ String get commit;/// Working tree has uncommitted changes or untracked files.
+ bool get dirty;
+/// Create a copy of EvalRevision
+/// with the given fields replaced by the non-null parameter values.
+@JsonKey(includeFromJson: false, includeToJson: false)
+@pragma('vm:prefer-inline')
+$EvalRevisionCopyWith<EvalRevision> get copyWith => _$EvalRevisionCopyWithImpl<EvalRevision>(this as EvalRevision, _$identity);
+
+  /// Serializes this EvalRevision to a JSON map.
+  Map<String, dynamic> toJson();
+
+
+@override
+bool operator ==(Object other) {
+  return identical(this, other) || (other.runtimeType == runtimeType&&other is EvalRevision&&(identical(other.type, type) || other.type == type)&&(identical(other.origin, origin) || other.origin == origin)&&(identical(other.commit, commit) || other.commit == commit)&&(identical(other.dirty, dirty) || other.dirty == dirty));
+}
+
+@JsonKey(includeFromJson: false, includeToJson: false)
+@override
+int get hashCode => Object.hash(runtimeType,type,origin,commit,dirty);
+
+@override
+String toString() {
+  return 'EvalRevision(type: $type, origin: $origin, commit: $commit, dirty: $dirty)';
+}
+
+
+}
+
+/// @nodoc
+abstract mixin class $EvalRevisionCopyWith<$Res>  {
+  factory $EvalRevisionCopyWith(EvalRevision value, $Res Function(EvalRevision) _then) = _$EvalRevisionCopyWithImpl;
+@useResult
+$Res call({
+ String type, String origin, String commit, bool dirty
+});
+
+
+
+
+}
+/// @nodoc
+class _$EvalRevisionCopyWithImpl<$Res>
+    implements $EvalRevisionCopyWith<$Res> {
+  _$EvalRevisionCopyWithImpl(this._self, this._then);
+
+  final EvalRevision _self;
+  final $Res Function(EvalRevision) _then;
+
+/// Create a copy of EvalRevision
+/// with the given fields replaced by the non-null parameter values.
+@pragma('vm:prefer-inline') @override $Res call({Object? type = null,Object? origin = null,Object? commit = null,Object? dirty = null,}) {
+  return _then(_self.copyWith(
+type: null == type ? _self.type : type // ignore: cast_nullable_to_non_nullable
+as String,origin: null == origin ? _self.origin : origin // ignore: cast_nullable_to_non_nullable
+as String,commit: null == commit ? _self.commit : commit // ignore: cast_nullable_to_non_nullable
+as String,dirty: null == dirty ? _self.dirty : dirty // ignore: cast_nullable_to_non_nullable
+as bool,
+  ));
+}
+
+}
+
+
+/// Adds pattern-matching-related methods to [EvalRevision].
+extension EvalRevisionPatterns on EvalRevision {
+/// A variant of `map` that fallback to returning `orElse`.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case _:
+///     return orElse();
+/// }
+/// ```
+
+@optionalTypeArgs TResult maybeMap<TResult extends Object?>(TResult Function( _EvalRevision value)?  $default,{required TResult orElse(),}){
+final _that = this;
+switch (_that) {
+case _EvalRevision() when $default != null:
+return $default(_that);case _:
+  return orElse();
+
+}
+}
+/// A `switch`-like method, using callbacks.
+///
+/// Callbacks receives the raw object, upcasted.
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case final Subclass2 value:
+///     return ...;
+/// }
+/// ```
+
+@optionalTypeArgs TResult map<TResult extends Object?>(TResult Function( _EvalRevision value)  $default,){
+final _that = this;
+switch (_that) {
+case _EvalRevision():
+return $default(_that);case _:
+  throw StateError('Unexpected subclass');
+
+}
+}
+/// A variant of `map` that fallback to returning `null`.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case _:
+///     return null;
+/// }
+/// ```
+
+@optionalTypeArgs TResult? mapOrNull<TResult extends Object?>(TResult? Function( _EvalRevision value)?  $default,){
+final _that = this;
+switch (_that) {
+case _EvalRevision() when $default != null:
+return $default(_that);case _:
+  return null;
+
+}
+}
+/// A variant of `when` that fallback to an `orElse` callback.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case _:
+///     return orElse();
+/// }
+/// ```
+
+@optionalTypeArgs TResult maybeWhen<TResult extends Object?>(TResult Function( String type,  String origin,  String commit,  bool dirty)?  $default,{required TResult orElse(),}) {final _that = this;
+switch (_that) {
+case _EvalRevision() when $default != null:
+return $default(_that.type,_that.origin,_that.commit,_that.dirty);case _:
+  return orElse();
+
+}
+}
+/// A `switch`-like method, using callbacks.
+///
+/// As opposed to `map`, this offers destructuring.
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case Subclass2(:final field2):
+///     return ...;
+/// }
+/// ```
+
+@optionalTypeArgs TResult when<TResult extends Object?>(TResult Function( String type,  String origin,  String commit,  bool dirty)  $default,) {final _that = this;
+switch (_that) {
+case _EvalRevision():
+return $default(_that.type,_that.origin,_that.commit,_that.dirty);case _:
+  throw StateError('Unexpected subclass');
+
+}
+}
+/// A variant of `when` that fallback to returning `null`
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case _:
+///     return null;
+/// }
+/// ```
+
+@optionalTypeArgs TResult? whenOrNull<TResult extends Object?>(TResult? Function( String type,  String origin,  String commit,  bool dirty)?  $default,) {final _that = this;
+switch (_that) {
+case _EvalRevision() when $default != null:
+return $default(_that.type,_that.origin,_that.commit,_that.dirty);case _:
+  return null;
+
+}
+}
+
+}
+
+/// @nodoc
+@JsonSerializable()
+
+class _EvalRevision extends EvalRevision {
+  const _EvalRevision({required this.type, required this.origin, required this.commit, this.dirty = false}): super._();
+  factory _EvalRevision.fromJson(Map<String, dynamic> json) => _$EvalRevisionFromJson(json);
+
+/// Type of revision (currently only “git”).
+@override final  String type;
+/// Revision origin server.
+@override final  String origin;
+/// Revision commit.
+@override final  String commit;
+/// Working tree has uncommitted changes or untracked files.
+@override@JsonKey() final  bool dirty;
+
+/// Create a copy of EvalRevision
+/// with the given fields replaced by the non-null parameter values.
+@override @JsonKey(includeFromJson: false, includeToJson: false)
+@pragma('vm:prefer-inline')
+_$EvalRevisionCopyWith<_EvalRevision> get copyWith => __$EvalRevisionCopyWithImpl<_EvalRevision>(this, _$identity);
+
+@override
+Map<String, dynamic> toJson() {
+  return _$EvalRevisionToJson(this, );
+}
+
+@override
+bool operator ==(Object other) {
+  return identical(this, other) || (other.runtimeType == runtimeType&&other is _EvalRevision&&(identical(other.type, type) || other.type == type)&&(identical(other.origin, origin) || other.origin == origin)&&(identical(other.commit, commit) || other.commit == commit)&&(identical(other.dirty, dirty) || other.dirty == dirty));
+}
+
+@JsonKey(includeFromJson: false, includeToJson: false)
+@override
+int get hashCode => Object.hash(runtimeType,type,origin,commit,dirty);
+
+@override
+String toString() {
+  return 'EvalRevision(type: $type, origin: $origin, commit: $commit, dirty: $dirty)';
+}
+
+
+}
+
+/// @nodoc
+abstract mixin class _$EvalRevisionCopyWith<$Res> implements $EvalRevisionCopyWith<$Res> {
+  factory _$EvalRevisionCopyWith(_EvalRevision value, $Res Function(_EvalRevision) _then) = __$EvalRevisionCopyWithImpl;
+@override @useResult
+$Res call({
+ String type, String origin, String commit, bool dirty
+});
+
+
+
+
+}
+/// @nodoc
+class __$EvalRevisionCopyWithImpl<$Res>
+    implements _$EvalRevisionCopyWith<$Res> {
+  __$EvalRevisionCopyWithImpl(this._self, this._then);
+
+  final _EvalRevision _self;
+  final $Res Function(_EvalRevision) _then;
+
+/// Create a copy of EvalRevision
+/// with the given fields replaced by the non-null parameter values.
+@override @pragma('vm:prefer-inline') $Res call({Object? type = null,Object? origin = null,Object? commit = null,Object? dirty = null,}) {
+  return _then(_EvalRevision(
+type: null == type ? _self.type : type // ignore: cast_nullable_to_non_nullable
+as String,origin: null == origin ? _self.origin : origin // ignore: cast_nullable_to_non_nullable
+as String,commit: null == commit ? _self.commit : commit // ignore: cast_nullable_to_non_nullable
+as String,dirty: null == dirty ? _self.dirty : dirty // ignore: cast_nullable_to_non_nullable
+as bool,
+  ));
+}
+
+
+}
+
+
+/// @nodoc
+mixin _$EvalPlan {
+
+/// Plan name.
+ String get name;/// Steps in plan.
+ List<EvalPlanStep> get steps;/// Step to always run at the end.
+ EvalPlanStep? get finish;/// Generation config.
+ GenerateConfig get config;
+/// Create a copy of EvalPlan
+/// with the given fields replaced by the non-null parameter values.
+@JsonKey(includeFromJson: false, includeToJson: false)
+@pragma('vm:prefer-inline')
+$EvalPlanCopyWith<EvalPlan> get copyWith => _$EvalPlanCopyWithImpl<EvalPlan>(this as EvalPlan, _$identity);
+
+  /// Serializes this EvalPlan to a JSON map.
+  Map<String, dynamic> toJson();
+
+
+@override
+bool operator ==(Object other) {
+  return identical(this, other) || (other.runtimeType == runtimeType&&other is EvalPlan&&(identical(other.name, name) || other.name == name)&&const DeepCollectionEquality().equals(other.steps, steps)&&(identical(other.finish, finish) || other.finish == finish)&&(identical(other.config, config) || other.config == config));
+}
+
+@JsonKey(includeFromJson: false, includeToJson: false)
+@override
+int get hashCode => Object.hash(runtimeType,name,const DeepCollectionEquality().hash(steps),finish,config);
+
+@override
+String toString() {
+  return 'EvalPlan(name: $name, steps: $steps, finish: $finish, config: $config)';
+}
+
+
+}
+
+/// @nodoc
+abstract mixin class $EvalPlanCopyWith<$Res>  {
+  factory $EvalPlanCopyWith(EvalPlan value, $Res Function(EvalPlan) _then) = _$EvalPlanCopyWithImpl;
+@useResult
+$Res call({
+ String name, List<EvalPlanStep> steps, EvalPlanStep? finish, GenerateConfig config
+});
+
+
+$EvalPlanStepCopyWith<$Res>? get finish;$GenerateConfigCopyWith<$Res> get config;
+
+}
+/// @nodoc
+class _$EvalPlanCopyWithImpl<$Res>
+    implements $EvalPlanCopyWith<$Res> {
+  _$EvalPlanCopyWithImpl(this._self, this._then);
+
+  final EvalPlan _self;
+  final $Res Function(EvalPlan) _then;
+
+/// Create a copy of EvalPlan
+/// with the given fields replaced by the non-null parameter values.
+@pragma('vm:prefer-inline') @override $Res call({Object? name = null,Object? steps = null,Object? finish = freezed,Object? config = null,}) {
+  return _then(_self.copyWith(
+name: null == name ? _self.name : name // ignore: cast_nullable_to_non_nullable
+as String,steps: null == steps ? _self.steps : steps // ignore: cast_nullable_to_non_nullable
+as List<EvalPlanStep>,finish: freezed == finish ? _self.finish : finish // ignore: cast_nullable_to_non_nullable
+as EvalPlanStep?,config: null == config ? _self.config : config // ignore: cast_nullable_to_non_nullable
+as GenerateConfig,
+  ));
+}
+/// Create a copy of EvalPlan
+/// with the given fields replaced by the non-null parameter values.
+@override
+@pragma('vm:prefer-inline')
+$EvalPlanStepCopyWith<$Res>? get finish {
+    if (_self.finish == null) {
+    return null;
+  }
+
+  return $EvalPlanStepCopyWith<$Res>(_self.finish!, (value) {
+    return _then(_self.copyWith(finish: value));
+  });
+}/// Create a copy of EvalPlan
+/// with the given fields replaced by the non-null parameter values.
+@override
+@pragma('vm:prefer-inline')
+$GenerateConfigCopyWith<$Res> get config {
+  
+  return $GenerateConfigCopyWith<$Res>(_self.config, (value) {
+    return _then(_self.copyWith(config: value));
+  });
+}
+}
+
+
+/// Adds pattern-matching-related methods to [EvalPlan].
+extension EvalPlanPatterns on EvalPlan {
+/// A variant of `map` that fallback to returning `orElse`.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case _:
+///     return orElse();
+/// }
+/// ```
+
+@optionalTypeArgs TResult maybeMap<TResult extends Object?>(TResult Function( _EvalPlan value)?  $default,{required TResult orElse(),}){
+final _that = this;
+switch (_that) {
+case _EvalPlan() when $default != null:
+return $default(_that);case _:
+  return orElse();
+
+}
+}
+/// A `switch`-like method, using callbacks.
+///
+/// Callbacks receives the raw object, upcasted.
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case final Subclass2 value:
+///     return ...;
+/// }
+/// ```
+
+@optionalTypeArgs TResult map<TResult extends Object?>(TResult Function( _EvalPlan value)  $default,){
+final _that = this;
+switch (_that) {
+case _EvalPlan():
+return $default(_that);case _:
+  throw StateError('Unexpected subclass');
+
+}
+}
+/// A variant of `map` that fallback to returning `null`.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case _:
+///     return null;
+/// }
+/// ```
+
+@optionalTypeArgs TResult? mapOrNull<TResult extends Object?>(TResult? Function( _EvalPlan value)?  $default,){
+final _that = this;
+switch (_that) {
+case _EvalPlan() when $default != null:
+return $default(_that);case _:
+  return null;
+
+}
+}
+/// A variant of `when` that fallback to an `orElse` callback.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case _:
+///     return orElse();
+/// }
+/// ```
+
+@optionalTypeArgs TResult maybeWhen<TResult extends Object?>(TResult Function( String name,  List<EvalPlanStep> steps,  EvalPlanStep? finish,  GenerateConfig config)?  $default,{required TResult orElse(),}) {final _that = this;
+switch (_that) {
+case _EvalPlan() when $default != null:
+return $default(_that.name,_that.steps,_that.finish,_that.config);case _:
+  return orElse();
+
+}
+}
+/// A `switch`-like method, using callbacks.
+///
+/// As opposed to `map`, this offers destructuring.
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case Subclass2(:final field2):
+///     return ...;
+/// }
+/// ```
+
+@optionalTypeArgs TResult when<TResult extends Object?>(TResult Function( String name,  List<EvalPlanStep> steps,  EvalPlanStep? finish,  GenerateConfig config)  $default,) {final _that = this;
+switch (_that) {
+case _EvalPlan():
+return $default(_that.name,_that.steps,_that.finish,_that.config);case _:
+  throw StateError('Unexpected subclass');
+
+}
+}
+/// A variant of `when` that fallback to returning `null`
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case _:
+///     return null;
+/// }
+/// ```
+
+@optionalTypeArgs TResult? whenOrNull<TResult extends Object?>(TResult? Function( String name,  List<EvalPlanStep> steps,  EvalPlanStep? finish,  GenerateConfig config)?  $default,) {final _that = this;
+switch (_that) {
+case _EvalPlan() when $default != null:
+return $default(_that.name,_that.steps,_that.finish,_that.config);case _:
+  return null;
+
+}
+}
+
+}
+
+/// @nodoc
+@JsonSerializable()
+
+class _EvalPlan extends EvalPlan {
+  const _EvalPlan({this.name = 'plan', final  List<EvalPlanStep> steps = const [], this.finish, this.config = const GenerateConfig()}): _steps = steps,super._();
+  factory _EvalPlan.fromJson(Map<String, dynamic> json) => _$EvalPlanFromJson(json);
+
+/// Plan name.
+@override@JsonKey() final  String name;
+/// Steps in plan.
+ final  List<EvalPlanStep> _steps;
+/// Steps in plan.
+@override@JsonKey() List<EvalPlanStep> get steps {
+  if (_steps is EqualUnmodifiableListView) return _steps;
+  // ignore: implicit_dynamic_type
+  return EqualUnmodifiableListView(_steps);
+}
+
+/// Step to always run at the end.
+@override final  EvalPlanStep? finish;
+/// Generation config.
+@override@JsonKey() final  GenerateConfig config;
+
+/// Create a copy of EvalPlan
+/// with the given fields replaced by the non-null parameter values.
+@override @JsonKey(includeFromJson: false, includeToJson: false)
+@pragma('vm:prefer-inline')
+_$EvalPlanCopyWith<_EvalPlan> get copyWith => __$EvalPlanCopyWithImpl<_EvalPlan>(this, _$identity);
+
+@override
+Map<String, dynamic> toJson() {
+  return _$EvalPlanToJson(this, );
+}
+
+@override
+bool operator ==(Object other) {
+  return identical(this, other) || (other.runtimeType == runtimeType&&other is _EvalPlan&&(identical(other.name, name) || other.name == name)&&const DeepCollectionEquality().equals(other._steps, _steps)&&(identical(other.finish, finish) || other.finish == finish)&&(identical(other.config, config) || other.config == config));
+}
+
+@JsonKey(includeFromJson: false, includeToJson: false)
+@override
+int get hashCode => Object.hash(runtimeType,name,const DeepCollectionEquality().hash(_steps),finish,config);
+
+@override
+String toString() {
+  return 'EvalPlan(name: $name, steps: $steps, finish: $finish, config: $config)';
+}
+
+
+}
+
+/// @nodoc
+abstract mixin class _$EvalPlanCopyWith<$Res> implements $EvalPlanCopyWith<$Res> {
+  factory _$EvalPlanCopyWith(_EvalPlan value, $Res Function(_EvalPlan) _then) = __$EvalPlanCopyWithImpl;
+@override @useResult
+$Res call({
+ String name, List<EvalPlanStep> steps, EvalPlanStep? finish, GenerateConfig config
+});
+
+
+@override $EvalPlanStepCopyWith<$Res>? get finish;@override $GenerateConfigCopyWith<$Res> get config;
+
+}
+/// @nodoc
+class __$EvalPlanCopyWithImpl<$Res>
+    implements _$EvalPlanCopyWith<$Res> {
+  __$EvalPlanCopyWithImpl(this._self, this._then);
+
+  final _EvalPlan _self;
+  final $Res Function(_EvalPlan) _then;
+
+/// Create a copy of EvalPlan
+/// with the given fields replaced by the non-null parameter values.
+@override @pragma('vm:prefer-inline') $Res call({Object? name = null,Object? steps = null,Object? finish = freezed,Object? config = null,}) {
+  return _then(_EvalPlan(
+name: null == name ? _self.name : name // ignore: cast_nullable_to_non_nullable
+as String,steps: null == steps ? _self._steps : steps // ignore: cast_nullable_to_non_nullable
+as List<EvalPlanStep>,finish: freezed == finish ? _self.finish : finish // ignore: cast_nullable_to_non_nullable
+as EvalPlanStep?,config: null == config ? _self.config : config // ignore: cast_nullable_to_non_nullable
+as GenerateConfig,
+  ));
+}
+
+/// Create a copy of EvalPlan
+/// with the given fields replaced by the non-null parameter values.
+@override
+@pragma('vm:prefer-inline')
+$EvalPlanStepCopyWith<$Res>? get finish {
+    if (_self.finish == null) {
+    return null;
+  }
+
+  return $EvalPlanStepCopyWith<$Res>(_self.finish!, (value) {
+    return _then(_self.copyWith(finish: value));
+  });
+}/// Create a copy of EvalPlan
+/// with the given fields replaced by the non-null parameter values.
+@override
+@pragma('vm:prefer-inline')
+$GenerateConfigCopyWith<$Res> get config {
+  
+  return $GenerateConfigCopyWith<$Res>(_self.config, (value) {
+    return _then(_self.copyWith(config: value));
+  });
+}
+}
+
+
+/// @nodoc
+mixin _$EvalPlanStep {
+
+/// Name of solver.
+ String get solver;/// Parameters used to instantiate solver.
+ Map<String, dynamic> get params;/// Parameters explicitly passed to the eval plan.
+@JsonKey(name: 'params_passed') Map<String, dynamic>? get paramsPassed;
+/// Create a copy of EvalPlanStep
+/// with the given fields replaced by the non-null parameter values.
+@JsonKey(includeFromJson: false, includeToJson: false)
+@pragma('vm:prefer-inline')
+$EvalPlanStepCopyWith<EvalPlanStep> get copyWith => _$EvalPlanStepCopyWithImpl<EvalPlanStep>(this as EvalPlanStep, _$identity);
+
+  /// Serializes this EvalPlanStep to a JSON map.
+  Map<String, dynamic> toJson();
+
+
+@override
+bool operator ==(Object other) {
+  return identical(this, other) || (other.runtimeType == runtimeType&&other is EvalPlanStep&&(identical(other.solver, solver) || other.solver == solver)&&const DeepCollectionEquality().equals(other.params, params)&&const DeepCollectionEquality().equals(other.paramsPassed, paramsPassed));
+}
+
+@JsonKey(includeFromJson: false, includeToJson: false)
+@override
+int get hashCode => Object.hash(runtimeType,solver,const DeepCollectionEquality().hash(params),const DeepCollectionEquality().hash(paramsPassed));
+
+@override
+String toString() {
+  return 'EvalPlanStep(solver: $solver, params: $params, paramsPassed: $paramsPassed)';
+}
+
+
+}
+
+/// @nodoc
+abstract mixin class $EvalPlanStepCopyWith<$Res>  {
+  factory $EvalPlanStepCopyWith(EvalPlanStep value, $Res Function(EvalPlanStep) _then) = _$EvalPlanStepCopyWithImpl;
+@useResult
+$Res call({
+ String solver, Map<String, dynamic> params,@JsonKey(name: 'params_passed') Map<String, dynamic>? paramsPassed
+});
+
+
+
+
+}
+/// @nodoc
+class _$EvalPlanStepCopyWithImpl<$Res>
+    implements $EvalPlanStepCopyWith<$Res> {
+  _$EvalPlanStepCopyWithImpl(this._self, this._then);
+
+  final EvalPlanStep _self;
+  final $Res Function(EvalPlanStep) _then;
+
+/// Create a copy of EvalPlanStep
+/// with the given fields replaced by the non-null parameter values.
+@pragma('vm:prefer-inline') @override $Res call({Object? solver = null,Object? params = null,Object? paramsPassed = freezed,}) {
+  return _then(_self.copyWith(
+solver: null == solver ? _self.solver : solver // ignore: cast_nullable_to_non_nullable
+as String,params: null == params ? _self.params : params // ignore: cast_nullable_to_non_nullable
+as Map<String, dynamic>,paramsPassed: freezed == paramsPassed ? _self.paramsPassed : paramsPassed // ignore: cast_nullable_to_non_nullable
+as Map<String, dynamic>?,
+  ));
+}
+
+}
+
+
+/// Adds pattern-matching-related methods to [EvalPlanStep].
+extension EvalPlanStepPatterns on EvalPlanStep {
+/// A variant of `map` that fallback to returning `orElse`.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case _:
+///     return orElse();
+/// }
+/// ```
+
+@optionalTypeArgs TResult maybeMap<TResult extends Object?>(TResult Function( _EvalPlanStep value)?  $default,{required TResult orElse(),}){
+final _that = this;
+switch (_that) {
+case _EvalPlanStep() when $default != null:
+return $default(_that);case _:
+  return orElse();
+
+}
+}
+/// A `switch`-like method, using callbacks.
+///
+/// Callbacks receives the raw object, upcasted.
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case final Subclass2 value:
+///     return ...;
+/// }
+/// ```
+
+@optionalTypeArgs TResult map<TResult extends Object?>(TResult Function( _EvalPlanStep value)  $default,){
+final _that = this;
+switch (_that) {
+case _EvalPlanStep():
+return $default(_that);case _:
+  throw StateError('Unexpected subclass');
+
+}
+}
+/// A variant of `map` that fallback to returning `null`.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case _:
+///     return null;
+/// }
+/// ```
+
+@optionalTypeArgs TResult? mapOrNull<TResult extends Object?>(TResult? Function( _EvalPlanStep value)?  $default,){
+final _that = this;
+switch (_that) {
+case _EvalPlanStep() when $default != null:
+return $default(_that);case _:
+  return null;
+
+}
+}
+/// A variant of `when` that fallback to an `orElse` callback.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case _:
+///     return orElse();
+/// }
+/// ```
+
+@optionalTypeArgs TResult maybeWhen<TResult extends Object?>(TResult Function( String solver,  Map<String, dynamic> params, @JsonKey(name: 'params_passed')  Map<String, dynamic>? paramsPassed)?  $default,{required TResult orElse(),}) {final _that = this;
+switch (_that) {
+case _EvalPlanStep() when $default != null:
+return $default(_that.solver,_that.params,_that.paramsPassed);case _:
+  return orElse();
+
+}
+}
+/// A `switch`-like method, using callbacks.
+///
+/// As opposed to `map`, this offers destructuring.
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case Subclass2(:final field2):
+///     return ...;
+/// }
+/// ```
+
+@optionalTypeArgs TResult when<TResult extends Object?>(TResult Function( String solver,  Map<String, dynamic> params, @JsonKey(name: 'params_passed')  Map<String, dynamic>? paramsPassed)  $default,) {final _that = this;
+switch (_that) {
+case _EvalPlanStep():
+return $default(_that.solver,_that.params,_that.paramsPassed);case _:
+  throw StateError('Unexpected subclass');
+
+}
+}
+/// A variant of `when` that fallback to returning `null`
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case _:
+///     return null;
+/// }
+/// ```
+
+@optionalTypeArgs TResult? whenOrNull<TResult extends Object?>(TResult? Function( String solver,  Map<String, dynamic> params, @JsonKey(name: 'params_passed')  Map<String, dynamic>? paramsPassed)?  $default,) {final _that = this;
+switch (_that) {
+case _EvalPlanStep() when $default != null:
+return $default(_that.solver,_that.params,_that.paramsPassed);case _:
+  return null;
+
+}
+}
+
+}
+
+/// @nodoc
+@JsonSerializable()
+
+class _EvalPlanStep extends EvalPlanStep {
+  const _EvalPlanStep({required this.solver, final  Map<String, dynamic> params = const {}, @JsonKey(name: 'params_passed') final  Map<String, dynamic>? paramsPassed}): _params = params,_paramsPassed = paramsPassed,super._();
+  factory _EvalPlanStep.fromJson(Map<String, dynamic> json) => _$EvalPlanStepFromJson(json);
+
+/// Name of solver.
+@override final  String solver;
+/// Parameters used to instantiate solver.
+ final  Map<String, dynamic> _params;
+/// Parameters used to instantiate solver.
+@override@JsonKey() Map<String, dynamic> get params {
+  if (_params is EqualUnmodifiableMapView) return _params;
+  // ignore: implicit_dynamic_type
+  return EqualUnmodifiableMapView(_params);
+}
+
+/// Parameters explicitly passed to the eval plan.
+ final  Map<String, dynamic>? _paramsPassed;
+/// Parameters explicitly passed to the eval plan.
+@override@JsonKey(name: 'params_passed') Map<String, dynamic>? get paramsPassed {
+  final value = _paramsPassed;
+  if (value == null) return null;
+  if (_paramsPassed is EqualUnmodifiableMapView) return _paramsPassed;
+  // ignore: implicit_dynamic_type
+  return EqualUnmodifiableMapView(value);
+}
+
+
+/// Create a copy of EvalPlanStep
+/// with the given fields replaced by the non-null parameter values.
+@override @JsonKey(includeFromJson: false, includeToJson: false)
+@pragma('vm:prefer-inline')
+_$EvalPlanStepCopyWith<_EvalPlanStep> get copyWith => __$EvalPlanStepCopyWithImpl<_EvalPlanStep>(this, _$identity);
+
+@override
+Map<String, dynamic> toJson() {
+  return _$EvalPlanStepToJson(this, );
+}
+
+@override
+bool operator ==(Object other) {
+  return identical(this, other) || (other.runtimeType == runtimeType&&other is _EvalPlanStep&&(identical(other.solver, solver) || other.solver == solver)&&const DeepCollectionEquality().equals(other._params, _params)&&const DeepCollectionEquality().equals(other._paramsPassed, _paramsPassed));
+}
+
+@JsonKey(includeFromJson: false, includeToJson: false)
+@override
+int get hashCode => Object.hash(runtimeType,solver,const DeepCollectionEquality().hash(_params),const DeepCollectionEquality().hash(_paramsPassed));
+
+@override
+String toString() {
+  return 'EvalPlanStep(solver: $solver, params: $params, paramsPassed: $paramsPassed)';
+}
+
+
+}
+
+/// @nodoc
+abstract mixin class _$EvalPlanStepCopyWith<$Res> implements $EvalPlanStepCopyWith<$Res> {
+  factory _$EvalPlanStepCopyWith(_EvalPlanStep value, $Res Function(_EvalPlanStep) _then) = __$EvalPlanStepCopyWithImpl;
+@override @useResult
+$Res call({
+ String solver, Map<String, dynamic> params,@JsonKey(name: 'params_passed') Map<String, dynamic>? paramsPassed
+});
+
+
+
+
+}
+/// @nodoc
+class __$EvalPlanStepCopyWithImpl<$Res>
+    implements _$EvalPlanStepCopyWith<$Res> {
+  __$EvalPlanStepCopyWithImpl(this._self, this._then);
+
+  final _EvalPlanStep _self;
+  final $Res Function(_EvalPlanStep) _then;
+
+/// Create a copy of EvalPlanStep
+/// with the given fields replaced by the non-null parameter values.
+@override @pragma('vm:prefer-inline') $Res call({Object? solver = null,Object? params = null,Object? paramsPassed = freezed,}) {
+  return _then(_EvalPlanStep(
+solver: null == solver ? _self.solver : solver // ignore: cast_nullable_to_non_nullable
+as String,params: null == params ? _self._params : params // ignore: cast_nullable_to_non_nullable
+as Map<String, dynamic>,paramsPassed: freezed == paramsPassed ? _self._paramsPassed : paramsPassed // ignore: cast_nullable_to_non_nullable
+as Map<String, dynamic>?,
+  ));
+}
+
+
+}
+
+
+/// @nodoc
+mixin _$EvalResults {
+
+/// Total samples in eval (dataset samples * epochs).
+@JsonKey(name: 'total_samples', defaultValue: 0) int get totalSamples;/// Samples completed without error.
+@JsonKey(name: 'completed_samples', defaultValue: 0) int get completedSamples;/// Early stopping summary (if an early stopping manager was present).
+@JsonKey(name: 'early_stopping') EarlyStoppingSummary? get earlyStopping;/// Scorers used to compute results.
+ List<EvalScore> get scores;/// Additional results metadata.
+ Map<String, dynamic> get metadata;/// List of per sample scores reduced across epochs.
+@JsonKey(name: 'sample_reductions') List<EvalSampleReductions>? get sampleReductions;
+/// Create a copy of EvalResults
+/// with the given fields replaced by the non-null parameter values.
+@JsonKey(includeFromJson: false, includeToJson: false)
+@pragma('vm:prefer-inline')
+$EvalResultsCopyWith<EvalResults> get copyWith => _$EvalResultsCopyWithImpl<EvalResults>(this as EvalResults, _$identity);
+
+  /// Serializes this EvalResults to a JSON map.
+  Map<String, dynamic> toJson();
+
+
+@override
+bool operator ==(Object other) {
+  return identical(this, other) || (other.runtimeType == runtimeType&&other is EvalResults&&(identical(other.totalSamples, totalSamples) || other.totalSamples == totalSamples)&&(identical(other.completedSamples, completedSamples) || other.completedSamples == completedSamples)&&(identical(other.earlyStopping, earlyStopping) || other.earlyStopping == earlyStopping)&&const DeepCollectionEquality().equals(other.scores, scores)&&const DeepCollectionEquality().equals(other.metadata, metadata)&&const DeepCollectionEquality().equals(other.sampleReductions, sampleReductions));
+}
+
+@JsonKey(includeFromJson: false, includeToJson: false)
+@override
+int get hashCode => Object.hash(runtimeType,totalSamples,completedSamples,earlyStopping,const DeepCollectionEquality().hash(scores),const DeepCollectionEquality().hash(metadata),const DeepCollectionEquality().hash(sampleReductions));
+
+@override
+String toString() {
+  return 'EvalResults(totalSamples: $totalSamples, completedSamples: $completedSamples, earlyStopping: $earlyStopping, scores: $scores, metadata: $metadata, sampleReductions: $sampleReductions)';
+}
+
+
+}
+
+/// @nodoc
+abstract mixin class $EvalResultsCopyWith<$Res>  {
+  factory $EvalResultsCopyWith(EvalResults value, $Res Function(EvalResults) _then) = _$EvalResultsCopyWithImpl;
+@useResult
+$Res call({
+@JsonKey(name: 'total_samples', defaultValue: 0) int totalSamples,@JsonKey(name: 'completed_samples', defaultValue: 0) int completedSamples,@JsonKey(name: 'early_stopping') EarlyStoppingSummary? earlyStopping, List<EvalScore> scores, Map<String, dynamic> metadata,@JsonKey(name: 'sample_reductions') List<EvalSampleReductions>? sampleReductions
+});
+
+
+$EarlyStoppingSummaryCopyWith<$Res>? get earlyStopping;
+
+}
+/// @nodoc
+class _$EvalResultsCopyWithImpl<$Res>
+    implements $EvalResultsCopyWith<$Res> {
+  _$EvalResultsCopyWithImpl(this._self, this._then);
+
+  final EvalResults _self;
+  final $Res Function(EvalResults) _then;
+
+/// Create a copy of EvalResults
+/// with the given fields replaced by the non-null parameter values.
+@pragma('vm:prefer-inline') @override $Res call({Object? totalSamples = null,Object? completedSamples = null,Object? earlyStopping = freezed,Object? scores = null,Object? metadata = null,Object? sampleReductions = freezed,}) {
+  return _then(_self.copyWith(
+totalSamples: null == totalSamples ? _self.totalSamples : totalSamples // ignore: cast_nullable_to_non_nullable
+as int,completedSamples: null == completedSamples ? _self.completedSamples : completedSamples // ignore: cast_nullable_to_non_nullable
+as int,earlyStopping: freezed == earlyStopping ? _self.earlyStopping : earlyStopping // ignore: cast_nullable_to_non_nullable
+as EarlyStoppingSummary?,scores: null == scores ? _self.scores : scores // ignore: cast_nullable_to_non_nullable
+as List<EvalScore>,metadata: null == metadata ? _self.metadata : metadata // ignore: cast_nullable_to_non_nullable
+as Map<String, dynamic>,sampleReductions: freezed == sampleReductions ? _self.sampleReductions : sampleReductions // ignore: cast_nullable_to_non_nullable
+as List<EvalSampleReductions>?,
+  ));
+}
+/// Create a copy of EvalResults
+/// with the given fields replaced by the non-null parameter values.
+@override
+@pragma('vm:prefer-inline')
+$EarlyStoppingSummaryCopyWith<$Res>? get earlyStopping {
+    if (_self.earlyStopping == null) {
+    return null;
+  }
+
+  return $EarlyStoppingSummaryCopyWith<$Res>(_self.earlyStopping!, (value) {
+    return _then(_self.copyWith(earlyStopping: value));
+  });
+}
+}
+
+
+/// Adds pattern-matching-related methods to [EvalResults].
+extension EvalResultsPatterns on EvalResults {
+/// A variant of `map` that fallback to returning `orElse`.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case _:
+///     return orElse();
+/// }
+/// ```
+
+@optionalTypeArgs TResult maybeMap<TResult extends Object?>(TResult Function( _EvalResults value)?  $default,{required TResult orElse(),}){
+final _that = this;
+switch (_that) {
+case _EvalResults() when $default != null:
+return $default(_that);case _:
+  return orElse();
+
+}
+}
+/// A `switch`-like method, using callbacks.
+///
+/// Callbacks receives the raw object, upcasted.
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case final Subclass2 value:
+///     return ...;
+/// }
+/// ```
+
+@optionalTypeArgs TResult map<TResult extends Object?>(TResult Function( _EvalResults value)  $default,){
+final _that = this;
+switch (_that) {
+case _EvalResults():
+return $default(_that);case _:
+  throw StateError('Unexpected subclass');
+
+}
+}
+/// A variant of `map` that fallback to returning `null`.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case _:
+///     return null;
+/// }
+/// ```
+
+@optionalTypeArgs TResult? mapOrNull<TResult extends Object?>(TResult? Function( _EvalResults value)?  $default,){
+final _that = this;
+switch (_that) {
+case _EvalResults() when $default != null:
+return $default(_that);case _:
+  return null;
+
+}
+}
+/// A variant of `when` that fallback to an `orElse` callback.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case _:
+///     return orElse();
+/// }
+/// ```
+
+@optionalTypeArgs TResult maybeWhen<TResult extends Object?>(TResult Function(@JsonKey(name: 'total_samples', defaultValue: 0)  int totalSamples, @JsonKey(name: 'completed_samples', defaultValue: 0)  int completedSamples, @JsonKey(name: 'early_stopping')  EarlyStoppingSummary? earlyStopping,  List<EvalScore> scores,  Map<String, dynamic> metadata, @JsonKey(name: 'sample_reductions')  List<EvalSampleReductions>? sampleReductions)?  $default,{required TResult orElse(),}) {final _that = this;
+switch (_that) {
+case _EvalResults() when $default != null:
+return $default(_that.totalSamples,_that.completedSamples,_that.earlyStopping,_that.scores,_that.metadata,_that.sampleReductions);case _:
+  return orElse();
+
+}
+}
+/// A `switch`-like method, using callbacks.
+///
+/// As opposed to `map`, this offers destructuring.
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case Subclass2(:final field2):
+///     return ...;
+/// }
+/// ```
+
+@optionalTypeArgs TResult when<TResult extends Object?>(TResult Function(@JsonKey(name: 'total_samples', defaultValue: 0)  int totalSamples, @JsonKey(name: 'completed_samples', defaultValue: 0)  int completedSamples, @JsonKey(name: 'early_stopping')  EarlyStoppingSummary? earlyStopping,  List<EvalScore> scores,  Map<String, dynamic> metadata, @JsonKey(name: 'sample_reductions')  List<EvalSampleReductions>? sampleReductions)  $default,) {final _that = this;
+switch (_that) {
+case _EvalResults():
+return $default(_that.totalSamples,_that.completedSamples,_that.earlyStopping,_that.scores,_that.metadata,_that.sampleReductions);case _:
+  throw StateError('Unexpected subclass');
+
+}
+}
+/// A variant of `when` that fallback to returning `null`
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case _:
+///     return null;
+/// }
+/// ```
+
+@optionalTypeArgs TResult? whenOrNull<TResult extends Object?>(TResult? Function(@JsonKey(name: 'total_samples', defaultValue: 0)  int totalSamples, @JsonKey(name: 'completed_samples', defaultValue: 0)  int completedSamples, @JsonKey(name: 'early_stopping')  EarlyStoppingSummary? earlyStopping,  List<EvalScore> scores,  Map<String, dynamic> metadata, @JsonKey(name: 'sample_reductions')  List<EvalSampleReductions>? sampleReductions)?  $default,) {final _that = this;
+switch (_that) {
+case _EvalResults() when $default != null:
+return $default(_that.totalSamples,_that.completedSamples,_that.earlyStopping,_that.scores,_that.metadata,_that.sampleReductions);case _:
+  return null;
+
+}
+}
+
+}
+
+/// @nodoc
+@JsonSerializable()
+
+class _EvalResults extends EvalResults {
+  const _EvalResults({@JsonKey(name: 'total_samples', defaultValue: 0) this.totalSamples = 0, @JsonKey(name: 'completed_samples', defaultValue: 0) this.completedSamples = 0, @JsonKey(name: 'early_stopping') this.earlyStopping, final  List<EvalScore> scores = const [], final  Map<String, dynamic> metadata = const {}, @JsonKey(name: 'sample_reductions') final  List<EvalSampleReductions>? sampleReductions}): _scores = scores,_metadata = metadata,_sampleReductions = sampleReductions,super._();
+  factory _EvalResults.fromJson(Map<String, dynamic> json) => _$EvalResultsFromJson(json);
+
+/// Total samples in eval (dataset samples * epochs).
+@override@JsonKey(name: 'total_samples', defaultValue: 0) final  int totalSamples;
+/// Samples completed without error.
+@override@JsonKey(name: 'completed_samples', defaultValue: 0) final  int completedSamples;
+/// Early stopping summary (if an early stopping manager was present).
+@override@JsonKey(name: 'early_stopping') final  EarlyStoppingSummary? earlyStopping;
+/// Scorers used to compute results.
+ final  List<EvalScore> _scores;
+/// Scorers used to compute results.
+@override@JsonKey() List<EvalScore> get scores {
+  if (_scores is EqualUnmodifiableListView) return _scores;
+  // ignore: implicit_dynamic_type
+  return EqualUnmodifiableListView(_scores);
+}
+
+/// Additional results metadata.
+ final  Map<String, dynamic> _metadata;
+/// Additional results metadata.
+@override@JsonKey() Map<String, dynamic> get metadata {
+  if (_metadata is EqualUnmodifiableMapView) return _metadata;
+  // ignore: implicit_dynamic_type
+  return EqualUnmodifiableMapView(_metadata);
+}
+
+/// List of per sample scores reduced across epochs.
+ final  List<EvalSampleReductions>? _sampleReductions;
+/// List of per sample scores reduced across epochs.
+@override@JsonKey(name: 'sample_reductions') List<EvalSampleReductions>? get sampleReductions {
+  final value = _sampleReductions;
+  if (value == null) return null;
+  if (_sampleReductions is EqualUnmodifiableListView) return _sampleReductions;
+  // ignore: implicit_dynamic_type
+  return EqualUnmodifiableListView(value);
+}
+
+
+/// Create a copy of EvalResults
+/// with the given fields replaced by the non-null parameter values.
+@override @JsonKey(includeFromJson: false, includeToJson: false)
+@pragma('vm:prefer-inline')
+_$EvalResultsCopyWith<_EvalResults> get copyWith => __$EvalResultsCopyWithImpl<_EvalResults>(this, _$identity);
+
+@override
+Map<String, dynamic> toJson() {
+  return _$EvalResultsToJson(this, );
+}
+
+@override
+bool operator ==(Object other) {
+  return identical(this, other) || (other.runtimeType == runtimeType&&other is _EvalResults&&(identical(other.totalSamples, totalSamples) || other.totalSamples == totalSamples)&&(identical(other.completedSamples, completedSamples) || other.completedSamples == completedSamples)&&(identical(other.earlyStopping, earlyStopping) || other.earlyStopping == earlyStopping)&&const DeepCollectionEquality().equals(other._scores, _scores)&&const DeepCollectionEquality().equals(other._metadata, _metadata)&&const DeepCollectionEquality().equals(other._sampleReductions, _sampleReductions));
+}
+
+@JsonKey(includeFromJson: false, includeToJson: false)
+@override
+int get hashCode => Object.hash(runtimeType,totalSamples,completedSamples,earlyStopping,const DeepCollectionEquality().hash(_scores),const DeepCollectionEquality().hash(_metadata),const DeepCollectionEquality().hash(_sampleReductions));
+
+@override
+String toString() {
+  return 'EvalResults(totalSamples: $totalSamples, completedSamples: $completedSamples, earlyStopping: $earlyStopping, scores: $scores, metadata: $metadata, sampleReductions: $sampleReductions)';
+}
+
+
+}
+
+/// @nodoc
+abstract mixin class _$EvalResultsCopyWith<$Res> implements $EvalResultsCopyWith<$Res> {
+  factory _$EvalResultsCopyWith(_EvalResults value, $Res Function(_EvalResults) _then) = __$EvalResultsCopyWithImpl;
+@override @useResult
+$Res call({
+@JsonKey(name: 'total_samples', defaultValue: 0) int totalSamples,@JsonKey(name: 'completed_samples', defaultValue: 0) int completedSamples,@JsonKey(name: 'early_stopping') EarlyStoppingSummary? earlyStopping, List<EvalScore> scores, Map<String, dynamic> metadata,@JsonKey(name: 'sample_reductions') List<EvalSampleReductions>? sampleReductions
+});
+
+
+@override $EarlyStoppingSummaryCopyWith<$Res>? get earlyStopping;
+
+}
+/// @nodoc
+class __$EvalResultsCopyWithImpl<$Res>
+    implements _$EvalResultsCopyWith<$Res> {
+  __$EvalResultsCopyWithImpl(this._self, this._then);
+
+  final _EvalResults _self;
+  final $Res Function(_EvalResults) _then;
+
+/// Create a copy of EvalResults
+/// with the given fields replaced by the non-null parameter values.
+@override @pragma('vm:prefer-inline') $Res call({Object? totalSamples = null,Object? completedSamples = null,Object? earlyStopping = freezed,Object? scores = null,Object? metadata = null,Object? sampleReductions = freezed,}) {
+  return _then(_EvalResults(
+totalSamples: null == totalSamples ? _self.totalSamples : totalSamples // ignore: cast_nullable_to_non_nullable
+as int,completedSamples: null == completedSamples ? _self.completedSamples : completedSamples // ignore: cast_nullable_to_non_nullable
+as int,earlyStopping: freezed == earlyStopping ? _self.earlyStopping : earlyStopping // ignore: cast_nullable_to_non_nullable
+as EarlyStoppingSummary?,scores: null == scores ? _self._scores : scores // ignore: cast_nullable_to_non_nullable
+as List<EvalScore>,metadata: null == metadata ? _self._metadata : metadata // ignore: cast_nullable_to_non_nullable
+as Map<String, dynamic>,sampleReductions: freezed == sampleReductions ? _self._sampleReductions : sampleReductions // ignore: cast_nullable_to_non_nullable
+as List<EvalSampleReductions>?,
+  ));
+}
+
+/// Create a copy of EvalResults
+/// with the given fields replaced by the non-null parameter values.
+@override
+@pragma('vm:prefer-inline')
+$EarlyStoppingSummaryCopyWith<$Res>? get earlyStopping {
+    if (_self.earlyStopping == null) {
+    return null;
+  }
+
+  return $EarlyStoppingSummaryCopyWith<$Res>(_self.earlyStopping!, (value) {
+    return _then(_self.copyWith(earlyStopping: value));
+  });
+}
+}
+
+
+/// @nodoc
+mixin _$EarlyStoppingSummary {
+
+/// Type of early stopping.
+ String get type;/// Limit that triggered early stopping.
+ double? get limit;/// Score that triggered early stopping.
+ double? get score;/// Additional metadata.
+ Map<String, dynamic> get metadata;
+/// Create a copy of EarlyStoppingSummary
+/// with the given fields replaced by the non-null parameter values.
+@JsonKey(includeFromJson: false, includeToJson: false)
+@pragma('vm:prefer-inline')
+$EarlyStoppingSummaryCopyWith<EarlyStoppingSummary> get copyWith => _$EarlyStoppingSummaryCopyWithImpl<EarlyStoppingSummary>(this as EarlyStoppingSummary, _$identity);
+
+  /// Serializes this EarlyStoppingSummary to a JSON map.
+  Map<String, dynamic> toJson();
+
+
+@override
+bool operator ==(Object other) {
+  return identical(this, other) || (other.runtimeType == runtimeType&&other is EarlyStoppingSummary&&(identical(other.type, type) || other.type == type)&&(identical(other.limit, limit) || other.limit == limit)&&(identical(other.score, score) || other.score == score)&&const DeepCollectionEquality().equals(other.metadata, metadata));
+}
+
+@JsonKey(includeFromJson: false, includeToJson: false)
+@override
+int get hashCode => Object.hash(runtimeType,type,limit,score,const DeepCollectionEquality().hash(metadata));
+
+@override
+String toString() {
+  return 'EarlyStoppingSummary(type: $type, limit: $limit, score: $score, metadata: $metadata)';
+}
+
+
+}
+
+/// @nodoc
+abstract mixin class $EarlyStoppingSummaryCopyWith<$Res>  {
+  factory $EarlyStoppingSummaryCopyWith(EarlyStoppingSummary value, $Res Function(EarlyStoppingSummary) _then) = _$EarlyStoppingSummaryCopyWithImpl;
+@useResult
+$Res call({
+ String type, double? limit, double? score, Map<String, dynamic> metadata
+});
+
+
+
+
+}
+/// @nodoc
+class _$EarlyStoppingSummaryCopyWithImpl<$Res>
+    implements $EarlyStoppingSummaryCopyWith<$Res> {
+  _$EarlyStoppingSummaryCopyWithImpl(this._self, this._then);
+
+  final EarlyStoppingSummary _self;
+  final $Res Function(EarlyStoppingSummary) _then;
+
+/// Create a copy of EarlyStoppingSummary
+/// with the given fields replaced by the non-null parameter values.
+@pragma('vm:prefer-inline') @override $Res call({Object? type = null,Object? limit = freezed,Object? score = freezed,Object? metadata = null,}) {
+  return _then(_self.copyWith(
+type: null == type ? _self.type : type // ignore: cast_nullable_to_non_nullable
+as String,limit: freezed == limit ? _self.limit : limit // ignore: cast_nullable_to_non_nullable
+as double?,score: freezed == score ? _self.score : score // ignore: cast_nullable_to_non_nullable
+as double?,metadata: null == metadata ? _self.metadata : metadata // ignore: cast_nullable_to_non_nullable
+as Map<String, dynamic>,
+  ));
+}
+
+}
+
+
+/// Adds pattern-matching-related methods to [EarlyStoppingSummary].
+extension EarlyStoppingSummaryPatterns on EarlyStoppingSummary {
+/// A variant of `map` that fallback to returning `orElse`.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case _:
+///     return orElse();
+/// }
+/// ```
+
+@optionalTypeArgs TResult maybeMap<TResult extends Object?>(TResult Function( _EarlyStoppingSummary value)?  $default,{required TResult orElse(),}){
+final _that = this;
+switch (_that) {
+case _EarlyStoppingSummary() when $default != null:
+return $default(_that);case _:
+  return orElse();
+
+}
+}
+/// A `switch`-like method, using callbacks.
+///
+/// Callbacks receives the raw object, upcasted.
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case final Subclass2 value:
+///     return ...;
+/// }
+/// ```
+
+@optionalTypeArgs TResult map<TResult extends Object?>(TResult Function( _EarlyStoppingSummary value)  $default,){
+final _that = this;
+switch (_that) {
+case _EarlyStoppingSummary():
+return $default(_that);case _:
+  throw StateError('Unexpected subclass');
+
+}
+}
+/// A variant of `map` that fallback to returning `null`.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case _:
+///     return null;
+/// }
+/// ```
+
+@optionalTypeArgs TResult? mapOrNull<TResult extends Object?>(TResult? Function( _EarlyStoppingSummary value)?  $default,){
+final _that = this;
+switch (_that) {
+case _EarlyStoppingSummary() when $default != null:
+return $default(_that);case _:
+  return null;
+
+}
+}
+/// A variant of `when` that fallback to an `orElse` callback.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case _:
+///     return orElse();
+/// }
+/// ```
+
+@optionalTypeArgs TResult maybeWhen<TResult extends Object?>(TResult Function( String type,  double? limit,  double? score,  Map<String, dynamic> metadata)?  $default,{required TResult orElse(),}) {final _that = this;
+switch (_that) {
+case _EarlyStoppingSummary() when $default != null:
+return $default(_that.type,_that.limit,_that.score,_that.metadata);case _:
+  return orElse();
+
+}
+}
+/// A `switch`-like method, using callbacks.
+///
+/// As opposed to `map`, this offers destructuring.
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case Subclass2(:final field2):
+///     return ...;
+/// }
+/// ```
+
+@optionalTypeArgs TResult when<TResult extends Object?>(TResult Function( String type,  double? limit,  double? score,  Map<String, dynamic> metadata)  $default,) {final _that = this;
+switch (_that) {
+case _EarlyStoppingSummary():
+return $default(_that.type,_that.limit,_that.score,_that.metadata);case _:
+  throw StateError('Unexpected subclass');
+
+}
+}
+/// A variant of `when` that fallback to returning `null`
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case _:
+///     return null;
+/// }
+/// ```
+
+@optionalTypeArgs TResult? whenOrNull<TResult extends Object?>(TResult? Function( String type,  double? limit,  double? score,  Map<String, dynamic> metadata)?  $default,) {final _that = this;
+switch (_that) {
+case _EarlyStoppingSummary() when $default != null:
+return $default(_that.type,_that.limit,_that.score,_that.metadata);case _:
+  return null;
+
+}
+}
+
+}
+
+/// @nodoc
+@JsonSerializable()
+
+class _EarlyStoppingSummary extends EarlyStoppingSummary {
+  const _EarlyStoppingSummary({required this.type, this.limit, this.score, final  Map<String, dynamic> metadata = const {}}): _metadata = metadata,super._();
+  factory _EarlyStoppingSummary.fromJson(Map<String, dynamic> json) => _$EarlyStoppingSummaryFromJson(json);
+
+/// Type of early stopping.
+@override final  String type;
+/// Limit that triggered early stopping.
+@override final  double? limit;
+/// Score that triggered early stopping.
+@override final  double? score;
+/// Additional metadata.
+ final  Map<String, dynamic> _metadata;
+/// Additional metadata.
+@override@JsonKey() Map<String, dynamic> get metadata {
+  if (_metadata is EqualUnmodifiableMapView) return _metadata;
+  // ignore: implicit_dynamic_type
+  return EqualUnmodifiableMapView(_metadata);
+}
+
+
+/// Create a copy of EarlyStoppingSummary
+/// with the given fields replaced by the non-null parameter values.
+@override @JsonKey(includeFromJson: false, includeToJson: false)
+@pragma('vm:prefer-inline')
+_$EarlyStoppingSummaryCopyWith<_EarlyStoppingSummary> get copyWith => __$EarlyStoppingSummaryCopyWithImpl<_EarlyStoppingSummary>(this, _$identity);
+
+@override
+Map<String, dynamic> toJson() {
+  return _$EarlyStoppingSummaryToJson(this, );
+}
+
+@override
+bool operator ==(Object other) {
+  return identical(this, other) || (other.runtimeType == runtimeType&&other is _EarlyStoppingSummary&&(identical(other.type, type) || other.type == type)&&(identical(other.limit, limit) || other.limit == limit)&&(identical(other.score, score) || other.score == score)&&const DeepCollectionEquality().equals(other._metadata, _metadata));
+}
+
+@JsonKey(includeFromJson: false, includeToJson: false)
+@override
+int get hashCode => Object.hash(runtimeType,type,limit,score,const DeepCollectionEquality().hash(_metadata));
+
+@override
+String toString() {
+  return 'EarlyStoppingSummary(type: $type, limit: $limit, score: $score, metadata: $metadata)';
+}
+
+
+}
+
+/// @nodoc
+abstract mixin class _$EarlyStoppingSummaryCopyWith<$Res> implements $EarlyStoppingSummaryCopyWith<$Res> {
+  factory _$EarlyStoppingSummaryCopyWith(_EarlyStoppingSummary value, $Res Function(_EarlyStoppingSummary) _then) = __$EarlyStoppingSummaryCopyWithImpl;
+@override @useResult
+$Res call({
+ String type, double? limit, double? score, Map<String, dynamic> metadata
+});
+
+
+
+
+}
+/// @nodoc
+class __$EarlyStoppingSummaryCopyWithImpl<$Res>
+    implements _$EarlyStoppingSummaryCopyWith<$Res> {
+  __$EarlyStoppingSummaryCopyWithImpl(this._self, this._then);
+
+  final _EarlyStoppingSummary _self;
+  final $Res Function(_EarlyStoppingSummary) _then;
+
+/// Create a copy of EarlyStoppingSummary
+/// with the given fields replaced by the non-null parameter values.
+@override @pragma('vm:prefer-inline') $Res call({Object? type = null,Object? limit = freezed,Object? score = freezed,Object? metadata = null,}) {
+  return _then(_EarlyStoppingSummary(
+type: null == type ? _self.type : type // ignore: cast_nullable_to_non_nullable
+as String,limit: freezed == limit ? _self.limit : limit // ignore: cast_nullable_to_non_nullable
+as double?,score: freezed == score ? _self.score : score // ignore: cast_nullable_to_non_nullable
+as double?,metadata: null == metadata ? _self._metadata : metadata // ignore: cast_nullable_to_non_nullable
+as Map<String, dynamic>,
+  ));
+}
+
+
+}
+
+
+/// @nodoc
+mixin _$EvalScore {
+
+/// Score name.
+ String get name;/// Scorer name.
+ String get scorer;/// Reducer name.
+ String? get reducer;/// Number of samples scored by this scorer.
+@JsonKey(name: 'scored_samples') int? get scoredSamples;/// Number of samples not scored by this scorer.
+@JsonKey(name: 'unscored_samples') int? get unscoredSamples;/// Parameters specified when creating scorer.
+ Map<String, dynamic> get params;/// Metrics computed for this scorer.
+@JsonKey(fromJson: _metricsFromJson) List<EvalMetric> get metrics;/// Additional scorer metadata.
+@JsonKey(name: 'metadata') Map<String, dynamic>? get metadata;
+/// Create a copy of EvalScore
+/// with the given fields replaced by the non-null parameter values.
+@JsonKey(includeFromJson: false, includeToJson: false)
+@pragma('vm:prefer-inline')
+$EvalScoreCopyWith<EvalScore> get copyWith => _$EvalScoreCopyWithImpl<EvalScore>(this as EvalScore, _$identity);
+
+  /// Serializes this EvalScore to a JSON map.
+  Map<String, dynamic> toJson();
+
+
+@override
+bool operator ==(Object other) {
+  return identical(this, other) || (other.runtimeType == runtimeType&&other is EvalScore&&(identical(other.name, name) || other.name == name)&&(identical(other.scorer, scorer) || other.scorer == scorer)&&(identical(other.reducer, reducer) || other.reducer == reducer)&&(identical(other.scoredSamples, scoredSamples) || other.scoredSamples == scoredSamples)&&(identical(other.unscoredSamples, unscoredSamples) || other.unscoredSamples == unscoredSamples)&&const DeepCollectionEquality().equals(other.params, params)&&const DeepCollectionEquality().equals(other.metrics, metrics)&&const DeepCollectionEquality().equals(other.metadata, metadata));
+}
+
+@JsonKey(includeFromJson: false, includeToJson: false)
+@override
+int get hashCode => Object.hash(runtimeType,name,scorer,reducer,scoredSamples,unscoredSamples,const DeepCollectionEquality().hash(params),const DeepCollectionEquality().hash(metrics),const DeepCollectionEquality().hash(metadata));
+
+@override
+String toString() {
+  return 'EvalScore(name: $name, scorer: $scorer, reducer: $reducer, scoredSamples: $scoredSamples, unscoredSamples: $unscoredSamples, params: $params, metrics: $metrics, metadata: $metadata)';
+}
+
+
+}
+
+/// @nodoc
+abstract mixin class $EvalScoreCopyWith<$Res>  {
+  factory $EvalScoreCopyWith(EvalScore value, $Res Function(EvalScore) _then) = _$EvalScoreCopyWithImpl;
+@useResult
+$Res call({
+ String name, String scorer, String? reducer,@JsonKey(name: 'scored_samples') int? scoredSamples,@JsonKey(name: 'unscored_samples') int? unscoredSamples, Map<String, dynamic> params,@JsonKey(fromJson: _metricsFromJson) List<EvalMetric> metrics,@JsonKey(name: 'metadata') Map<String, dynamic>? metadata
+});
+
+
+
+
+}
+/// @nodoc
+class _$EvalScoreCopyWithImpl<$Res>
+    implements $EvalScoreCopyWith<$Res> {
+  _$EvalScoreCopyWithImpl(this._self, this._then);
+
+  final EvalScore _self;
+  final $Res Function(EvalScore) _then;
+
+/// Create a copy of EvalScore
+/// with the given fields replaced by the non-null parameter values.
+@pragma('vm:prefer-inline') @override $Res call({Object? name = null,Object? scorer = null,Object? reducer = freezed,Object? scoredSamples = freezed,Object? unscoredSamples = freezed,Object? params = null,Object? metrics = null,Object? metadata = freezed,}) {
+  return _then(_self.copyWith(
+name: null == name ? _self.name : name // ignore: cast_nullable_to_non_nullable
+as String,scorer: null == scorer ? _self.scorer : scorer // ignore: cast_nullable_to_non_nullable
+as String,reducer: freezed == reducer ? _self.reducer : reducer // ignore: cast_nullable_to_non_nullable
+as String?,scoredSamples: freezed == scoredSamples ? _self.scoredSamples : scoredSamples // ignore: cast_nullable_to_non_nullable
+as int?,unscoredSamples: freezed == unscoredSamples ? _self.unscoredSamples : unscoredSamples // ignore: cast_nullable_to_non_nullable
+as int?,params: null == params ? _self.params : params // ignore: cast_nullable_to_non_nullable
+as Map<String, dynamic>,metrics: null == metrics ? _self.metrics : metrics // ignore: cast_nullable_to_non_nullable
+as List<EvalMetric>,metadata: freezed == metadata ? _self.metadata : metadata // ignore: cast_nullable_to_non_nullable
+as Map<String, dynamic>?,
+  ));
+}
+
+}
+
+
+/// Adds pattern-matching-related methods to [EvalScore].
+extension EvalScorePatterns on EvalScore {
+/// A variant of `map` that fallback to returning `orElse`.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case _:
+///     return orElse();
+/// }
+/// ```
+
+@optionalTypeArgs TResult maybeMap<TResult extends Object?>(TResult Function( _EvalScore value)?  $default,{required TResult orElse(),}){
+final _that = this;
+switch (_that) {
+case _EvalScore() when $default != null:
+return $default(_that);case _:
+  return orElse();
+
+}
+}
+/// A `switch`-like method, using callbacks.
+///
+/// Callbacks receives the raw object, upcasted.
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case final Subclass2 value:
+///     return ...;
+/// }
+/// ```
+
+@optionalTypeArgs TResult map<TResult extends Object?>(TResult Function( _EvalScore value)  $default,){
+final _that = this;
+switch (_that) {
+case _EvalScore():
+return $default(_that);case _:
+  throw StateError('Unexpected subclass');
+
+}
+}
+/// A variant of `map` that fallback to returning `null`.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case _:
+///     return null;
+/// }
+/// ```
+
+@optionalTypeArgs TResult? mapOrNull<TResult extends Object?>(TResult? Function( _EvalScore value)?  $default,){
+final _that = this;
+switch (_that) {
+case _EvalScore() when $default != null:
+return $default(_that);case _:
+  return null;
+
+}
+}
+/// A variant of `when` that fallback to an `orElse` callback.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case _:
+///     return orElse();
+/// }
+/// ```
+
+@optionalTypeArgs TResult maybeWhen<TResult extends Object?>(TResult Function( String name,  String scorer,  String? reducer, @JsonKey(name: 'scored_samples')  int? scoredSamples, @JsonKey(name: 'unscored_samples')  int? unscoredSamples,  Map<String, dynamic> params, @JsonKey(fromJson: _metricsFromJson)  List<EvalMetric> metrics, @JsonKey(name: 'metadata')  Map<String, dynamic>? metadata)?  $default,{required TResult orElse(),}) {final _that = this;
+switch (_that) {
+case _EvalScore() when $default != null:
+return $default(_that.name,_that.scorer,_that.reducer,_that.scoredSamples,_that.unscoredSamples,_that.params,_that.metrics,_that.metadata);case _:
+  return orElse();
+
+}
+}
+/// A `switch`-like method, using callbacks.
+///
+/// As opposed to `map`, this offers destructuring.
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case Subclass2(:final field2):
+///     return ...;
+/// }
+/// ```
+
+@optionalTypeArgs TResult when<TResult extends Object?>(TResult Function( String name,  String scorer,  String? reducer, @JsonKey(name: 'scored_samples')  int? scoredSamples, @JsonKey(name: 'unscored_samples')  int? unscoredSamples,  Map<String, dynamic> params, @JsonKey(fromJson: _metricsFromJson)  List<EvalMetric> metrics, @JsonKey(name: 'metadata')  Map<String, dynamic>? metadata)  $default,) {final _that = this;
+switch (_that) {
+case _EvalScore():
+return $default(_that.name,_that.scorer,_that.reducer,_that.scoredSamples,_that.unscoredSamples,_that.params,_that.metrics,_that.metadata);case _:
+  throw StateError('Unexpected subclass');
+
+}
+}
+/// A variant of `when` that fallback to returning `null`
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case _:
+///     return null;
+/// }
+/// ```
+
+@optionalTypeArgs TResult? whenOrNull<TResult extends Object?>(TResult? Function( String name,  String scorer,  String? reducer, @JsonKey(name: 'scored_samples')  int? scoredSamples, @JsonKey(name: 'unscored_samples')  int? unscoredSamples,  Map<String, dynamic> params, @JsonKey(fromJson: _metricsFromJson)  List<EvalMetric> metrics, @JsonKey(name: 'metadata')  Map<String, dynamic>? metadata)?  $default,) {final _that = this;
+switch (_that) {
+case _EvalScore() when $default != null:
+return $default(_that.name,_that.scorer,_that.reducer,_that.scoredSamples,_that.unscoredSamples,_that.params,_that.metrics,_that.metadata);case _:
+  return null;
+
+}
+}
+
+}
+
+/// @nodoc
+@JsonSerializable()
+
+class _EvalScore extends EvalScore {
+  const _EvalScore({required this.name, required this.scorer, this.reducer, @JsonKey(name: 'scored_samples') this.scoredSamples, @JsonKey(name: 'unscored_samples') this.unscoredSamples, final  Map<String, dynamic> params = const {}, @JsonKey(fromJson: _metricsFromJson) final  List<EvalMetric> metrics = const [], @JsonKey(name: 'metadata') final  Map<String, dynamic>? metadata}): _params = params,_metrics = metrics,_metadata = metadata,super._();
+  factory _EvalScore.fromJson(Map<String, dynamic> json) => _$EvalScoreFromJson(json);
+
+/// Score name.
+@override final  String name;
+/// Scorer name.
+@override final  String scorer;
+/// Reducer name.
+@override final  String? reducer;
+/// Number of samples scored by this scorer.
+@override@JsonKey(name: 'scored_samples') final  int? scoredSamples;
+/// Number of samples not scored by this scorer.
+@override@JsonKey(name: 'unscored_samples') final  int? unscoredSamples;
+/// Parameters specified when creating scorer.
+ final  Map<String, dynamic> _params;
+/// Parameters specified when creating scorer.
+@override@JsonKey() Map<String, dynamic> get params {
+  if (_params is EqualUnmodifiableMapView) return _params;
+  // ignore: implicit_dynamic_type
+  return EqualUnmodifiableMapView(_params);
+}
+
+/// Metrics computed for this scorer.
+ final  List<EvalMetric> _metrics;
+/// Metrics computed for this scorer.
+@override@JsonKey(fromJson: _metricsFromJson) List<EvalMetric> get metrics {
+  if (_metrics is EqualUnmodifiableListView) return _metrics;
+  // ignore: implicit_dynamic_type
+  return EqualUnmodifiableListView(_metrics);
+}
+
+/// Additional scorer metadata.
+ final  Map<String, dynamic>? _metadata;
+/// Additional scorer metadata.
+@override@JsonKey(name: 'metadata') Map<String, dynamic>? get metadata {
+  final value = _metadata;
+  if (value == null) return null;
+  if (_metadata is EqualUnmodifiableMapView) return _metadata;
+  // ignore: implicit_dynamic_type
+  return EqualUnmodifiableMapView(value);
+}
+
+
+/// Create a copy of EvalScore
+/// with the given fields replaced by the non-null parameter values.
+@override @JsonKey(includeFromJson: false, includeToJson: false)
+@pragma('vm:prefer-inline')
+_$EvalScoreCopyWith<_EvalScore> get copyWith => __$EvalScoreCopyWithImpl<_EvalScore>(this, _$identity);
+
+@override
+Map<String, dynamic> toJson() {
+  return _$EvalScoreToJson(this, );
+}
+
+@override
+bool operator ==(Object other) {
+  return identical(this, other) || (other.runtimeType == runtimeType&&other is _EvalScore&&(identical(other.name, name) || other.name == name)&&(identical(other.scorer, scorer) || other.scorer == scorer)&&(identical(other.reducer, reducer) || other.reducer == reducer)&&(identical(other.scoredSamples, scoredSamples) || other.scoredSamples == scoredSamples)&&(identical(other.unscoredSamples, unscoredSamples) || other.unscoredSamples == unscoredSamples)&&const DeepCollectionEquality().equals(other._params, _params)&&const DeepCollectionEquality().equals(other._metrics, _metrics)&&const DeepCollectionEquality().equals(other._metadata, _metadata));
+}
+
+@JsonKey(includeFromJson: false, includeToJson: false)
+@override
+int get hashCode => Object.hash(runtimeType,name,scorer,reducer,scoredSamples,unscoredSamples,const DeepCollectionEquality().hash(_params),const DeepCollectionEquality().hash(_metrics),const DeepCollectionEquality().hash(_metadata));
+
+@override
+String toString() {
+  return 'EvalScore(name: $name, scorer: $scorer, reducer: $reducer, scoredSamples: $scoredSamples, unscoredSamples: $unscoredSamples, params: $params, metrics: $metrics, metadata: $metadata)';
+}
+
+
+}
+
+/// @nodoc
+abstract mixin class _$EvalScoreCopyWith<$Res> implements $EvalScoreCopyWith<$Res> {
+  factory _$EvalScoreCopyWith(_EvalScore value, $Res Function(_EvalScore) _then) = __$EvalScoreCopyWithImpl;
+@override @useResult
+$Res call({
+ String name, String scorer, String? reducer,@JsonKey(name: 'scored_samples') int? scoredSamples,@JsonKey(name: 'unscored_samples') int? unscoredSamples, Map<String, dynamic> params,@JsonKey(fromJson: _metricsFromJson) List<EvalMetric> metrics,@JsonKey(name: 'metadata') Map<String, dynamic>? metadata
+});
+
+
+
+
+}
+/// @nodoc
+class __$EvalScoreCopyWithImpl<$Res>
+    implements _$EvalScoreCopyWith<$Res> {
+  __$EvalScoreCopyWithImpl(this._self, this._then);
+
+  final _EvalScore _self;
+  final $Res Function(_EvalScore) _then;
+
+/// Create a copy of EvalScore
+/// with the given fields replaced by the non-null parameter values.
+@override @pragma('vm:prefer-inline') $Res call({Object? name = null,Object? scorer = null,Object? reducer = freezed,Object? scoredSamples = freezed,Object? unscoredSamples = freezed,Object? params = null,Object? metrics = null,Object? metadata = freezed,}) {
+  return _then(_EvalScore(
+name: null == name ? _self.name : name // ignore: cast_nullable_to_non_nullable
+as String,scorer: null == scorer ? _self.scorer : scorer // ignore: cast_nullable_to_non_nullable
+as String,reducer: freezed == reducer ? _self.reducer : reducer // ignore: cast_nullable_to_non_nullable
+as String?,scoredSamples: freezed == scoredSamples ? _self.scoredSamples : scoredSamples // ignore: cast_nullable_to_non_nullable
+as int?,unscoredSamples: freezed == unscoredSamples ? _self.unscoredSamples : unscoredSamples // ignore: cast_nullable_to_non_nullable
+as int?,params: null == params ? _self._params : params // ignore: cast_nullable_to_non_nullable
+as Map<String, dynamic>,metrics: null == metrics ? _self._metrics : metrics // ignore: cast_nullable_to_non_nullable
+as List<EvalMetric>,metadata: freezed == metadata ? _self._metadata : metadata // ignore: cast_nullable_to_non_nullable
+as Map<String, dynamic>?,
+  ));
+}
+
+
+}
+
+
+/// @nodoc
+mixin _$EvalMetric {
+
+/// Metric name.
+ String get name;/// Metric value.
+ Object get value;/// Params specified when creating metric.
+ Map<String, dynamic> get params;/// Additional metadata associated with metric.
+ Map<String, dynamic>? get metadata;
+/// Create a copy of EvalMetric
+/// with the given fields replaced by the non-null parameter values.
+@JsonKey(includeFromJson: false, includeToJson: false)
+@pragma('vm:prefer-inline')
+$EvalMetricCopyWith<EvalMetric> get copyWith => _$EvalMetricCopyWithImpl<EvalMetric>(this as EvalMetric, _$identity);
+
+  /// Serializes this EvalMetric to a JSON map.
+  Map<String, dynamic> toJson();
+
+
+@override
+bool operator ==(Object other) {
+  return identical(this, other) || (other.runtimeType == runtimeType&&other is EvalMetric&&(identical(other.name, name) || other.name == name)&&const DeepCollectionEquality().equals(other.value, value)&&const DeepCollectionEquality().equals(other.params, params)&&const DeepCollectionEquality().equals(other.metadata, metadata));
+}
+
+@JsonKey(includeFromJson: false, includeToJson: false)
+@override
+int get hashCode => Object.hash(runtimeType,name,const DeepCollectionEquality().hash(value),const DeepCollectionEquality().hash(params),const DeepCollectionEquality().hash(metadata));
+
+@override
+String toString() {
+  return 'EvalMetric(name: $name, value: $value, params: $params, metadata: $metadata)';
+}
+
+
+}
+
+/// @nodoc
+abstract mixin class $EvalMetricCopyWith<$Res>  {
+  factory $EvalMetricCopyWith(EvalMetric value, $Res Function(EvalMetric) _then) = _$EvalMetricCopyWithImpl;
+@useResult
+$Res call({
+ String name, Object value, Map<String, dynamic> params, Map<String, dynamic>? metadata
+});
+
+
+
+
+}
+/// @nodoc
+class _$EvalMetricCopyWithImpl<$Res>
+    implements $EvalMetricCopyWith<$Res> {
+  _$EvalMetricCopyWithImpl(this._self, this._then);
+
+  final EvalMetric _self;
+  final $Res Function(EvalMetric) _then;
+
+/// Create a copy of EvalMetric
+/// with the given fields replaced by the non-null parameter values.
+@pragma('vm:prefer-inline') @override $Res call({Object? name = null,Object? value = null,Object? params = null,Object? metadata = freezed,}) {
+  return _then(_self.copyWith(
+name: null == name ? _self.name : name // ignore: cast_nullable_to_non_nullable
+as String,value: null == value ? _self.value : value ,params: null == params ? _self.params : params // ignore: cast_nullable_to_non_nullable
+as Map<String, dynamic>,metadata: freezed == metadata ? _self.metadata : metadata // ignore: cast_nullable_to_non_nullable
+as Map<String, dynamic>?,
+  ));
+}
+
+}
+
+
+/// Adds pattern-matching-related methods to [EvalMetric].
+extension EvalMetricPatterns on EvalMetric {
+/// A variant of `map` that fallback to returning `orElse`.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case _:
+///     return orElse();
+/// }
+/// ```
+
+@optionalTypeArgs TResult maybeMap<TResult extends Object?>(TResult Function( _EvalMetric value)?  $default,{required TResult orElse(),}){
+final _that = this;
+switch (_that) {
+case _EvalMetric() when $default != null:
+return $default(_that);case _:
+  return orElse();
+
+}
+}
+/// A `switch`-like method, using callbacks.
+///
+/// Callbacks receives the raw object, upcasted.
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case final Subclass2 value:
+///     return ...;
+/// }
+/// ```
+
+@optionalTypeArgs TResult map<TResult extends Object?>(TResult Function( _EvalMetric value)  $default,){
+final _that = this;
+switch (_that) {
+case _EvalMetric():
+return $default(_that);case _:
+  throw StateError('Unexpected subclass');
+
+}
+}
+/// A variant of `map` that fallback to returning `null`.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case _:
+///     return null;
+/// }
+/// ```
+
+@optionalTypeArgs TResult? mapOrNull<TResult extends Object?>(TResult? Function( _EvalMetric value)?  $default,){
+final _that = this;
+switch (_that) {
+case _EvalMetric() when $default != null:
+return $default(_that);case _:
+  return null;
+
+}
+}
+/// A variant of `when` that fallback to an `orElse` callback.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case _:
+///     return orElse();
+/// }
+/// ```
+
+@optionalTypeArgs TResult maybeWhen<TResult extends Object?>(TResult Function( String name,  Object value,  Map<String, dynamic> params,  Map<String, dynamic>? metadata)?  $default,{required TResult orElse(),}) {final _that = this;
+switch (_that) {
+case _EvalMetric() when $default != null:
+return $default(_that.name,_that.value,_that.params,_that.metadata);case _:
+  return orElse();
+
+}
+}
+/// A `switch`-like method, using callbacks.
+///
+/// As opposed to `map`, this offers destructuring.
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case Subclass2(:final field2):
+///     return ...;
+/// }
+/// ```
+
+@optionalTypeArgs TResult when<TResult extends Object?>(TResult Function( String name,  Object value,  Map<String, dynamic> params,  Map<String, dynamic>? metadata)  $default,) {final _that = this;
+switch (_that) {
+case _EvalMetric():
+return $default(_that.name,_that.value,_that.params,_that.metadata);case _:
+  throw StateError('Unexpected subclass');
+
+}
+}
+/// A variant of `when` that fallback to returning `null`
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case _:
+///     return null;
+/// }
+/// ```
+
+@optionalTypeArgs TResult? whenOrNull<TResult extends Object?>(TResult? Function( String name,  Object value,  Map<String, dynamic> params,  Map<String, dynamic>? metadata)?  $default,) {final _that = this;
+switch (_that) {
+case _EvalMetric() when $default != null:
+return $default(_that.name,_that.value,_that.params,_that.metadata);case _:
+  return null;
+
+}
+}
+
+}
+
+/// @nodoc
+@JsonSerializable()
+
+class _EvalMetric extends EvalMetric {
+  const _EvalMetric({required this.name, required this.value, final  Map<String, dynamic> params = const {}, final  Map<String, dynamic>? metadata}): _params = params,_metadata = metadata,super._();
+  factory _EvalMetric.fromJson(Map<String, dynamic> json) => _$EvalMetricFromJson(json);
+
+/// Metric name.
+@override final  String name;
+/// Metric value.
+@override final  Object value;
+/// Params specified when creating metric.
+ final  Map<String, dynamic> _params;
+/// Params specified when creating metric.
+@override@JsonKey() Map<String, dynamic> get params {
+  if (_params is EqualUnmodifiableMapView) return _params;
+  // ignore: implicit_dynamic_type
+  return EqualUnmodifiableMapView(_params);
+}
+
+/// Additional metadata associated with metric.
+ final  Map<String, dynamic>? _metadata;
+/// Additional metadata associated with metric.
+@override Map<String, dynamic>? get metadata {
+  final value = _metadata;
+  if (value == null) return null;
+  if (_metadata is EqualUnmodifiableMapView) return _metadata;
+  // ignore: implicit_dynamic_type
+  return EqualUnmodifiableMapView(value);
+}
+
+
+/// Create a copy of EvalMetric
+/// with the given fields replaced by the non-null parameter values.
+@override @JsonKey(includeFromJson: false, includeToJson: false)
+@pragma('vm:prefer-inline')
+_$EvalMetricCopyWith<_EvalMetric> get copyWith => __$EvalMetricCopyWithImpl<_EvalMetric>(this, _$identity);
+
+@override
+Map<String, dynamic> toJson() {
+  return _$EvalMetricToJson(this, );
+}
+
+@override
+bool operator ==(Object other) {
+  return identical(this, other) || (other.runtimeType == runtimeType&&other is _EvalMetric&&(identical(other.name, name) || other.name == name)&&const DeepCollectionEquality().equals(other.value, value)&&const DeepCollectionEquality().equals(other._params, _params)&&const DeepCollectionEquality().equals(other._metadata, _metadata));
+}
+
+@JsonKey(includeFromJson: false, includeToJson: false)
+@override
+int get hashCode => Object.hash(runtimeType,name,const DeepCollectionEquality().hash(value),const DeepCollectionEquality().hash(_params),const DeepCollectionEquality().hash(_metadata));
+
+@override
+String toString() {
+  return 'EvalMetric(name: $name, value: $value, params: $params, metadata: $metadata)';
+}
+
+
+}
+
+/// @nodoc
+abstract mixin class _$EvalMetricCopyWith<$Res> implements $EvalMetricCopyWith<$Res> {
+  factory _$EvalMetricCopyWith(_EvalMetric value, $Res Function(_EvalMetric) _then) = __$EvalMetricCopyWithImpl;
+@override @useResult
+$Res call({
+ String name, Object value, Map<String, dynamic> params, Map<String, dynamic>? metadata
+});
+
+
+
+
+}
+/// @nodoc
+class __$EvalMetricCopyWithImpl<$Res>
+    implements _$EvalMetricCopyWith<$Res> {
+  __$EvalMetricCopyWithImpl(this._self, this._then);
+
+  final _EvalMetric _self;
+  final $Res Function(_EvalMetric) _then;
+
+/// Create a copy of EvalMetric
+/// with the given fields replaced by the non-null parameter values.
+@override @pragma('vm:prefer-inline') $Res call({Object? name = null,Object? value = null,Object? params = null,Object? metadata = freezed,}) {
+  return _then(_EvalMetric(
+name: null == name ? _self.name : name // ignore: cast_nullable_to_non_nullable
+as String,value: null == value ? _self.value : value ,params: null == params ? _self._params : params // ignore: cast_nullable_to_non_nullable
+as Map<String, dynamic>,metadata: freezed == metadata ? _self._metadata : metadata // ignore: cast_nullable_to_non_nullable
+as Map<String, dynamic>?,
+  ));
+}
+
+
+}
+
+
+/// @nodoc
+mixin _$EvalSampleReductions {
+
+/// Name the of scorer.
+ String get scorer;/// Name the of reducer.
+ String? get reducer;/// List of reduced scores.
+ List<EvalSampleScore> get samples;
+/// Create a copy of EvalSampleReductions
+/// with the given fields replaced by the non-null parameter values.
+@JsonKey(includeFromJson: false, includeToJson: false)
+@pragma('vm:prefer-inline')
+$EvalSampleReductionsCopyWith<EvalSampleReductions> get copyWith => _$EvalSampleReductionsCopyWithImpl<EvalSampleReductions>(this as EvalSampleReductions, _$identity);
+
+  /// Serializes this EvalSampleReductions to a JSON map.
+  Map<String, dynamic> toJson();
+
+
+@override
+bool operator ==(Object other) {
+  return identical(this, other) || (other.runtimeType == runtimeType&&other is EvalSampleReductions&&(identical(other.scorer, scorer) || other.scorer == scorer)&&(identical(other.reducer, reducer) || other.reducer == reducer)&&const DeepCollectionEquality().equals(other.samples, samples));
+}
+
+@JsonKey(includeFromJson: false, includeToJson: false)
+@override
+int get hashCode => Object.hash(runtimeType,scorer,reducer,const DeepCollectionEquality().hash(samples));
+
+@override
+String toString() {
+  return 'EvalSampleReductions(scorer: $scorer, reducer: $reducer, samples: $samples)';
+}
+
+
+}
+
+/// @nodoc
+abstract mixin class $EvalSampleReductionsCopyWith<$Res>  {
+  factory $EvalSampleReductionsCopyWith(EvalSampleReductions value, $Res Function(EvalSampleReductions) _then) = _$EvalSampleReductionsCopyWithImpl;
+@useResult
+$Res call({
+ String scorer, String? reducer, List<EvalSampleScore> samples
+});
+
+
+
+
+}
+/// @nodoc
+class _$EvalSampleReductionsCopyWithImpl<$Res>
+    implements $EvalSampleReductionsCopyWith<$Res> {
+  _$EvalSampleReductionsCopyWithImpl(this._self, this._then);
+
+  final EvalSampleReductions _self;
+  final $Res Function(EvalSampleReductions) _then;
+
+/// Create a copy of EvalSampleReductions
+/// with the given fields replaced by the non-null parameter values.
+@pragma('vm:prefer-inline') @override $Res call({Object? scorer = null,Object? reducer = freezed,Object? samples = null,}) {
+  return _then(_self.copyWith(
+scorer: null == scorer ? _self.scorer : scorer // ignore: cast_nullable_to_non_nullable
+as String,reducer: freezed == reducer ? _self.reducer : reducer // ignore: cast_nullable_to_non_nullable
+as String?,samples: null == samples ? _self.samples : samples // ignore: cast_nullable_to_non_nullable
+as List<EvalSampleScore>,
+  ));
+}
+
+}
+
+
+/// Adds pattern-matching-related methods to [EvalSampleReductions].
+extension EvalSampleReductionsPatterns on EvalSampleReductions {
+/// A variant of `map` that fallback to returning `orElse`.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case _:
+///     return orElse();
+/// }
+/// ```
+
+@optionalTypeArgs TResult maybeMap<TResult extends Object?>(TResult Function( _EvalSampleReductions value)?  $default,{required TResult orElse(),}){
+final _that = this;
+switch (_that) {
+case _EvalSampleReductions() when $default != null:
+return $default(_that);case _:
+  return orElse();
+
+}
+}
+/// A `switch`-like method, using callbacks.
+///
+/// Callbacks receives the raw object, upcasted.
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case final Subclass2 value:
+///     return ...;
+/// }
+/// ```
+
+@optionalTypeArgs TResult map<TResult extends Object?>(TResult Function( _EvalSampleReductions value)  $default,){
+final _that = this;
+switch (_that) {
+case _EvalSampleReductions():
+return $default(_that);case _:
+  throw StateError('Unexpected subclass');
+
+}
+}
+/// A variant of `map` that fallback to returning `null`.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case _:
+///     return null;
+/// }
+/// ```
+
+@optionalTypeArgs TResult? mapOrNull<TResult extends Object?>(TResult? Function( _EvalSampleReductions value)?  $default,){
+final _that = this;
+switch (_that) {
+case _EvalSampleReductions() when $default != null:
+return $default(_that);case _:
+  return null;
+
+}
+}
+/// A variant of `when` that fallback to an `orElse` callback.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case _:
+///     return orElse();
+/// }
+/// ```
+
+@optionalTypeArgs TResult maybeWhen<TResult extends Object?>(TResult Function( String scorer,  String? reducer,  List<EvalSampleScore> samples)?  $default,{required TResult orElse(),}) {final _that = this;
+switch (_that) {
+case _EvalSampleReductions() when $default != null:
+return $default(_that.scorer,_that.reducer,_that.samples);case _:
+  return orElse();
+
+}
+}
+/// A `switch`-like method, using callbacks.
+///
+/// As opposed to `map`, this offers destructuring.
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case Subclass2(:final field2):
+///     return ...;
+/// }
+/// ```
+
+@optionalTypeArgs TResult when<TResult extends Object?>(TResult Function( String scorer,  String? reducer,  List<EvalSampleScore> samples)  $default,) {final _that = this;
+switch (_that) {
+case _EvalSampleReductions():
+return $default(_that.scorer,_that.reducer,_that.samples);case _:
+  throw StateError('Unexpected subclass');
+
+}
+}
+/// A variant of `when` that fallback to returning `null`
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case _:
+///     return null;
+/// }
+/// ```
+
+@optionalTypeArgs TResult? whenOrNull<TResult extends Object?>(TResult? Function( String scorer,  String? reducer,  List<EvalSampleScore> samples)?  $default,) {final _that = this;
+switch (_that) {
+case _EvalSampleReductions() when $default != null:
+return $default(_that.scorer,_that.reducer,_that.samples);case _:
+  return null;
+
+}
+}
+
+}
+
+/// @nodoc
+@JsonSerializable()
+
+class _EvalSampleReductions extends EvalSampleReductions {
+  const _EvalSampleReductions({required this.scorer, this.reducer, required final  List<EvalSampleScore> samples}): _samples = samples,super._();
+  factory _EvalSampleReductions.fromJson(Map<String, dynamic> json) => _$EvalSampleReductionsFromJson(json);
+
+/// Name the of scorer.
+@override final  String scorer;
+/// Name the of reducer.
+@override final  String? reducer;
+/// List of reduced scores.
+ final  List<EvalSampleScore> _samples;
+/// List of reduced scores.
+@override List<EvalSampleScore> get samples {
+  if (_samples is EqualUnmodifiableListView) return _samples;
+  // ignore: implicit_dynamic_type
+  return EqualUnmodifiableListView(_samples);
+}
+
+
+/// Create a copy of EvalSampleReductions
+/// with the given fields replaced by the non-null parameter values.
+@override @JsonKey(includeFromJson: false, includeToJson: false)
+@pragma('vm:prefer-inline')
+_$EvalSampleReductionsCopyWith<_EvalSampleReductions> get copyWith => __$EvalSampleReductionsCopyWithImpl<_EvalSampleReductions>(this, _$identity);
+
+@override
+Map<String, dynamic> toJson() {
+  return _$EvalSampleReductionsToJson(this, );
+}
+
+@override
+bool operator ==(Object other) {
+  return identical(this, other) || (other.runtimeType == runtimeType&&other is _EvalSampleReductions&&(identical(other.scorer, scorer) || other.scorer == scorer)&&(identical(other.reducer, reducer) || other.reducer == reducer)&&const DeepCollectionEquality().equals(other._samples, _samples));
+}
+
+@JsonKey(includeFromJson: false, includeToJson: false)
+@override
+int get hashCode => Object.hash(runtimeType,scorer,reducer,const DeepCollectionEquality().hash(_samples));
+
+@override
+String toString() {
+  return 'EvalSampleReductions(scorer: $scorer, reducer: $reducer, samples: $samples)';
+}
+
+
+}
+
+/// @nodoc
+abstract mixin class _$EvalSampleReductionsCopyWith<$Res> implements $EvalSampleReductionsCopyWith<$Res> {
+  factory _$EvalSampleReductionsCopyWith(_EvalSampleReductions value, $Res Function(_EvalSampleReductions) _then) = __$EvalSampleReductionsCopyWithImpl;
+@override @useResult
+$Res call({
+ String scorer, String? reducer, List<EvalSampleScore> samples
+});
+
+
+
+
+}
+/// @nodoc
+class __$EvalSampleReductionsCopyWithImpl<$Res>
+    implements _$EvalSampleReductionsCopyWith<$Res> {
+  __$EvalSampleReductionsCopyWithImpl(this._self, this._then);
+
+  final _EvalSampleReductions _self;
+  final $Res Function(_EvalSampleReductions) _then;
+
+/// Create a copy of EvalSampleReductions
+/// with the given fields replaced by the non-null parameter values.
+@override @pragma('vm:prefer-inline') $Res call({Object? scorer = null,Object? reducer = freezed,Object? samples = null,}) {
+  return _then(_EvalSampleReductions(
+scorer: null == scorer ? _self.scorer : scorer // ignore: cast_nullable_to_non_nullable
+as String,reducer: freezed == reducer ? _self.reducer : reducer // ignore: cast_nullable_to_non_nullable
+as String?,samples: null == samples ? _self._samples : samples // ignore: cast_nullable_to_non_nullable
+as List<EvalSampleScore>,
+  ));
+}
+
+
+}
+
+
+/// @nodoc
+mixin _$EvalStats {
+
+/// Evaluation start time. Empty string if eval interrupted before start time set.
+@JsonKey(name: 'started_at') String get startedAt;/// Evaluation completion time. Empty string if eval interrupted before completion.
+@JsonKey(name: 'completed_at') String get completedAt;/// Model token usage for evaluation.
+@JsonKey(name: 'model_usage', defaultValue: {}) Map<String, ModelUsage> get modelUsage;
+/// Create a copy of EvalStats
+/// with the given fields replaced by the non-null parameter values.
+@JsonKey(includeFromJson: false, includeToJson: false)
+@pragma('vm:prefer-inline')
+$EvalStatsCopyWith<EvalStats> get copyWith => _$EvalStatsCopyWithImpl<EvalStats>(this as EvalStats, _$identity);
+
+  /// Serializes this EvalStats to a JSON map.
+  Map<String, dynamic> toJson();
+
+
+@override
+bool operator ==(Object other) {
+  return identical(this, other) || (other.runtimeType == runtimeType&&other is EvalStats&&(identical(other.startedAt, startedAt) || other.startedAt == startedAt)&&(identical(other.completedAt, completedAt) || other.completedAt == completedAt)&&const DeepCollectionEquality().equals(other.modelUsage, modelUsage));
+}
+
+@JsonKey(includeFromJson: false, includeToJson: false)
+@override
+int get hashCode => Object.hash(runtimeType,startedAt,completedAt,const DeepCollectionEquality().hash(modelUsage));
+
+@override
+String toString() {
+  return 'EvalStats(startedAt: $startedAt, completedAt: $completedAt, modelUsage: $modelUsage)';
+}
+
+
+}
+
+/// @nodoc
+abstract mixin class $EvalStatsCopyWith<$Res>  {
+  factory $EvalStatsCopyWith(EvalStats value, $Res Function(EvalStats) _then) = _$EvalStatsCopyWithImpl;
+@useResult
+$Res call({
+@JsonKey(name: 'started_at') String startedAt,@JsonKey(name: 'completed_at') String completedAt,@JsonKey(name: 'model_usage', defaultValue: {}) Map<String, ModelUsage> modelUsage
+});
+
+
+
+
+}
+/// @nodoc
+class _$EvalStatsCopyWithImpl<$Res>
+    implements $EvalStatsCopyWith<$Res> {
+  _$EvalStatsCopyWithImpl(this._self, this._then);
+
+  final EvalStats _self;
+  final $Res Function(EvalStats) _then;
+
+/// Create a copy of EvalStats
+/// with the given fields replaced by the non-null parameter values.
+@pragma('vm:prefer-inline') @override $Res call({Object? startedAt = null,Object? completedAt = null,Object? modelUsage = null,}) {
+  return _then(_self.copyWith(
+startedAt: null == startedAt ? _self.startedAt : startedAt // ignore: cast_nullable_to_non_nullable
+as String,completedAt: null == completedAt ? _self.completedAt : completedAt // ignore: cast_nullable_to_non_nullable
+as String,modelUsage: null == modelUsage ? _self.modelUsage : modelUsage // ignore: cast_nullable_to_non_nullable
+as Map<String, ModelUsage>,
+  ));
+}
+
+}
+
+
+/// Adds pattern-matching-related methods to [EvalStats].
+extension EvalStatsPatterns on EvalStats {
+/// A variant of `map` that fallback to returning `orElse`.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case _:
+///     return orElse();
+/// }
+/// ```
+
+@optionalTypeArgs TResult maybeMap<TResult extends Object?>(TResult Function( _EvalStats value)?  $default,{required TResult orElse(),}){
+final _that = this;
+switch (_that) {
+case _EvalStats() when $default != null:
+return $default(_that);case _:
+  return orElse();
+
+}
+}
+/// A `switch`-like method, using callbacks.
+///
+/// Callbacks receives the raw object, upcasted.
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case final Subclass2 value:
+///     return ...;
+/// }
+/// ```
+
+@optionalTypeArgs TResult map<TResult extends Object?>(TResult Function( _EvalStats value)  $default,){
+final _that = this;
+switch (_that) {
+case _EvalStats():
+return $default(_that);case _:
+  throw StateError('Unexpected subclass');
+
+}
+}
+/// A variant of `map` that fallback to returning `null`.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case _:
+///     return null;
+/// }
+/// ```
+
+@optionalTypeArgs TResult? mapOrNull<TResult extends Object?>(TResult? Function( _EvalStats value)?  $default,){
+final _that = this;
+switch (_that) {
+case _EvalStats() when $default != null:
+return $default(_that);case _:
+  return null;
+
+}
+}
+/// A variant of `when` that fallback to an `orElse` callback.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case _:
+///     return orElse();
+/// }
+/// ```
+
+@optionalTypeArgs TResult maybeWhen<TResult extends Object?>(TResult Function(@JsonKey(name: 'started_at')  String startedAt, @JsonKey(name: 'completed_at')  String completedAt, @JsonKey(name: 'model_usage', defaultValue: {})  Map<String, ModelUsage> modelUsage)?  $default,{required TResult orElse(),}) {final _that = this;
+switch (_that) {
+case _EvalStats() when $default != null:
+return $default(_that.startedAt,_that.completedAt,_that.modelUsage);case _:
+  return orElse();
+
+}
+}
+/// A `switch`-like method, using callbacks.
+///
+/// As opposed to `map`, this offers destructuring.
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case Subclass2(:final field2):
+///     return ...;
+/// }
+/// ```
+
+@optionalTypeArgs TResult when<TResult extends Object?>(TResult Function(@JsonKey(name: 'started_at')  String startedAt, @JsonKey(name: 'completed_at')  String completedAt, @JsonKey(name: 'model_usage', defaultValue: {})  Map<String, ModelUsage> modelUsage)  $default,) {final _that = this;
+switch (_that) {
+case _EvalStats():
+return $default(_that.startedAt,_that.completedAt,_that.modelUsage);case _:
+  throw StateError('Unexpected subclass');
+
+}
+}
+/// A variant of `when` that fallback to returning `null`
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case _:
+///     return null;
+/// }
+/// ```
+
+@optionalTypeArgs TResult? whenOrNull<TResult extends Object?>(TResult? Function(@JsonKey(name: 'started_at')  String startedAt, @JsonKey(name: 'completed_at')  String completedAt, @JsonKey(name: 'model_usage', defaultValue: {})  Map<String, ModelUsage> modelUsage)?  $default,) {final _that = this;
+switch (_that) {
+case _EvalStats() when $default != null:
+return $default(_that.startedAt,_that.completedAt,_that.modelUsage);case _:
+  return null;
+
+}
+}
+
+}
+
+/// @nodoc
+@JsonSerializable()
+
+class _EvalStats extends EvalStats {
+  const _EvalStats({@JsonKey(name: 'started_at') required this.startedAt, @JsonKey(name: 'completed_at') required this.completedAt, @JsonKey(name: 'model_usage', defaultValue: {}) final  Map<String, ModelUsage> modelUsage = const {}}): _modelUsage = modelUsage,super._();
+  factory _EvalStats.fromJson(Map<String, dynamic> json) => _$EvalStatsFromJson(json);
+
+/// Evaluation start time. Empty string if eval interrupted before start time set.
+@override@JsonKey(name: 'started_at') final  String startedAt;
+/// Evaluation completion time. Empty string if eval interrupted before completion.
+@override@JsonKey(name: 'completed_at') final  String completedAt;
+/// Model token usage for evaluation.
+ final  Map<String, ModelUsage> _modelUsage;
+/// Model token usage for evaluation.
+@override@JsonKey(name: 'model_usage', defaultValue: {}) Map<String, ModelUsage> get modelUsage {
+  if (_modelUsage is EqualUnmodifiableMapView) return _modelUsage;
+  // ignore: implicit_dynamic_type
+  return EqualUnmodifiableMapView(_modelUsage);
+}
+
+
+/// Create a copy of EvalStats
+/// with the given fields replaced by the non-null parameter values.
+@override @JsonKey(includeFromJson: false, includeToJson: false)
+@pragma('vm:prefer-inline')
+_$EvalStatsCopyWith<_EvalStats> get copyWith => __$EvalStatsCopyWithImpl<_EvalStats>(this, _$identity);
+
+@override
+Map<String, dynamic> toJson() {
+  return _$EvalStatsToJson(this, );
+}
+
+@override
+bool operator ==(Object other) {
+  return identical(this, other) || (other.runtimeType == runtimeType&&other is _EvalStats&&(identical(other.startedAt, startedAt) || other.startedAt == startedAt)&&(identical(other.completedAt, completedAt) || other.completedAt == completedAt)&&const DeepCollectionEquality().equals(other._modelUsage, _modelUsage));
+}
+
+@JsonKey(includeFromJson: false, includeToJson: false)
+@override
+int get hashCode => Object.hash(runtimeType,startedAt,completedAt,const DeepCollectionEquality().hash(_modelUsage));
+
+@override
+String toString() {
+  return 'EvalStats(startedAt: $startedAt, completedAt: $completedAt, modelUsage: $modelUsage)';
+}
+
+
+}
+
+/// @nodoc
+abstract mixin class _$EvalStatsCopyWith<$Res> implements $EvalStatsCopyWith<$Res> {
+  factory _$EvalStatsCopyWith(_EvalStats value, $Res Function(_EvalStats) _then) = __$EvalStatsCopyWithImpl;
+@override @useResult
+$Res call({
+@JsonKey(name: 'started_at') String startedAt,@JsonKey(name: 'completed_at') String completedAt,@JsonKey(name: 'model_usage', defaultValue: {}) Map<String, ModelUsage> modelUsage
+});
+
+
+
+
+}
+/// @nodoc
+class __$EvalStatsCopyWithImpl<$Res>
+    implements _$EvalStatsCopyWith<$Res> {
+  __$EvalStatsCopyWithImpl(this._self, this._then);
+
+  final _EvalStats _self;
+  final $Res Function(_EvalStats) _then;
+
+/// Create a copy of EvalStats
+/// with the given fields replaced by the non-null parameter values.
+@override @pragma('vm:prefer-inline') $Res call({Object? startedAt = null,Object? completedAt = null,Object? modelUsage = null,}) {
+  return _then(_EvalStats(
+startedAt: null == startedAt ? _self.startedAt : startedAt // ignore: cast_nullable_to_non_nullable
+as String,completedAt: null == completedAt ? _self.completedAt : completedAt // ignore: cast_nullable_to_non_nullable
+as String,modelUsage: null == modelUsage ? _self._modelUsage : modelUsage // ignore: cast_nullable_to_non_nullable
+as Map<String, ModelUsage>,
+  ));
+}
+
+
+}
+
+
+/// @nodoc
+mixin _$EvalError {
+
+/// Error message.
+ String get message;/// Error traceback.
+ String get traceback;/// Error traceback with ANSI color codes.
+@JsonKey(name: 'traceback_ansi') String get tracebackAnsi;
+/// Create a copy of EvalError
+/// with the given fields replaced by the non-null parameter values.
+@JsonKey(includeFromJson: false, includeToJson: false)
+@pragma('vm:prefer-inline')
+$EvalErrorCopyWith<EvalError> get copyWith => _$EvalErrorCopyWithImpl<EvalError>(this as EvalError, _$identity);
+
+  /// Serializes this EvalError to a JSON map.
+  Map<String, dynamic> toJson();
+
+
+@override
+bool operator ==(Object other) {
+  return identical(this, other) || (other.runtimeType == runtimeType&&other is EvalError&&(identical(other.message, message) || other.message == message)&&(identical(other.traceback, traceback) || other.traceback == traceback)&&(identical(other.tracebackAnsi, tracebackAnsi) || other.tracebackAnsi == tracebackAnsi));
+}
+
+@JsonKey(includeFromJson: false, includeToJson: false)
+@override
+int get hashCode => Object.hash(runtimeType,message,traceback,tracebackAnsi);
+
+@override
+String toString() {
+  return 'EvalError(message: $message, traceback: $traceback, tracebackAnsi: $tracebackAnsi)';
+}
+
+
+}
+
+/// @nodoc
+abstract mixin class $EvalErrorCopyWith<$Res>  {
+  factory $EvalErrorCopyWith(EvalError value, $Res Function(EvalError) _then) = _$EvalErrorCopyWithImpl;
+@useResult
+$Res call({
+ String message, String traceback,@JsonKey(name: 'traceback_ansi') String tracebackAnsi
+});
+
+
+
+
+}
+/// @nodoc
+class _$EvalErrorCopyWithImpl<$Res>
+    implements $EvalErrorCopyWith<$Res> {
+  _$EvalErrorCopyWithImpl(this._self, this._then);
+
+  final EvalError _self;
+  final $Res Function(EvalError) _then;
+
+/// Create a copy of EvalError
+/// with the given fields replaced by the non-null parameter values.
+@pragma('vm:prefer-inline') @override $Res call({Object? message = null,Object? traceback = null,Object? tracebackAnsi = null,}) {
+  return _then(_self.copyWith(
+message: null == message ? _self.message : message // ignore: cast_nullable_to_non_nullable
+as String,traceback: null == traceback ? _self.traceback : traceback // ignore: cast_nullable_to_non_nullable
+as String,tracebackAnsi: null == tracebackAnsi ? _self.tracebackAnsi : tracebackAnsi // ignore: cast_nullable_to_non_nullable
+as String,
+  ));
+}
+
+}
+
+
+/// Adds pattern-matching-related methods to [EvalError].
+extension EvalErrorPatterns on EvalError {
+/// A variant of `map` that fallback to returning `orElse`.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case _:
+///     return orElse();
+/// }
+/// ```
+
+@optionalTypeArgs TResult maybeMap<TResult extends Object?>(TResult Function( _EvalError value)?  $default,{required TResult orElse(),}){
+final _that = this;
+switch (_that) {
+case _EvalError() when $default != null:
+return $default(_that);case _:
+  return orElse();
+
+}
+}
+/// A `switch`-like method, using callbacks.
+///
+/// Callbacks receives the raw object, upcasted.
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case final Subclass2 value:
+///     return ...;
+/// }
+/// ```
+
+@optionalTypeArgs TResult map<TResult extends Object?>(TResult Function( _EvalError value)  $default,){
+final _that = this;
+switch (_that) {
+case _EvalError():
+return $default(_that);case _:
+  throw StateError('Unexpected subclass');
+
+}
+}
+/// A variant of `map` that fallback to returning `null`.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case _:
+///     return null;
+/// }
+/// ```
+
+@optionalTypeArgs TResult? mapOrNull<TResult extends Object?>(TResult? Function( _EvalError value)?  $default,){
+final _that = this;
+switch (_that) {
+case _EvalError() when $default != null:
+return $default(_that);case _:
+  return null;
+
+}
+}
+/// A variant of `when` that fallback to an `orElse` callback.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case _:
+///     return orElse();
+/// }
+/// ```
+
+@optionalTypeArgs TResult maybeWhen<TResult extends Object?>(TResult Function( String message,  String traceback, @JsonKey(name: 'traceback_ansi')  String tracebackAnsi)?  $default,{required TResult orElse(),}) {final _that = this;
+switch (_that) {
+case _EvalError() when $default != null:
+return $default(_that.message,_that.traceback,_that.tracebackAnsi);case _:
+  return orElse();
+
+}
+}
+/// A `switch`-like method, using callbacks.
+///
+/// As opposed to `map`, this offers destructuring.
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case Subclass2(:final field2):
+///     return ...;
+/// }
+/// ```
+
+@optionalTypeArgs TResult when<TResult extends Object?>(TResult Function( String message,  String traceback, @JsonKey(name: 'traceback_ansi')  String tracebackAnsi)  $default,) {final _that = this;
+switch (_that) {
+case _EvalError():
+return $default(_that.message,_that.traceback,_that.tracebackAnsi);case _:
+  throw StateError('Unexpected subclass');
+
+}
+}
+/// A variant of `when` that fallback to returning `null`
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case _:
+///     return null;
+/// }
+/// ```
+
+@optionalTypeArgs TResult? whenOrNull<TResult extends Object?>(TResult? Function( String message,  String traceback, @JsonKey(name: 'traceback_ansi')  String tracebackAnsi)?  $default,) {final _that = this;
+switch (_that) {
+case _EvalError() when $default != null:
+return $default(_that.message,_that.traceback,_that.tracebackAnsi);case _:
+  return null;
+
+}
+}
+
+}
+
+/// @nodoc
+@JsonSerializable()
+
+class _EvalError extends EvalError {
+  const _EvalError({required this.message, required this.traceback, @JsonKey(name: 'traceback_ansi') required this.tracebackAnsi}): super._();
+  factory _EvalError.fromJson(Map<String, dynamic> json) => _$EvalErrorFromJson(json);
+
+/// Error message.
+@override final  String message;
+/// Error traceback.
+@override final  String traceback;
+/// Error traceback with ANSI color codes.
+@override@JsonKey(name: 'traceback_ansi') final  String tracebackAnsi;
+
+/// Create a copy of EvalError
+/// with the given fields replaced by the non-null parameter values.
+@override @JsonKey(includeFromJson: false, includeToJson: false)
+@pragma('vm:prefer-inline')
+_$EvalErrorCopyWith<_EvalError> get copyWith => __$EvalErrorCopyWithImpl<_EvalError>(this, _$identity);
+
+@override
+Map<String, dynamic> toJson() {
+  return _$EvalErrorToJson(this, );
+}
+
+@override
+bool operator ==(Object other) {
+  return identical(this, other) || (other.runtimeType == runtimeType&&other is _EvalError&&(identical(other.message, message) || other.message == message)&&(identical(other.traceback, traceback) || other.traceback == traceback)&&(identical(other.tracebackAnsi, tracebackAnsi) || other.tracebackAnsi == tracebackAnsi));
+}
+
+@JsonKey(includeFromJson: false, includeToJson: false)
+@override
+int get hashCode => Object.hash(runtimeType,message,traceback,tracebackAnsi);
+
+@override
+String toString() {
+  return 'EvalError(message: $message, traceback: $traceback, tracebackAnsi: $tracebackAnsi)';
+}
+
+
+}
+
+/// @nodoc
+abstract mixin class _$EvalErrorCopyWith<$Res> implements $EvalErrorCopyWith<$Res> {
+  factory _$EvalErrorCopyWith(_EvalError value, $Res Function(_EvalError) _then) = __$EvalErrorCopyWithImpl;
+@override @useResult
+$Res call({
+ String message, String traceback,@JsonKey(name: 'traceback_ansi') String tracebackAnsi
+});
+
+
+
+
+}
+/// @nodoc
+class __$EvalErrorCopyWithImpl<$Res>
+    implements _$EvalErrorCopyWith<$Res> {
+  __$EvalErrorCopyWithImpl(this._self, this._then);
+
+  final _EvalError _self;
+  final $Res Function(_EvalError) _then;
+
+/// Create a copy of EvalError
+/// with the given fields replaced by the non-null parameter values.
+@override @pragma('vm:prefer-inline') $Res call({Object? message = null,Object? traceback = null,Object? tracebackAnsi = null,}) {
+  return _then(_EvalError(
+message: null == message ? _self.message : message // ignore: cast_nullable_to_non_nullable
+as String,traceback: null == traceback ? _self.traceback : traceback // ignore: cast_nullable_to_non_nullable
+as String,tracebackAnsi: null == tracebackAnsi ? _self.tracebackAnsi : tracebackAnsi // ignore: cast_nullable_to_non_nullable
+as String,
+  ));
+}
+
+
+}
+
+
+/// @nodoc
+mixin _$EvalSample {
+
+/// Unique id for sample.
+ Object get id;/// Epoch number for sample.
+ int get epoch;/// Sample input.
+ Object get input;/// Sample choices.
+ List<String>? get choices;/// Sample target value(s).
+ Object? get target;/// Additional sample metadata.
+ Map<String, dynamic> get metadata;/// Sandbox environment type and optional config file.
+ Object? get sandbox;/// Files that go along with the sample (copied to SandboxEnvironment).
+ List<String>? get files;/// Setup script to run for sample (run within default SandboxEnvironment).
+ String? get setup;/// Chat conversation history for sample.
+ List<ChatMessage> get messages;/// Model output from sample.
+ ModelOutput get output;/// Scores for sample.
+ Map<String, Score>? get scores;/// State at end of sample execution.
+ Map<String, dynamic> get store;/// Events that occurred during sample execution.
+ List<Object> get events;/// Model token usage for sample.
+@JsonKey(name: 'model_usage', defaultValue: {}) Map<String, ModelUsage> get modelUsage;/// Time sample started.
+@JsonKey(name: 'started_at') String? get startedAt;/// Time sample completed.
+@JsonKey(name: 'completed_at') String? get completedAt;/// Total time that the sample was running.
+@JsonKey(name: 'total_time') double? get totalTime;/// Time spent working (model generation, sandbox calls, etc.).
+@JsonKey(name: 'working_time') double? get workingTime;/// Globally unique identifier for sample run.
+ String? get uuid;/// Provenance data for invalidation.
+ ProvenanceData? get invalidation;/// Error that halted sample.
+ EvalError? get error;/// Errors that were retried for this sample.
+@JsonKey(name: 'error_retries') List<EvalError>? get errorRetries;/// Attachments referenced from messages and events.
+ Map<String, String> get attachments;/// The limit that halted the sample.
+ EvalSampleLimit? get limit;
+/// Create a copy of EvalSample
+/// with the given fields replaced by the non-null parameter values.
+@JsonKey(includeFromJson: false, includeToJson: false)
+@pragma('vm:prefer-inline')
+$EvalSampleCopyWith<EvalSample> get copyWith => _$EvalSampleCopyWithImpl<EvalSample>(this as EvalSample, _$identity);
+
+  /// Serializes this EvalSample to a JSON map.
+  Map<String, dynamic> toJson();
+
+
+@override
+bool operator ==(Object other) {
+  return identical(this, other) || (other.runtimeType == runtimeType&&other is EvalSample&&const DeepCollectionEquality().equals(other.id, id)&&(identical(other.epoch, epoch) || other.epoch == epoch)&&const DeepCollectionEquality().equals(other.input, input)&&const DeepCollectionEquality().equals(other.choices, choices)&&const DeepCollectionEquality().equals(other.target, target)&&const DeepCollectionEquality().equals(other.metadata, metadata)&&const DeepCollectionEquality().equals(other.sandbox, sandbox)&&const DeepCollectionEquality().equals(other.files, files)&&(identical(other.setup, setup) || other.setup == setup)&&const DeepCollectionEquality().equals(other.messages, messages)&&(identical(other.output, output) || other.output == output)&&const DeepCollectionEquality().equals(other.scores, scores)&&const DeepCollectionEquality().equals(other.store, store)&&const DeepCollectionEquality().equals(other.events, events)&&const DeepCollectionEquality().equals(other.modelUsage, modelUsage)&&(identical(other.startedAt, startedAt) || other.startedAt == startedAt)&&(identical(other.completedAt, completedAt) || other.completedAt == completedAt)&&(identical(other.totalTime, totalTime) || other.totalTime == totalTime)&&(identical(other.workingTime, workingTime) || other.workingTime == workingTime)&&(identical(other.uuid, uuid) || other.uuid == uuid)&&(identical(other.invalidation, invalidation) || other.invalidation == invalidation)&&(identical(other.error, error) || other.error == error)&&const DeepCollectionEquality().equals(other.errorRetries, errorRetries)&&const DeepCollectionEquality().equals(other.attachments, attachments)&&(identical(other.limit, limit) || other.limit == limit));
+}
+
+@JsonKey(includeFromJson: false, includeToJson: false)
+@override
+int get hashCode => Object.hashAll([runtimeType,const DeepCollectionEquality().hash(id),epoch,const DeepCollectionEquality().hash(input),const DeepCollectionEquality().hash(choices),const DeepCollectionEquality().hash(target),const DeepCollectionEquality().hash(metadata),const DeepCollectionEquality().hash(sandbox),const DeepCollectionEquality().hash(files),setup,const DeepCollectionEquality().hash(messages),output,const DeepCollectionEquality().hash(scores),const DeepCollectionEquality().hash(store),const DeepCollectionEquality().hash(events),const DeepCollectionEquality().hash(modelUsage),startedAt,completedAt,totalTime,workingTime,uuid,invalidation,error,const DeepCollectionEquality().hash(errorRetries),const DeepCollectionEquality().hash(attachments),limit]);
+
+@override
+String toString() {
+  return 'EvalSample(id: $id, epoch: $epoch, input: $input, choices: $choices, target: $target, metadata: $metadata, sandbox: $sandbox, files: $files, setup: $setup, messages: $messages, output: $output, scores: $scores, store: $store, events: $events, modelUsage: $modelUsage, startedAt: $startedAt, completedAt: $completedAt, totalTime: $totalTime, workingTime: $workingTime, uuid: $uuid, invalidation: $invalidation, error: $error, errorRetries: $errorRetries, attachments: $attachments, limit: $limit)';
+}
+
+
+}
+
+/// @nodoc
+abstract mixin class $EvalSampleCopyWith<$Res>  {
+  factory $EvalSampleCopyWith(EvalSample value, $Res Function(EvalSample) _then) = _$EvalSampleCopyWithImpl;
+@useResult
+$Res call({
+ Object id, int epoch, Object input, List<String>? choices, Object? target, Map<String, dynamic> metadata, Object? sandbox, List<String>? files, String? setup, List<ChatMessage> messages, ModelOutput output, Map<String, Score>? scores, Map<String, dynamic> store, List<Object> events,@JsonKey(name: 'model_usage', defaultValue: {}) Map<String, ModelUsage> modelUsage,@JsonKey(name: 'started_at') String? startedAt,@JsonKey(name: 'completed_at') String? completedAt,@JsonKey(name: 'total_time') double? totalTime,@JsonKey(name: 'working_time') double? workingTime, String? uuid, ProvenanceData? invalidation, EvalError? error,@JsonKey(name: 'error_retries') List<EvalError>? errorRetries, Map<String, String> attachments, EvalSampleLimit? limit
+});
+
+
+$ModelOutputCopyWith<$Res> get output;$ProvenanceDataCopyWith<$Res>? get invalidation;$EvalErrorCopyWith<$Res>? get error;$EvalSampleLimitCopyWith<$Res>? get limit;
+
+}
+/// @nodoc
+class _$EvalSampleCopyWithImpl<$Res>
+    implements $EvalSampleCopyWith<$Res> {
+  _$EvalSampleCopyWithImpl(this._self, this._then);
+
+  final EvalSample _self;
+  final $Res Function(EvalSample) _then;
+
+/// Create a copy of EvalSample
+/// with the given fields replaced by the non-null parameter values.
+@pragma('vm:prefer-inline') @override $Res call({Object? id = null,Object? epoch = null,Object? input = null,Object? choices = freezed,Object? target = freezed,Object? metadata = null,Object? sandbox = freezed,Object? files = freezed,Object? setup = freezed,Object? messages = null,Object? output = null,Object? scores = freezed,Object? store = null,Object? events = null,Object? modelUsage = null,Object? startedAt = freezed,Object? completedAt = freezed,Object? totalTime = freezed,Object? workingTime = freezed,Object? uuid = freezed,Object? invalidation = freezed,Object? error = freezed,Object? errorRetries = freezed,Object? attachments = null,Object? limit = freezed,}) {
+  return _then(_self.copyWith(
+id: null == id ? _self.id : id ,epoch: null == epoch ? _self.epoch : epoch // ignore: cast_nullable_to_non_nullable
+as int,input: null == input ? _self.input : input ,choices: freezed == choices ? _self.choices : choices // ignore: cast_nullable_to_non_nullable
+as List<String>?,target: freezed == target ? _self.target : target ,metadata: null == metadata ? _self.metadata : metadata // ignore: cast_nullable_to_non_nullable
+as Map<String, dynamic>,sandbox: freezed == sandbox ? _self.sandbox : sandbox ,files: freezed == files ? _self.files : files // ignore: cast_nullable_to_non_nullable
+as List<String>?,setup: freezed == setup ? _self.setup : setup // ignore: cast_nullable_to_non_nullable
+as String?,messages: null == messages ? _self.messages : messages // ignore: cast_nullable_to_non_nullable
+as List<ChatMessage>,output: null == output ? _self.output : output // ignore: cast_nullable_to_non_nullable
+as ModelOutput,scores: freezed == scores ? _self.scores : scores // ignore: cast_nullable_to_non_nullable
+as Map<String, Score>?,store: null == store ? _self.store : store // ignore: cast_nullable_to_non_nullable
+as Map<String, dynamic>,events: null == events ? _self.events : events // ignore: cast_nullable_to_non_nullable
+as List<Object>,modelUsage: null == modelUsage ? _self.modelUsage : modelUsage // ignore: cast_nullable_to_non_nullable
+as Map<String, ModelUsage>,startedAt: freezed == startedAt ? _self.startedAt : startedAt // ignore: cast_nullable_to_non_nullable
+as String?,completedAt: freezed == completedAt ? _self.completedAt : completedAt // ignore: cast_nullable_to_non_nullable
+as String?,totalTime: freezed == totalTime ? _self.totalTime : totalTime // ignore: cast_nullable_to_non_nullable
+as double?,workingTime: freezed == workingTime ? _self.workingTime : workingTime // ignore: cast_nullable_to_non_nullable
+as double?,uuid: freezed == uuid ? _self.uuid : uuid // ignore: cast_nullable_to_non_nullable
+as String?,invalidation: freezed == invalidation ? _self.invalidation : invalidation // ignore: cast_nullable_to_non_nullable
+as ProvenanceData?,error: freezed == error ? _self.error : error // ignore: cast_nullable_to_non_nullable
+as EvalError?,errorRetries: freezed == errorRetries ? _self.errorRetries : errorRetries // ignore: cast_nullable_to_non_nullable
+as List<EvalError>?,attachments: null == attachments ? _self.attachments : attachments // ignore: cast_nullable_to_non_nullable
+as Map<String, String>,limit: freezed == limit ? _self.limit : limit // ignore: cast_nullable_to_non_nullable
+as EvalSampleLimit?,
+  ));
+}
+/// Create a copy of EvalSample
+/// with the given fields replaced by the non-null parameter values.
+@override
+@pragma('vm:prefer-inline')
+$ModelOutputCopyWith<$Res> get output {
+  
+  return $ModelOutputCopyWith<$Res>(_self.output, (value) {
+    return _then(_self.copyWith(output: value));
+  });
+}/// Create a copy of EvalSample
+/// with the given fields replaced by the non-null parameter values.
+@override
+@pragma('vm:prefer-inline')
+$ProvenanceDataCopyWith<$Res>? get invalidation {
+    if (_self.invalidation == null) {
+    return null;
+  }
+
+  return $ProvenanceDataCopyWith<$Res>(_self.invalidation!, (value) {
+    return _then(_self.copyWith(invalidation: value));
+  });
+}/// Create a copy of EvalSample
+/// with the given fields replaced by the non-null parameter values.
+@override
+@pragma('vm:prefer-inline')
+$EvalErrorCopyWith<$Res>? get error {
+    if (_self.error == null) {
+    return null;
+  }
+
+  return $EvalErrorCopyWith<$Res>(_self.error!, (value) {
+    return _then(_self.copyWith(error: value));
+  });
+}/// Create a copy of EvalSample
+/// with the given fields replaced by the non-null parameter values.
+@override
+@pragma('vm:prefer-inline')
+$EvalSampleLimitCopyWith<$Res>? get limit {
+    if (_self.limit == null) {
+    return null;
+  }
+
+  return $EvalSampleLimitCopyWith<$Res>(_self.limit!, (value) {
+    return _then(_self.copyWith(limit: value));
+  });
+}
+}
+
+
+/// Adds pattern-matching-related methods to [EvalSample].
+extension EvalSamplePatterns on EvalSample {
+/// A variant of `map` that fallback to returning `orElse`.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case _:
+///     return orElse();
+/// }
+/// ```
+
+@optionalTypeArgs TResult maybeMap<TResult extends Object?>(TResult Function( _EvalSample value)?  $default,{required TResult orElse(),}){
+final _that = this;
+switch (_that) {
+case _EvalSample() when $default != null:
+return $default(_that);case _:
+  return orElse();
+
+}
+}
+/// A `switch`-like method, using callbacks.
+///
+/// Callbacks receives the raw object, upcasted.
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case final Subclass2 value:
+///     return ...;
+/// }
+/// ```
+
+@optionalTypeArgs TResult map<TResult extends Object?>(TResult Function( _EvalSample value)  $default,){
+final _that = this;
+switch (_that) {
+case _EvalSample():
+return $default(_that);case _:
+  throw StateError('Unexpected subclass');
+
+}
+}
+/// A variant of `map` that fallback to returning `null`.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case _:
+///     return null;
+/// }
+/// ```
+
+@optionalTypeArgs TResult? mapOrNull<TResult extends Object?>(TResult? Function( _EvalSample value)?  $default,){
+final _that = this;
+switch (_that) {
+case _EvalSample() when $default != null:
+return $default(_that);case _:
+  return null;
+
+}
+}
+/// A variant of `when` that fallback to an `orElse` callback.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case _:
+///     return orElse();
+/// }
+/// ```
+
+@optionalTypeArgs TResult maybeWhen<TResult extends Object?>(TResult Function( Object id,  int epoch,  Object input,  List<String>? choices,  Object? target,  Map<String, dynamic> metadata,  Object? sandbox,  List<String>? files,  String? setup,  List<ChatMessage> messages,  ModelOutput output,  Map<String, Score>? scores,  Map<String, dynamic> store,  List<Object> events, @JsonKey(name: 'model_usage', defaultValue: {})  Map<String, ModelUsage> modelUsage, @JsonKey(name: 'started_at')  String? startedAt, @JsonKey(name: 'completed_at')  String? completedAt, @JsonKey(name: 'total_time')  double? totalTime, @JsonKey(name: 'working_time')  double? workingTime,  String? uuid,  ProvenanceData? invalidation,  EvalError? error, @JsonKey(name: 'error_retries')  List<EvalError>? errorRetries,  Map<String, String> attachments,  EvalSampleLimit? limit)?  $default,{required TResult orElse(),}) {final _that = this;
+switch (_that) {
+case _EvalSample() when $default != null:
+return $default(_that.id,_that.epoch,_that.input,_that.choices,_that.target,_that.metadata,_that.sandbox,_that.files,_that.setup,_that.messages,_that.output,_that.scores,_that.store,_that.events,_that.modelUsage,_that.startedAt,_that.completedAt,_that.totalTime,_that.workingTime,_that.uuid,_that.invalidation,_that.error,_that.errorRetries,_that.attachments,_that.limit);case _:
+  return orElse();
+
+}
+}
+/// A `switch`-like method, using callbacks.
+///
+/// As opposed to `map`, this offers destructuring.
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case Subclass2(:final field2):
+///     return ...;
+/// }
+/// ```
+
+@optionalTypeArgs TResult when<TResult extends Object?>(TResult Function( Object id,  int epoch,  Object input,  List<String>? choices,  Object? target,  Map<String, dynamic> metadata,  Object? sandbox,  List<String>? files,  String? setup,  List<ChatMessage> messages,  ModelOutput output,  Map<String, Score>? scores,  Map<String, dynamic> store,  List<Object> events, @JsonKey(name: 'model_usage', defaultValue: {})  Map<String, ModelUsage> modelUsage, @JsonKey(name: 'started_at')  String? startedAt, @JsonKey(name: 'completed_at')  String? completedAt, @JsonKey(name: 'total_time')  double? totalTime, @JsonKey(name: 'working_time')  double? workingTime,  String? uuid,  ProvenanceData? invalidation,  EvalError? error, @JsonKey(name: 'error_retries')  List<EvalError>? errorRetries,  Map<String, String> attachments,  EvalSampleLimit? limit)  $default,) {final _that = this;
+switch (_that) {
+case _EvalSample():
+return $default(_that.id,_that.epoch,_that.input,_that.choices,_that.target,_that.metadata,_that.sandbox,_that.files,_that.setup,_that.messages,_that.output,_that.scores,_that.store,_that.events,_that.modelUsage,_that.startedAt,_that.completedAt,_that.totalTime,_that.workingTime,_that.uuid,_that.invalidation,_that.error,_that.errorRetries,_that.attachments,_that.limit);case _:
+  throw StateError('Unexpected subclass');
+
+}
+}
+/// A variant of `when` that fallback to returning `null`
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case _:
+///     return null;
+/// }
+/// ```
+
+@optionalTypeArgs TResult? whenOrNull<TResult extends Object?>(TResult? Function( Object id,  int epoch,  Object input,  List<String>? choices,  Object? target,  Map<String, dynamic> metadata,  Object? sandbox,  List<String>? files,  String? setup,  List<ChatMessage> messages,  ModelOutput output,  Map<String, Score>? scores,  Map<String, dynamic> store,  List<Object> events, @JsonKey(name: 'model_usage', defaultValue: {})  Map<String, ModelUsage> modelUsage, @JsonKey(name: 'started_at')  String? startedAt, @JsonKey(name: 'completed_at')  String? completedAt, @JsonKey(name: 'total_time')  double? totalTime, @JsonKey(name: 'working_time')  double? workingTime,  String? uuid,  ProvenanceData? invalidation,  EvalError? error, @JsonKey(name: 'error_retries')  List<EvalError>? errorRetries,  Map<String, String> attachments,  EvalSampleLimit? limit)?  $default,) {final _that = this;
+switch (_that) {
+case _EvalSample() when $default != null:
+return $default(_that.id,_that.epoch,_that.input,_that.choices,_that.target,_that.metadata,_that.sandbox,_that.files,_that.setup,_that.messages,_that.output,_that.scores,_that.store,_that.events,_that.modelUsage,_that.startedAt,_that.completedAt,_that.totalTime,_that.workingTime,_that.uuid,_that.invalidation,_that.error,_that.errorRetries,_that.attachments,_that.limit);case _:
+  return null;
+
+}
+}
+
+}
+
+/// @nodoc
+@JsonSerializable()
+
+class _EvalSample extends EvalSample {
+  const _EvalSample({required this.id, required this.epoch, required this.input, final  List<String>? choices, this.target, final  Map<String, dynamic> metadata = const {}, this.sandbox, final  List<String>? files, this.setup, final  List<ChatMessage> messages = const [], required this.output, final  Map<String, Score>? scores, final  Map<String, dynamic> store = const {}, final  List<Object> events = const [], @JsonKey(name: 'model_usage', defaultValue: {}) final  Map<String, ModelUsage> modelUsage = const {}, @JsonKey(name: 'started_at') this.startedAt, @JsonKey(name: 'completed_at') this.completedAt, @JsonKey(name: 'total_time') this.totalTime, @JsonKey(name: 'working_time') this.workingTime, this.uuid, this.invalidation, this.error, @JsonKey(name: 'error_retries') final  List<EvalError>? errorRetries, final  Map<String, String> attachments = const {}, this.limit}): _choices = choices,_metadata = metadata,_files = files,_messages = messages,_scores = scores,_store = store,_events = events,_modelUsage = modelUsage,_errorRetries = errorRetries,_attachments = attachments,super._();
+  factory _EvalSample.fromJson(Map<String, dynamic> json) => _$EvalSampleFromJson(json);
+
+/// Unique id for sample.
+@override final  Object id;
+/// Epoch number for sample.
+@override final  int epoch;
+/// Sample input.
+@override final  Object input;
+/// Sample choices.
+ final  List<String>? _choices;
+/// Sample choices.
+@override List<String>? get choices {
+  final value = _choices;
+  if (value == null) return null;
+  if (_choices is EqualUnmodifiableListView) return _choices;
+  // ignore: implicit_dynamic_type
+  return EqualUnmodifiableListView(value);
+}
+
+/// Sample target value(s).
+@override final  Object? target;
+/// Additional sample metadata.
+ final  Map<String, dynamic> _metadata;
+/// Additional sample metadata.
+@override@JsonKey() Map<String, dynamic> get metadata {
+  if (_metadata is EqualUnmodifiableMapView) return _metadata;
+  // ignore: implicit_dynamic_type
+  return EqualUnmodifiableMapView(_metadata);
+}
+
+/// Sandbox environment type and optional config file.
+@override final  Object? sandbox;
+/// Files that go along with the sample (copied to SandboxEnvironment).
+ final  List<String>? _files;
+/// Files that go along with the sample (copied to SandboxEnvironment).
+@override List<String>? get files {
+  final value = _files;
+  if (value == null) return null;
+  if (_files is EqualUnmodifiableListView) return _files;
+  // ignore: implicit_dynamic_type
+  return EqualUnmodifiableListView(value);
+}
+
+/// Setup script to run for sample (run within default SandboxEnvironment).
+@override final  String? setup;
+/// Chat conversation history for sample.
+ final  List<ChatMessage> _messages;
+/// Chat conversation history for sample.
+@override@JsonKey() List<ChatMessage> get messages {
+  if (_messages is EqualUnmodifiableListView) return _messages;
+  // ignore: implicit_dynamic_type
+  return EqualUnmodifiableListView(_messages);
+}
+
+/// Model output from sample.
+@override final  ModelOutput output;
+/// Scores for sample.
+ final  Map<String, Score>? _scores;
+/// Scores for sample.
+@override Map<String, Score>? get scores {
+  final value = _scores;
+  if (value == null) return null;
+  if (_scores is EqualUnmodifiableMapView) return _scores;
+  // ignore: implicit_dynamic_type
+  return EqualUnmodifiableMapView(value);
+}
+
+/// State at end of sample execution.
+ final  Map<String, dynamic> _store;
+/// State at end of sample execution.
+@override@JsonKey() Map<String, dynamic> get store {
+  if (_store is EqualUnmodifiableMapView) return _store;
+  // ignore: implicit_dynamic_type
+  return EqualUnmodifiableMapView(_store);
+}
+
+/// Events that occurred during sample execution.
+ final  List<Object> _events;
+/// Events that occurred during sample execution.
+@override@JsonKey() List<Object> get events {
+  if (_events is EqualUnmodifiableListView) return _events;
+  // ignore: implicit_dynamic_type
+  return EqualUnmodifiableListView(_events);
+}
+
+/// Model token usage for sample.
+ final  Map<String, ModelUsage> _modelUsage;
+/// Model token usage for sample.
+@override@JsonKey(name: 'model_usage', defaultValue: {}) Map<String, ModelUsage> get modelUsage {
+  if (_modelUsage is EqualUnmodifiableMapView) return _modelUsage;
+  // ignore: implicit_dynamic_type
+  return EqualUnmodifiableMapView(_modelUsage);
+}
+
+/// Time sample started.
+@override@JsonKey(name: 'started_at') final  String? startedAt;
+/// Time sample completed.
+@override@JsonKey(name: 'completed_at') final  String? completedAt;
+/// Total time that the sample was running.
+@override@JsonKey(name: 'total_time') final  double? totalTime;
+/// Time spent working (model generation, sandbox calls, etc.).
+@override@JsonKey(name: 'working_time') final  double? workingTime;
+/// Globally unique identifier for sample run.
+@override final  String? uuid;
+/// Provenance data for invalidation.
+@override final  ProvenanceData? invalidation;
+/// Error that halted sample.
+@override final  EvalError? error;
+/// Errors that were retried for this sample.
+ final  List<EvalError>? _errorRetries;
+/// Errors that were retried for this sample.
+@override@JsonKey(name: 'error_retries') List<EvalError>? get errorRetries {
+  final value = _errorRetries;
+  if (value == null) return null;
+  if (_errorRetries is EqualUnmodifiableListView) return _errorRetries;
+  // ignore: implicit_dynamic_type
+  return EqualUnmodifiableListView(value);
+}
+
+/// Attachments referenced from messages and events.
+ final  Map<String, String> _attachments;
+/// Attachments referenced from messages and events.
+@override@JsonKey() Map<String, String> get attachments {
+  if (_attachments is EqualUnmodifiableMapView) return _attachments;
+  // ignore: implicit_dynamic_type
+  return EqualUnmodifiableMapView(_attachments);
+}
+
+/// The limit that halted the sample.
+@override final  EvalSampleLimit? limit;
+
+/// Create a copy of EvalSample
+/// with the given fields replaced by the non-null parameter values.
+@override @JsonKey(includeFromJson: false, includeToJson: false)
+@pragma('vm:prefer-inline')
+_$EvalSampleCopyWith<_EvalSample> get copyWith => __$EvalSampleCopyWithImpl<_EvalSample>(this, _$identity);
+
+@override
+Map<String, dynamic> toJson() {
+  return _$EvalSampleToJson(this, );
+}
+
+@override
+bool operator ==(Object other) {
+  return identical(this, other) || (other.runtimeType == runtimeType&&other is _EvalSample&&const DeepCollectionEquality().equals(other.id, id)&&(identical(other.epoch, epoch) || other.epoch == epoch)&&const DeepCollectionEquality().equals(other.input, input)&&const DeepCollectionEquality().equals(other._choices, _choices)&&const DeepCollectionEquality().equals(other.target, target)&&const DeepCollectionEquality().equals(other._metadata, _metadata)&&const DeepCollectionEquality().equals(other.sandbox, sandbox)&&const DeepCollectionEquality().equals(other._files, _files)&&(identical(other.setup, setup) || other.setup == setup)&&const DeepCollectionEquality().equals(other._messages, _messages)&&(identical(other.output, output) || other.output == output)&&const DeepCollectionEquality().equals(other._scores, _scores)&&const DeepCollectionEquality().equals(other._store, _store)&&const DeepCollectionEquality().equals(other._events, _events)&&const DeepCollectionEquality().equals(other._modelUsage, _modelUsage)&&(identical(other.startedAt, startedAt) || other.startedAt == startedAt)&&(identical(other.completedAt, completedAt) || other.completedAt == completedAt)&&(identical(other.totalTime, totalTime) || other.totalTime == totalTime)&&(identical(other.workingTime, workingTime) || other.workingTime == workingTime)&&(identical(other.uuid, uuid) || other.uuid == uuid)&&(identical(other.invalidation, invalidation) || other.invalidation == invalidation)&&(identical(other.error, error) || other.error == error)&&const DeepCollectionEquality().equals(other._errorRetries, _errorRetries)&&const DeepCollectionEquality().equals(other._attachments, _attachments)&&(identical(other.limit, limit) || other.limit == limit));
+}
+
+@JsonKey(includeFromJson: false, includeToJson: false)
+@override
+int get hashCode => Object.hashAll([runtimeType,const DeepCollectionEquality().hash(id),epoch,const DeepCollectionEquality().hash(input),const DeepCollectionEquality().hash(_choices),const DeepCollectionEquality().hash(target),const DeepCollectionEquality().hash(_metadata),const DeepCollectionEquality().hash(sandbox),const DeepCollectionEquality().hash(_files),setup,const DeepCollectionEquality().hash(_messages),output,const DeepCollectionEquality().hash(_scores),const DeepCollectionEquality().hash(_store),const DeepCollectionEquality().hash(_events),const DeepCollectionEquality().hash(_modelUsage),startedAt,completedAt,totalTime,workingTime,uuid,invalidation,error,const DeepCollectionEquality().hash(_errorRetries),const DeepCollectionEquality().hash(_attachments),limit]);
+
+@override
+String toString() {
+  return 'EvalSample(id: $id, epoch: $epoch, input: $input, choices: $choices, target: $target, metadata: $metadata, sandbox: $sandbox, files: $files, setup: $setup, messages: $messages, output: $output, scores: $scores, store: $store, events: $events, modelUsage: $modelUsage, startedAt: $startedAt, completedAt: $completedAt, totalTime: $totalTime, workingTime: $workingTime, uuid: $uuid, invalidation: $invalidation, error: $error, errorRetries: $errorRetries, attachments: $attachments, limit: $limit)';
+}
+
+
+}
+
+/// @nodoc
+abstract mixin class _$EvalSampleCopyWith<$Res> implements $EvalSampleCopyWith<$Res> {
+  factory _$EvalSampleCopyWith(_EvalSample value, $Res Function(_EvalSample) _then) = __$EvalSampleCopyWithImpl;
+@override @useResult
+$Res call({
+ Object id, int epoch, Object input, List<String>? choices, Object? target, Map<String, dynamic> metadata, Object? sandbox, List<String>? files, String? setup, List<ChatMessage> messages, ModelOutput output, Map<String, Score>? scores, Map<String, dynamic> store, List<Object> events,@JsonKey(name: 'model_usage', defaultValue: {}) Map<String, ModelUsage> modelUsage,@JsonKey(name: 'started_at') String? startedAt,@JsonKey(name: 'completed_at') String? completedAt,@JsonKey(name: 'total_time') double? totalTime,@JsonKey(name: 'working_time') double? workingTime, String? uuid, ProvenanceData? invalidation, EvalError? error,@JsonKey(name: 'error_retries') List<EvalError>? errorRetries, Map<String, String> attachments, EvalSampleLimit? limit
+});
+
+
+@override $ModelOutputCopyWith<$Res> get output;@override $ProvenanceDataCopyWith<$Res>? get invalidation;@override $EvalErrorCopyWith<$Res>? get error;@override $EvalSampleLimitCopyWith<$Res>? get limit;
+
+}
+/// @nodoc
+class __$EvalSampleCopyWithImpl<$Res>
+    implements _$EvalSampleCopyWith<$Res> {
+  __$EvalSampleCopyWithImpl(this._self, this._then);
+
+  final _EvalSample _self;
+  final $Res Function(_EvalSample) _then;
+
+/// Create a copy of EvalSample
+/// with the given fields replaced by the non-null parameter values.
+@override @pragma('vm:prefer-inline') $Res call({Object? id = null,Object? epoch = null,Object? input = null,Object? choices = freezed,Object? target = freezed,Object? metadata = null,Object? sandbox = freezed,Object? files = freezed,Object? setup = freezed,Object? messages = null,Object? output = null,Object? scores = freezed,Object? store = null,Object? events = null,Object? modelUsage = null,Object? startedAt = freezed,Object? completedAt = freezed,Object? totalTime = freezed,Object? workingTime = freezed,Object? uuid = freezed,Object? invalidation = freezed,Object? error = freezed,Object? errorRetries = freezed,Object? attachments = null,Object? limit = freezed,}) {
+  return _then(_EvalSample(
+id: null == id ? _self.id : id ,epoch: null == epoch ? _self.epoch : epoch // ignore: cast_nullable_to_non_nullable
+as int,input: null == input ? _self.input : input ,choices: freezed == choices ? _self._choices : choices // ignore: cast_nullable_to_non_nullable
+as List<String>?,target: freezed == target ? _self.target : target ,metadata: null == metadata ? _self._metadata : metadata // ignore: cast_nullable_to_non_nullable
+as Map<String, dynamic>,sandbox: freezed == sandbox ? _self.sandbox : sandbox ,files: freezed == files ? _self._files : files // ignore: cast_nullable_to_non_nullable
+as List<String>?,setup: freezed == setup ? _self.setup : setup // ignore: cast_nullable_to_non_nullable
+as String?,messages: null == messages ? _self._messages : messages // ignore: cast_nullable_to_non_nullable
+as List<ChatMessage>,output: null == output ? _self.output : output // ignore: cast_nullable_to_non_nullable
+as ModelOutput,scores: freezed == scores ? _self._scores : scores // ignore: cast_nullable_to_non_nullable
+as Map<String, Score>?,store: null == store ? _self._store : store // ignore: cast_nullable_to_non_nullable
+as Map<String, dynamic>,events: null == events ? _self._events : events // ignore: cast_nullable_to_non_nullable
+as List<Object>,modelUsage: null == modelUsage ? _self._modelUsage : modelUsage // ignore: cast_nullable_to_non_nullable
+as Map<String, ModelUsage>,startedAt: freezed == startedAt ? _self.startedAt : startedAt // ignore: cast_nullable_to_non_nullable
+as String?,completedAt: freezed == completedAt ? _self.completedAt : completedAt // ignore: cast_nullable_to_non_nullable
+as String?,totalTime: freezed == totalTime ? _self.totalTime : totalTime // ignore: cast_nullable_to_non_nullable
+as double?,workingTime: freezed == workingTime ? _self.workingTime : workingTime // ignore: cast_nullable_to_non_nullable
+as double?,uuid: freezed == uuid ? _self.uuid : uuid // ignore: cast_nullable_to_non_nullable
+as String?,invalidation: freezed == invalidation ? _self.invalidation : invalidation // ignore: cast_nullable_to_non_nullable
+as ProvenanceData?,error: freezed == error ? _self.error : error // ignore: cast_nullable_to_non_nullable
+as EvalError?,errorRetries: freezed == errorRetries ? _self._errorRetries : errorRetries // ignore: cast_nullable_to_non_nullable
+as List<EvalError>?,attachments: null == attachments ? _self._attachments : attachments // ignore: cast_nullable_to_non_nullable
+as Map<String, String>,limit: freezed == limit ? _self.limit : limit // ignore: cast_nullable_to_non_nullable
+as EvalSampleLimit?,
+  ));
+}
+
+/// Create a copy of EvalSample
+/// with the given fields replaced by the non-null parameter values.
+@override
+@pragma('vm:prefer-inline')
+$ModelOutputCopyWith<$Res> get output {
+  
+  return $ModelOutputCopyWith<$Res>(_self.output, (value) {
+    return _then(_self.copyWith(output: value));
+  });
+}/// Create a copy of EvalSample
+/// with the given fields replaced by the non-null parameter values.
+@override
+@pragma('vm:prefer-inline')
+$ProvenanceDataCopyWith<$Res>? get invalidation {
+    if (_self.invalidation == null) {
+    return null;
+  }
+
+  return $ProvenanceDataCopyWith<$Res>(_self.invalidation!, (value) {
+    return _then(_self.copyWith(invalidation: value));
+  });
+}/// Create a copy of EvalSample
+/// with the given fields replaced by the non-null parameter values.
+@override
+@pragma('vm:prefer-inline')
+$EvalErrorCopyWith<$Res>? get error {
+    if (_self.error == null) {
+    return null;
+  }
+
+  return $EvalErrorCopyWith<$Res>(_self.error!, (value) {
+    return _then(_self.copyWith(error: value));
+  });
+}/// Create a copy of EvalSample
+/// with the given fields replaced by the non-null parameter values.
+@override
+@pragma('vm:prefer-inline')
+$EvalSampleLimitCopyWith<$Res>? get limit {
+    if (_self.limit == null) {
+    return null;
+  }
+
+  return $EvalSampleLimitCopyWith<$Res>(_self.limit!, (value) {
+    return _then(_self.copyWith(limit: value));
+  });
+}
+}
+
+
+/// @nodoc
+mixin _$ModelOutput {
+
+/// Model used for generation.
+ String get model;/// Completion choices.
+ List<ChatCompletionChoice> get choices;/// Model token usage.
+ ModelUsage? get usage;/// Model completion.
+ String get completion;/// First message stop reason.
+@JsonKey(name: 'stop_reason', defaultValue: 'unknown') String get stopReason;/// Time elapsed (in seconds) for call to generate.
+ double? get time;/// Additional metadata associated with model output.
+ Map<String, dynamic> get metadata;/// Error message in the case of content moderation refusals.
+ String? get error;/// First message choice.
+ ChatMessageAssistant? get message;
+/// Create a copy of ModelOutput
+/// with the given fields replaced by the non-null parameter values.
+@JsonKey(includeFromJson: false, includeToJson: false)
+@pragma('vm:prefer-inline')
+$ModelOutputCopyWith<ModelOutput> get copyWith => _$ModelOutputCopyWithImpl<ModelOutput>(this as ModelOutput, _$identity);
+
+  /// Serializes this ModelOutput to a JSON map.
+  Map<String, dynamic> toJson();
+
+
+@override
+bool operator ==(Object other) {
+  return identical(this, other) || (other.runtimeType == runtimeType&&other is ModelOutput&&(identical(other.model, model) || other.model == model)&&const DeepCollectionEquality().equals(other.choices, choices)&&(identical(other.usage, usage) || other.usage == usage)&&(identical(other.completion, completion) || other.completion == completion)&&(identical(other.stopReason, stopReason) || other.stopReason == stopReason)&&(identical(other.time, time) || other.time == time)&&const DeepCollectionEquality().equals(other.metadata, metadata)&&(identical(other.error, error) || other.error == error)&&const DeepCollectionEquality().equals(other.message, message));
+}
+
+@JsonKey(includeFromJson: false, includeToJson: false)
+@override
+int get hashCode => Object.hash(runtimeType,model,const DeepCollectionEquality().hash(choices),usage,completion,stopReason,time,const DeepCollectionEquality().hash(metadata),error,const DeepCollectionEquality().hash(message));
+
+@override
+String toString() {
+  return 'ModelOutput(model: $model, choices: $choices, usage: $usage, completion: $completion, stopReason: $stopReason, time: $time, metadata: $metadata, error: $error, message: $message)';
+}
+
+
+}
+
+/// @nodoc
+abstract mixin class $ModelOutputCopyWith<$Res>  {
+  factory $ModelOutputCopyWith(ModelOutput value, $Res Function(ModelOutput) _then) = _$ModelOutputCopyWithImpl;
+@useResult
+$Res call({
+ String model, List<ChatCompletionChoice> choices, ModelUsage? usage, String completion,@JsonKey(name: 'stop_reason', defaultValue: 'unknown') String stopReason, double? time, Map<String, dynamic> metadata, String? error, ChatMessageAssistant? message
+});
+
+
+$ModelUsageCopyWith<$Res>? get usage;
+
+}
+/// @nodoc
+class _$ModelOutputCopyWithImpl<$Res>
+    implements $ModelOutputCopyWith<$Res> {
+  _$ModelOutputCopyWithImpl(this._self, this._then);
+
+  final ModelOutput _self;
+  final $Res Function(ModelOutput) _then;
+
+/// Create a copy of ModelOutput
+/// with the given fields replaced by the non-null parameter values.
+@pragma('vm:prefer-inline') @override $Res call({Object? model = null,Object? choices = null,Object? usage = freezed,Object? completion = null,Object? stopReason = null,Object? time = freezed,Object? metadata = null,Object? error = freezed,Object? message = freezed,}) {
+  return _then(_self.copyWith(
+model: null == model ? _self.model : model // ignore: cast_nullable_to_non_nullable
+as String,choices: null == choices ? _self.choices : choices // ignore: cast_nullable_to_non_nullable
+as List<ChatCompletionChoice>,usage: freezed == usage ? _self.usage : usage // ignore: cast_nullable_to_non_nullable
+as ModelUsage?,completion: null == completion ? _self.completion : completion // ignore: cast_nullable_to_non_nullable
+as String,stopReason: null == stopReason ? _self.stopReason : stopReason // ignore: cast_nullable_to_non_nullable
+as String,time: freezed == time ? _self.time : time // ignore: cast_nullable_to_non_nullable
+as double?,metadata: null == metadata ? _self.metadata : metadata // ignore: cast_nullable_to_non_nullable
+as Map<String, dynamic>,error: freezed == error ? _self.error : error // ignore: cast_nullable_to_non_nullable
+as String?,message: freezed == message ? _self.message : message // ignore: cast_nullable_to_non_nullable
+as ChatMessageAssistant?,
+  ));
+}
+/// Create a copy of ModelOutput
+/// with the given fields replaced by the non-null parameter values.
+@override
+@pragma('vm:prefer-inline')
+$ModelUsageCopyWith<$Res>? get usage {
+    if (_self.usage == null) {
+    return null;
+  }
+
+  return $ModelUsageCopyWith<$Res>(_self.usage!, (value) {
+    return _then(_self.copyWith(usage: value));
+  });
+}
+}
+
+
+/// Adds pattern-matching-related methods to [ModelOutput].
+extension ModelOutputPatterns on ModelOutput {
+/// A variant of `map` that fallback to returning `orElse`.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case _:
+///     return orElse();
+/// }
+/// ```
+
+@optionalTypeArgs TResult maybeMap<TResult extends Object?>(TResult Function( _ModelOutput value)?  $default,{required TResult orElse(),}){
+final _that = this;
+switch (_that) {
+case _ModelOutput() when $default != null:
+return $default(_that);case _:
+  return orElse();
+
+}
+}
+/// A `switch`-like method, using callbacks.
+///
+/// Callbacks receives the raw object, upcasted.
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case final Subclass2 value:
+///     return ...;
+/// }
+/// ```
+
+@optionalTypeArgs TResult map<TResult extends Object?>(TResult Function( _ModelOutput value)  $default,){
+final _that = this;
+switch (_that) {
+case _ModelOutput():
+return $default(_that);case _:
+  throw StateError('Unexpected subclass');
+
+}
+}
+/// A variant of `map` that fallback to returning `null`.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case _:
+///     return null;
+/// }
+/// ```
+
+@optionalTypeArgs TResult? mapOrNull<TResult extends Object?>(TResult? Function( _ModelOutput value)?  $default,){
+final _that = this;
+switch (_that) {
+case _ModelOutput() when $default != null:
+return $default(_that);case _:
+  return null;
+
+}
+}
+/// A variant of `when` that fallback to an `orElse` callback.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case _:
+///     return orElse();
+/// }
+/// ```
+
+@optionalTypeArgs TResult maybeWhen<TResult extends Object?>(TResult Function( String model,  List<ChatCompletionChoice> choices,  ModelUsage? usage,  String completion, @JsonKey(name: 'stop_reason', defaultValue: 'unknown')  String stopReason,  double? time,  Map<String, dynamic> metadata,  String? error,  ChatMessageAssistant? message)?  $default,{required TResult orElse(),}) {final _that = this;
+switch (_that) {
+case _ModelOutput() when $default != null:
+return $default(_that.model,_that.choices,_that.usage,_that.completion,_that.stopReason,_that.time,_that.metadata,_that.error,_that.message);case _:
+  return orElse();
+
+}
+}
+/// A `switch`-like method, using callbacks.
+///
+/// As opposed to `map`, this offers destructuring.
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case Subclass2(:final field2):
+///     return ...;
+/// }
+/// ```
+
+@optionalTypeArgs TResult when<TResult extends Object?>(TResult Function( String model,  List<ChatCompletionChoice> choices,  ModelUsage? usage,  String completion, @JsonKey(name: 'stop_reason', defaultValue: 'unknown')  String stopReason,  double? time,  Map<String, dynamic> metadata,  String? error,  ChatMessageAssistant? message)  $default,) {final _that = this;
+switch (_that) {
+case _ModelOutput():
+return $default(_that.model,_that.choices,_that.usage,_that.completion,_that.stopReason,_that.time,_that.metadata,_that.error,_that.message);case _:
+  throw StateError('Unexpected subclass');
+
+}
+}
+/// A variant of `when` that fallback to returning `null`
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case _:
+///     return null;
+/// }
+/// ```
+
+@optionalTypeArgs TResult? whenOrNull<TResult extends Object?>(TResult? Function( String model,  List<ChatCompletionChoice> choices,  ModelUsage? usage,  String completion, @JsonKey(name: 'stop_reason', defaultValue: 'unknown')  String stopReason,  double? time,  Map<String, dynamic> metadata,  String? error,  ChatMessageAssistant? message)?  $default,) {final _that = this;
+switch (_that) {
+case _ModelOutput() when $default != null:
+return $default(_that.model,_that.choices,_that.usage,_that.completion,_that.stopReason,_that.time,_that.metadata,_that.error,_that.message);case _:
+  return null;
+
+}
+}
+
+}
+
+/// @nodoc
+@JsonSerializable()
+
+class _ModelOutput extends ModelOutput {
+  const _ModelOutput({required this.model, final  List<ChatCompletionChoice> choices = const [], this.usage, required this.completion, @JsonKey(name: 'stop_reason', defaultValue: 'unknown') this.stopReason = 'unknown', this.time, final  Map<String, dynamic> metadata = const {}, this.error, this.message}): _choices = choices,_metadata = metadata,super._();
+  factory _ModelOutput.fromJson(Map<String, dynamic> json) => _$ModelOutputFromJson(json);
+
+/// Model used for generation.
+@override final  String model;
+/// Completion choices.
+ final  List<ChatCompletionChoice> _choices;
+/// Completion choices.
+@override@JsonKey() List<ChatCompletionChoice> get choices {
+  if (_choices is EqualUnmodifiableListView) return _choices;
+  // ignore: implicit_dynamic_type
+  return EqualUnmodifiableListView(_choices);
+}
+
+/// Model token usage.
+@override final  ModelUsage? usage;
+/// Model completion.
+@override final  String completion;
+/// First message stop reason.
+@override@JsonKey(name: 'stop_reason', defaultValue: 'unknown') final  String stopReason;
+/// Time elapsed (in seconds) for call to generate.
+@override final  double? time;
+/// Additional metadata associated with model output.
+ final  Map<String, dynamic> _metadata;
+/// Additional metadata associated with model output.
+@override@JsonKey() Map<String, dynamic> get metadata {
+  if (_metadata is EqualUnmodifiableMapView) return _metadata;
+  // ignore: implicit_dynamic_type
+  return EqualUnmodifiableMapView(_metadata);
+}
+
+/// Error message in the case of content moderation refusals.
+@override final  String? error;
+/// First message choice.
+@override final  ChatMessageAssistant? message;
+
+/// Create a copy of ModelOutput
+/// with the given fields replaced by the non-null parameter values.
+@override @JsonKey(includeFromJson: false, includeToJson: false)
+@pragma('vm:prefer-inline')
+_$ModelOutputCopyWith<_ModelOutput> get copyWith => __$ModelOutputCopyWithImpl<_ModelOutput>(this, _$identity);
+
+@override
+Map<String, dynamic> toJson() {
+  return _$ModelOutputToJson(this, );
+}
+
+@override
+bool operator ==(Object other) {
+  return identical(this, other) || (other.runtimeType == runtimeType&&other is _ModelOutput&&(identical(other.model, model) || other.model == model)&&const DeepCollectionEquality().equals(other._choices, _choices)&&(identical(other.usage, usage) || other.usage == usage)&&(identical(other.completion, completion) || other.completion == completion)&&(identical(other.stopReason, stopReason) || other.stopReason == stopReason)&&(identical(other.time, time) || other.time == time)&&const DeepCollectionEquality().equals(other._metadata, _metadata)&&(identical(other.error, error) || other.error == error)&&const DeepCollectionEquality().equals(other.message, message));
+}
+
+@JsonKey(includeFromJson: false, includeToJson: false)
+@override
+int get hashCode => Object.hash(runtimeType,model,const DeepCollectionEquality().hash(_choices),usage,completion,stopReason,time,const DeepCollectionEquality().hash(_metadata),error,const DeepCollectionEquality().hash(message));
+
+@override
+String toString() {
+  return 'ModelOutput(model: $model, choices: $choices, usage: $usage, completion: $completion, stopReason: $stopReason, time: $time, metadata: $metadata, error: $error, message: $message)';
+}
+
+
+}
+
+/// @nodoc
+abstract mixin class _$ModelOutputCopyWith<$Res> implements $ModelOutputCopyWith<$Res> {
+  factory _$ModelOutputCopyWith(_ModelOutput value, $Res Function(_ModelOutput) _then) = __$ModelOutputCopyWithImpl;
+@override @useResult
+$Res call({
+ String model, List<ChatCompletionChoice> choices, ModelUsage? usage, String completion,@JsonKey(name: 'stop_reason', defaultValue: 'unknown') String stopReason, double? time, Map<String, dynamic> metadata, String? error, ChatMessageAssistant? message
+});
+
+
+@override $ModelUsageCopyWith<$Res>? get usage;
+
+}
+/// @nodoc
+class __$ModelOutputCopyWithImpl<$Res>
+    implements _$ModelOutputCopyWith<$Res> {
+  __$ModelOutputCopyWithImpl(this._self, this._then);
+
+  final _ModelOutput _self;
+  final $Res Function(_ModelOutput) _then;
+
+/// Create a copy of ModelOutput
+/// with the given fields replaced by the non-null parameter values.
+@override @pragma('vm:prefer-inline') $Res call({Object? model = null,Object? choices = null,Object? usage = freezed,Object? completion = null,Object? stopReason = null,Object? time = freezed,Object? metadata = null,Object? error = freezed,Object? message = freezed,}) {
+  return _then(_ModelOutput(
+model: null == model ? _self.model : model // ignore: cast_nullable_to_non_nullable
+as String,choices: null == choices ? _self._choices : choices // ignore: cast_nullable_to_non_nullable
+as List<ChatCompletionChoice>,usage: freezed == usage ? _self.usage : usage // ignore: cast_nullable_to_non_nullable
+as ModelUsage?,completion: null == completion ? _self.completion : completion // ignore: cast_nullable_to_non_nullable
+as String,stopReason: null == stopReason ? _self.stopReason : stopReason // ignore: cast_nullable_to_non_nullable
+as String,time: freezed == time ? _self.time : time // ignore: cast_nullable_to_non_nullable
+as double?,metadata: null == metadata ? _self._metadata : metadata // ignore: cast_nullable_to_non_nullable
+as Map<String, dynamic>,error: freezed == error ? _self.error : error // ignore: cast_nullable_to_non_nullable
+as String?,message: freezed == message ? _self.message : message // ignore: cast_nullable_to_non_nullable
+as ChatMessageAssistant?,
+  ));
+}
+
+/// Create a copy of ModelOutput
+/// with the given fields replaced by the non-null parameter values.
+@override
+@pragma('vm:prefer-inline')
+$ModelUsageCopyWith<$Res>? get usage {
+    if (_self.usage == null) {
+    return null;
+  }
+
+  return $ModelUsageCopyWith<$Res>(_self.usage!, (value) {
+    return _then(_self.copyWith(usage: value));
+  });
+}
+}
+
+
+/// @nodoc
+mixin _$ChatCompletionChoice {
+
+/// Assistant message.
+ ChatMessageAssistant get message;/// Reason that the model stopped generating.
+@JsonKey(name: 'stop_reason', defaultValue: 'unknown') String get stopReason;/// Logprobs.
+ Logprobs? get logprobs;
+/// Create a copy of ChatCompletionChoice
+/// with the given fields replaced by the non-null parameter values.
+@JsonKey(includeFromJson: false, includeToJson: false)
+@pragma('vm:prefer-inline')
+$ChatCompletionChoiceCopyWith<ChatCompletionChoice> get copyWith => _$ChatCompletionChoiceCopyWithImpl<ChatCompletionChoice>(this as ChatCompletionChoice, _$identity);
+
+  /// Serializes this ChatCompletionChoice to a JSON map.
+  Map<String, dynamic> toJson();
+
+
+@override
+bool operator ==(Object other) {
+  return identical(this, other) || (other.runtimeType == runtimeType&&other is ChatCompletionChoice&&const DeepCollectionEquality().equals(other.message, message)&&(identical(other.stopReason, stopReason) || other.stopReason == stopReason)&&(identical(other.logprobs, logprobs) || other.logprobs == logprobs));
+}
+
+@JsonKey(includeFromJson: false, includeToJson: false)
+@override
+int get hashCode => Object.hash(runtimeType,const DeepCollectionEquality().hash(message),stopReason,logprobs);
+
+@override
+String toString() {
+  return 'ChatCompletionChoice(message: $message, stopReason: $stopReason, logprobs: $logprobs)';
+}
+
+
+}
+
+/// @nodoc
+abstract mixin class $ChatCompletionChoiceCopyWith<$Res>  {
+  factory $ChatCompletionChoiceCopyWith(ChatCompletionChoice value, $Res Function(ChatCompletionChoice) _then) = _$ChatCompletionChoiceCopyWithImpl;
+@useResult
+$Res call({
+ ChatMessageAssistant message,@JsonKey(name: 'stop_reason', defaultValue: 'unknown') String stopReason, Logprobs? logprobs
+});
+
+
+$LogprobsCopyWith<$Res>? get logprobs;
+
+}
+/// @nodoc
+class _$ChatCompletionChoiceCopyWithImpl<$Res>
+    implements $ChatCompletionChoiceCopyWith<$Res> {
+  _$ChatCompletionChoiceCopyWithImpl(this._self, this._then);
+
+  final ChatCompletionChoice _self;
+  final $Res Function(ChatCompletionChoice) _then;
+
+/// Create a copy of ChatCompletionChoice
+/// with the given fields replaced by the non-null parameter values.
+@pragma('vm:prefer-inline') @override $Res call({Object? message = freezed,Object? stopReason = null,Object? logprobs = freezed,}) {
+  return _then(_self.copyWith(
+message: freezed == message ? _self.message : message // ignore: cast_nullable_to_non_nullable
+as ChatMessageAssistant,stopReason: null == stopReason ? _self.stopReason : stopReason // ignore: cast_nullable_to_non_nullable
+as String,logprobs: freezed == logprobs ? _self.logprobs : logprobs // ignore: cast_nullable_to_non_nullable
+as Logprobs?,
+  ));
+}
+/// Create a copy of ChatCompletionChoice
+/// with the given fields replaced by the non-null parameter values.
+@override
+@pragma('vm:prefer-inline')
+$LogprobsCopyWith<$Res>? get logprobs {
+    if (_self.logprobs == null) {
+    return null;
+  }
+
+  return $LogprobsCopyWith<$Res>(_self.logprobs!, (value) {
+    return _then(_self.copyWith(logprobs: value));
+  });
+}
+}
+
+
+/// Adds pattern-matching-related methods to [ChatCompletionChoice].
+extension ChatCompletionChoicePatterns on ChatCompletionChoice {
+/// A variant of `map` that fallback to returning `orElse`.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case _:
+///     return orElse();
+/// }
+/// ```
+
+@optionalTypeArgs TResult maybeMap<TResult extends Object?>(TResult Function( _ChatCompletionChoice value)?  $default,{required TResult orElse(),}){
+final _that = this;
+switch (_that) {
+case _ChatCompletionChoice() when $default != null:
+return $default(_that);case _:
+  return orElse();
+
+}
+}
+/// A `switch`-like method, using callbacks.
+///
+/// Callbacks receives the raw object, upcasted.
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case final Subclass2 value:
+///     return ...;
+/// }
+/// ```
+
+@optionalTypeArgs TResult map<TResult extends Object?>(TResult Function( _ChatCompletionChoice value)  $default,){
+final _that = this;
+switch (_that) {
+case _ChatCompletionChoice():
+return $default(_that);case _:
+  throw StateError('Unexpected subclass');
+
+}
+}
+/// A variant of `map` that fallback to returning `null`.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case _:
+///     return null;
+/// }
+/// ```
+
+@optionalTypeArgs TResult? mapOrNull<TResult extends Object?>(TResult? Function( _ChatCompletionChoice value)?  $default,){
+final _that = this;
+switch (_that) {
+case _ChatCompletionChoice() when $default != null:
+return $default(_that);case _:
+  return null;
+
+}
+}
+/// A variant of `when` that fallback to an `orElse` callback.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case _:
+///     return orElse();
+/// }
+/// ```
+
+@optionalTypeArgs TResult maybeWhen<TResult extends Object?>(TResult Function( ChatMessageAssistant message, @JsonKey(name: 'stop_reason', defaultValue: 'unknown')  String stopReason,  Logprobs? logprobs)?  $default,{required TResult orElse(),}) {final _that = this;
+switch (_that) {
+case _ChatCompletionChoice() when $default != null:
+return $default(_that.message,_that.stopReason,_that.logprobs);case _:
+  return orElse();
+
+}
+}
+/// A `switch`-like method, using callbacks.
+///
+/// As opposed to `map`, this offers destructuring.
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case Subclass2(:final field2):
+///     return ...;
+/// }
+/// ```
+
+@optionalTypeArgs TResult when<TResult extends Object?>(TResult Function( ChatMessageAssistant message, @JsonKey(name: 'stop_reason', defaultValue: 'unknown')  String stopReason,  Logprobs? logprobs)  $default,) {final _that = this;
+switch (_that) {
+case _ChatCompletionChoice():
+return $default(_that.message,_that.stopReason,_that.logprobs);case _:
+  throw StateError('Unexpected subclass');
+
+}
+}
+/// A variant of `when` that fallback to returning `null`
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case _:
+///     return null;
+/// }
+/// ```
+
+@optionalTypeArgs TResult? whenOrNull<TResult extends Object?>(TResult? Function( ChatMessageAssistant message, @JsonKey(name: 'stop_reason', defaultValue: 'unknown')  String stopReason,  Logprobs? logprobs)?  $default,) {final _that = this;
+switch (_that) {
+case _ChatCompletionChoice() when $default != null:
+return $default(_that.message,_that.stopReason,_that.logprobs);case _:
+  return null;
+
+}
+}
+
+}
+
+/// @nodoc
+@JsonSerializable()
+
+class _ChatCompletionChoice extends ChatCompletionChoice {
+  const _ChatCompletionChoice({required this.message, @JsonKey(name: 'stop_reason', defaultValue: 'unknown') this.stopReason = 'unknown', this.logprobs}): super._();
+  factory _ChatCompletionChoice.fromJson(Map<String, dynamic> json) => _$ChatCompletionChoiceFromJson(json);
+
+/// Assistant message.
+@override final  ChatMessageAssistant message;
+/// Reason that the model stopped generating.
+@override@JsonKey(name: 'stop_reason', defaultValue: 'unknown') final  String stopReason;
+/// Logprobs.
+@override final  Logprobs? logprobs;
+
+/// Create a copy of ChatCompletionChoice
+/// with the given fields replaced by the non-null parameter values.
+@override @JsonKey(includeFromJson: false, includeToJson: false)
+@pragma('vm:prefer-inline')
+_$ChatCompletionChoiceCopyWith<_ChatCompletionChoice> get copyWith => __$ChatCompletionChoiceCopyWithImpl<_ChatCompletionChoice>(this, _$identity);
+
+@override
+Map<String, dynamic> toJson() {
+  return _$ChatCompletionChoiceToJson(this, );
+}
+
+@override
+bool operator ==(Object other) {
+  return identical(this, other) || (other.runtimeType == runtimeType&&other is _ChatCompletionChoice&&const DeepCollectionEquality().equals(other.message, message)&&(identical(other.stopReason, stopReason) || other.stopReason == stopReason)&&(identical(other.logprobs, logprobs) || other.logprobs == logprobs));
+}
+
+@JsonKey(includeFromJson: false, includeToJson: false)
+@override
+int get hashCode => Object.hash(runtimeType,const DeepCollectionEquality().hash(message),stopReason,logprobs);
+
+@override
+String toString() {
+  return 'ChatCompletionChoice(message: $message, stopReason: $stopReason, logprobs: $logprobs)';
+}
+
+
+}
+
+/// @nodoc
+abstract mixin class _$ChatCompletionChoiceCopyWith<$Res> implements $ChatCompletionChoiceCopyWith<$Res> {
+  factory _$ChatCompletionChoiceCopyWith(_ChatCompletionChoice value, $Res Function(_ChatCompletionChoice) _then) = __$ChatCompletionChoiceCopyWithImpl;
+@override @useResult
+$Res call({
+ ChatMessageAssistant message,@JsonKey(name: 'stop_reason', defaultValue: 'unknown') String stopReason, Logprobs? logprobs
+});
+
+
+@override $LogprobsCopyWith<$Res>? get logprobs;
+
+}
+/// @nodoc
+class __$ChatCompletionChoiceCopyWithImpl<$Res>
+    implements _$ChatCompletionChoiceCopyWith<$Res> {
+  __$ChatCompletionChoiceCopyWithImpl(this._self, this._then);
+
+  final _ChatCompletionChoice _self;
+  final $Res Function(_ChatCompletionChoice) _then;
+
+/// Create a copy of ChatCompletionChoice
+/// with the given fields replaced by the non-null parameter values.
+@override @pragma('vm:prefer-inline') $Res call({Object? message = freezed,Object? stopReason = null,Object? logprobs = freezed,}) {
+  return _then(_ChatCompletionChoice(
+message: freezed == message ? _self.message : message // ignore: cast_nullable_to_non_nullable
+as ChatMessageAssistant,stopReason: null == stopReason ? _self.stopReason : stopReason // ignore: cast_nullable_to_non_nullable
+as String,logprobs: freezed == logprobs ? _self.logprobs : logprobs // ignore: cast_nullable_to_non_nullable
+as Logprobs?,
+  ));
+}
+
+/// Create a copy of ChatCompletionChoice
+/// with the given fields replaced by the non-null parameter values.
+@override
+@pragma('vm:prefer-inline')
+$LogprobsCopyWith<$Res>? get logprobs {
+    if (_self.logprobs == null) {
+    return null;
+  }
+
+  return $LogprobsCopyWith<$Res>(_self.logprobs!, (value) {
+    return _then(_self.copyWith(logprobs: value));
+  });
+}
+}
+
+
+/// @nodoc
+mixin _$ModelUsage {
+
+/// Total input tokens used.
+@JsonKey(name: 'input_tokens', defaultValue: 0) int get inputTokens;/// Total output tokens used.
+@JsonKey(name: 'output_tokens', defaultValue: 0) int get outputTokens;/// Total tokens used.
+@JsonKey(name: 'total_tokens', defaultValue: 0) int get totalTokens;/// Number of tokens written to the cache.
+@JsonKey(name: 'input_tokens_cache_write') int? get inputTokensCacheWrite;/// Number of tokens retrieved from the cache.
+@JsonKey(name: 'input_tokens_cache_read') int? get inputTokensCacheRead;/// Number of tokens used for reasoning.
+@JsonKey(name: 'reasoning_tokens', defaultValue: 0) int get reasoningTokens;
+/// Create a copy of ModelUsage
+/// with the given fields replaced by the non-null parameter values.
+@JsonKey(includeFromJson: false, includeToJson: false)
+@pragma('vm:prefer-inline')
+$ModelUsageCopyWith<ModelUsage> get copyWith => _$ModelUsageCopyWithImpl<ModelUsage>(this as ModelUsage, _$identity);
+
+  /// Serializes this ModelUsage to a JSON map.
+  Map<String, dynamic> toJson();
+
+
+@override
+bool operator ==(Object other) {
+  return identical(this, other) || (other.runtimeType == runtimeType&&other is ModelUsage&&(identical(other.inputTokens, inputTokens) || other.inputTokens == inputTokens)&&(identical(other.outputTokens, outputTokens) || other.outputTokens == outputTokens)&&(identical(other.totalTokens, totalTokens) || other.totalTokens == totalTokens)&&(identical(other.inputTokensCacheWrite, inputTokensCacheWrite) || other.inputTokensCacheWrite == inputTokensCacheWrite)&&(identical(other.inputTokensCacheRead, inputTokensCacheRead) || other.inputTokensCacheRead == inputTokensCacheRead)&&(identical(other.reasoningTokens, reasoningTokens) || other.reasoningTokens == reasoningTokens));
+}
+
+@JsonKey(includeFromJson: false, includeToJson: false)
+@override
+int get hashCode => Object.hash(runtimeType,inputTokens,outputTokens,totalTokens,inputTokensCacheWrite,inputTokensCacheRead,reasoningTokens);
+
+@override
+String toString() {
+  return 'ModelUsage(inputTokens: $inputTokens, outputTokens: $outputTokens, totalTokens: $totalTokens, inputTokensCacheWrite: $inputTokensCacheWrite, inputTokensCacheRead: $inputTokensCacheRead, reasoningTokens: $reasoningTokens)';
+}
+
+
+}
+
+/// @nodoc
+abstract mixin class $ModelUsageCopyWith<$Res>  {
+  factory $ModelUsageCopyWith(ModelUsage value, $Res Function(ModelUsage) _then) = _$ModelUsageCopyWithImpl;
+@useResult
+$Res call({
+@JsonKey(name: 'input_tokens', defaultValue: 0) int inputTokens,@JsonKey(name: 'output_tokens', defaultValue: 0) int outputTokens,@JsonKey(name: 'total_tokens', defaultValue: 0) int totalTokens,@JsonKey(name: 'input_tokens_cache_write') int? inputTokensCacheWrite,@JsonKey(name: 'input_tokens_cache_read') int? inputTokensCacheRead,@JsonKey(name: 'reasoning_tokens', defaultValue: 0) int reasoningTokens
+});
+
+
+
+
+}
+/// @nodoc
+class _$ModelUsageCopyWithImpl<$Res>
+    implements $ModelUsageCopyWith<$Res> {
+  _$ModelUsageCopyWithImpl(this._self, this._then);
+
+  final ModelUsage _self;
+  final $Res Function(ModelUsage) _then;
+
+/// Create a copy of ModelUsage
+/// with the given fields replaced by the non-null parameter values.
+@pragma('vm:prefer-inline') @override $Res call({Object? inputTokens = null,Object? outputTokens = null,Object? totalTokens = null,Object? inputTokensCacheWrite = freezed,Object? inputTokensCacheRead = freezed,Object? reasoningTokens = null,}) {
+  return _then(_self.copyWith(
+inputTokens: null == inputTokens ? _self.inputTokens : inputTokens // ignore: cast_nullable_to_non_nullable
+as int,outputTokens: null == outputTokens ? _self.outputTokens : outputTokens // ignore: cast_nullable_to_non_nullable
+as int,totalTokens: null == totalTokens ? _self.totalTokens : totalTokens // ignore: cast_nullable_to_non_nullable
+as int,inputTokensCacheWrite: freezed == inputTokensCacheWrite ? _self.inputTokensCacheWrite : inputTokensCacheWrite // ignore: cast_nullable_to_non_nullable
+as int?,inputTokensCacheRead: freezed == inputTokensCacheRead ? _self.inputTokensCacheRead : inputTokensCacheRead // ignore: cast_nullable_to_non_nullable
+as int?,reasoningTokens: null == reasoningTokens ? _self.reasoningTokens : reasoningTokens // ignore: cast_nullable_to_non_nullable
+as int,
+  ));
+}
+
+}
+
+
+/// Adds pattern-matching-related methods to [ModelUsage].
+extension ModelUsagePatterns on ModelUsage {
+/// A variant of `map` that fallback to returning `orElse`.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case _:
+///     return orElse();
+/// }
+/// ```
+
+@optionalTypeArgs TResult maybeMap<TResult extends Object?>(TResult Function( _ModelUsage value)?  $default,{required TResult orElse(),}){
+final _that = this;
+switch (_that) {
+case _ModelUsage() when $default != null:
+return $default(_that);case _:
+  return orElse();
+
+}
+}
+/// A `switch`-like method, using callbacks.
+///
+/// Callbacks receives the raw object, upcasted.
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case final Subclass2 value:
+///     return ...;
+/// }
+/// ```
+
+@optionalTypeArgs TResult map<TResult extends Object?>(TResult Function( _ModelUsage value)  $default,){
+final _that = this;
+switch (_that) {
+case _ModelUsage():
+return $default(_that);case _:
+  throw StateError('Unexpected subclass');
+
+}
+}
+/// A variant of `map` that fallback to returning `null`.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case _:
+///     return null;
+/// }
+/// ```
+
+@optionalTypeArgs TResult? mapOrNull<TResult extends Object?>(TResult? Function( _ModelUsage value)?  $default,){
+final _that = this;
+switch (_that) {
+case _ModelUsage() when $default != null:
+return $default(_that);case _:
+  return null;
+
+}
+}
+/// A variant of `when` that fallback to an `orElse` callback.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case _:
+///     return orElse();
+/// }
+/// ```
+
+@optionalTypeArgs TResult maybeWhen<TResult extends Object?>(TResult Function(@JsonKey(name: 'input_tokens', defaultValue: 0)  int inputTokens, @JsonKey(name: 'output_tokens', defaultValue: 0)  int outputTokens, @JsonKey(name: 'total_tokens', defaultValue: 0)  int totalTokens, @JsonKey(name: 'input_tokens_cache_write')  int? inputTokensCacheWrite, @JsonKey(name: 'input_tokens_cache_read')  int? inputTokensCacheRead, @JsonKey(name: 'reasoning_tokens', defaultValue: 0)  int reasoningTokens)?  $default,{required TResult orElse(),}) {final _that = this;
+switch (_that) {
+case _ModelUsage() when $default != null:
+return $default(_that.inputTokens,_that.outputTokens,_that.totalTokens,_that.inputTokensCacheWrite,_that.inputTokensCacheRead,_that.reasoningTokens);case _:
+  return orElse();
+
+}
+}
+/// A `switch`-like method, using callbacks.
+///
+/// As opposed to `map`, this offers destructuring.
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case Subclass2(:final field2):
+///     return ...;
+/// }
+/// ```
+
+@optionalTypeArgs TResult when<TResult extends Object?>(TResult Function(@JsonKey(name: 'input_tokens', defaultValue: 0)  int inputTokens, @JsonKey(name: 'output_tokens', defaultValue: 0)  int outputTokens, @JsonKey(name: 'total_tokens', defaultValue: 0)  int totalTokens, @JsonKey(name: 'input_tokens_cache_write')  int? inputTokensCacheWrite, @JsonKey(name: 'input_tokens_cache_read')  int? inputTokensCacheRead, @JsonKey(name: 'reasoning_tokens', defaultValue: 0)  int reasoningTokens)  $default,) {final _that = this;
+switch (_that) {
+case _ModelUsage():
+return $default(_that.inputTokens,_that.outputTokens,_that.totalTokens,_that.inputTokensCacheWrite,_that.inputTokensCacheRead,_that.reasoningTokens);case _:
+  throw StateError('Unexpected subclass');
+
+}
+}
+/// A variant of `when` that fallback to returning `null`
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case _:
+///     return null;
+/// }
+/// ```
+
+@optionalTypeArgs TResult? whenOrNull<TResult extends Object?>(TResult? Function(@JsonKey(name: 'input_tokens', defaultValue: 0)  int inputTokens, @JsonKey(name: 'output_tokens', defaultValue: 0)  int outputTokens, @JsonKey(name: 'total_tokens', defaultValue: 0)  int totalTokens, @JsonKey(name: 'input_tokens_cache_write')  int? inputTokensCacheWrite, @JsonKey(name: 'input_tokens_cache_read')  int? inputTokensCacheRead, @JsonKey(name: 'reasoning_tokens', defaultValue: 0)  int reasoningTokens)?  $default,) {final _that = this;
+switch (_that) {
+case _ModelUsage() when $default != null:
+return $default(_that.inputTokens,_that.outputTokens,_that.totalTokens,_that.inputTokensCacheWrite,_that.inputTokensCacheRead,_that.reasoningTokens);case _:
+  return null;
+
+}
+}
+
+}
+
+/// @nodoc
+@JsonSerializable()
+
+class _ModelUsage extends ModelUsage {
+  const _ModelUsage({@JsonKey(name: 'input_tokens', defaultValue: 0) this.inputTokens = 0, @JsonKey(name: 'output_tokens', defaultValue: 0) this.outputTokens = 0, @JsonKey(name: 'total_tokens', defaultValue: 0) this.totalTokens = 0, @JsonKey(name: 'input_tokens_cache_write') this.inputTokensCacheWrite, @JsonKey(name: 'input_tokens_cache_read') this.inputTokensCacheRead, @JsonKey(name: 'reasoning_tokens', defaultValue: 0) this.reasoningTokens = 0}): super._();
+  factory _ModelUsage.fromJson(Map<String, dynamic> json) => _$ModelUsageFromJson(json);
+
+/// Total input tokens used.
+@override@JsonKey(name: 'input_tokens', defaultValue: 0) final  int inputTokens;
+/// Total output tokens used.
+@override@JsonKey(name: 'output_tokens', defaultValue: 0) final  int outputTokens;
+/// Total tokens used.
+@override@JsonKey(name: 'total_tokens', defaultValue: 0) final  int totalTokens;
+/// Number of tokens written to the cache.
+@override@JsonKey(name: 'input_tokens_cache_write') final  int? inputTokensCacheWrite;
+/// Number of tokens retrieved from the cache.
+@override@JsonKey(name: 'input_tokens_cache_read') final  int? inputTokensCacheRead;
+/// Number of tokens used for reasoning.
+@override@JsonKey(name: 'reasoning_tokens', defaultValue: 0) final  int reasoningTokens;
+
+/// Create a copy of ModelUsage
+/// with the given fields replaced by the non-null parameter values.
+@override @JsonKey(includeFromJson: false, includeToJson: false)
+@pragma('vm:prefer-inline')
+_$ModelUsageCopyWith<_ModelUsage> get copyWith => __$ModelUsageCopyWithImpl<_ModelUsage>(this, _$identity);
+
+@override
+Map<String, dynamic> toJson() {
+  return _$ModelUsageToJson(this, );
+}
+
+@override
+bool operator ==(Object other) {
+  return identical(this, other) || (other.runtimeType == runtimeType&&other is _ModelUsage&&(identical(other.inputTokens, inputTokens) || other.inputTokens == inputTokens)&&(identical(other.outputTokens, outputTokens) || other.outputTokens == outputTokens)&&(identical(other.totalTokens, totalTokens) || other.totalTokens == totalTokens)&&(identical(other.inputTokensCacheWrite, inputTokensCacheWrite) || other.inputTokensCacheWrite == inputTokensCacheWrite)&&(identical(other.inputTokensCacheRead, inputTokensCacheRead) || other.inputTokensCacheRead == inputTokensCacheRead)&&(identical(other.reasoningTokens, reasoningTokens) || other.reasoningTokens == reasoningTokens));
+}
+
+@JsonKey(includeFromJson: false, includeToJson: false)
+@override
+int get hashCode => Object.hash(runtimeType,inputTokens,outputTokens,totalTokens,inputTokensCacheWrite,inputTokensCacheRead,reasoningTokens);
+
+@override
+String toString() {
+  return 'ModelUsage(inputTokens: $inputTokens, outputTokens: $outputTokens, totalTokens: $totalTokens, inputTokensCacheWrite: $inputTokensCacheWrite, inputTokensCacheRead: $inputTokensCacheRead, reasoningTokens: $reasoningTokens)';
+}
+
+
+}
+
+/// @nodoc
+abstract mixin class _$ModelUsageCopyWith<$Res> implements $ModelUsageCopyWith<$Res> {
+  factory _$ModelUsageCopyWith(_ModelUsage value, $Res Function(_ModelUsage) _then) = __$ModelUsageCopyWithImpl;
+@override @useResult
+$Res call({
+@JsonKey(name: 'input_tokens', defaultValue: 0) int inputTokens,@JsonKey(name: 'output_tokens', defaultValue: 0) int outputTokens,@JsonKey(name: 'total_tokens', defaultValue: 0) int totalTokens,@JsonKey(name: 'input_tokens_cache_write') int? inputTokensCacheWrite,@JsonKey(name: 'input_tokens_cache_read') int? inputTokensCacheRead,@JsonKey(name: 'reasoning_tokens', defaultValue: 0) int reasoningTokens
+});
+
+
+
+
+}
+/// @nodoc
+class __$ModelUsageCopyWithImpl<$Res>
+    implements _$ModelUsageCopyWith<$Res> {
+  __$ModelUsageCopyWithImpl(this._self, this._then);
+
+  final _ModelUsage _self;
+  final $Res Function(_ModelUsage) _then;
+
+/// Create a copy of ModelUsage
+/// with the given fields replaced by the non-null parameter values.
+@override @pragma('vm:prefer-inline') $Res call({Object? inputTokens = null,Object? outputTokens = null,Object? totalTokens = null,Object? inputTokensCacheWrite = freezed,Object? inputTokensCacheRead = freezed,Object? reasoningTokens = null,}) {
+  return _then(_ModelUsage(
+inputTokens: null == inputTokens ? _self.inputTokens : inputTokens // ignore: cast_nullable_to_non_nullable
+as int,outputTokens: null == outputTokens ? _self.outputTokens : outputTokens // ignore: cast_nullable_to_non_nullable
+as int,totalTokens: null == totalTokens ? _self.totalTokens : totalTokens // ignore: cast_nullable_to_non_nullable
+as int,inputTokensCacheWrite: freezed == inputTokensCacheWrite ? _self.inputTokensCacheWrite : inputTokensCacheWrite // ignore: cast_nullable_to_non_nullable
+as int?,inputTokensCacheRead: freezed == inputTokensCacheRead ? _self.inputTokensCacheRead : inputTokensCacheRead // ignore: cast_nullable_to_non_nullable
+as int?,reasoningTokens: null == reasoningTokens ? _self.reasoningTokens : reasoningTokens // ignore: cast_nullable_to_non_nullable
+as int,
+  ));
+}
+
+
+}
+
+ChatMessage _$ChatMessageFromJson(
+  Map<String, dynamic> json
+) {
+        switch (json['role']) {
+                  case 'system':
+          return ChatMessageSystem.fromJson(
+            json
+          );
+                case 'user':
+          return ChatMessageUser.fromJson(
+            json
+          );
+                case 'assistant':
+          return ChatMessageAssistant.fromJson(
+            json
+          );
+                case 'tool':
+          return ChatMessageTool.fromJson(
+            json
+          );
+        
+          default:
+            throw CheckedFromJsonException(
+  json,
+  'role',
+  'ChatMessage',
+  'Invalid union type "${json['role']}"!'
+);
+        }
+      
+}
+
+/// @nodoc
+mixin _$ChatMessage {
+
+/// Unique identifer for message.
+ String? get id;/// Content (simple string or list of content objects).
+ Object get content;/// Source of message.
+ String? get source;/// Additional message metadata.
+ Map<String, dynamic>? get metadata;/// Conversation role.
+ String get role;
+/// Create a copy of ChatMessage
+/// with the given fields replaced by the non-null parameter values.
+@JsonKey(includeFromJson: false, includeToJson: false)
+@pragma('vm:prefer-inline')
+$ChatMessageCopyWith<ChatMessage> get copyWith => _$ChatMessageCopyWithImpl<ChatMessage>(this as ChatMessage, _$identity);
+
+  /// Serializes this ChatMessage to a JSON map.
+  Map<String, dynamic> toJson();
+
+
+@override
+bool operator ==(Object other) {
+  return identical(this, other) || (other.runtimeType == runtimeType&&other is ChatMessage&&(identical(other.id, id) || other.id == id)&&const DeepCollectionEquality().equals(other.content, content)&&(identical(other.source, source) || other.source == source)&&const DeepCollectionEquality().equals(other.metadata, metadata)&&(identical(other.role, role) || other.role == role));
+}
+
+@JsonKey(includeFromJson: false, includeToJson: false)
+@override
+int get hashCode => Object.hash(runtimeType,id,const DeepCollectionEquality().hash(content),source,const DeepCollectionEquality().hash(metadata),role);
+
+@override
+String toString() {
+  return 'ChatMessage(id: $id, content: $content, source: $source, metadata: $metadata, role: $role)';
+}
+
+
+}
+
+/// @nodoc
+abstract mixin class $ChatMessageCopyWith<$Res>  {
+  factory $ChatMessageCopyWith(ChatMessage value, $Res Function(ChatMessage) _then) = _$ChatMessageCopyWithImpl;
+@useResult
+$Res call({
+ String? id, Object content, String? source, Map<String, dynamic>? metadata, String role
+});
+
+
+
+
+}
+/// @nodoc
+class _$ChatMessageCopyWithImpl<$Res>
+    implements $ChatMessageCopyWith<$Res> {
+  _$ChatMessageCopyWithImpl(this._self, this._then);
+
+  final ChatMessage _self;
+  final $Res Function(ChatMessage) _then;
+
+/// Create a copy of ChatMessage
+/// with the given fields replaced by the non-null parameter values.
+@pragma('vm:prefer-inline') @override $Res call({Object? id = freezed,Object? content = null,Object? source = freezed,Object? metadata = freezed,Object? role = null,}) {
+  return _then(_self.copyWith(
+id: freezed == id ? _self.id : id // ignore: cast_nullable_to_non_nullable
+as String?,content: null == content ? _self.content : content ,source: freezed == source ? _self.source : source // ignore: cast_nullable_to_non_nullable
+as String?,metadata: freezed == metadata ? _self.metadata : metadata // ignore: cast_nullable_to_non_nullable
+as Map<String, dynamic>?,role: null == role ? _self.role : role // ignore: cast_nullable_to_non_nullable
+as String,
+  ));
+}
+
+}
+
+
+/// Adds pattern-matching-related methods to [ChatMessage].
+extension ChatMessagePatterns on ChatMessage {
+/// A variant of `map` that fallback to returning `orElse`.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case _:
+///     return orElse();
+/// }
+/// ```
+
+@optionalTypeArgs TResult maybeMap<TResult extends Object?>({TResult Function( ChatMessageSystem value)?  system,TResult Function( ChatMessageUser value)?  user,TResult Function( ChatMessageAssistant value)?  assistant,TResult Function( ChatMessageTool value)?  tool,required TResult orElse(),}){
+final _that = this;
+switch (_that) {
+case ChatMessageSystem() when system != null:
+return system(_that);case ChatMessageUser() when user != null:
+return user(_that);case ChatMessageAssistant() when assistant != null:
+return assistant(_that);case ChatMessageTool() when tool != null:
+return tool(_that);case _:
+  return orElse();
+
+}
+}
+/// A `switch`-like method, using callbacks.
+///
+/// Callbacks receives the raw object, upcasted.
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case final Subclass2 value:
+///     return ...;
+/// }
+/// ```
+
+@optionalTypeArgs TResult map<TResult extends Object?>({required TResult Function( ChatMessageSystem value)  system,required TResult Function( ChatMessageUser value)  user,required TResult Function( ChatMessageAssistant value)  assistant,required TResult Function( ChatMessageTool value)  tool,}){
+final _that = this;
+switch (_that) {
+case ChatMessageSystem():
+return system(_that);case ChatMessageUser():
+return user(_that);case ChatMessageAssistant():
+return assistant(_that);case ChatMessageTool():
+return tool(_that);}
+}
+/// A variant of `map` that fallback to returning `null`.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case _:
+///     return null;
+/// }
+/// ```
+
+@optionalTypeArgs TResult? mapOrNull<TResult extends Object?>({TResult? Function( ChatMessageSystem value)?  system,TResult? Function( ChatMessageUser value)?  user,TResult? Function( ChatMessageAssistant value)?  assistant,TResult? Function( ChatMessageTool value)?  tool,}){
+final _that = this;
+switch (_that) {
+case ChatMessageSystem() when system != null:
+return system(_that);case ChatMessageUser() when user != null:
+return user(_that);case ChatMessageAssistant() when assistant != null:
+return assistant(_that);case ChatMessageTool() when tool != null:
+return tool(_that);case _:
+  return null;
+
+}
+}
+/// A variant of `when` that fallback to an `orElse` callback.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case _:
+///     return orElse();
+/// }
+/// ```
+
+@optionalTypeArgs TResult maybeWhen<TResult extends Object?>({TResult Function( String? id,  Object content,  String? source,  Map<String, dynamic>? metadata,  String role)?  system,TResult Function( String? id,  Object content,  String? source,  Map<String, dynamic>? metadata,  String role, @JsonKey(name: 'tool_call_id')  Object? toolCallId)?  user,TResult Function( String? id,  Object content,  String? source,  Map<String, dynamic>? metadata,  String role, @JsonKey(name: 'tool_calls')  List<ToolCall>? toolCalls,  String? model)?  assistant,TResult Function( String? id,  Object content,  String? source,  Map<String, dynamic>? metadata,  String role, @JsonKey(name: 'tool_call_id')  String? toolCallId,  String? function,  ToolCallError? error)?  tool,required TResult orElse(),}) {final _that = this;
+switch (_that) {
+case ChatMessageSystem() when system != null:
+return system(_that.id,_that.content,_that.source,_that.metadata,_that.role);case ChatMessageUser() when user != null:
+return user(_that.id,_that.content,_that.source,_that.metadata,_that.role,_that.toolCallId);case ChatMessageAssistant() when assistant != null:
+return assistant(_that.id,_that.content,_that.source,_that.metadata,_that.role,_that.toolCalls,_that.model);case ChatMessageTool() when tool != null:
+return tool(_that.id,_that.content,_that.source,_that.metadata,_that.role,_that.toolCallId,_that.function,_that.error);case _:
+  return orElse();
+
+}
+}
+/// A `switch`-like method, using callbacks.
+///
+/// As opposed to `map`, this offers destructuring.
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case Subclass2(:final field2):
+///     return ...;
+/// }
+/// ```
+
+@optionalTypeArgs TResult when<TResult extends Object?>({required TResult Function( String? id,  Object content,  String? source,  Map<String, dynamic>? metadata,  String role)  system,required TResult Function( String? id,  Object content,  String? source,  Map<String, dynamic>? metadata,  String role, @JsonKey(name: 'tool_call_id')  Object? toolCallId)  user,required TResult Function( String? id,  Object content,  String? source,  Map<String, dynamic>? metadata,  String role, @JsonKey(name: 'tool_calls')  List<ToolCall>? toolCalls,  String? model)  assistant,required TResult Function( String? id,  Object content,  String? source,  Map<String, dynamic>? metadata,  String role, @JsonKey(name: 'tool_call_id')  String? toolCallId,  String? function,  ToolCallError? error)  tool,}) {final _that = this;
+switch (_that) {
+case ChatMessageSystem():
+return system(_that.id,_that.content,_that.source,_that.metadata,_that.role);case ChatMessageUser():
+return user(_that.id,_that.content,_that.source,_that.metadata,_that.role,_that.toolCallId);case ChatMessageAssistant():
+return assistant(_that.id,_that.content,_that.source,_that.metadata,_that.role,_that.toolCalls,_that.model);case ChatMessageTool():
+return tool(_that.id,_that.content,_that.source,_that.metadata,_that.role,_that.toolCallId,_that.function,_that.error);}
+}
+/// A variant of `when` that fallback to returning `null`
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case _:
+///     return null;
+/// }
+/// ```
+
+@optionalTypeArgs TResult? whenOrNull<TResult extends Object?>({TResult? Function( String? id,  Object content,  String? source,  Map<String, dynamic>? metadata,  String role)?  system,TResult? Function( String? id,  Object content,  String? source,  Map<String, dynamic>? metadata,  String role, @JsonKey(name: 'tool_call_id')  Object? toolCallId)?  user,TResult? Function( String? id,  Object content,  String? source,  Map<String, dynamic>? metadata,  String role, @JsonKey(name: 'tool_calls')  List<ToolCall>? toolCalls,  String? model)?  assistant,TResult? Function( String? id,  Object content,  String? source,  Map<String, dynamic>? metadata,  String role, @JsonKey(name: 'tool_call_id')  String? toolCallId,  String? function,  ToolCallError? error)?  tool,}) {final _that = this;
+switch (_that) {
+case ChatMessageSystem() when system != null:
+return system(_that.id,_that.content,_that.source,_that.metadata,_that.role);case ChatMessageUser() when user != null:
+return user(_that.id,_that.content,_that.source,_that.metadata,_that.role,_that.toolCallId);case ChatMessageAssistant() when assistant != null:
+return assistant(_that.id,_that.content,_that.source,_that.metadata,_that.role,_that.toolCalls,_that.model);case ChatMessageTool() when tool != null:
+return tool(_that.id,_that.content,_that.source,_that.metadata,_that.role,_that.toolCallId,_that.function,_that.error);case _:
+  return null;
+
+}
+}
+
+}
+
+/// @nodoc
+@JsonSerializable()
+
+class ChatMessageSystem extends ChatMessage {
+  const ChatMessageSystem({this.id, required this.content, this.source, final  Map<String, dynamic>? metadata, this.role = 'system'}): _metadata = metadata,super._();
+  factory ChatMessageSystem.fromJson(Map<String, dynamic> json) => _$ChatMessageSystemFromJson(json);
+
+/// Unique identifer for message.
+@override final  String? id;
+/// Content (simple string or list of content objects).
+@override final  Object content;
+/// Source of message.
+@override final  String? source;
+/// Additional message metadata.
+ final  Map<String, dynamic>? _metadata;
+/// Additional message metadata.
+@override Map<String, dynamic>? get metadata {
+  final value = _metadata;
+  if (value == null) return null;
+  if (_metadata is EqualUnmodifiableMapView) return _metadata;
+  // ignore: implicit_dynamic_type
+  return EqualUnmodifiableMapView(value);
+}
+
+/// Conversation role.
+@override@JsonKey() final  String role;
+
+/// Create a copy of ChatMessage
+/// with the given fields replaced by the non-null parameter values.
+@override @JsonKey(includeFromJson: false, includeToJson: false)
+@pragma('vm:prefer-inline')
+$ChatMessageSystemCopyWith<ChatMessageSystem> get copyWith => _$ChatMessageSystemCopyWithImpl<ChatMessageSystem>(this, _$identity);
+
+@override
+Map<String, dynamic> toJson() {
+  return _$ChatMessageSystemToJson(this, );
+}
+
+@override
+bool operator ==(Object other) {
+  return identical(this, other) || (other.runtimeType == runtimeType&&other is ChatMessageSystem&&(identical(other.id, id) || other.id == id)&&const DeepCollectionEquality().equals(other.content, content)&&(identical(other.source, source) || other.source == source)&&const DeepCollectionEquality().equals(other._metadata, _metadata)&&(identical(other.role, role) || other.role == role));
+}
+
+@JsonKey(includeFromJson: false, includeToJson: false)
+@override
+int get hashCode => Object.hash(runtimeType,id,const DeepCollectionEquality().hash(content),source,const DeepCollectionEquality().hash(_metadata),role);
+
+@override
+String toString() {
+  return 'ChatMessage.system(id: $id, content: $content, source: $source, metadata: $metadata, role: $role)';
+}
+
+
+}
+
+/// @nodoc
+abstract mixin class $ChatMessageSystemCopyWith<$Res> implements $ChatMessageCopyWith<$Res> {
+  factory $ChatMessageSystemCopyWith(ChatMessageSystem value, $Res Function(ChatMessageSystem) _then) = _$ChatMessageSystemCopyWithImpl;
+@override @useResult
+$Res call({
+ String? id, Object content, String? source, Map<String, dynamic>? metadata, String role
+});
+
+
+
+
+}
+/// @nodoc
+class _$ChatMessageSystemCopyWithImpl<$Res>
+    implements $ChatMessageSystemCopyWith<$Res> {
+  _$ChatMessageSystemCopyWithImpl(this._self, this._then);
+
+  final ChatMessageSystem _self;
+  final $Res Function(ChatMessageSystem) _then;
+
+/// Create a copy of ChatMessage
+/// with the given fields replaced by the non-null parameter values.
+@override @pragma('vm:prefer-inline') $Res call({Object? id = freezed,Object? content = null,Object? source = freezed,Object? metadata = freezed,Object? role = null,}) {
+  return _then(ChatMessageSystem(
+id: freezed == id ? _self.id : id // ignore: cast_nullable_to_non_nullable
+as String?,content: null == content ? _self.content : content ,source: freezed == source ? _self.source : source // ignore: cast_nullable_to_non_nullable
+as String?,metadata: freezed == metadata ? _self._metadata : metadata // ignore: cast_nullable_to_non_nullable
+as Map<String, dynamic>?,role: null == role ? _self.role : role // ignore: cast_nullable_to_non_nullable
+as String,
+  ));
+}
+
+
+}
+
+/// @nodoc
+@JsonSerializable()
+
+class ChatMessageUser extends ChatMessage {
+  const ChatMessageUser({this.id, required this.content, this.source, final  Map<String, dynamic>? metadata, this.role = 'user', @JsonKey(name: 'tool_call_id') this.toolCallId}): _metadata = metadata,super._();
+  factory ChatMessageUser.fromJson(Map<String, dynamic> json) => _$ChatMessageUserFromJson(json);
+
+/// Unique identifer for message.
+@override final  String? id;
+/// Content (simple string or list of content objects).
+@override final  Object content;
+/// Source of message.
+@override final  String? source;
+/// Additional message metadata.
+ final  Map<String, dynamic>? _metadata;
+/// Additional message metadata.
+@override Map<String, dynamic>? get metadata {
+  final value = _metadata;
+  if (value == null) return null;
+  if (_metadata is EqualUnmodifiableMapView) return _metadata;
+  // ignore: implicit_dynamic_type
+  return EqualUnmodifiableMapView(value);
+}
+
+/// Conversation role.
+@override@JsonKey() final  String role;
+/// ID(s) of tool call(s) this message has the content payload for.
+@JsonKey(name: 'tool_call_id') final  Object? toolCallId;
+
+/// Create a copy of ChatMessage
+/// with the given fields replaced by the non-null parameter values.
+@override @JsonKey(includeFromJson: false, includeToJson: false)
+@pragma('vm:prefer-inline')
+$ChatMessageUserCopyWith<ChatMessageUser> get copyWith => _$ChatMessageUserCopyWithImpl<ChatMessageUser>(this, _$identity);
+
+@override
+Map<String, dynamic> toJson() {
+  return _$ChatMessageUserToJson(this, );
+}
+
+@override
+bool operator ==(Object other) {
+  return identical(this, other) || (other.runtimeType == runtimeType&&other is ChatMessageUser&&(identical(other.id, id) || other.id == id)&&const DeepCollectionEquality().equals(other.content, content)&&(identical(other.source, source) || other.source == source)&&const DeepCollectionEquality().equals(other._metadata, _metadata)&&(identical(other.role, role) || other.role == role)&&const DeepCollectionEquality().equals(other.toolCallId, toolCallId));
+}
+
+@JsonKey(includeFromJson: false, includeToJson: false)
+@override
+int get hashCode => Object.hash(runtimeType,id,const DeepCollectionEquality().hash(content),source,const DeepCollectionEquality().hash(_metadata),role,const DeepCollectionEquality().hash(toolCallId));
+
+@override
+String toString() {
+  return 'ChatMessage.user(id: $id, content: $content, source: $source, metadata: $metadata, role: $role, toolCallId: $toolCallId)';
+}
+
+
+}
+
+/// @nodoc
+abstract mixin class $ChatMessageUserCopyWith<$Res> implements $ChatMessageCopyWith<$Res> {
+  factory $ChatMessageUserCopyWith(ChatMessageUser value, $Res Function(ChatMessageUser) _then) = _$ChatMessageUserCopyWithImpl;
+@override @useResult
+$Res call({
+ String? id, Object content, String? source, Map<String, dynamic>? metadata, String role,@JsonKey(name: 'tool_call_id') Object? toolCallId
+});
+
+
+
+
+}
+/// @nodoc
+class _$ChatMessageUserCopyWithImpl<$Res>
+    implements $ChatMessageUserCopyWith<$Res> {
+  _$ChatMessageUserCopyWithImpl(this._self, this._then);
+
+  final ChatMessageUser _self;
+  final $Res Function(ChatMessageUser) _then;
+
+/// Create a copy of ChatMessage
+/// with the given fields replaced by the non-null parameter values.
+@override @pragma('vm:prefer-inline') $Res call({Object? id = freezed,Object? content = null,Object? source = freezed,Object? metadata = freezed,Object? role = null,Object? toolCallId = freezed,}) {
+  return _then(ChatMessageUser(
+id: freezed == id ? _self.id : id // ignore: cast_nullable_to_non_nullable
+as String?,content: null == content ? _self.content : content ,source: freezed == source ? _self.source : source // ignore: cast_nullable_to_non_nullable
+as String?,metadata: freezed == metadata ? _self._metadata : metadata // ignore: cast_nullable_to_non_nullable
+as Map<String, dynamic>?,role: null == role ? _self.role : role // ignore: cast_nullable_to_non_nullable
+as String,toolCallId: freezed == toolCallId ? _self.toolCallId : toolCallId ,
+  ));
+}
+
+
+}
+
+/// @nodoc
+@JsonSerializable()
+
+class ChatMessageAssistant extends ChatMessage {
+  const ChatMessageAssistant({this.id, required this.content, this.source, final  Map<String, dynamic>? metadata, this.role = 'assistant', @JsonKey(name: 'tool_calls') final  List<ToolCall>? toolCalls, this.model}): _metadata = metadata,_toolCalls = toolCalls,super._();
+  factory ChatMessageAssistant.fromJson(Map<String, dynamic> json) => _$ChatMessageAssistantFromJson(json);
+
+/// Unique identifer for message.
+@override final  String? id;
+/// Content (simple string or list of content objects).
+@override final  Object content;
+/// Source of message.
+@override final  String? source;
+/// Additional message metadata.
+ final  Map<String, dynamic>? _metadata;
+/// Additional message metadata.
+@override Map<String, dynamic>? get metadata {
+  final value = _metadata;
+  if (value == null) return null;
+  if (_metadata is EqualUnmodifiableMapView) return _metadata;
+  // ignore: implicit_dynamic_type
+  return EqualUnmodifiableMapView(value);
+}
+
+/// Conversation role.
+@override@JsonKey() final  String role;
+/// Tool calls made by the model.
+ final  List<ToolCall>? _toolCalls;
+/// Tool calls made by the model.
+@JsonKey(name: 'tool_calls') List<ToolCall>? get toolCalls {
+  final value = _toolCalls;
+  if (value == null) return null;
+  if (_toolCalls is EqualUnmodifiableListView) return _toolCalls;
+  // ignore: implicit_dynamic_type
+  return EqualUnmodifiableListView(value);
+}
+
+/// Model used to generate assistant message.
+ final  String? model;
+
+/// Create a copy of ChatMessage
+/// with the given fields replaced by the non-null parameter values.
+@override @JsonKey(includeFromJson: false, includeToJson: false)
+@pragma('vm:prefer-inline')
+$ChatMessageAssistantCopyWith<ChatMessageAssistant> get copyWith => _$ChatMessageAssistantCopyWithImpl<ChatMessageAssistant>(this, _$identity);
+
+@override
+Map<String, dynamic> toJson() {
+  return _$ChatMessageAssistantToJson(this, );
+}
+
+@override
+bool operator ==(Object other) {
+  return identical(this, other) || (other.runtimeType == runtimeType&&other is ChatMessageAssistant&&(identical(other.id, id) || other.id == id)&&const DeepCollectionEquality().equals(other.content, content)&&(identical(other.source, source) || other.source == source)&&const DeepCollectionEquality().equals(other._metadata, _metadata)&&(identical(other.role, role) || other.role == role)&&const DeepCollectionEquality().equals(other._toolCalls, _toolCalls)&&(identical(other.model, model) || other.model == model));
+}
+
+@JsonKey(includeFromJson: false, includeToJson: false)
+@override
+int get hashCode => Object.hash(runtimeType,id,const DeepCollectionEquality().hash(content),source,const DeepCollectionEquality().hash(_metadata),role,const DeepCollectionEquality().hash(_toolCalls),model);
+
+@override
+String toString() {
+  return 'ChatMessage.assistant(id: $id, content: $content, source: $source, metadata: $metadata, role: $role, toolCalls: $toolCalls, model: $model)';
+}
+
+
+}
+
+/// @nodoc
+abstract mixin class $ChatMessageAssistantCopyWith<$Res> implements $ChatMessageCopyWith<$Res> {
+  factory $ChatMessageAssistantCopyWith(ChatMessageAssistant value, $Res Function(ChatMessageAssistant) _then) = _$ChatMessageAssistantCopyWithImpl;
+@override @useResult
+$Res call({
+ String? id, Object content, String? source, Map<String, dynamic>? metadata, String role,@JsonKey(name: 'tool_calls') List<ToolCall>? toolCalls, String? model
+});
+
+
+
+
+}
+/// @nodoc
+class _$ChatMessageAssistantCopyWithImpl<$Res>
+    implements $ChatMessageAssistantCopyWith<$Res> {
+  _$ChatMessageAssistantCopyWithImpl(this._self, this._then);
+
+  final ChatMessageAssistant _self;
+  final $Res Function(ChatMessageAssistant) _then;
+
+/// Create a copy of ChatMessage
+/// with the given fields replaced by the non-null parameter values.
+@override @pragma('vm:prefer-inline') $Res call({Object? id = freezed,Object? content = null,Object? source = freezed,Object? metadata = freezed,Object? role = null,Object? toolCalls = freezed,Object? model = freezed,}) {
+  return _then(ChatMessageAssistant(
+id: freezed == id ? _self.id : id // ignore: cast_nullable_to_non_nullable
+as String?,content: null == content ? _self.content : content ,source: freezed == source ? _self.source : source // ignore: cast_nullable_to_non_nullable
+as String?,metadata: freezed == metadata ? _self._metadata : metadata // ignore: cast_nullable_to_non_nullable
+as Map<String, dynamic>?,role: null == role ? _self.role : role // ignore: cast_nullable_to_non_nullable
+as String,toolCalls: freezed == toolCalls ? _self._toolCalls : toolCalls // ignore: cast_nullable_to_non_nullable
+as List<ToolCall>?,model: freezed == model ? _self.model : model // ignore: cast_nullable_to_non_nullable
+as String?,
+  ));
+}
+
+
+}
+
+/// @nodoc
+@JsonSerializable()
+
+class ChatMessageTool extends ChatMessage {
+  const ChatMessageTool({this.id, required this.content, this.source, final  Map<String, dynamic>? metadata, this.role = 'tool', @JsonKey(name: 'tool_call_id') this.toolCallId, this.function, this.error}): _metadata = metadata,super._();
+  factory ChatMessageTool.fromJson(Map<String, dynamic> json) => _$ChatMessageToolFromJson(json);
+
+/// Unique identifer for message.
+@override final  String? id;
+/// Content (simple string or list of content objects).
+@override final  Object content;
+/// Source of message.
+@override final  String? source;
+/// Additional message metadata.
+ final  Map<String, dynamic>? _metadata;
+/// Additional message metadata.
+@override Map<String, dynamic>? get metadata {
+  final value = _metadata;
+  if (value == null) return null;
+  if (_metadata is EqualUnmodifiableMapView) return _metadata;
+  // ignore: implicit_dynamic_type
+  return EqualUnmodifiableMapView(value);
+}
+
+/// Conversation role.
+@override@JsonKey() final  String role;
+/// ID of tool call.
+@JsonKey(name: 'tool_call_id') final  String? toolCallId;
+/// Name of function called.
+ final  String? function;
+/// Error which occurred during tool call.
+ final  ToolCallError? error;
+
+/// Create a copy of ChatMessage
+/// with the given fields replaced by the non-null parameter values.
+@override @JsonKey(includeFromJson: false, includeToJson: false)
+@pragma('vm:prefer-inline')
+$ChatMessageToolCopyWith<ChatMessageTool> get copyWith => _$ChatMessageToolCopyWithImpl<ChatMessageTool>(this, _$identity);
+
+@override
+Map<String, dynamic> toJson() {
+  return _$ChatMessageToolToJson(this, );
+}
+
+@override
+bool operator ==(Object other) {
+  return identical(this, other) || (other.runtimeType == runtimeType&&other is ChatMessageTool&&(identical(other.id, id) || other.id == id)&&const DeepCollectionEquality().equals(other.content, content)&&(identical(other.source, source) || other.source == source)&&const DeepCollectionEquality().equals(other._metadata, _metadata)&&(identical(other.role, role) || other.role == role)&&(identical(other.toolCallId, toolCallId) || other.toolCallId == toolCallId)&&(identical(other.function, function) || other.function == function)&&(identical(other.error, error) || other.error == error));
+}
+
+@JsonKey(includeFromJson: false, includeToJson: false)
+@override
+int get hashCode => Object.hash(runtimeType,id,const DeepCollectionEquality().hash(content),source,const DeepCollectionEquality().hash(_metadata),role,toolCallId,function,error);
+
+@override
+String toString() {
+  return 'ChatMessage.tool(id: $id, content: $content, source: $source, metadata: $metadata, role: $role, toolCallId: $toolCallId, function: $function, error: $error)';
+}
+
+
+}
+
+/// @nodoc
+abstract mixin class $ChatMessageToolCopyWith<$Res> implements $ChatMessageCopyWith<$Res> {
+  factory $ChatMessageToolCopyWith(ChatMessageTool value, $Res Function(ChatMessageTool) _then) = _$ChatMessageToolCopyWithImpl;
+@override @useResult
+$Res call({
+ String? id, Object content, String? source, Map<String, dynamic>? metadata, String role,@JsonKey(name: 'tool_call_id') String? toolCallId, String? function, ToolCallError? error
+});
+
+
+$ToolCallErrorCopyWith<$Res>? get error;
+
+}
+/// @nodoc
+class _$ChatMessageToolCopyWithImpl<$Res>
+    implements $ChatMessageToolCopyWith<$Res> {
+  _$ChatMessageToolCopyWithImpl(this._self, this._then);
+
+  final ChatMessageTool _self;
+  final $Res Function(ChatMessageTool) _then;
+
+/// Create a copy of ChatMessage
+/// with the given fields replaced by the non-null parameter values.
+@override @pragma('vm:prefer-inline') $Res call({Object? id = freezed,Object? content = null,Object? source = freezed,Object? metadata = freezed,Object? role = null,Object? toolCallId = freezed,Object? function = freezed,Object? error = freezed,}) {
+  return _then(ChatMessageTool(
+id: freezed == id ? _self.id : id // ignore: cast_nullable_to_non_nullable
+as String?,content: null == content ? _self.content : content ,source: freezed == source ? _self.source : source // ignore: cast_nullable_to_non_nullable
+as String?,metadata: freezed == metadata ? _self._metadata : metadata // ignore: cast_nullable_to_non_nullable
+as Map<String, dynamic>?,role: null == role ? _self.role : role // ignore: cast_nullable_to_non_nullable
+as String,toolCallId: freezed == toolCallId ? _self.toolCallId : toolCallId // ignore: cast_nullable_to_non_nullable
+as String?,function: freezed == function ? _self.function : function // ignore: cast_nullable_to_non_nullable
+as String?,error: freezed == error ? _self.error : error // ignore: cast_nullable_to_non_nullable
+as ToolCallError?,
+  ));
+}
+
+/// Create a copy of ChatMessage
+/// with the given fields replaced by the non-null parameter values.
+@override
+@pragma('vm:prefer-inline')
+$ToolCallErrorCopyWith<$Res>? get error {
+    if (_self.error == null) {
+    return null;
+  }
+
+  return $ToolCallErrorCopyWith<$Res>(_self.error!, (value) {
+    return _then(_self.copyWith(error: value));
+  });
+}
+}
+
+Content _$ContentFromJson(
+  Map<String, dynamic> json
+) {
+        switch (json['type']) {
+                  case 'text':
+          return ContentText.fromJson(
+            json
+          );
+                case 'reasoning':
+          return ContentReasoning.fromJson(
+            json
+          );
+                case 'image':
+          return ContentImage.fromJson(
+            json
+          );
+                case 'audio':
+          return ContentAudio.fromJson(
+            json
+          );
+                case 'video':
+          return ContentVideo.fromJson(
+            json
+          );
+                case 'document':
+          return ContentDocument.fromJson(
+            json
+          );
+                case 'data':
+          return ContentData.fromJson(
+            json
+          );
+                case 'tool_use':
+          return ContentToolUse.fromJson(
+            json
+          );
+        
+          default:
+            throw CheckedFromJsonException(
+  json,
+  'type',
+  'Content',
+  'Invalid union type "${json['type']}"!'
+);
+        }
+      
+}
+
+/// @nodoc
+mixin _$Content {
+
+/// Content type.
+ String get type;
+/// Create a copy of Content
+/// with the given fields replaced by the non-null parameter values.
+@JsonKey(includeFromJson: false, includeToJson: false)
+@pragma('vm:prefer-inline')
+$ContentCopyWith<Content> get copyWith => _$ContentCopyWithImpl<Content>(this as Content, _$identity);
+
+  /// Serializes this Content to a JSON map.
+  Map<String, dynamic> toJson();
+
+
+@override
+bool operator ==(Object other) {
+  return identical(this, other) || (other.runtimeType == runtimeType&&other is Content&&(identical(other.type, type) || other.type == type));
+}
+
+@JsonKey(includeFromJson: false, includeToJson: false)
+@override
+int get hashCode => Object.hash(runtimeType,type);
+
+@override
+String toString() {
+  return 'Content(type: $type)';
+}
+
+
+}
+
+/// @nodoc
+abstract mixin class $ContentCopyWith<$Res>  {
+  factory $ContentCopyWith(Content value, $Res Function(Content) _then) = _$ContentCopyWithImpl;
+@useResult
+$Res call({
+ String type
+});
+
+
+
+
+}
+/// @nodoc
+class _$ContentCopyWithImpl<$Res>
+    implements $ContentCopyWith<$Res> {
+  _$ContentCopyWithImpl(this._self, this._then);
+
+  final Content _self;
+  final $Res Function(Content) _then;
+
+/// Create a copy of Content
+/// with the given fields replaced by the non-null parameter values.
+@pragma('vm:prefer-inline') @override $Res call({Object? type = null,}) {
+  return _then(_self.copyWith(
+type: null == type ? _self.type : type // ignore: cast_nullable_to_non_nullable
+as String,
+  ));
+}
+
+}
+
+
+/// Adds pattern-matching-related methods to [Content].
+extension ContentPatterns on Content {
+/// A variant of `map` that fallback to returning `orElse`.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case _:
+///     return orElse();
+/// }
+/// ```
+
+@optionalTypeArgs TResult maybeMap<TResult extends Object?>({TResult Function( ContentText value)?  text,TResult Function( ContentReasoning value)?  reasoning,TResult Function( ContentImage value)?  image,TResult Function( ContentAudio value)?  audio,TResult Function( ContentVideo value)?  video,TResult Function( ContentDocument value)?  document,TResult Function( ContentData value)?  data,TResult Function( ContentToolUse value)?  toolUse,required TResult orElse(),}){
+final _that = this;
+switch (_that) {
+case ContentText() when text != null:
+return text(_that);case ContentReasoning() when reasoning != null:
+return reasoning(_that);case ContentImage() when image != null:
+return image(_that);case ContentAudio() when audio != null:
+return audio(_that);case ContentVideo() when video != null:
+return video(_that);case ContentDocument() when document != null:
+return document(_that);case ContentData() when data != null:
+return data(_that);case ContentToolUse() when toolUse != null:
+return toolUse(_that);case _:
+  return orElse();
+
+}
+}
+/// A `switch`-like method, using callbacks.
+///
+/// Callbacks receives the raw object, upcasted.
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case final Subclass2 value:
+///     return ...;
+/// }
+/// ```
+
+@optionalTypeArgs TResult map<TResult extends Object?>({required TResult Function( ContentText value)  text,required TResult Function( ContentReasoning value)  reasoning,required TResult Function( ContentImage value)  image,required TResult Function( ContentAudio value)  audio,required TResult Function( ContentVideo value)  video,required TResult Function( ContentDocument value)  document,required TResult Function( ContentData value)  data,required TResult Function( ContentToolUse value)  toolUse,}){
+final _that = this;
+switch (_that) {
+case ContentText():
+return text(_that);case ContentReasoning():
+return reasoning(_that);case ContentImage():
+return image(_that);case ContentAudio():
+return audio(_that);case ContentVideo():
+return video(_that);case ContentDocument():
+return document(_that);case ContentData():
+return data(_that);case ContentToolUse():
+return toolUse(_that);}
+}
+/// A variant of `map` that fallback to returning `null`.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case _:
+///     return null;
+/// }
+/// ```
+
+@optionalTypeArgs TResult? mapOrNull<TResult extends Object?>({TResult? Function( ContentText value)?  text,TResult? Function( ContentReasoning value)?  reasoning,TResult? Function( ContentImage value)?  image,TResult? Function( ContentAudio value)?  audio,TResult? Function( ContentVideo value)?  video,TResult? Function( ContentDocument value)?  document,TResult? Function( ContentData value)?  data,TResult? Function( ContentToolUse value)?  toolUse,}){
+final _that = this;
+switch (_that) {
+case ContentText() when text != null:
+return text(_that);case ContentReasoning() when reasoning != null:
+return reasoning(_that);case ContentImage() when image != null:
+return image(_that);case ContentAudio() when audio != null:
+return audio(_that);case ContentVideo() when video != null:
+return video(_that);case ContentDocument() when document != null:
+return document(_that);case ContentData() when data != null:
+return data(_that);case ContentToolUse() when toolUse != null:
+return toolUse(_that);case _:
+  return null;
+
+}
+}
+/// A variant of `when` that fallback to an `orElse` callback.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case _:
+///     return orElse();
+/// }
+/// ```
+
+@optionalTypeArgs TResult maybeWhen<TResult extends Object?>({TResult Function( String text,  bool refusal,  List<Object>? citations,  String type)?  text,TResult Function( String reasoning,  String? summary,  String? signature,  bool redacted,  String? text,  String type)?  reasoning,TResult Function( String image,  String detail,  String type)?  image,TResult Function( String audio,  String format,  String type)?  audio,TResult Function( String video,  String format,  String type)?  video,TResult Function( String document,  String? filename, @JsonKey(name: 'mime_type')  String? mimeType,  String type)?  document,TResult Function( Map<String, dynamic> data,  String type)?  data,TResult Function(@JsonKey(name: 'tool_type')  String toolType,  String id,  String name,  Map<String, dynamic>? context,  Map<String, dynamic> arguments,  Object? result,  Object? error,  String type)?  toolUse,required TResult orElse(),}) {final _that = this;
+switch (_that) {
+case ContentText() when text != null:
+return text(_that.text,_that.refusal,_that.citations,_that.type);case ContentReasoning() when reasoning != null:
+return reasoning(_that.reasoning,_that.summary,_that.signature,_that.redacted,_that.text,_that.type);case ContentImage() when image != null:
+return image(_that.image,_that.detail,_that.type);case ContentAudio() when audio != null:
+return audio(_that.audio,_that.format,_that.type);case ContentVideo() when video != null:
+return video(_that.video,_that.format,_that.type);case ContentDocument() when document != null:
+return document(_that.document,_that.filename,_that.mimeType,_that.type);case ContentData() when data != null:
+return data(_that.data,_that.type);case ContentToolUse() when toolUse != null:
+return toolUse(_that.toolType,_that.id,_that.name,_that.context,_that.arguments,_that.result,_that.error,_that.type);case _:
+  return orElse();
+
+}
+}
+/// A `switch`-like method, using callbacks.
+///
+/// As opposed to `map`, this offers destructuring.
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case Subclass2(:final field2):
+///     return ...;
+/// }
+/// ```
+
+@optionalTypeArgs TResult when<TResult extends Object?>({required TResult Function( String text,  bool refusal,  List<Object>? citations,  String type)  text,required TResult Function( String reasoning,  String? summary,  String? signature,  bool redacted,  String? text,  String type)  reasoning,required TResult Function( String image,  String detail,  String type)  image,required TResult Function( String audio,  String format,  String type)  audio,required TResult Function( String video,  String format,  String type)  video,required TResult Function( String document,  String? filename, @JsonKey(name: 'mime_type')  String? mimeType,  String type)  document,required TResult Function( Map<String, dynamic> data,  String type)  data,required TResult Function(@JsonKey(name: 'tool_type')  String toolType,  String id,  String name,  Map<String, dynamic>? context,  Map<String, dynamic> arguments,  Object? result,  Object? error,  String type)  toolUse,}) {final _that = this;
+switch (_that) {
+case ContentText():
+return text(_that.text,_that.refusal,_that.citations,_that.type);case ContentReasoning():
+return reasoning(_that.reasoning,_that.summary,_that.signature,_that.redacted,_that.text,_that.type);case ContentImage():
+return image(_that.image,_that.detail,_that.type);case ContentAudio():
+return audio(_that.audio,_that.format,_that.type);case ContentVideo():
+return video(_that.video,_that.format,_that.type);case ContentDocument():
+return document(_that.document,_that.filename,_that.mimeType,_that.type);case ContentData():
+return data(_that.data,_that.type);case ContentToolUse():
+return toolUse(_that.toolType,_that.id,_that.name,_that.context,_that.arguments,_that.result,_that.error,_that.type);}
+}
+/// A variant of `when` that fallback to returning `null`
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case _:
+///     return null;
+/// }
+/// ```
+
+@optionalTypeArgs TResult? whenOrNull<TResult extends Object?>({TResult? Function( String text,  bool refusal,  List<Object>? citations,  String type)?  text,TResult? Function( String reasoning,  String? summary,  String? signature,  bool redacted,  String? text,  String type)?  reasoning,TResult? Function( String image,  String detail,  String type)?  image,TResult? Function( String audio,  String format,  String type)?  audio,TResult? Function( String video,  String format,  String type)?  video,TResult? Function( String document,  String? filename, @JsonKey(name: 'mime_type')  String? mimeType,  String type)?  document,TResult? Function( Map<String, dynamic> data,  String type)?  data,TResult? Function(@JsonKey(name: 'tool_type')  String toolType,  String id,  String name,  Map<String, dynamic>? context,  Map<String, dynamic> arguments,  Object? result,  Object? error,  String type)?  toolUse,}) {final _that = this;
+switch (_that) {
+case ContentText() when text != null:
+return text(_that.text,_that.refusal,_that.citations,_that.type);case ContentReasoning() when reasoning != null:
+return reasoning(_that.reasoning,_that.summary,_that.signature,_that.redacted,_that.text,_that.type);case ContentImage() when image != null:
+return image(_that.image,_that.detail,_that.type);case ContentAudio() when audio != null:
+return audio(_that.audio,_that.format,_that.type);case ContentVideo() when video != null:
+return video(_that.video,_that.format,_that.type);case ContentDocument() when document != null:
+return document(_that.document,_that.filename,_that.mimeType,_that.type);case ContentData() when data != null:
+return data(_that.data,_that.type);case ContentToolUse() when toolUse != null:
+return toolUse(_that.toolType,_that.id,_that.name,_that.context,_that.arguments,_that.result,_that.error,_that.type);case _:
+  return null;
+
+}
+}
+
+}
+
+/// @nodoc
+@JsonSerializable()
+
+class ContentText extends Content {
+  const ContentText({required this.text, this.refusal = false, final  List<Object>? citations, this.type = 'text'}): _citations = citations,super._();
+  factory ContentText.fromJson(Map<String, dynamic> json) => _$ContentTextFromJson(json);
+
+/// Text content.
+ final  String text;
+/// Was this a refusal message?
+@JsonKey() final  bool refusal;
+/// Citations supporting the text block.
+ final  List<Object>? _citations;
+/// Citations supporting the text block.
+ List<Object>? get citations {
+  final value = _citations;
+  if (value == null) return null;
+  if (_citations is EqualUnmodifiableListView) return _citations;
+  // ignore: implicit_dynamic_type
+  return EqualUnmodifiableListView(value);
+}
+
+/// Content type.
+@override@JsonKey() final  String type;
+
+/// Create a copy of Content
+/// with the given fields replaced by the non-null parameter values.
+@override @JsonKey(includeFromJson: false, includeToJson: false)
+@pragma('vm:prefer-inline')
+$ContentTextCopyWith<ContentText> get copyWith => _$ContentTextCopyWithImpl<ContentText>(this, _$identity);
+
+@override
+Map<String, dynamic> toJson() {
+  return _$ContentTextToJson(this, );
+}
+
+@override
+bool operator ==(Object other) {
+  return identical(this, other) || (other.runtimeType == runtimeType&&other is ContentText&&(identical(other.text, text) || other.text == text)&&(identical(other.refusal, refusal) || other.refusal == refusal)&&const DeepCollectionEquality().equals(other._citations, _citations)&&(identical(other.type, type) || other.type == type));
+}
+
+@JsonKey(includeFromJson: false, includeToJson: false)
+@override
+int get hashCode => Object.hash(runtimeType,text,refusal,const DeepCollectionEquality().hash(_citations),type);
+
+@override
+String toString() {
+  return 'Content.text(text: $text, refusal: $refusal, citations: $citations, type: $type)';
+}
+
+
+}
+
+/// @nodoc
+abstract mixin class $ContentTextCopyWith<$Res> implements $ContentCopyWith<$Res> {
+  factory $ContentTextCopyWith(ContentText value, $Res Function(ContentText) _then) = _$ContentTextCopyWithImpl;
+@override @useResult
+$Res call({
+ String text, bool refusal, List<Object>? citations, String type
+});
+
+
+
+
+}
+/// @nodoc
+class _$ContentTextCopyWithImpl<$Res>
+    implements $ContentTextCopyWith<$Res> {
+  _$ContentTextCopyWithImpl(this._self, this._then);
+
+  final ContentText _self;
+  final $Res Function(ContentText) _then;
+
+/// Create a copy of Content
+/// with the given fields replaced by the non-null parameter values.
+@override @pragma('vm:prefer-inline') $Res call({Object? text = null,Object? refusal = null,Object? citations = freezed,Object? type = null,}) {
+  return _then(ContentText(
+text: null == text ? _self.text : text // ignore: cast_nullable_to_non_nullable
+as String,refusal: null == refusal ? _self.refusal : refusal // ignore: cast_nullable_to_non_nullable
+as bool,citations: freezed == citations ? _self._citations : citations // ignore: cast_nullable_to_non_nullable
+as List<Object>?,type: null == type ? _self.type : type // ignore: cast_nullable_to_non_nullable
+as String,
+  ));
+}
+
+
+}
+
+/// @nodoc
+@JsonSerializable()
+
+class ContentReasoning extends Content {
+  const ContentReasoning({required this.reasoning, this.summary, this.signature, this.redacted = false, this.text, this.type = 'reasoning'}): super._();
+  factory ContentReasoning.fromJson(Map<String, dynamic> json) => _$ContentReasoningFromJson(json);
+
+/// Reasoning content.
+ final  String reasoning;
+/// Reasoning summary.
+ final  String? summary;
+/// Signature for reasoning content.
+ final  String? signature;
+/// Indicates that the explicit content of this reasoning block has been redacted.
+@JsonKey() final  bool redacted;
+/// Pure text rendering of reasoning.
+ final  String? text;
+/// Content type.
+@override@JsonKey() final  String type;
+
+/// Create a copy of Content
+/// with the given fields replaced by the non-null parameter values.
+@override @JsonKey(includeFromJson: false, includeToJson: false)
+@pragma('vm:prefer-inline')
+$ContentReasoningCopyWith<ContentReasoning> get copyWith => _$ContentReasoningCopyWithImpl<ContentReasoning>(this, _$identity);
+
+@override
+Map<String, dynamic> toJson() {
+  return _$ContentReasoningToJson(this, );
+}
+
+@override
+bool operator ==(Object other) {
+  return identical(this, other) || (other.runtimeType == runtimeType&&other is ContentReasoning&&(identical(other.reasoning, reasoning) || other.reasoning == reasoning)&&(identical(other.summary, summary) || other.summary == summary)&&(identical(other.signature, signature) || other.signature == signature)&&(identical(other.redacted, redacted) || other.redacted == redacted)&&(identical(other.text, text) || other.text == text)&&(identical(other.type, type) || other.type == type));
+}
+
+@JsonKey(includeFromJson: false, includeToJson: false)
+@override
+int get hashCode => Object.hash(runtimeType,reasoning,summary,signature,redacted,text,type);
+
+@override
+String toString() {
+  return 'Content.reasoning(reasoning: $reasoning, summary: $summary, signature: $signature, redacted: $redacted, text: $text, type: $type)';
+}
+
+
+}
+
+/// @nodoc
+abstract mixin class $ContentReasoningCopyWith<$Res> implements $ContentCopyWith<$Res> {
+  factory $ContentReasoningCopyWith(ContentReasoning value, $Res Function(ContentReasoning) _then) = _$ContentReasoningCopyWithImpl;
+@override @useResult
+$Res call({
+ String reasoning, String? summary, String? signature, bool redacted, String? text, String type
+});
+
+
+
+
+}
+/// @nodoc
+class _$ContentReasoningCopyWithImpl<$Res>
+    implements $ContentReasoningCopyWith<$Res> {
+  _$ContentReasoningCopyWithImpl(this._self, this._then);
+
+  final ContentReasoning _self;
+  final $Res Function(ContentReasoning) _then;
+
+/// Create a copy of Content
+/// with the given fields replaced by the non-null parameter values.
+@override @pragma('vm:prefer-inline') $Res call({Object? reasoning = null,Object? summary = freezed,Object? signature = freezed,Object? redacted = null,Object? text = freezed,Object? type = null,}) {
+  return _then(ContentReasoning(
+reasoning: null == reasoning ? _self.reasoning : reasoning // ignore: cast_nullable_to_non_nullable
+as String,summary: freezed == summary ? _self.summary : summary // ignore: cast_nullable_to_non_nullable
+as String?,signature: freezed == signature ? _self.signature : signature // ignore: cast_nullable_to_non_nullable
+as String?,redacted: null == redacted ? _self.redacted : redacted // ignore: cast_nullable_to_non_nullable
+as bool,text: freezed == text ? _self.text : text // ignore: cast_nullable_to_non_nullable
+as String?,type: null == type ? _self.type : type // ignore: cast_nullable_to_non_nullable
+as String,
+  ));
+}
+
+
+}
+
+/// @nodoc
+@JsonSerializable()
+
+class ContentImage extends Content {
+  const ContentImage({required this.image, this.detail = 'auto', this.type = 'image'}): super._();
+  factory ContentImage.fromJson(Map<String, dynamic> json) => _$ContentImageFromJson(json);
+
+/// Either a URL of the image or the base64 encoded image data.
+ final  String image;
+/// Specifies the detail level of the image.
+@JsonKey() final  String detail;
+/// Content type.
+@override@JsonKey() final  String type;
+
+/// Create a copy of Content
+/// with the given fields replaced by the non-null parameter values.
+@override @JsonKey(includeFromJson: false, includeToJson: false)
+@pragma('vm:prefer-inline')
+$ContentImageCopyWith<ContentImage> get copyWith => _$ContentImageCopyWithImpl<ContentImage>(this, _$identity);
+
+@override
+Map<String, dynamic> toJson() {
+  return _$ContentImageToJson(this, );
+}
+
+@override
+bool operator ==(Object other) {
+  return identical(this, other) || (other.runtimeType == runtimeType&&other is ContentImage&&(identical(other.image, image) || other.image == image)&&(identical(other.detail, detail) || other.detail == detail)&&(identical(other.type, type) || other.type == type));
+}
+
+@JsonKey(includeFromJson: false, includeToJson: false)
+@override
+int get hashCode => Object.hash(runtimeType,image,detail,type);
+
+@override
+String toString() {
+  return 'Content.image(image: $image, detail: $detail, type: $type)';
+}
+
+
+}
+
+/// @nodoc
+abstract mixin class $ContentImageCopyWith<$Res> implements $ContentCopyWith<$Res> {
+  factory $ContentImageCopyWith(ContentImage value, $Res Function(ContentImage) _then) = _$ContentImageCopyWithImpl;
+@override @useResult
+$Res call({
+ String image, String detail, String type
+});
+
+
+
+
+}
+/// @nodoc
+class _$ContentImageCopyWithImpl<$Res>
+    implements $ContentImageCopyWith<$Res> {
+  _$ContentImageCopyWithImpl(this._self, this._then);
+
+  final ContentImage _self;
+  final $Res Function(ContentImage) _then;
+
+/// Create a copy of Content
+/// with the given fields replaced by the non-null parameter values.
+@override @pragma('vm:prefer-inline') $Res call({Object? image = null,Object? detail = null,Object? type = null,}) {
+  return _then(ContentImage(
+image: null == image ? _self.image : image // ignore: cast_nullable_to_non_nullable
+as String,detail: null == detail ? _self.detail : detail // ignore: cast_nullable_to_non_nullable
+as String,type: null == type ? _self.type : type // ignore: cast_nullable_to_non_nullable
+as String,
+  ));
+}
+
+
+}
+
+/// @nodoc
+@JsonSerializable()
+
+class ContentAudio extends Content {
+  const ContentAudio({required this.audio, required this.format, this.type = 'audio'}): super._();
+  factory ContentAudio.fromJson(Map<String, dynamic> json) => _$ContentAudioFromJson(json);
+
+/// Audio file path or base64 encoded data URL.
+ final  String audio;
+/// Format of audio data (‘mp3’ or ‘wav’).
+ final  String format;
+/// Content type.
+@override@JsonKey() final  String type;
+
+/// Create a copy of Content
+/// with the given fields replaced by the non-null parameter values.
+@override @JsonKey(includeFromJson: false, includeToJson: false)
+@pragma('vm:prefer-inline')
+$ContentAudioCopyWith<ContentAudio> get copyWith => _$ContentAudioCopyWithImpl<ContentAudio>(this, _$identity);
+
+@override
+Map<String, dynamic> toJson() {
+  return _$ContentAudioToJson(this, );
+}
+
+@override
+bool operator ==(Object other) {
+  return identical(this, other) || (other.runtimeType == runtimeType&&other is ContentAudio&&(identical(other.audio, audio) || other.audio == audio)&&(identical(other.format, format) || other.format == format)&&(identical(other.type, type) || other.type == type));
+}
+
+@JsonKey(includeFromJson: false, includeToJson: false)
+@override
+int get hashCode => Object.hash(runtimeType,audio,format,type);
+
+@override
+String toString() {
+  return 'Content.audio(audio: $audio, format: $format, type: $type)';
+}
+
+
+}
+
+/// @nodoc
+abstract mixin class $ContentAudioCopyWith<$Res> implements $ContentCopyWith<$Res> {
+  factory $ContentAudioCopyWith(ContentAudio value, $Res Function(ContentAudio) _then) = _$ContentAudioCopyWithImpl;
+@override @useResult
+$Res call({
+ String audio, String format, String type
+});
+
+
+
+
+}
+/// @nodoc
+class _$ContentAudioCopyWithImpl<$Res>
+    implements $ContentAudioCopyWith<$Res> {
+  _$ContentAudioCopyWithImpl(this._self, this._then);
+
+  final ContentAudio _self;
+  final $Res Function(ContentAudio) _then;
+
+/// Create a copy of Content
+/// with the given fields replaced by the non-null parameter values.
+@override @pragma('vm:prefer-inline') $Res call({Object? audio = null,Object? format = null,Object? type = null,}) {
+  return _then(ContentAudio(
+audio: null == audio ? _self.audio : audio // ignore: cast_nullable_to_non_nullable
+as String,format: null == format ? _self.format : format // ignore: cast_nullable_to_non_nullable
+as String,type: null == type ? _self.type : type // ignore: cast_nullable_to_non_nullable
+as String,
+  ));
+}
+
+
+}
+
+/// @nodoc
+@JsonSerializable()
+
+class ContentVideo extends Content {
+  const ContentVideo({required this.video, required this.format, this.type = 'video'}): super._();
+  factory ContentVideo.fromJson(Map<String, dynamic> json) => _$ContentVideoFromJson(json);
+
+/// Video file path or base64 encoded data URL.
+ final  String video;
+/// Format of video data (‘mp4’, ‘mpeg’, or ‘mov’).
+ final  String format;
+/// Content type.
+@override@JsonKey() final  String type;
+
+/// Create a copy of Content
+/// with the given fields replaced by the non-null parameter values.
+@override @JsonKey(includeFromJson: false, includeToJson: false)
+@pragma('vm:prefer-inline')
+$ContentVideoCopyWith<ContentVideo> get copyWith => _$ContentVideoCopyWithImpl<ContentVideo>(this, _$identity);
+
+@override
+Map<String, dynamic> toJson() {
+  return _$ContentVideoToJson(this, );
+}
+
+@override
+bool operator ==(Object other) {
+  return identical(this, other) || (other.runtimeType == runtimeType&&other is ContentVideo&&(identical(other.video, video) || other.video == video)&&(identical(other.format, format) || other.format == format)&&(identical(other.type, type) || other.type == type));
+}
+
+@JsonKey(includeFromJson: false, includeToJson: false)
+@override
+int get hashCode => Object.hash(runtimeType,video,format,type);
+
+@override
+String toString() {
+  return 'Content.video(video: $video, format: $format, type: $type)';
+}
+
+
+}
+
+/// @nodoc
+abstract mixin class $ContentVideoCopyWith<$Res> implements $ContentCopyWith<$Res> {
+  factory $ContentVideoCopyWith(ContentVideo value, $Res Function(ContentVideo) _then) = _$ContentVideoCopyWithImpl;
+@override @useResult
+$Res call({
+ String video, String format, String type
+});
+
+
+
+
+}
+/// @nodoc
+class _$ContentVideoCopyWithImpl<$Res>
+    implements $ContentVideoCopyWith<$Res> {
+  _$ContentVideoCopyWithImpl(this._self, this._then);
+
+  final ContentVideo _self;
+  final $Res Function(ContentVideo) _then;
+
+/// Create a copy of Content
+/// with the given fields replaced by the non-null parameter values.
+@override @pragma('vm:prefer-inline') $Res call({Object? video = null,Object? format = null,Object? type = null,}) {
+  return _then(ContentVideo(
+video: null == video ? _self.video : video // ignore: cast_nullable_to_non_nullable
+as String,format: null == format ? _self.format : format // ignore: cast_nullable_to_non_nullable
+as String,type: null == type ? _self.type : type // ignore: cast_nullable_to_non_nullable
+as String,
+  ));
+}
+
+
+}
+
+/// @nodoc
+@JsonSerializable()
+
+class ContentDocument extends Content {
+  const ContentDocument({required this.document, this.filename, @JsonKey(name: 'mime_type') this.mimeType, this.type = 'document'}): super._();
+  factory ContentDocument.fromJson(Map<String, dynamic> json) => _$ContentDocumentFromJson(json);
+
+/// Document file path or base64 encoded data URL.
+ final  String document;
+/// Document filename.
+ final  String? filename;
+/// Document mime type.
+@JsonKey(name: 'mime_type') final  String? mimeType;
+/// Content type.
+@override@JsonKey() final  String type;
+
+/// Create a copy of Content
+/// with the given fields replaced by the non-null parameter values.
+@override @JsonKey(includeFromJson: false, includeToJson: false)
+@pragma('vm:prefer-inline')
+$ContentDocumentCopyWith<ContentDocument> get copyWith => _$ContentDocumentCopyWithImpl<ContentDocument>(this, _$identity);
+
+@override
+Map<String, dynamic> toJson() {
+  return _$ContentDocumentToJson(this, );
+}
+
+@override
+bool operator ==(Object other) {
+  return identical(this, other) || (other.runtimeType == runtimeType&&other is ContentDocument&&(identical(other.document, document) || other.document == document)&&(identical(other.filename, filename) || other.filename == filename)&&(identical(other.mimeType, mimeType) || other.mimeType == mimeType)&&(identical(other.type, type) || other.type == type));
+}
+
+@JsonKey(includeFromJson: false, includeToJson: false)
+@override
+int get hashCode => Object.hash(runtimeType,document,filename,mimeType,type);
+
+@override
+String toString() {
+  return 'Content.document(document: $document, filename: $filename, mimeType: $mimeType, type: $type)';
+}
+
+
+}
+
+/// @nodoc
+abstract mixin class $ContentDocumentCopyWith<$Res> implements $ContentCopyWith<$Res> {
+  factory $ContentDocumentCopyWith(ContentDocument value, $Res Function(ContentDocument) _then) = _$ContentDocumentCopyWithImpl;
+@override @useResult
+$Res call({
+ String document, String? filename,@JsonKey(name: 'mime_type') String? mimeType, String type
+});
+
+
+
+
+}
+/// @nodoc
+class _$ContentDocumentCopyWithImpl<$Res>
+    implements $ContentDocumentCopyWith<$Res> {
+  _$ContentDocumentCopyWithImpl(this._self, this._then);
+
+  final ContentDocument _self;
+  final $Res Function(ContentDocument) _then;
+
+/// Create a copy of Content
+/// with the given fields replaced by the non-null parameter values.
+@override @pragma('vm:prefer-inline') $Res call({Object? document = null,Object? filename = freezed,Object? mimeType = freezed,Object? type = null,}) {
+  return _then(ContentDocument(
+document: null == document ? _self.document : document // ignore: cast_nullable_to_non_nullable
+as String,filename: freezed == filename ? _self.filename : filename // ignore: cast_nullable_to_non_nullable
+as String?,mimeType: freezed == mimeType ? _self.mimeType : mimeType // ignore: cast_nullable_to_non_nullable
+as String?,type: null == type ? _self.type : type // ignore: cast_nullable_to_non_nullable
+as String,
+  ));
+}
+
+
+}
+
+/// @nodoc
+@JsonSerializable()
+
+class ContentData extends Content {
+  const ContentData({required final  Map<String, dynamic> data, this.type = 'data'}): _data = data,super._();
+  factory ContentData.fromJson(Map<String, dynamic> json) => _$ContentDataFromJson(json);
+
+/// Model provider specific payload.
+ final  Map<String, dynamic> _data;
+/// Model provider specific payload.
+ Map<String, dynamic> get data {
+  if (_data is EqualUnmodifiableMapView) return _data;
+  // ignore: implicit_dynamic_type
+  return EqualUnmodifiableMapView(_data);
+}
+
+/// Content type.
+@override@JsonKey() final  String type;
+
+/// Create a copy of Content
+/// with the given fields replaced by the non-null parameter values.
+@override @JsonKey(includeFromJson: false, includeToJson: false)
+@pragma('vm:prefer-inline')
+$ContentDataCopyWith<ContentData> get copyWith => _$ContentDataCopyWithImpl<ContentData>(this, _$identity);
+
+@override
+Map<String, dynamic> toJson() {
+  return _$ContentDataToJson(this, );
+}
+
+@override
+bool operator ==(Object other) {
+  return identical(this, other) || (other.runtimeType == runtimeType&&other is ContentData&&const DeepCollectionEquality().equals(other._data, _data)&&(identical(other.type, type) || other.type == type));
+}
+
+@JsonKey(includeFromJson: false, includeToJson: false)
+@override
+int get hashCode => Object.hash(runtimeType,const DeepCollectionEquality().hash(_data),type);
+
+@override
+String toString() {
+  return 'Content.data(data: $data, type: $type)';
+}
+
+
+}
+
+/// @nodoc
+abstract mixin class $ContentDataCopyWith<$Res> implements $ContentCopyWith<$Res> {
+  factory $ContentDataCopyWith(ContentData value, $Res Function(ContentData) _then) = _$ContentDataCopyWithImpl;
+@override @useResult
+$Res call({
+ Map<String, dynamic> data, String type
+});
+
+
+
+
+}
+/// @nodoc
+class _$ContentDataCopyWithImpl<$Res>
+    implements $ContentDataCopyWith<$Res> {
+  _$ContentDataCopyWithImpl(this._self, this._then);
+
+  final ContentData _self;
+  final $Res Function(ContentData) _then;
+
+/// Create a copy of Content
+/// with the given fields replaced by the non-null parameter values.
+@override @pragma('vm:prefer-inline') $Res call({Object? data = null,Object? type = null,}) {
+  return _then(ContentData(
+data: null == data ? _self._data : data // ignore: cast_nullable_to_non_nullable
+as Map<String, dynamic>,type: null == type ? _self.type : type // ignore: cast_nullable_to_non_nullable
+as String,
+  ));
+}
+
+
+}
+
+/// @nodoc
+@JsonSerializable()
+
+class ContentToolUse extends Content {
+  const ContentToolUse({@JsonKey(name: 'tool_type') required this.toolType, required this.id, required this.name, final  Map<String, dynamic>? context, required final  Map<String, dynamic> arguments, this.result, this.error, this.type = 'tool_use'}): _context = context,_arguments = arguments,super._();
+  factory ContentToolUse.fromJson(Map<String, dynamic> json) => _$ContentToolUseFromJson(json);
+
+/// The type of the tool call.
+@JsonKey(name: 'tool_type') final  String toolType;
+/// The unique ID of the tool call.
+ final  String id;
+/// Name of the tool.
+ final  String name;
+/// Tool context (e.g. MCP Server).
+ final  Map<String, dynamic>? _context;
+/// Tool context (e.g. MCP Server).
+ Map<String, dynamic>? get context {
+  final value = _context;
+  if (value == null) return null;
+  if (_context is EqualUnmodifiableMapView) return _context;
+  // ignore: implicit_dynamic_type
+  return EqualUnmodifiableMapView(value);
+}
+
+/// Arguments passed to the tool.
+ final  Map<String, dynamic> _arguments;
+/// Arguments passed to the tool.
+ Map<String, dynamic> get arguments {
+  if (_arguments is EqualUnmodifiableMapView) return _arguments;
+  // ignore: implicit_dynamic_type
+  return EqualUnmodifiableMapView(_arguments);
+}
+
+/// Result from the tool call.
+ final  Object? result;
+/// The error from the tool call (if any).
+ final  Object? error;
+/// Content type.
+@override@JsonKey() final  String type;
+
+/// Create a copy of Content
+/// with the given fields replaced by the non-null parameter values.
+@override @JsonKey(includeFromJson: false, includeToJson: false)
+@pragma('vm:prefer-inline')
+$ContentToolUseCopyWith<ContentToolUse> get copyWith => _$ContentToolUseCopyWithImpl<ContentToolUse>(this, _$identity);
+
+@override
+Map<String, dynamic> toJson() {
+  return _$ContentToolUseToJson(this, );
+}
+
+@override
+bool operator ==(Object other) {
+  return identical(this, other) || (other.runtimeType == runtimeType&&other is ContentToolUse&&(identical(other.toolType, toolType) || other.toolType == toolType)&&(identical(other.id, id) || other.id == id)&&(identical(other.name, name) || other.name == name)&&const DeepCollectionEquality().equals(other._context, _context)&&const DeepCollectionEquality().equals(other._arguments, _arguments)&&const DeepCollectionEquality().equals(other.result, result)&&const DeepCollectionEquality().equals(other.error, error)&&(identical(other.type, type) || other.type == type));
+}
+
+@JsonKey(includeFromJson: false, includeToJson: false)
+@override
+int get hashCode => Object.hash(runtimeType,toolType,id,name,const DeepCollectionEquality().hash(_context),const DeepCollectionEquality().hash(_arguments),const DeepCollectionEquality().hash(result),const DeepCollectionEquality().hash(error),type);
+
+@override
+String toString() {
+  return 'Content.toolUse(toolType: $toolType, id: $id, name: $name, context: $context, arguments: $arguments, result: $result, error: $error, type: $type)';
+}
+
+
+}
+
+/// @nodoc
+abstract mixin class $ContentToolUseCopyWith<$Res> implements $ContentCopyWith<$Res> {
+  factory $ContentToolUseCopyWith(ContentToolUse value, $Res Function(ContentToolUse) _then) = _$ContentToolUseCopyWithImpl;
+@override @useResult
+$Res call({
+@JsonKey(name: 'tool_type') String toolType, String id, String name, Map<String, dynamic>? context, Map<String, dynamic> arguments, Object? result, Object? error, String type
+});
+
+
+
+
+}
+/// @nodoc
+class _$ContentToolUseCopyWithImpl<$Res>
+    implements $ContentToolUseCopyWith<$Res> {
+  _$ContentToolUseCopyWithImpl(this._self, this._then);
+
+  final ContentToolUse _self;
+  final $Res Function(ContentToolUse) _then;
+
+/// Create a copy of Content
+/// with the given fields replaced by the non-null parameter values.
+@override @pragma('vm:prefer-inline') $Res call({Object? toolType = null,Object? id = null,Object? name = null,Object? context = freezed,Object? arguments = null,Object? result = freezed,Object? error = freezed,Object? type = null,}) {
+  return _then(ContentToolUse(
+toolType: null == toolType ? _self.toolType : toolType // ignore: cast_nullable_to_non_nullable
+as String,id: null == id ? _self.id : id // ignore: cast_nullable_to_non_nullable
+as String,name: null == name ? _self.name : name // ignore: cast_nullable_to_non_nullable
+as String,context: freezed == context ? _self._context : context // ignore: cast_nullable_to_non_nullable
+as Map<String, dynamic>?,arguments: null == arguments ? _self._arguments : arguments // ignore: cast_nullable_to_non_nullable
+as Map<String, dynamic>,result: freezed == result ? _self.result : result ,error: freezed == error ? _self.error : error ,type: null == type ? _self.type : type // ignore: cast_nullable_to_non_nullable
+as String,
+  ));
+}
+
+
+}
+
+
+/// @nodoc
+mixin _$EvalSampleScore {
+
+/// Score value.
+ Object get value;/// Model's answer (for logging).
+ String? get answer;/// Why this score was given.
+ String? get explanation;/// Additional metadata.
+ Map<String, dynamic> get metadata;/// History of scores (if applicable).
+ List<Object> get history;/// Sample ID.
+@JsonKey(name: 'sample_id') Object? get sampleId;
+/// Create a copy of EvalSampleScore
+/// with the given fields replaced by the non-null parameter values.
+@JsonKey(includeFromJson: false, includeToJson: false)
+@pragma('vm:prefer-inline')
+$EvalSampleScoreCopyWith<EvalSampleScore> get copyWith => _$EvalSampleScoreCopyWithImpl<EvalSampleScore>(this as EvalSampleScore, _$identity);
+
+  /// Serializes this EvalSampleScore to a JSON map.
+  Map<String, dynamic> toJson();
+
+
+@override
+bool operator ==(Object other) {
+  return identical(this, other) || (other.runtimeType == runtimeType&&other is EvalSampleScore&&const DeepCollectionEquality().equals(other.value, value)&&(identical(other.answer, answer) || other.answer == answer)&&(identical(other.explanation, explanation) || other.explanation == explanation)&&const DeepCollectionEquality().equals(other.metadata, metadata)&&const DeepCollectionEquality().equals(other.history, history)&&const DeepCollectionEquality().equals(other.sampleId, sampleId));
+}
+
+@JsonKey(includeFromJson: false, includeToJson: false)
+@override
+int get hashCode => Object.hash(runtimeType,const DeepCollectionEquality().hash(value),answer,explanation,const DeepCollectionEquality().hash(metadata),const DeepCollectionEquality().hash(history),const DeepCollectionEquality().hash(sampleId));
+
+@override
+String toString() {
+  return 'EvalSampleScore(value: $value, answer: $answer, explanation: $explanation, metadata: $metadata, history: $history, sampleId: $sampleId)';
+}
+
+
+}
+
+/// @nodoc
+abstract mixin class $EvalSampleScoreCopyWith<$Res>  {
+  factory $EvalSampleScoreCopyWith(EvalSampleScore value, $Res Function(EvalSampleScore) _then) = _$EvalSampleScoreCopyWithImpl;
+@useResult
+$Res call({
+ Object value, String? answer, String? explanation, Map<String, dynamic> metadata, List<Object> history,@JsonKey(name: 'sample_id') Object? sampleId
+});
+
+
+
+
+}
+/// @nodoc
+class _$EvalSampleScoreCopyWithImpl<$Res>
+    implements $EvalSampleScoreCopyWith<$Res> {
+  _$EvalSampleScoreCopyWithImpl(this._self, this._then);
+
+  final EvalSampleScore _self;
+  final $Res Function(EvalSampleScore) _then;
+
+/// Create a copy of EvalSampleScore
+/// with the given fields replaced by the non-null parameter values.
+@pragma('vm:prefer-inline') @override $Res call({Object? value = null,Object? answer = freezed,Object? explanation = freezed,Object? metadata = null,Object? history = null,Object? sampleId = freezed,}) {
+  return _then(_self.copyWith(
+value: null == value ? _self.value : value ,answer: freezed == answer ? _self.answer : answer // ignore: cast_nullable_to_non_nullable
+as String?,explanation: freezed == explanation ? _self.explanation : explanation // ignore: cast_nullable_to_non_nullable
+as String?,metadata: null == metadata ? _self.metadata : metadata // ignore: cast_nullable_to_non_nullable
+as Map<String, dynamic>,history: null == history ? _self.history : history // ignore: cast_nullable_to_non_nullable
+as List<Object>,sampleId: freezed == sampleId ? _self.sampleId : sampleId ,
+  ));
+}
+
+}
+
+
+/// Adds pattern-matching-related methods to [EvalSampleScore].
+extension EvalSampleScorePatterns on EvalSampleScore {
+/// A variant of `map` that fallback to returning `orElse`.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case _:
+///     return orElse();
+/// }
+/// ```
+
+@optionalTypeArgs TResult maybeMap<TResult extends Object?>(TResult Function( _EvalSampleScore value)?  $default,{required TResult orElse(),}){
+final _that = this;
+switch (_that) {
+case _EvalSampleScore() when $default != null:
+return $default(_that);case _:
+  return orElse();
+
+}
+}
+/// A `switch`-like method, using callbacks.
+///
+/// Callbacks receives the raw object, upcasted.
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case final Subclass2 value:
+///     return ...;
+/// }
+/// ```
+
+@optionalTypeArgs TResult map<TResult extends Object?>(TResult Function( _EvalSampleScore value)  $default,){
+final _that = this;
+switch (_that) {
+case _EvalSampleScore():
+return $default(_that);case _:
+  throw StateError('Unexpected subclass');
+
+}
+}
+/// A variant of `map` that fallback to returning `null`.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case _:
+///     return null;
+/// }
+/// ```
+
+@optionalTypeArgs TResult? mapOrNull<TResult extends Object?>(TResult? Function( _EvalSampleScore value)?  $default,){
+final _that = this;
+switch (_that) {
+case _EvalSampleScore() when $default != null:
+return $default(_that);case _:
+  return null;
+
+}
+}
+/// A variant of `when` that fallback to an `orElse` callback.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case _:
+///     return orElse();
+/// }
+/// ```
+
+@optionalTypeArgs TResult maybeWhen<TResult extends Object?>(TResult Function( Object value,  String? answer,  String? explanation,  Map<String, dynamic> metadata,  List<Object> history, @JsonKey(name: 'sample_id')  Object? sampleId)?  $default,{required TResult orElse(),}) {final _that = this;
+switch (_that) {
+case _EvalSampleScore() when $default != null:
+return $default(_that.value,_that.answer,_that.explanation,_that.metadata,_that.history,_that.sampleId);case _:
+  return orElse();
+
+}
+}
+/// A `switch`-like method, using callbacks.
+///
+/// As opposed to `map`, this offers destructuring.
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case Subclass2(:final field2):
+///     return ...;
+/// }
+/// ```
+
+@optionalTypeArgs TResult when<TResult extends Object?>(TResult Function( Object value,  String? answer,  String? explanation,  Map<String, dynamic> metadata,  List<Object> history, @JsonKey(name: 'sample_id')  Object? sampleId)  $default,) {final _that = this;
+switch (_that) {
+case _EvalSampleScore():
+return $default(_that.value,_that.answer,_that.explanation,_that.metadata,_that.history,_that.sampleId);case _:
+  throw StateError('Unexpected subclass');
+
+}
+}
+/// A variant of `when` that fallback to returning `null`
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case _:
+///     return null;
+/// }
+/// ```
+
+@optionalTypeArgs TResult? whenOrNull<TResult extends Object?>(TResult? Function( Object value,  String? answer,  String? explanation,  Map<String, dynamic> metadata,  List<Object> history, @JsonKey(name: 'sample_id')  Object? sampleId)?  $default,) {final _that = this;
+switch (_that) {
+case _EvalSampleScore() when $default != null:
+return $default(_that.value,_that.answer,_that.explanation,_that.metadata,_that.history,_that.sampleId);case _:
+  return null;
+
+}
+}
+
+}
+
+/// @nodoc
+@JsonSerializable()
+
+class _EvalSampleScore extends EvalSampleScore {
+  const _EvalSampleScore({required this.value, this.answer, this.explanation, final  Map<String, dynamic> metadata = const {}, final  List<Object> history = const [], @JsonKey(name: 'sample_id') this.sampleId}): _metadata = metadata,_history = history,super._();
+  factory _EvalSampleScore.fromJson(Map<String, dynamic> json) => _$EvalSampleScoreFromJson(json);
+
+/// Score value.
+@override final  Object value;
+/// Model's answer (for logging).
+@override final  String? answer;
+/// Why this score was given.
+@override final  String? explanation;
+/// Additional metadata.
+ final  Map<String, dynamic> _metadata;
+/// Additional metadata.
+@override@JsonKey() Map<String, dynamic> get metadata {
+  if (_metadata is EqualUnmodifiableMapView) return _metadata;
+  // ignore: implicit_dynamic_type
+  return EqualUnmodifiableMapView(_metadata);
+}
+
+/// History of scores (if applicable).
+ final  List<Object> _history;
+/// History of scores (if applicable).
+@override@JsonKey() List<Object> get history {
+  if (_history is EqualUnmodifiableListView) return _history;
+  // ignore: implicit_dynamic_type
+  return EqualUnmodifiableListView(_history);
+}
+
+/// Sample ID.
+@override@JsonKey(name: 'sample_id') final  Object? sampleId;
+
+/// Create a copy of EvalSampleScore
+/// with the given fields replaced by the non-null parameter values.
+@override @JsonKey(includeFromJson: false, includeToJson: false)
+@pragma('vm:prefer-inline')
+_$EvalSampleScoreCopyWith<_EvalSampleScore> get copyWith => __$EvalSampleScoreCopyWithImpl<_EvalSampleScore>(this, _$identity);
+
+@override
+Map<String, dynamic> toJson() {
+  return _$EvalSampleScoreToJson(this, );
+}
+
+@override
+bool operator ==(Object other) {
+  return identical(this, other) || (other.runtimeType == runtimeType&&other is _EvalSampleScore&&const DeepCollectionEquality().equals(other.value, value)&&(identical(other.answer, answer) || other.answer == answer)&&(identical(other.explanation, explanation) || other.explanation == explanation)&&const DeepCollectionEquality().equals(other._metadata, _metadata)&&const DeepCollectionEquality().equals(other._history, _history)&&const DeepCollectionEquality().equals(other.sampleId, sampleId));
+}
+
+@JsonKey(includeFromJson: false, includeToJson: false)
+@override
+int get hashCode => Object.hash(runtimeType,const DeepCollectionEquality().hash(value),answer,explanation,const DeepCollectionEquality().hash(_metadata),const DeepCollectionEquality().hash(_history),const DeepCollectionEquality().hash(sampleId));
+
+@override
+String toString() {
+  return 'EvalSampleScore(value: $value, answer: $answer, explanation: $explanation, metadata: $metadata, history: $history, sampleId: $sampleId)';
+}
+
+
+}
+
+/// @nodoc
+abstract mixin class _$EvalSampleScoreCopyWith<$Res> implements $EvalSampleScoreCopyWith<$Res> {
+  factory _$EvalSampleScoreCopyWith(_EvalSampleScore value, $Res Function(_EvalSampleScore) _then) = __$EvalSampleScoreCopyWithImpl;
+@override @useResult
+$Res call({
+ Object value, String? answer, String? explanation, Map<String, dynamic> metadata, List<Object> history,@JsonKey(name: 'sample_id') Object? sampleId
+});
+
+
+
+
+}
+/// @nodoc
+class __$EvalSampleScoreCopyWithImpl<$Res>
+    implements _$EvalSampleScoreCopyWith<$Res> {
+  __$EvalSampleScoreCopyWithImpl(this._self, this._then);
+
+  final _EvalSampleScore _self;
+  final $Res Function(_EvalSampleScore) _then;
+
+/// Create a copy of EvalSampleScore
+/// with the given fields replaced by the non-null parameter values.
+@override @pragma('vm:prefer-inline') $Res call({Object? value = null,Object? answer = freezed,Object? explanation = freezed,Object? metadata = null,Object? history = null,Object? sampleId = freezed,}) {
+  return _then(_EvalSampleScore(
+value: null == value ? _self.value : value ,answer: freezed == answer ? _self.answer : answer // ignore: cast_nullable_to_non_nullable
+as String?,explanation: freezed == explanation ? _self.explanation : explanation // ignore: cast_nullable_to_non_nullable
+as String?,metadata: null == metadata ? _self._metadata : metadata // ignore: cast_nullable_to_non_nullable
+as Map<String, dynamic>,history: null == history ? _self._history : history // ignore: cast_nullable_to_non_nullable
+as List<Object>,sampleId: freezed == sampleId ? _self.sampleId : sampleId ,
+  ));
+}
+
+
+}
+
+
+/// @nodoc
+mixin _$Score {
+
+/// Score value.
+ Object get value;/// Model's answer (for logging).
+ String? get answer;/// Why this score was given.
+ String? get explanation;/// Additional metadata.
+ Map<String, dynamic>? get metadata;
+/// Create a copy of Score
+/// with the given fields replaced by the non-null parameter values.
+@JsonKey(includeFromJson: false, includeToJson: false)
+@pragma('vm:prefer-inline')
+$ScoreCopyWith<Score> get copyWith => _$ScoreCopyWithImpl<Score>(this as Score, _$identity);
+
+  /// Serializes this Score to a JSON map.
+  Map<String, dynamic> toJson();
+
+
+@override
+bool operator ==(Object other) {
+  return identical(this, other) || (other.runtimeType == runtimeType&&other is Score&&const DeepCollectionEquality().equals(other.value, value)&&(identical(other.answer, answer) || other.answer == answer)&&(identical(other.explanation, explanation) || other.explanation == explanation)&&const DeepCollectionEquality().equals(other.metadata, metadata));
+}
+
+@JsonKey(includeFromJson: false, includeToJson: false)
+@override
+int get hashCode => Object.hash(runtimeType,const DeepCollectionEquality().hash(value),answer,explanation,const DeepCollectionEquality().hash(metadata));
+
+@override
+String toString() {
+  return 'Score(value: $value, answer: $answer, explanation: $explanation, metadata: $metadata)';
+}
+
+
+}
+
+/// @nodoc
+abstract mixin class $ScoreCopyWith<$Res>  {
+  factory $ScoreCopyWith(Score value, $Res Function(Score) _then) = _$ScoreCopyWithImpl;
+@useResult
+$Res call({
+ Object value, String? answer, String? explanation, Map<String, dynamic>? metadata
+});
+
+
+
+
+}
+/// @nodoc
+class _$ScoreCopyWithImpl<$Res>
+    implements $ScoreCopyWith<$Res> {
+  _$ScoreCopyWithImpl(this._self, this._then);
+
+  final Score _self;
+  final $Res Function(Score) _then;
+
+/// Create a copy of Score
+/// with the given fields replaced by the non-null parameter values.
+@pragma('vm:prefer-inline') @override $Res call({Object? value = null,Object? answer = freezed,Object? explanation = freezed,Object? metadata = freezed,}) {
+  return _then(_self.copyWith(
+value: null == value ? _self.value : value ,answer: freezed == answer ? _self.answer : answer // ignore: cast_nullable_to_non_nullable
+as String?,explanation: freezed == explanation ? _self.explanation : explanation // ignore: cast_nullable_to_non_nullable
+as String?,metadata: freezed == metadata ? _self.metadata : metadata // ignore: cast_nullable_to_non_nullable
+as Map<String, dynamic>?,
+  ));
+}
+
+}
+
+
+/// Adds pattern-matching-related methods to [Score].
+extension ScorePatterns on Score {
+/// A variant of `map` that fallback to returning `orElse`.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case _:
+///     return orElse();
+/// }
+/// ```
+
+@optionalTypeArgs TResult maybeMap<TResult extends Object?>(TResult Function( _Score value)?  $default,{required TResult orElse(),}){
+final _that = this;
+switch (_that) {
+case _Score() when $default != null:
+return $default(_that);case _:
+  return orElse();
+
+}
+}
+/// A `switch`-like method, using callbacks.
+///
+/// Callbacks receives the raw object, upcasted.
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case final Subclass2 value:
+///     return ...;
+/// }
+/// ```
+
+@optionalTypeArgs TResult map<TResult extends Object?>(TResult Function( _Score value)  $default,){
+final _that = this;
+switch (_that) {
+case _Score():
+return $default(_that);case _:
+  throw StateError('Unexpected subclass');
+
+}
+}
+/// A variant of `map` that fallback to returning `null`.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case _:
+///     return null;
+/// }
+/// ```
+
+@optionalTypeArgs TResult? mapOrNull<TResult extends Object?>(TResult? Function( _Score value)?  $default,){
+final _that = this;
+switch (_that) {
+case _Score() when $default != null:
+return $default(_that);case _:
+  return null;
+
+}
+}
+/// A variant of `when` that fallback to an `orElse` callback.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case _:
+///     return orElse();
+/// }
+/// ```
+
+@optionalTypeArgs TResult maybeWhen<TResult extends Object?>(TResult Function( Object value,  String? answer,  String? explanation,  Map<String, dynamic>? metadata)?  $default,{required TResult orElse(),}) {final _that = this;
+switch (_that) {
+case _Score() when $default != null:
+return $default(_that.value,_that.answer,_that.explanation,_that.metadata);case _:
+  return orElse();
+
+}
+}
+/// A `switch`-like method, using callbacks.
+///
+/// As opposed to `map`, this offers destructuring.
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case Subclass2(:final field2):
+///     return ...;
+/// }
+/// ```
+
+@optionalTypeArgs TResult when<TResult extends Object?>(TResult Function( Object value,  String? answer,  String? explanation,  Map<String, dynamic>? metadata)  $default,) {final _that = this;
+switch (_that) {
+case _Score():
+return $default(_that.value,_that.answer,_that.explanation,_that.metadata);case _:
+  throw StateError('Unexpected subclass');
+
+}
+}
+/// A variant of `when` that fallback to returning `null`
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case _:
+///     return null;
+/// }
+/// ```
+
+@optionalTypeArgs TResult? whenOrNull<TResult extends Object?>(TResult? Function( Object value,  String? answer,  String? explanation,  Map<String, dynamic>? metadata)?  $default,) {final _that = this;
+switch (_that) {
+case _Score() when $default != null:
+return $default(_that.value,_that.answer,_that.explanation,_that.metadata);case _:
+  return null;
+
+}
+}
+
+}
+
+/// @nodoc
+@JsonSerializable()
+
+class _Score extends Score {
+  const _Score({required this.value, this.answer, this.explanation, final  Map<String, dynamic>? metadata}): _metadata = metadata,super._();
+  factory _Score.fromJson(Map<String, dynamic> json) => _$ScoreFromJson(json);
+
+/// Score value.
+@override final  Object value;
+/// Model's answer (for logging).
+@override final  String? answer;
+/// Why this score was given.
+@override final  String? explanation;
+/// Additional metadata.
+ final  Map<String, dynamic>? _metadata;
+/// Additional metadata.
+@override Map<String, dynamic>? get metadata {
+  final value = _metadata;
+  if (value == null) return null;
+  if (_metadata is EqualUnmodifiableMapView) return _metadata;
+  // ignore: implicit_dynamic_type
+  return EqualUnmodifiableMapView(value);
+}
+
+
+/// Create a copy of Score
+/// with the given fields replaced by the non-null parameter values.
+@override @JsonKey(includeFromJson: false, includeToJson: false)
+@pragma('vm:prefer-inline')
+_$ScoreCopyWith<_Score> get copyWith => __$ScoreCopyWithImpl<_Score>(this, _$identity);
+
+@override
+Map<String, dynamic> toJson() {
+  return _$ScoreToJson(this, );
+}
+
+@override
+bool operator ==(Object other) {
+  return identical(this, other) || (other.runtimeType == runtimeType&&other is _Score&&const DeepCollectionEquality().equals(other.value, value)&&(identical(other.answer, answer) || other.answer == answer)&&(identical(other.explanation, explanation) || other.explanation == explanation)&&const DeepCollectionEquality().equals(other._metadata, _metadata));
+}
+
+@JsonKey(includeFromJson: false, includeToJson: false)
+@override
+int get hashCode => Object.hash(runtimeType,const DeepCollectionEquality().hash(value),answer,explanation,const DeepCollectionEquality().hash(_metadata));
+
+@override
+String toString() {
+  return 'Score(value: $value, answer: $answer, explanation: $explanation, metadata: $metadata)';
+}
+
+
+}
+
+/// @nodoc
+abstract mixin class _$ScoreCopyWith<$Res> implements $ScoreCopyWith<$Res> {
+  factory _$ScoreCopyWith(_Score value, $Res Function(_Score) _then) = __$ScoreCopyWithImpl;
+@override @useResult
+$Res call({
+ Object value, String? answer, String? explanation, Map<String, dynamic>? metadata
+});
+
+
+
+
+}
+/// @nodoc
+class __$ScoreCopyWithImpl<$Res>
+    implements _$ScoreCopyWith<$Res> {
+  __$ScoreCopyWithImpl(this._self, this._then);
+
+  final _Score _self;
+  final $Res Function(_Score) _then;
+
+/// Create a copy of Score
+/// with the given fields replaced by the non-null parameter values.
+@override @pragma('vm:prefer-inline') $Res call({Object? value = null,Object? answer = freezed,Object? explanation = freezed,Object? metadata = freezed,}) {
+  return _then(_Score(
+value: null == value ? _self.value : value ,answer: freezed == answer ? _self.answer : answer // ignore: cast_nullable_to_non_nullable
+as String?,explanation: freezed == explanation ? _self.explanation : explanation // ignore: cast_nullable_to_non_nullable
+as String?,metadata: freezed == metadata ? _self._metadata : metadata // ignore: cast_nullable_to_non_nullable
+as Map<String, dynamic>?,
+  ));
+}
+
+
+}
+
+
+/// @nodoc
+mixin _$ToolCall {
+
+/// Unique ID of tool call.
+ String get id;/// Name of function called.
+ String get function;/// Arguments passed to function.
+ Map<String, dynamic> get arguments;/// Type of tool call.
+ String get type;
+/// Create a copy of ToolCall
+/// with the given fields replaced by the non-null parameter values.
+@JsonKey(includeFromJson: false, includeToJson: false)
+@pragma('vm:prefer-inline')
+$ToolCallCopyWith<ToolCall> get copyWith => _$ToolCallCopyWithImpl<ToolCall>(this as ToolCall, _$identity);
+
+  /// Serializes this ToolCall to a JSON map.
+  Map<String, dynamic> toJson();
+
+
+@override
+bool operator ==(Object other) {
+  return identical(this, other) || (other.runtimeType == runtimeType&&other is ToolCall&&(identical(other.id, id) || other.id == id)&&(identical(other.function, function) || other.function == function)&&const DeepCollectionEquality().equals(other.arguments, arguments)&&(identical(other.type, type) || other.type == type));
+}
+
+@JsonKey(includeFromJson: false, includeToJson: false)
+@override
+int get hashCode => Object.hash(runtimeType,id,function,const DeepCollectionEquality().hash(arguments),type);
+
+@override
+String toString() {
+  return 'ToolCall(id: $id, function: $function, arguments: $arguments, type: $type)';
+}
+
+
+}
+
+/// @nodoc
+abstract mixin class $ToolCallCopyWith<$Res>  {
+  factory $ToolCallCopyWith(ToolCall value, $Res Function(ToolCall) _then) = _$ToolCallCopyWithImpl;
+@useResult
+$Res call({
+ String id, String function, Map<String, dynamic> arguments, String type
+});
+
+
+
+
+}
+/// @nodoc
+class _$ToolCallCopyWithImpl<$Res>
+    implements $ToolCallCopyWith<$Res> {
+  _$ToolCallCopyWithImpl(this._self, this._then);
+
+  final ToolCall _self;
+  final $Res Function(ToolCall) _then;
+
+/// Create a copy of ToolCall
+/// with the given fields replaced by the non-null parameter values.
+@pragma('vm:prefer-inline') @override $Res call({Object? id = null,Object? function = null,Object? arguments = null,Object? type = null,}) {
+  return _then(_self.copyWith(
+id: null == id ? _self.id : id // ignore: cast_nullable_to_non_nullable
+as String,function: null == function ? _self.function : function // ignore: cast_nullable_to_non_nullable
+as String,arguments: null == arguments ? _self.arguments : arguments // ignore: cast_nullable_to_non_nullable
+as Map<String, dynamic>,type: null == type ? _self.type : type // ignore: cast_nullable_to_non_nullable
+as String,
+  ));
+}
+
+}
+
+
+/// Adds pattern-matching-related methods to [ToolCall].
+extension ToolCallPatterns on ToolCall {
+/// A variant of `map` that fallback to returning `orElse`.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case _:
+///     return orElse();
+/// }
+/// ```
+
+@optionalTypeArgs TResult maybeMap<TResult extends Object?>(TResult Function( _ToolCall value)?  $default,{required TResult orElse(),}){
+final _that = this;
+switch (_that) {
+case _ToolCall() when $default != null:
+return $default(_that);case _:
+  return orElse();
+
+}
+}
+/// A `switch`-like method, using callbacks.
+///
+/// Callbacks receives the raw object, upcasted.
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case final Subclass2 value:
+///     return ...;
+/// }
+/// ```
+
+@optionalTypeArgs TResult map<TResult extends Object?>(TResult Function( _ToolCall value)  $default,){
+final _that = this;
+switch (_that) {
+case _ToolCall():
+return $default(_that);case _:
+  throw StateError('Unexpected subclass');
+
+}
+}
+/// A variant of `map` that fallback to returning `null`.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case _:
+///     return null;
+/// }
+/// ```
+
+@optionalTypeArgs TResult? mapOrNull<TResult extends Object?>(TResult? Function( _ToolCall value)?  $default,){
+final _that = this;
+switch (_that) {
+case _ToolCall() when $default != null:
+return $default(_that);case _:
+  return null;
+
+}
+}
+/// A variant of `when` that fallback to an `orElse` callback.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case _:
+///     return orElse();
+/// }
+/// ```
+
+@optionalTypeArgs TResult maybeWhen<TResult extends Object?>(TResult Function( String id,  String function,  Map<String, dynamic> arguments,  String type)?  $default,{required TResult orElse(),}) {final _that = this;
+switch (_that) {
+case _ToolCall() when $default != null:
+return $default(_that.id,_that.function,_that.arguments,_that.type);case _:
+  return orElse();
+
+}
+}
+/// A `switch`-like method, using callbacks.
+///
+/// As opposed to `map`, this offers destructuring.
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case Subclass2(:final field2):
+///     return ...;
+/// }
+/// ```
+
+@optionalTypeArgs TResult when<TResult extends Object?>(TResult Function( String id,  String function,  Map<String, dynamic> arguments,  String type)  $default,) {final _that = this;
+switch (_that) {
+case _ToolCall():
+return $default(_that.id,_that.function,_that.arguments,_that.type);case _:
+  throw StateError('Unexpected subclass');
+
+}
+}
+/// A variant of `when` that fallback to returning `null`
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case _:
+///     return null;
+/// }
+/// ```
+
+@optionalTypeArgs TResult? whenOrNull<TResult extends Object?>(TResult? Function( String id,  String function,  Map<String, dynamic> arguments,  String type)?  $default,) {final _that = this;
+switch (_that) {
+case _ToolCall() when $default != null:
+return $default(_that.id,_that.function,_that.arguments,_that.type);case _:
+  return null;
+
+}
+}
+
+}
+
+/// @nodoc
+@JsonSerializable()
+
+class _ToolCall extends ToolCall {
+  const _ToolCall({required this.id, required this.function, required final  Map<String, dynamic> arguments, this.type = 'call'}): _arguments = arguments,super._();
+  factory _ToolCall.fromJson(Map<String, dynamic> json) => _$ToolCallFromJson(json);
+
+/// Unique ID of tool call.
+@override final  String id;
+/// Name of function called.
+@override final  String function;
+/// Arguments passed to function.
+ final  Map<String, dynamic> _arguments;
+/// Arguments passed to function.
+@override Map<String, dynamic> get arguments {
+  if (_arguments is EqualUnmodifiableMapView) return _arguments;
+  // ignore: implicit_dynamic_type
+  return EqualUnmodifiableMapView(_arguments);
+}
+
+/// Type of tool call.
+@override@JsonKey() final  String type;
+
+/// Create a copy of ToolCall
+/// with the given fields replaced by the non-null parameter values.
+@override @JsonKey(includeFromJson: false, includeToJson: false)
+@pragma('vm:prefer-inline')
+_$ToolCallCopyWith<_ToolCall> get copyWith => __$ToolCallCopyWithImpl<_ToolCall>(this, _$identity);
+
+@override
+Map<String, dynamic> toJson() {
+  return _$ToolCallToJson(this, );
+}
+
+@override
+bool operator ==(Object other) {
+  return identical(this, other) || (other.runtimeType == runtimeType&&other is _ToolCall&&(identical(other.id, id) || other.id == id)&&(identical(other.function, function) || other.function == function)&&const DeepCollectionEquality().equals(other._arguments, _arguments)&&(identical(other.type, type) || other.type == type));
+}
+
+@JsonKey(includeFromJson: false, includeToJson: false)
+@override
+int get hashCode => Object.hash(runtimeType,id,function,const DeepCollectionEquality().hash(_arguments),type);
+
+@override
+String toString() {
+  return 'ToolCall(id: $id, function: $function, arguments: $arguments, type: $type)';
+}
+
+
+}
+
+/// @nodoc
+abstract mixin class _$ToolCallCopyWith<$Res> implements $ToolCallCopyWith<$Res> {
+  factory _$ToolCallCopyWith(_ToolCall value, $Res Function(_ToolCall) _then) = __$ToolCallCopyWithImpl;
+@override @useResult
+$Res call({
+ String id, String function, Map<String, dynamic> arguments, String type
+});
+
+
+
+
+}
+/// @nodoc
+class __$ToolCallCopyWithImpl<$Res>
+    implements _$ToolCallCopyWith<$Res> {
+  __$ToolCallCopyWithImpl(this._self, this._then);
+
+  final _ToolCall _self;
+  final $Res Function(_ToolCall) _then;
+
+/// Create a copy of ToolCall
+/// with the given fields replaced by the non-null parameter values.
+@override @pragma('vm:prefer-inline') $Res call({Object? id = null,Object? function = null,Object? arguments = null,Object? type = null,}) {
+  return _then(_ToolCall(
+id: null == id ? _self.id : id // ignore: cast_nullable_to_non_nullable
+as String,function: null == function ? _self.function : function // ignore: cast_nullable_to_non_nullable
+as String,arguments: null == arguments ? _self._arguments : arguments // ignore: cast_nullable_to_non_nullable
+as Map<String, dynamic>,type: null == type ? _self.type : type // ignore: cast_nullable_to_non_nullable
+as String,
+  ));
+}
+
+
+}
+
+
+/// @nodoc
+mixin _$ToolCallError {
+
+/// Error message.
+ String get message;/// Error code.
+ int? get code;/// Additional error data.
+@JsonKey(name: 'data') Map<String, dynamic>? get data;
+/// Create a copy of ToolCallError
+/// with the given fields replaced by the non-null parameter values.
+@JsonKey(includeFromJson: false, includeToJson: false)
+@pragma('vm:prefer-inline')
+$ToolCallErrorCopyWith<ToolCallError> get copyWith => _$ToolCallErrorCopyWithImpl<ToolCallError>(this as ToolCallError, _$identity);
+
+  /// Serializes this ToolCallError to a JSON map.
+  Map<String, dynamic> toJson();
+
+
+@override
+bool operator ==(Object other) {
+  return identical(this, other) || (other.runtimeType == runtimeType&&other is ToolCallError&&(identical(other.message, message) || other.message == message)&&(identical(other.code, code) || other.code == code)&&const DeepCollectionEquality().equals(other.data, data));
+}
+
+@JsonKey(includeFromJson: false, includeToJson: false)
+@override
+int get hashCode => Object.hash(runtimeType,message,code,const DeepCollectionEquality().hash(data));
+
+@override
+String toString() {
+  return 'ToolCallError(message: $message, code: $code, data: $data)';
+}
+
+
+}
+
+/// @nodoc
+abstract mixin class $ToolCallErrorCopyWith<$Res>  {
+  factory $ToolCallErrorCopyWith(ToolCallError value, $Res Function(ToolCallError) _then) = _$ToolCallErrorCopyWithImpl;
+@useResult
+$Res call({
+ String message, int? code,@JsonKey(name: 'data') Map<String, dynamic>? data
+});
+
+
+
+
+}
+/// @nodoc
+class _$ToolCallErrorCopyWithImpl<$Res>
+    implements $ToolCallErrorCopyWith<$Res> {
+  _$ToolCallErrorCopyWithImpl(this._self, this._then);
+
+  final ToolCallError _self;
+  final $Res Function(ToolCallError) _then;
+
+/// Create a copy of ToolCallError
+/// with the given fields replaced by the non-null parameter values.
+@pragma('vm:prefer-inline') @override $Res call({Object? message = null,Object? code = freezed,Object? data = freezed,}) {
+  return _then(_self.copyWith(
+message: null == message ? _self.message : message // ignore: cast_nullable_to_non_nullable
+as String,code: freezed == code ? _self.code : code // ignore: cast_nullable_to_non_nullable
+as int?,data: freezed == data ? _self.data : data // ignore: cast_nullable_to_non_nullable
+as Map<String, dynamic>?,
+  ));
+}
+
+}
+
+
+/// Adds pattern-matching-related methods to [ToolCallError].
+extension ToolCallErrorPatterns on ToolCallError {
+/// A variant of `map` that fallback to returning `orElse`.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case _:
+///     return orElse();
+/// }
+/// ```
+
+@optionalTypeArgs TResult maybeMap<TResult extends Object?>(TResult Function( _ToolCallError value)?  $default,{required TResult orElse(),}){
+final _that = this;
+switch (_that) {
+case _ToolCallError() when $default != null:
+return $default(_that);case _:
+  return orElse();
+
+}
+}
+/// A `switch`-like method, using callbacks.
+///
+/// Callbacks receives the raw object, upcasted.
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case final Subclass2 value:
+///     return ...;
+/// }
+/// ```
+
+@optionalTypeArgs TResult map<TResult extends Object?>(TResult Function( _ToolCallError value)  $default,){
+final _that = this;
+switch (_that) {
+case _ToolCallError():
+return $default(_that);case _:
+  throw StateError('Unexpected subclass');
+
+}
+}
+/// A variant of `map` that fallback to returning `null`.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case _:
+///     return null;
+/// }
+/// ```
+
+@optionalTypeArgs TResult? mapOrNull<TResult extends Object?>(TResult? Function( _ToolCallError value)?  $default,){
+final _that = this;
+switch (_that) {
+case _ToolCallError() when $default != null:
+return $default(_that);case _:
+  return null;
+
+}
+}
+/// A variant of `when` that fallback to an `orElse` callback.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case _:
+///     return orElse();
+/// }
+/// ```
+
+@optionalTypeArgs TResult maybeWhen<TResult extends Object?>(TResult Function( String message,  int? code, @JsonKey(name: 'data')  Map<String, dynamic>? data)?  $default,{required TResult orElse(),}) {final _that = this;
+switch (_that) {
+case _ToolCallError() when $default != null:
+return $default(_that.message,_that.code,_that.data);case _:
+  return orElse();
+
+}
+}
+/// A `switch`-like method, using callbacks.
+///
+/// As opposed to `map`, this offers destructuring.
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case Subclass2(:final field2):
+///     return ...;
+/// }
+/// ```
+
+@optionalTypeArgs TResult when<TResult extends Object?>(TResult Function( String message,  int? code, @JsonKey(name: 'data')  Map<String, dynamic>? data)  $default,) {final _that = this;
+switch (_that) {
+case _ToolCallError():
+return $default(_that.message,_that.code,_that.data);case _:
+  throw StateError('Unexpected subclass');
+
+}
+}
+/// A variant of `when` that fallback to returning `null`
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case _:
+///     return null;
+/// }
+/// ```
+
+@optionalTypeArgs TResult? whenOrNull<TResult extends Object?>(TResult? Function( String message,  int? code, @JsonKey(name: 'data')  Map<String, dynamic>? data)?  $default,) {final _that = this;
+switch (_that) {
+case _ToolCallError() when $default != null:
+return $default(_that.message,_that.code,_that.data);case _:
+  return null;
+
+}
+}
+
+}
+
+/// @nodoc
+@JsonSerializable()
+
+class _ToolCallError extends ToolCallError {
+  const _ToolCallError({required this.message, this.code, @JsonKey(name: 'data') final  Map<String, dynamic>? data}): _data = data,super._();
+  factory _ToolCallError.fromJson(Map<String, dynamic> json) => _$ToolCallErrorFromJson(json);
+
+/// Error message.
+@override final  String message;
+/// Error code.
+@override final  int? code;
+/// Additional error data.
+ final  Map<String, dynamic>? _data;
+/// Additional error data.
+@override@JsonKey(name: 'data') Map<String, dynamic>? get data {
+  final value = _data;
+  if (value == null) return null;
+  if (_data is EqualUnmodifiableMapView) return _data;
+  // ignore: implicit_dynamic_type
+  return EqualUnmodifiableMapView(value);
+}
+
+
+/// Create a copy of ToolCallError
+/// with the given fields replaced by the non-null parameter values.
+@override @JsonKey(includeFromJson: false, includeToJson: false)
+@pragma('vm:prefer-inline')
+_$ToolCallErrorCopyWith<_ToolCallError> get copyWith => __$ToolCallErrorCopyWithImpl<_ToolCallError>(this, _$identity);
+
+@override
+Map<String, dynamic> toJson() {
+  return _$ToolCallErrorToJson(this, );
+}
+
+@override
+bool operator ==(Object other) {
+  return identical(this, other) || (other.runtimeType == runtimeType&&other is _ToolCallError&&(identical(other.message, message) || other.message == message)&&(identical(other.code, code) || other.code == code)&&const DeepCollectionEquality().equals(other._data, _data));
+}
+
+@JsonKey(includeFromJson: false, includeToJson: false)
+@override
+int get hashCode => Object.hash(runtimeType,message,code,const DeepCollectionEquality().hash(_data));
+
+@override
+String toString() {
+  return 'ToolCallError(message: $message, code: $code, data: $data)';
+}
+
+
+}
+
+/// @nodoc
+abstract mixin class _$ToolCallErrorCopyWith<$Res> implements $ToolCallErrorCopyWith<$Res> {
+  factory _$ToolCallErrorCopyWith(_ToolCallError value, $Res Function(_ToolCallError) _then) = __$ToolCallErrorCopyWithImpl;
+@override @useResult
+$Res call({
+ String message, int? code,@JsonKey(name: 'data') Map<String, dynamic>? data
+});
+
+
+
+
+}
+/// @nodoc
+class __$ToolCallErrorCopyWithImpl<$Res>
+    implements _$ToolCallErrorCopyWith<$Res> {
+  __$ToolCallErrorCopyWithImpl(this._self, this._then);
+
+  final _ToolCallError _self;
+  final $Res Function(_ToolCallError) _then;
+
+/// Create a copy of ToolCallError
+/// with the given fields replaced by the non-null parameter values.
+@override @pragma('vm:prefer-inline') $Res call({Object? message = null,Object? code = freezed,Object? data = freezed,}) {
+  return _then(_ToolCallError(
+message: null == message ? _self.message : message // ignore: cast_nullable_to_non_nullable
+as String,code: freezed == code ? _self.code : code // ignore: cast_nullable_to_non_nullable
+as int?,data: freezed == data ? _self._data : data // ignore: cast_nullable_to_non_nullable
+as Map<String, dynamic>?,
+  ));
+}
+
+
+}
+
+
+/// @nodoc
+mixin _$GenerateConfig {
+
+/// Maximum number of times to retry a request.
+@JsonKey(name: 'max_retries') int? get maxRetries;/// Request timeout (in seconds).
+ int? get timeout;/// Timeout for each individual request attempt (in seconds).
+@JsonKey(name: 'attempt_timeout') int? get attemptTimeout;/// Maximum number of concurrent connections to the model API.
+@JsonKey(name: 'max_connections') int? get maxConnections;/// System message to provide to the model.
+@JsonKey(name: 'system_message') String? get systemMessage;/// Maximum number of tokens to generate.
+@JsonKey(name: 'max_tokens') int? get maxTokens;/// Top-p sampling parameter.
+@JsonKey(name: 'top_p') double? get topP;/// Temperature sampling parameter.
+ double? get temperature;/// Sequences that should stop generation.
+@JsonKey(name: 'stop_seqs') List<String>? get stopSeqs;/// Number of completions to generate and choose the best from.
+@JsonKey(name: 'best_of') int? get bestOf;/// Frequency penalty parameter.
+@JsonKey(name: 'frequency_penalty') double? get frequencyPenalty;/// Presence penalty parameter.
+@JsonKey(name: 'presence_penalty') double? get presencePenalty;/// Logit bias parameter.
+@JsonKey(name: 'logit_bias') Map<String, double>? get logitBias;/// Random seed for generation.
+ int? get seed;/// Top-k sampling parameter.
+@JsonKey(name: 'top_k') int? get topK;/// Number of completion choices to return.
+@JsonKey(name: 'num_choices') int? get numChoices;/// Whether to return logprobs.
+ bool? get logprobs;/// Number of top logprobs to return.
+@JsonKey(name: 'top_logprobs') int? get topLogprobs;/// Whether to allow parallel tool calls.
+@JsonKey(name: 'parallel_tool_calls') bool? get parallelToolCalls;/// Whether to allow internal model tools.
+@JsonKey(name: 'internal_tools') bool? get internalTools;/// Maximum number of characters to retain for tool output.
+@JsonKey(name: 'max_tool_output') int? get maxToolOutput;/// Cache the prompt (if supported by the provider).
+@JsonKey(name: 'cache_prompt') Object? get cachePrompt;
+/// Create a copy of GenerateConfig
+/// with the given fields replaced by the non-null parameter values.
+@JsonKey(includeFromJson: false, includeToJson: false)
+@pragma('vm:prefer-inline')
+$GenerateConfigCopyWith<GenerateConfig> get copyWith => _$GenerateConfigCopyWithImpl<GenerateConfig>(this as GenerateConfig, _$identity);
+
+  /// Serializes this GenerateConfig to a JSON map.
+  Map<String, dynamic> toJson();
+
+
+@override
+bool operator ==(Object other) {
+  return identical(this, other) || (other.runtimeType == runtimeType&&other is GenerateConfig&&(identical(other.maxRetries, maxRetries) || other.maxRetries == maxRetries)&&(identical(other.timeout, timeout) || other.timeout == timeout)&&(identical(other.attemptTimeout, attemptTimeout) || other.attemptTimeout == attemptTimeout)&&(identical(other.maxConnections, maxConnections) || other.maxConnections == maxConnections)&&(identical(other.systemMessage, systemMessage) || other.systemMessage == systemMessage)&&(identical(other.maxTokens, maxTokens) || other.maxTokens == maxTokens)&&(identical(other.topP, topP) || other.topP == topP)&&(identical(other.temperature, temperature) || other.temperature == temperature)&&const DeepCollectionEquality().equals(other.stopSeqs, stopSeqs)&&(identical(other.bestOf, bestOf) || other.bestOf == bestOf)&&(identical(other.frequencyPenalty, frequencyPenalty) || other.frequencyPenalty == frequencyPenalty)&&(identical(other.presencePenalty, presencePenalty) || other.presencePenalty == presencePenalty)&&const DeepCollectionEquality().equals(other.logitBias, logitBias)&&(identical(other.seed, seed) || other.seed == seed)&&(identical(other.topK, topK) || other.topK == topK)&&(identical(other.numChoices, numChoices) || other.numChoices == numChoices)&&(identical(other.logprobs, logprobs) || other.logprobs == logprobs)&&(identical(other.topLogprobs, topLogprobs) || other.topLogprobs == topLogprobs)&&(identical(other.parallelToolCalls, parallelToolCalls) || other.parallelToolCalls == parallelToolCalls)&&(identical(other.internalTools, internalTools) || other.internalTools == internalTools)&&(identical(other.maxToolOutput, maxToolOutput) || other.maxToolOutput == maxToolOutput)&&const DeepCollectionEquality().equals(other.cachePrompt, cachePrompt));
+}
+
+@JsonKey(includeFromJson: false, includeToJson: false)
+@override
+int get hashCode => Object.hashAll([runtimeType,maxRetries,timeout,attemptTimeout,maxConnections,systemMessage,maxTokens,topP,temperature,const DeepCollectionEquality().hash(stopSeqs),bestOf,frequencyPenalty,presencePenalty,const DeepCollectionEquality().hash(logitBias),seed,topK,numChoices,logprobs,topLogprobs,parallelToolCalls,internalTools,maxToolOutput,const DeepCollectionEquality().hash(cachePrompt)]);
+
+@override
+String toString() {
+  return 'GenerateConfig(maxRetries: $maxRetries, timeout: $timeout, attemptTimeout: $attemptTimeout, maxConnections: $maxConnections, systemMessage: $systemMessage, maxTokens: $maxTokens, topP: $topP, temperature: $temperature, stopSeqs: $stopSeqs, bestOf: $bestOf, frequencyPenalty: $frequencyPenalty, presencePenalty: $presencePenalty, logitBias: $logitBias, seed: $seed, topK: $topK, numChoices: $numChoices, logprobs: $logprobs, topLogprobs: $topLogprobs, parallelToolCalls: $parallelToolCalls, internalTools: $internalTools, maxToolOutput: $maxToolOutput, cachePrompt: $cachePrompt)';
+}
+
+
+}
+
+/// @nodoc
+abstract mixin class $GenerateConfigCopyWith<$Res>  {
+  factory $GenerateConfigCopyWith(GenerateConfig value, $Res Function(GenerateConfig) _then) = _$GenerateConfigCopyWithImpl;
+@useResult
+$Res call({
+@JsonKey(name: 'max_retries') int? maxRetries, int? timeout,@JsonKey(name: 'attempt_timeout') int? attemptTimeout,@JsonKey(name: 'max_connections') int? maxConnections,@JsonKey(name: 'system_message') String? systemMessage,@JsonKey(name: 'max_tokens') int? maxTokens,@JsonKey(name: 'top_p') double? topP, double? temperature,@JsonKey(name: 'stop_seqs') List<String>? stopSeqs,@JsonKey(name: 'best_of') int? bestOf,@JsonKey(name: 'frequency_penalty') double? frequencyPenalty,@JsonKey(name: 'presence_penalty') double? presencePenalty,@JsonKey(name: 'logit_bias') Map<String, double>? logitBias, int? seed,@JsonKey(name: 'top_k') int? topK,@JsonKey(name: 'num_choices') int? numChoices, bool? logprobs,@JsonKey(name: 'top_logprobs') int? topLogprobs,@JsonKey(name: 'parallel_tool_calls') bool? parallelToolCalls,@JsonKey(name: 'internal_tools') bool? internalTools,@JsonKey(name: 'max_tool_output') int? maxToolOutput,@JsonKey(name: 'cache_prompt') Object? cachePrompt
+});
+
+
+
+
+}
+/// @nodoc
+class _$GenerateConfigCopyWithImpl<$Res>
+    implements $GenerateConfigCopyWith<$Res> {
+  _$GenerateConfigCopyWithImpl(this._self, this._then);
+
+  final GenerateConfig _self;
+  final $Res Function(GenerateConfig) _then;
+
+/// Create a copy of GenerateConfig
+/// with the given fields replaced by the non-null parameter values.
+@pragma('vm:prefer-inline') @override $Res call({Object? maxRetries = freezed,Object? timeout = freezed,Object? attemptTimeout = freezed,Object? maxConnections = freezed,Object? systemMessage = freezed,Object? maxTokens = freezed,Object? topP = freezed,Object? temperature = freezed,Object? stopSeqs = freezed,Object? bestOf = freezed,Object? frequencyPenalty = freezed,Object? presencePenalty = freezed,Object? logitBias = freezed,Object? seed = freezed,Object? topK = freezed,Object? numChoices = freezed,Object? logprobs = freezed,Object? topLogprobs = freezed,Object? parallelToolCalls = freezed,Object? internalTools = freezed,Object? maxToolOutput = freezed,Object? cachePrompt = freezed,}) {
+  return _then(_self.copyWith(
+maxRetries: freezed == maxRetries ? _self.maxRetries : maxRetries // ignore: cast_nullable_to_non_nullable
+as int?,timeout: freezed == timeout ? _self.timeout : timeout // ignore: cast_nullable_to_non_nullable
+as int?,attemptTimeout: freezed == attemptTimeout ? _self.attemptTimeout : attemptTimeout // ignore: cast_nullable_to_non_nullable
+as int?,maxConnections: freezed == maxConnections ? _self.maxConnections : maxConnections // ignore: cast_nullable_to_non_nullable
+as int?,systemMessage: freezed == systemMessage ? _self.systemMessage : systemMessage // ignore: cast_nullable_to_non_nullable
+as String?,maxTokens: freezed == maxTokens ? _self.maxTokens : maxTokens // ignore: cast_nullable_to_non_nullable
+as int?,topP: freezed == topP ? _self.topP : topP // ignore: cast_nullable_to_non_nullable
+as double?,temperature: freezed == temperature ? _self.temperature : temperature // ignore: cast_nullable_to_non_nullable
+as double?,stopSeqs: freezed == stopSeqs ? _self.stopSeqs : stopSeqs // ignore: cast_nullable_to_non_nullable
+as List<String>?,bestOf: freezed == bestOf ? _self.bestOf : bestOf // ignore: cast_nullable_to_non_nullable
+as int?,frequencyPenalty: freezed == frequencyPenalty ? _self.frequencyPenalty : frequencyPenalty // ignore: cast_nullable_to_non_nullable
+as double?,presencePenalty: freezed == presencePenalty ? _self.presencePenalty : presencePenalty // ignore: cast_nullable_to_non_nullable
+as double?,logitBias: freezed == logitBias ? _self.logitBias : logitBias // ignore: cast_nullable_to_non_nullable
+as Map<String, double>?,seed: freezed == seed ? _self.seed : seed // ignore: cast_nullable_to_non_nullable
+as int?,topK: freezed == topK ? _self.topK : topK // ignore: cast_nullable_to_non_nullable
+as int?,numChoices: freezed == numChoices ? _self.numChoices : numChoices // ignore: cast_nullable_to_non_nullable
+as int?,logprobs: freezed == logprobs ? _self.logprobs : logprobs // ignore: cast_nullable_to_non_nullable
+as bool?,topLogprobs: freezed == topLogprobs ? _self.topLogprobs : topLogprobs // ignore: cast_nullable_to_non_nullable
+as int?,parallelToolCalls: freezed == parallelToolCalls ? _self.parallelToolCalls : parallelToolCalls // ignore: cast_nullable_to_non_nullable
+as bool?,internalTools: freezed == internalTools ? _self.internalTools : internalTools // ignore: cast_nullable_to_non_nullable
+as bool?,maxToolOutput: freezed == maxToolOutput ? _self.maxToolOutput : maxToolOutput // ignore: cast_nullable_to_non_nullable
+as int?,cachePrompt: freezed == cachePrompt ? _self.cachePrompt : cachePrompt ,
+  ));
+}
+
+}
+
+
+/// Adds pattern-matching-related methods to [GenerateConfig].
+extension GenerateConfigPatterns on GenerateConfig {
+/// A variant of `map` that fallback to returning `orElse`.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case _:
+///     return orElse();
+/// }
+/// ```
+
+@optionalTypeArgs TResult maybeMap<TResult extends Object?>(TResult Function( _GenerateConfig value)?  $default,{required TResult orElse(),}){
+final _that = this;
+switch (_that) {
+case _GenerateConfig() when $default != null:
+return $default(_that);case _:
+  return orElse();
+
+}
+}
+/// A `switch`-like method, using callbacks.
+///
+/// Callbacks receives the raw object, upcasted.
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case final Subclass2 value:
+///     return ...;
+/// }
+/// ```
+
+@optionalTypeArgs TResult map<TResult extends Object?>(TResult Function( _GenerateConfig value)  $default,){
+final _that = this;
+switch (_that) {
+case _GenerateConfig():
+return $default(_that);case _:
+  throw StateError('Unexpected subclass');
+
+}
+}
+/// A variant of `map` that fallback to returning `null`.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case _:
+///     return null;
+/// }
+/// ```
+
+@optionalTypeArgs TResult? mapOrNull<TResult extends Object?>(TResult? Function( _GenerateConfig value)?  $default,){
+final _that = this;
+switch (_that) {
+case _GenerateConfig() when $default != null:
+return $default(_that);case _:
+  return null;
+
+}
+}
+/// A variant of `when` that fallback to an `orElse` callback.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case _:
+///     return orElse();
+/// }
+/// ```
+
+@optionalTypeArgs TResult maybeWhen<TResult extends Object?>(TResult Function(@JsonKey(name: 'max_retries')  int? maxRetries,  int? timeout, @JsonKey(name: 'attempt_timeout')  int? attemptTimeout, @JsonKey(name: 'max_connections')  int? maxConnections, @JsonKey(name: 'system_message')  String? systemMessage, @JsonKey(name: 'max_tokens')  int? maxTokens, @JsonKey(name: 'top_p')  double? topP,  double? temperature, @JsonKey(name: 'stop_seqs')  List<String>? stopSeqs, @JsonKey(name: 'best_of')  int? bestOf, @JsonKey(name: 'frequency_penalty')  double? frequencyPenalty, @JsonKey(name: 'presence_penalty')  double? presencePenalty, @JsonKey(name: 'logit_bias')  Map<String, double>? logitBias,  int? seed, @JsonKey(name: 'top_k')  int? topK, @JsonKey(name: 'num_choices')  int? numChoices,  bool? logprobs, @JsonKey(name: 'top_logprobs')  int? topLogprobs, @JsonKey(name: 'parallel_tool_calls')  bool? parallelToolCalls, @JsonKey(name: 'internal_tools')  bool? internalTools, @JsonKey(name: 'max_tool_output')  int? maxToolOutput, @JsonKey(name: 'cache_prompt')  Object? cachePrompt)?  $default,{required TResult orElse(),}) {final _that = this;
+switch (_that) {
+case _GenerateConfig() when $default != null:
+return $default(_that.maxRetries,_that.timeout,_that.attemptTimeout,_that.maxConnections,_that.systemMessage,_that.maxTokens,_that.topP,_that.temperature,_that.stopSeqs,_that.bestOf,_that.frequencyPenalty,_that.presencePenalty,_that.logitBias,_that.seed,_that.topK,_that.numChoices,_that.logprobs,_that.topLogprobs,_that.parallelToolCalls,_that.internalTools,_that.maxToolOutput,_that.cachePrompt);case _:
+  return orElse();
+
+}
+}
+/// A `switch`-like method, using callbacks.
+///
+/// As opposed to `map`, this offers destructuring.
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case Subclass2(:final field2):
+///     return ...;
+/// }
+/// ```
+
+@optionalTypeArgs TResult when<TResult extends Object?>(TResult Function(@JsonKey(name: 'max_retries')  int? maxRetries,  int? timeout, @JsonKey(name: 'attempt_timeout')  int? attemptTimeout, @JsonKey(name: 'max_connections')  int? maxConnections, @JsonKey(name: 'system_message')  String? systemMessage, @JsonKey(name: 'max_tokens')  int? maxTokens, @JsonKey(name: 'top_p')  double? topP,  double? temperature, @JsonKey(name: 'stop_seqs')  List<String>? stopSeqs, @JsonKey(name: 'best_of')  int? bestOf, @JsonKey(name: 'frequency_penalty')  double? frequencyPenalty, @JsonKey(name: 'presence_penalty')  double? presencePenalty, @JsonKey(name: 'logit_bias')  Map<String, double>? logitBias,  int? seed, @JsonKey(name: 'top_k')  int? topK, @JsonKey(name: 'num_choices')  int? numChoices,  bool? logprobs, @JsonKey(name: 'top_logprobs')  int? topLogprobs, @JsonKey(name: 'parallel_tool_calls')  bool? parallelToolCalls, @JsonKey(name: 'internal_tools')  bool? internalTools, @JsonKey(name: 'max_tool_output')  int? maxToolOutput, @JsonKey(name: 'cache_prompt')  Object? cachePrompt)  $default,) {final _that = this;
+switch (_that) {
+case _GenerateConfig():
+return $default(_that.maxRetries,_that.timeout,_that.attemptTimeout,_that.maxConnections,_that.systemMessage,_that.maxTokens,_that.topP,_that.temperature,_that.stopSeqs,_that.bestOf,_that.frequencyPenalty,_that.presencePenalty,_that.logitBias,_that.seed,_that.topK,_that.numChoices,_that.logprobs,_that.topLogprobs,_that.parallelToolCalls,_that.internalTools,_that.maxToolOutput,_that.cachePrompt);case _:
+  throw StateError('Unexpected subclass');
+
+}
+}
+/// A variant of `when` that fallback to returning `null`
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case _:
+///     return null;
+/// }
+/// ```
+
+@optionalTypeArgs TResult? whenOrNull<TResult extends Object?>(TResult? Function(@JsonKey(name: 'max_retries')  int? maxRetries,  int? timeout, @JsonKey(name: 'attempt_timeout')  int? attemptTimeout, @JsonKey(name: 'max_connections')  int? maxConnections, @JsonKey(name: 'system_message')  String? systemMessage, @JsonKey(name: 'max_tokens')  int? maxTokens, @JsonKey(name: 'top_p')  double? topP,  double? temperature, @JsonKey(name: 'stop_seqs')  List<String>? stopSeqs, @JsonKey(name: 'best_of')  int? bestOf, @JsonKey(name: 'frequency_penalty')  double? frequencyPenalty, @JsonKey(name: 'presence_penalty')  double? presencePenalty, @JsonKey(name: 'logit_bias')  Map<String, double>? logitBias,  int? seed, @JsonKey(name: 'top_k')  int? topK, @JsonKey(name: 'num_choices')  int? numChoices,  bool? logprobs, @JsonKey(name: 'top_logprobs')  int? topLogprobs, @JsonKey(name: 'parallel_tool_calls')  bool? parallelToolCalls, @JsonKey(name: 'internal_tools')  bool? internalTools, @JsonKey(name: 'max_tool_output')  int? maxToolOutput, @JsonKey(name: 'cache_prompt')  Object? cachePrompt)?  $default,) {final _that = this;
+switch (_that) {
+case _GenerateConfig() when $default != null:
+return $default(_that.maxRetries,_that.timeout,_that.attemptTimeout,_that.maxConnections,_that.systemMessage,_that.maxTokens,_that.topP,_that.temperature,_that.stopSeqs,_that.bestOf,_that.frequencyPenalty,_that.presencePenalty,_that.logitBias,_that.seed,_that.topK,_that.numChoices,_that.logprobs,_that.topLogprobs,_that.parallelToolCalls,_that.internalTools,_that.maxToolOutput,_that.cachePrompt);case _:
+  return null;
+
+}
+}
+
+}
+
+/// @nodoc
+@JsonSerializable()
+
+class _GenerateConfig extends GenerateConfig {
+  const _GenerateConfig({@JsonKey(name: 'max_retries') this.maxRetries, this.timeout, @JsonKey(name: 'attempt_timeout') this.attemptTimeout, @JsonKey(name: 'max_connections') this.maxConnections, @JsonKey(name: 'system_message') this.systemMessage, @JsonKey(name: 'max_tokens') this.maxTokens, @JsonKey(name: 'top_p') this.topP, this.temperature, @JsonKey(name: 'stop_seqs') final  List<String>? stopSeqs, @JsonKey(name: 'best_of') this.bestOf, @JsonKey(name: 'frequency_penalty') this.frequencyPenalty, @JsonKey(name: 'presence_penalty') this.presencePenalty, @JsonKey(name: 'logit_bias') final  Map<String, double>? logitBias, this.seed, @JsonKey(name: 'top_k') this.topK, @JsonKey(name: 'num_choices') this.numChoices, this.logprobs, @JsonKey(name: 'top_logprobs') this.topLogprobs, @JsonKey(name: 'parallel_tool_calls') this.parallelToolCalls, @JsonKey(name: 'internal_tools') this.internalTools, @JsonKey(name: 'max_tool_output') this.maxToolOutput, @JsonKey(name: 'cache_prompt') this.cachePrompt}): _stopSeqs = stopSeqs,_logitBias = logitBias,super._();
+  factory _GenerateConfig.fromJson(Map<String, dynamic> json) => _$GenerateConfigFromJson(json);
+
+/// Maximum number of times to retry a request.
+@override@JsonKey(name: 'max_retries') final  int? maxRetries;
+/// Request timeout (in seconds).
+@override final  int? timeout;
+/// Timeout for each individual request attempt (in seconds).
+@override@JsonKey(name: 'attempt_timeout') final  int? attemptTimeout;
+/// Maximum number of concurrent connections to the model API.
+@override@JsonKey(name: 'max_connections') final  int? maxConnections;
+/// System message to provide to the model.
+@override@JsonKey(name: 'system_message') final  String? systemMessage;
+/// Maximum number of tokens to generate.
+@override@JsonKey(name: 'max_tokens') final  int? maxTokens;
+/// Top-p sampling parameter.
+@override@JsonKey(name: 'top_p') final  double? topP;
+/// Temperature sampling parameter.
+@override final  double? temperature;
+/// Sequences that should stop generation.
+ final  List<String>? _stopSeqs;
+/// Sequences that should stop generation.
+@override@JsonKey(name: 'stop_seqs') List<String>? get stopSeqs {
+  final value = _stopSeqs;
+  if (value == null) return null;
+  if (_stopSeqs is EqualUnmodifiableListView) return _stopSeqs;
+  // ignore: implicit_dynamic_type
+  return EqualUnmodifiableListView(value);
+}
+
+/// Number of completions to generate and choose the best from.
+@override@JsonKey(name: 'best_of') final  int? bestOf;
+/// Frequency penalty parameter.
+@override@JsonKey(name: 'frequency_penalty') final  double? frequencyPenalty;
+/// Presence penalty parameter.
+@override@JsonKey(name: 'presence_penalty') final  double? presencePenalty;
+/// Logit bias parameter.
+ final  Map<String, double>? _logitBias;
+/// Logit bias parameter.
+@override@JsonKey(name: 'logit_bias') Map<String, double>? get logitBias {
+  final value = _logitBias;
+  if (value == null) return null;
+  if (_logitBias is EqualUnmodifiableMapView) return _logitBias;
+  // ignore: implicit_dynamic_type
+  return EqualUnmodifiableMapView(value);
+}
+
+/// Random seed for generation.
+@override final  int? seed;
+/// Top-k sampling parameter.
+@override@JsonKey(name: 'top_k') final  int? topK;
+/// Number of completion choices to return.
+@override@JsonKey(name: 'num_choices') final  int? numChoices;
+/// Whether to return logprobs.
+@override final  bool? logprobs;
+/// Number of top logprobs to return.
+@override@JsonKey(name: 'top_logprobs') final  int? topLogprobs;
+/// Whether to allow parallel tool calls.
+@override@JsonKey(name: 'parallel_tool_calls') final  bool? parallelToolCalls;
+/// Whether to allow internal model tools.
+@override@JsonKey(name: 'internal_tools') final  bool? internalTools;
+/// Maximum number of characters to retain for tool output.
+@override@JsonKey(name: 'max_tool_output') final  int? maxToolOutput;
+/// Cache the prompt (if supported by the provider).
+@override@JsonKey(name: 'cache_prompt') final  Object? cachePrompt;
+
+/// Create a copy of GenerateConfig
+/// with the given fields replaced by the non-null parameter values.
+@override @JsonKey(includeFromJson: false, includeToJson: false)
+@pragma('vm:prefer-inline')
+_$GenerateConfigCopyWith<_GenerateConfig> get copyWith => __$GenerateConfigCopyWithImpl<_GenerateConfig>(this, _$identity);
+
+@override
+Map<String, dynamic> toJson() {
+  return _$GenerateConfigToJson(this, );
+}
+
+@override
+bool operator ==(Object other) {
+  return identical(this, other) || (other.runtimeType == runtimeType&&other is _GenerateConfig&&(identical(other.maxRetries, maxRetries) || other.maxRetries == maxRetries)&&(identical(other.timeout, timeout) || other.timeout == timeout)&&(identical(other.attemptTimeout, attemptTimeout) || other.attemptTimeout == attemptTimeout)&&(identical(other.maxConnections, maxConnections) || other.maxConnections == maxConnections)&&(identical(other.systemMessage, systemMessage) || other.systemMessage == systemMessage)&&(identical(other.maxTokens, maxTokens) || other.maxTokens == maxTokens)&&(identical(other.topP, topP) || other.topP == topP)&&(identical(other.temperature, temperature) || other.temperature == temperature)&&const DeepCollectionEquality().equals(other._stopSeqs, _stopSeqs)&&(identical(other.bestOf, bestOf) || other.bestOf == bestOf)&&(identical(other.frequencyPenalty, frequencyPenalty) || other.frequencyPenalty == frequencyPenalty)&&(identical(other.presencePenalty, presencePenalty) || other.presencePenalty == presencePenalty)&&const DeepCollectionEquality().equals(other._logitBias, _logitBias)&&(identical(other.seed, seed) || other.seed == seed)&&(identical(other.topK, topK) || other.topK == topK)&&(identical(other.numChoices, numChoices) || other.numChoices == numChoices)&&(identical(other.logprobs, logprobs) || other.logprobs == logprobs)&&(identical(other.topLogprobs, topLogprobs) || other.topLogprobs == topLogprobs)&&(identical(other.parallelToolCalls, parallelToolCalls) || other.parallelToolCalls == parallelToolCalls)&&(identical(other.internalTools, internalTools) || other.internalTools == internalTools)&&(identical(other.maxToolOutput, maxToolOutput) || other.maxToolOutput == maxToolOutput)&&const DeepCollectionEquality().equals(other.cachePrompt, cachePrompt));
+}
+
+@JsonKey(includeFromJson: false, includeToJson: false)
+@override
+int get hashCode => Object.hashAll([runtimeType,maxRetries,timeout,attemptTimeout,maxConnections,systemMessage,maxTokens,topP,temperature,const DeepCollectionEquality().hash(_stopSeqs),bestOf,frequencyPenalty,presencePenalty,const DeepCollectionEquality().hash(_logitBias),seed,topK,numChoices,logprobs,topLogprobs,parallelToolCalls,internalTools,maxToolOutput,const DeepCollectionEquality().hash(cachePrompt)]);
+
+@override
+String toString() {
+  return 'GenerateConfig(maxRetries: $maxRetries, timeout: $timeout, attemptTimeout: $attemptTimeout, maxConnections: $maxConnections, systemMessage: $systemMessage, maxTokens: $maxTokens, topP: $topP, temperature: $temperature, stopSeqs: $stopSeqs, bestOf: $bestOf, frequencyPenalty: $frequencyPenalty, presencePenalty: $presencePenalty, logitBias: $logitBias, seed: $seed, topK: $topK, numChoices: $numChoices, logprobs: $logprobs, topLogprobs: $topLogprobs, parallelToolCalls: $parallelToolCalls, internalTools: $internalTools, maxToolOutput: $maxToolOutput, cachePrompt: $cachePrompt)';
+}
+
+
+}
+
+/// @nodoc
+abstract mixin class _$GenerateConfigCopyWith<$Res> implements $GenerateConfigCopyWith<$Res> {
+  factory _$GenerateConfigCopyWith(_GenerateConfig value, $Res Function(_GenerateConfig) _then) = __$GenerateConfigCopyWithImpl;
+@override @useResult
+$Res call({
+@JsonKey(name: 'max_retries') int? maxRetries, int? timeout,@JsonKey(name: 'attempt_timeout') int? attemptTimeout,@JsonKey(name: 'max_connections') int? maxConnections,@JsonKey(name: 'system_message') String? systemMessage,@JsonKey(name: 'max_tokens') int? maxTokens,@JsonKey(name: 'top_p') double? topP, double? temperature,@JsonKey(name: 'stop_seqs') List<String>? stopSeqs,@JsonKey(name: 'best_of') int? bestOf,@JsonKey(name: 'frequency_penalty') double? frequencyPenalty,@JsonKey(name: 'presence_penalty') double? presencePenalty,@JsonKey(name: 'logit_bias') Map<String, double>? logitBias, int? seed,@JsonKey(name: 'top_k') int? topK,@JsonKey(name: 'num_choices') int? numChoices, bool? logprobs,@JsonKey(name: 'top_logprobs') int? topLogprobs,@JsonKey(name: 'parallel_tool_calls') bool? parallelToolCalls,@JsonKey(name: 'internal_tools') bool? internalTools,@JsonKey(name: 'max_tool_output') int? maxToolOutput,@JsonKey(name: 'cache_prompt') Object? cachePrompt
+});
+
+
+
+
+}
+/// @nodoc
+class __$GenerateConfigCopyWithImpl<$Res>
+    implements _$GenerateConfigCopyWith<$Res> {
+  __$GenerateConfigCopyWithImpl(this._self, this._then);
+
+  final _GenerateConfig _self;
+  final $Res Function(_GenerateConfig) _then;
+
+/// Create a copy of GenerateConfig
+/// with the given fields replaced by the non-null parameter values.
+@override @pragma('vm:prefer-inline') $Res call({Object? maxRetries = freezed,Object? timeout = freezed,Object? attemptTimeout = freezed,Object? maxConnections = freezed,Object? systemMessage = freezed,Object? maxTokens = freezed,Object? topP = freezed,Object? temperature = freezed,Object? stopSeqs = freezed,Object? bestOf = freezed,Object? frequencyPenalty = freezed,Object? presencePenalty = freezed,Object? logitBias = freezed,Object? seed = freezed,Object? topK = freezed,Object? numChoices = freezed,Object? logprobs = freezed,Object? topLogprobs = freezed,Object? parallelToolCalls = freezed,Object? internalTools = freezed,Object? maxToolOutput = freezed,Object? cachePrompt = freezed,}) {
+  return _then(_GenerateConfig(
+maxRetries: freezed == maxRetries ? _self.maxRetries : maxRetries // ignore: cast_nullable_to_non_nullable
+as int?,timeout: freezed == timeout ? _self.timeout : timeout // ignore: cast_nullable_to_non_nullable
+as int?,attemptTimeout: freezed == attemptTimeout ? _self.attemptTimeout : attemptTimeout // ignore: cast_nullable_to_non_nullable
+as int?,maxConnections: freezed == maxConnections ? _self.maxConnections : maxConnections // ignore: cast_nullable_to_non_nullable
+as int?,systemMessage: freezed == systemMessage ? _self.systemMessage : systemMessage // ignore: cast_nullable_to_non_nullable
+as String?,maxTokens: freezed == maxTokens ? _self.maxTokens : maxTokens // ignore: cast_nullable_to_non_nullable
+as int?,topP: freezed == topP ? _self.topP : topP // ignore: cast_nullable_to_non_nullable
+as double?,temperature: freezed == temperature ? _self.temperature : temperature // ignore: cast_nullable_to_non_nullable
+as double?,stopSeqs: freezed == stopSeqs ? _self._stopSeqs : stopSeqs // ignore: cast_nullable_to_non_nullable
+as List<String>?,bestOf: freezed == bestOf ? _self.bestOf : bestOf // ignore: cast_nullable_to_non_nullable
+as int?,frequencyPenalty: freezed == frequencyPenalty ? _self.frequencyPenalty : frequencyPenalty // ignore: cast_nullable_to_non_nullable
+as double?,presencePenalty: freezed == presencePenalty ? _self.presencePenalty : presencePenalty // ignore: cast_nullable_to_non_nullable
+as double?,logitBias: freezed == logitBias ? _self._logitBias : logitBias // ignore: cast_nullable_to_non_nullable
+as Map<String, double>?,seed: freezed == seed ? _self.seed : seed // ignore: cast_nullable_to_non_nullable
+as int?,topK: freezed == topK ? _self.topK : topK // ignore: cast_nullable_to_non_nullable
+as int?,numChoices: freezed == numChoices ? _self.numChoices : numChoices // ignore: cast_nullable_to_non_nullable
+as int?,logprobs: freezed == logprobs ? _self.logprobs : logprobs // ignore: cast_nullable_to_non_nullable
+as bool?,topLogprobs: freezed == topLogprobs ? _self.topLogprobs : topLogprobs // ignore: cast_nullable_to_non_nullable
+as int?,parallelToolCalls: freezed == parallelToolCalls ? _self.parallelToolCalls : parallelToolCalls // ignore: cast_nullable_to_non_nullable
+as bool?,internalTools: freezed == internalTools ? _self.internalTools : internalTools // ignore: cast_nullable_to_non_nullable
+as bool?,maxToolOutput: freezed == maxToolOutput ? _self.maxToolOutput : maxToolOutput // ignore: cast_nullable_to_non_nullable
+as int?,cachePrompt: freezed == cachePrompt ? _self.cachePrompt : cachePrompt ,
+  ));
+}
+
+
+}
+
+
+/// @nodoc
+mixin _$Logprobs {
+
+/// Logprob content.
+ List<Object> get content;
+/// Create a copy of Logprobs
+/// with the given fields replaced by the non-null parameter values.
+@JsonKey(includeFromJson: false, includeToJson: false)
+@pragma('vm:prefer-inline')
+$LogprobsCopyWith<Logprobs> get copyWith => _$LogprobsCopyWithImpl<Logprobs>(this as Logprobs, _$identity);
+
+  /// Serializes this Logprobs to a JSON map.
+  Map<String, dynamic> toJson();
+
+
+@override
+bool operator ==(Object other) {
+  return identical(this, other) || (other.runtimeType == runtimeType&&other is Logprobs&&const DeepCollectionEquality().equals(other.content, content));
+}
+
+@JsonKey(includeFromJson: false, includeToJson: false)
+@override
+int get hashCode => Object.hash(runtimeType,const DeepCollectionEquality().hash(content));
+
+@override
+String toString() {
+  return 'Logprobs(content: $content)';
+}
+
+
+}
+
+/// @nodoc
+abstract mixin class $LogprobsCopyWith<$Res>  {
+  factory $LogprobsCopyWith(Logprobs value, $Res Function(Logprobs) _then) = _$LogprobsCopyWithImpl;
+@useResult
+$Res call({
+ List<Object> content
+});
+
+
+
+
+}
+/// @nodoc
+class _$LogprobsCopyWithImpl<$Res>
+    implements $LogprobsCopyWith<$Res> {
+  _$LogprobsCopyWithImpl(this._self, this._then);
+
+  final Logprobs _self;
+  final $Res Function(Logprobs) _then;
+
+/// Create a copy of Logprobs
+/// with the given fields replaced by the non-null parameter values.
+@pragma('vm:prefer-inline') @override $Res call({Object? content = null,}) {
+  return _then(_self.copyWith(
+content: null == content ? _self.content : content // ignore: cast_nullable_to_non_nullable
+as List<Object>,
+  ));
+}
+
+}
+
+
+/// Adds pattern-matching-related methods to [Logprobs].
+extension LogprobsPatterns on Logprobs {
+/// A variant of `map` that fallback to returning `orElse`.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case _:
+///     return orElse();
+/// }
+/// ```
+
+@optionalTypeArgs TResult maybeMap<TResult extends Object?>(TResult Function( _Logprobs value)?  $default,{required TResult orElse(),}){
+final _that = this;
+switch (_that) {
+case _Logprobs() when $default != null:
+return $default(_that);case _:
+  return orElse();
+
+}
+}
+/// A `switch`-like method, using callbacks.
+///
+/// Callbacks receives the raw object, upcasted.
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case final Subclass2 value:
+///     return ...;
+/// }
+/// ```
+
+@optionalTypeArgs TResult map<TResult extends Object?>(TResult Function( _Logprobs value)  $default,){
+final _that = this;
+switch (_that) {
+case _Logprobs():
+return $default(_that);case _:
+  throw StateError('Unexpected subclass');
+
+}
+}
+/// A variant of `map` that fallback to returning `null`.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case _:
+///     return null;
+/// }
+/// ```
+
+@optionalTypeArgs TResult? mapOrNull<TResult extends Object?>(TResult? Function( _Logprobs value)?  $default,){
+final _that = this;
+switch (_that) {
+case _Logprobs() when $default != null:
+return $default(_that);case _:
+  return null;
+
+}
+}
+/// A variant of `when` that fallback to an `orElse` callback.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case _:
+///     return orElse();
+/// }
+/// ```
+
+@optionalTypeArgs TResult maybeWhen<TResult extends Object?>(TResult Function( List<Object> content)?  $default,{required TResult orElse(),}) {final _that = this;
+switch (_that) {
+case _Logprobs() when $default != null:
+return $default(_that.content);case _:
+  return orElse();
+
+}
+}
+/// A `switch`-like method, using callbacks.
+///
+/// As opposed to `map`, this offers destructuring.
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case Subclass2(:final field2):
+///     return ...;
+/// }
+/// ```
+
+@optionalTypeArgs TResult when<TResult extends Object?>(TResult Function( List<Object> content)  $default,) {final _that = this;
+switch (_that) {
+case _Logprobs():
+return $default(_that.content);case _:
+  throw StateError('Unexpected subclass');
+
+}
+}
+/// A variant of `when` that fallback to returning `null`
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case _:
+///     return null;
+/// }
+/// ```
+
+@optionalTypeArgs TResult? whenOrNull<TResult extends Object?>(TResult? Function( List<Object> content)?  $default,) {final _that = this;
+switch (_that) {
+case _Logprobs() when $default != null:
+return $default(_that.content);case _:
+  return null;
+
+}
+}
+
+}
+
+/// @nodoc
+@JsonSerializable()
+
+class _Logprobs extends Logprobs {
+  const _Logprobs({required final  List<Object> content}): _content = content,super._();
+  factory _Logprobs.fromJson(Map<String, dynamic> json) => _$LogprobsFromJson(json);
+
+/// Logprob content.
+ final  List<Object> _content;
+/// Logprob content.
+@override List<Object> get content {
+  if (_content is EqualUnmodifiableListView) return _content;
+  // ignore: implicit_dynamic_type
+  return EqualUnmodifiableListView(_content);
+}
+
+
+/// Create a copy of Logprobs
+/// with the given fields replaced by the non-null parameter values.
+@override @JsonKey(includeFromJson: false, includeToJson: false)
+@pragma('vm:prefer-inline')
+_$LogprobsCopyWith<_Logprobs> get copyWith => __$LogprobsCopyWithImpl<_Logprobs>(this, _$identity);
+
+@override
+Map<String, dynamic> toJson() {
+  return _$LogprobsToJson(this, );
+}
+
+@override
+bool operator ==(Object other) {
+  return identical(this, other) || (other.runtimeType == runtimeType&&other is _Logprobs&&const DeepCollectionEquality().equals(other._content, _content));
+}
+
+@JsonKey(includeFromJson: false, includeToJson: false)
+@override
+int get hashCode => Object.hash(runtimeType,const DeepCollectionEquality().hash(_content));
+
+@override
+String toString() {
+  return 'Logprobs(content: $content)';
+}
+
+
+}
+
+/// @nodoc
+abstract mixin class _$LogprobsCopyWith<$Res> implements $LogprobsCopyWith<$Res> {
+  factory _$LogprobsCopyWith(_Logprobs value, $Res Function(_Logprobs) _then) = __$LogprobsCopyWithImpl;
+@override @useResult
+$Res call({
+ List<Object> content
+});
+
+
+
+
+}
+/// @nodoc
+class __$LogprobsCopyWithImpl<$Res>
+    implements _$LogprobsCopyWith<$Res> {
+  __$LogprobsCopyWithImpl(this._self, this._then);
+
+  final _Logprobs _self;
+  final $Res Function(_Logprobs) _then;
+
+/// Create a copy of Logprobs
+/// with the given fields replaced by the non-null parameter values.
+@override @pragma('vm:prefer-inline') $Res call({Object? content = null,}) {
+  return _then(_Logprobs(
+content: null == content ? _self._content : content // ignore: cast_nullable_to_non_nullable
+as List<Object>,
+  ));
+}
+
+
+}
+
+
+/// @nodoc
+mixin _$ProvenanceData {
+
+/// Source location.
+ String get location;/// Static hash.
+ String get shash;
+/// Create a copy of ProvenanceData
+/// with the given fields replaced by the non-null parameter values.
+@JsonKey(includeFromJson: false, includeToJson: false)
+@pragma('vm:prefer-inline')
+$ProvenanceDataCopyWith<ProvenanceData> get copyWith => _$ProvenanceDataCopyWithImpl<ProvenanceData>(this as ProvenanceData, _$identity);
+
+  /// Serializes this ProvenanceData to a JSON map.
+  Map<String, dynamic> toJson();
+
+
+@override
+bool operator ==(Object other) {
+  return identical(this, other) || (other.runtimeType == runtimeType&&other is ProvenanceData&&(identical(other.location, location) || other.location == location)&&(identical(other.shash, shash) || other.shash == shash));
+}
+
+@JsonKey(includeFromJson: false, includeToJson: false)
+@override
+int get hashCode => Object.hash(runtimeType,location,shash);
+
+@override
+String toString() {
+  return 'ProvenanceData(location: $location, shash: $shash)';
+}
+
+
+}
+
+/// @nodoc
+abstract mixin class $ProvenanceDataCopyWith<$Res>  {
+  factory $ProvenanceDataCopyWith(ProvenanceData value, $Res Function(ProvenanceData) _then) = _$ProvenanceDataCopyWithImpl;
+@useResult
+$Res call({
+ String location, String shash
+});
+
+
+
+
+}
+/// @nodoc
+class _$ProvenanceDataCopyWithImpl<$Res>
+    implements $ProvenanceDataCopyWith<$Res> {
+  _$ProvenanceDataCopyWithImpl(this._self, this._then);
+
+  final ProvenanceData _self;
+  final $Res Function(ProvenanceData) _then;
+
+/// Create a copy of ProvenanceData
+/// with the given fields replaced by the non-null parameter values.
+@pragma('vm:prefer-inline') @override $Res call({Object? location = null,Object? shash = null,}) {
+  return _then(_self.copyWith(
+location: null == location ? _self.location : location // ignore: cast_nullable_to_non_nullable
+as String,shash: null == shash ? _self.shash : shash // ignore: cast_nullable_to_non_nullable
+as String,
+  ));
+}
+
+}
+
+
+/// Adds pattern-matching-related methods to [ProvenanceData].
+extension ProvenanceDataPatterns on ProvenanceData {
+/// A variant of `map` that fallback to returning `orElse`.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case _:
+///     return orElse();
+/// }
+/// ```
+
+@optionalTypeArgs TResult maybeMap<TResult extends Object?>(TResult Function( _ProvenanceData value)?  $default,{required TResult orElse(),}){
+final _that = this;
+switch (_that) {
+case _ProvenanceData() when $default != null:
+return $default(_that);case _:
+  return orElse();
+
+}
+}
+/// A `switch`-like method, using callbacks.
+///
+/// Callbacks receives the raw object, upcasted.
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case final Subclass2 value:
+///     return ...;
+/// }
+/// ```
+
+@optionalTypeArgs TResult map<TResult extends Object?>(TResult Function( _ProvenanceData value)  $default,){
+final _that = this;
+switch (_that) {
+case _ProvenanceData():
+return $default(_that);case _:
+  throw StateError('Unexpected subclass');
+
+}
+}
+/// A variant of `map` that fallback to returning `null`.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case _:
+///     return null;
+/// }
+/// ```
+
+@optionalTypeArgs TResult? mapOrNull<TResult extends Object?>(TResult? Function( _ProvenanceData value)?  $default,){
+final _that = this;
+switch (_that) {
+case _ProvenanceData() when $default != null:
+return $default(_that);case _:
+  return null;
+
+}
+}
+/// A variant of `when` that fallback to an `orElse` callback.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case _:
+///     return orElse();
+/// }
+/// ```
+
+@optionalTypeArgs TResult maybeWhen<TResult extends Object?>(TResult Function( String location,  String shash)?  $default,{required TResult orElse(),}) {final _that = this;
+switch (_that) {
+case _ProvenanceData() when $default != null:
+return $default(_that.location,_that.shash);case _:
+  return orElse();
+
+}
+}
+/// A `switch`-like method, using callbacks.
+///
+/// As opposed to `map`, this offers destructuring.
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case Subclass2(:final field2):
+///     return ...;
+/// }
+/// ```
+
+@optionalTypeArgs TResult when<TResult extends Object?>(TResult Function( String location,  String shash)  $default,) {final _that = this;
+switch (_that) {
+case _ProvenanceData():
+return $default(_that.location,_that.shash);case _:
+  throw StateError('Unexpected subclass');
+
+}
+}
+/// A variant of `when` that fallback to returning `null`
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case _:
+///     return null;
+/// }
+/// ```
+
+@optionalTypeArgs TResult? whenOrNull<TResult extends Object?>(TResult? Function( String location,  String shash)?  $default,) {final _that = this;
+switch (_that) {
+case _ProvenanceData() when $default != null:
+return $default(_that.location,_that.shash);case _:
+  return null;
+
+}
+}
+
+}
+
+/// @nodoc
+@JsonSerializable()
+
+class _ProvenanceData extends ProvenanceData {
+  const _ProvenanceData({required this.location, required this.shash}): super._();
+  factory _ProvenanceData.fromJson(Map<String, dynamic> json) => _$ProvenanceDataFromJson(json);
+
+/// Source location.
+@override final  String location;
+/// Static hash.
+@override final  String shash;
+
+/// Create a copy of ProvenanceData
+/// with the given fields replaced by the non-null parameter values.
+@override @JsonKey(includeFromJson: false, includeToJson: false)
+@pragma('vm:prefer-inline')
+_$ProvenanceDataCopyWith<_ProvenanceData> get copyWith => __$ProvenanceDataCopyWithImpl<_ProvenanceData>(this, _$identity);
+
+@override
+Map<String, dynamic> toJson() {
+  return _$ProvenanceDataToJson(this, );
+}
+
+@override
+bool operator ==(Object other) {
+  return identical(this, other) || (other.runtimeType == runtimeType&&other is _ProvenanceData&&(identical(other.location, location) || other.location == location)&&(identical(other.shash, shash) || other.shash == shash));
+}
+
+@JsonKey(includeFromJson: false, includeToJson: false)
+@override
+int get hashCode => Object.hash(runtimeType,location,shash);
+
+@override
+String toString() {
+  return 'ProvenanceData(location: $location, shash: $shash)';
+}
+
+
+}
+
+/// @nodoc
+abstract mixin class _$ProvenanceDataCopyWith<$Res> implements $ProvenanceDataCopyWith<$Res> {
+  factory _$ProvenanceDataCopyWith(_ProvenanceData value, $Res Function(_ProvenanceData) _then) = __$ProvenanceDataCopyWithImpl;
+@override @useResult
+$Res call({
+ String location, String shash
+});
+
+
+
+
+}
+/// @nodoc
+class __$ProvenanceDataCopyWithImpl<$Res>
+    implements _$ProvenanceDataCopyWith<$Res> {
+  __$ProvenanceDataCopyWithImpl(this._self, this._then);
+
+  final _ProvenanceData _self;
+  final $Res Function(_ProvenanceData) _then;
+
+/// Create a copy of ProvenanceData
+/// with the given fields replaced by the non-null parameter values.
+@override @pragma('vm:prefer-inline') $Res call({Object? location = null,Object? shash = null,}) {
+  return _then(_ProvenanceData(
+location: null == location ? _self.location : location // ignore: cast_nullable_to_non_nullable
+as String,shash: null == shash ? _self.shash : shash // ignore: cast_nullable_to_non_nullable
+as String,
+  ));
+}
+
+
+}
+
+
+/// @nodoc
+mixin _$EvalSampleLimit {
+
+/// The type of limit.
+ String get type;/// The limit value.
+ double get limit;
+/// Create a copy of EvalSampleLimit
+/// with the given fields replaced by the non-null parameter values.
+@JsonKey(includeFromJson: false, includeToJson: false)
+@pragma('vm:prefer-inline')
+$EvalSampleLimitCopyWith<EvalSampleLimit> get copyWith => _$EvalSampleLimitCopyWithImpl<EvalSampleLimit>(this as EvalSampleLimit, _$identity);
+
+  /// Serializes this EvalSampleLimit to a JSON map.
+  Map<String, dynamic> toJson();
+
+
+@override
+bool operator ==(Object other) {
+  return identical(this, other) || (other.runtimeType == runtimeType&&other is EvalSampleLimit&&(identical(other.type, type) || other.type == type)&&(identical(other.limit, limit) || other.limit == limit));
+}
+
+@JsonKey(includeFromJson: false, includeToJson: false)
+@override
+int get hashCode => Object.hash(runtimeType,type,limit);
+
+@override
+String toString() {
+  return 'EvalSampleLimit(type: $type, limit: $limit)';
+}
+
+
+}
+
+/// @nodoc
+abstract mixin class $EvalSampleLimitCopyWith<$Res>  {
+  factory $EvalSampleLimitCopyWith(EvalSampleLimit value, $Res Function(EvalSampleLimit) _then) = _$EvalSampleLimitCopyWithImpl;
+@useResult
+$Res call({
+ String type, double limit
+});
+
+
+
+
+}
+/// @nodoc
+class _$EvalSampleLimitCopyWithImpl<$Res>
+    implements $EvalSampleLimitCopyWith<$Res> {
+  _$EvalSampleLimitCopyWithImpl(this._self, this._then);
+
+  final EvalSampleLimit _self;
+  final $Res Function(EvalSampleLimit) _then;
+
+/// Create a copy of EvalSampleLimit
+/// with the given fields replaced by the non-null parameter values.
+@pragma('vm:prefer-inline') @override $Res call({Object? type = null,Object? limit = null,}) {
+  return _then(_self.copyWith(
+type: null == type ? _self.type : type // ignore: cast_nullable_to_non_nullable
+as String,limit: null == limit ? _self.limit : limit // ignore: cast_nullable_to_non_nullable
+as double,
+  ));
+}
+
+}
+
+
+/// Adds pattern-matching-related methods to [EvalSampleLimit].
+extension EvalSampleLimitPatterns on EvalSampleLimit {
+/// A variant of `map` that fallback to returning `orElse`.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case _:
+///     return orElse();
+/// }
+/// ```
+
+@optionalTypeArgs TResult maybeMap<TResult extends Object?>(TResult Function( _EvalSampleLimit value)?  $default,{required TResult orElse(),}){
+final _that = this;
+switch (_that) {
+case _EvalSampleLimit() when $default != null:
+return $default(_that);case _:
+  return orElse();
+
+}
+}
+/// A `switch`-like method, using callbacks.
+///
+/// Callbacks receives the raw object, upcasted.
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case final Subclass2 value:
+///     return ...;
+/// }
+/// ```
+
+@optionalTypeArgs TResult map<TResult extends Object?>(TResult Function( _EvalSampleLimit value)  $default,){
+final _that = this;
+switch (_that) {
+case _EvalSampleLimit():
+return $default(_that);case _:
+  throw StateError('Unexpected subclass');
+
+}
+}
+/// A variant of `map` that fallback to returning `null`.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case _:
+///     return null;
+/// }
+/// ```
+
+@optionalTypeArgs TResult? mapOrNull<TResult extends Object?>(TResult? Function( _EvalSampleLimit value)?  $default,){
+final _that = this;
+switch (_that) {
+case _EvalSampleLimit() when $default != null:
+return $default(_that);case _:
+  return null;
+
+}
+}
+/// A variant of `when` that fallback to an `orElse` callback.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case _:
+///     return orElse();
+/// }
+/// ```
+
+@optionalTypeArgs TResult maybeWhen<TResult extends Object?>(TResult Function( String type,  double limit)?  $default,{required TResult orElse(),}) {final _that = this;
+switch (_that) {
+case _EvalSampleLimit() when $default != null:
+return $default(_that.type,_that.limit);case _:
+  return orElse();
+
+}
+}
+/// A `switch`-like method, using callbacks.
+///
+/// As opposed to `map`, this offers destructuring.
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case Subclass2(:final field2):
+///     return ...;
+/// }
+/// ```
+
+@optionalTypeArgs TResult when<TResult extends Object?>(TResult Function( String type,  double limit)  $default,) {final _that = this;
+switch (_that) {
+case _EvalSampleLimit():
+return $default(_that.type,_that.limit);case _:
+  throw StateError('Unexpected subclass');
+
+}
+}
+/// A variant of `when` that fallback to returning `null`
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case _:
+///     return null;
+/// }
+/// ```
+
+@optionalTypeArgs TResult? whenOrNull<TResult extends Object?>(TResult? Function( String type,  double limit)?  $default,) {final _that = this;
+switch (_that) {
+case _EvalSampleLimit() when $default != null:
+return $default(_that.type,_that.limit);case _:
+  return null;
+
+}
+}
+
+}
+
+/// @nodoc
+@JsonSerializable()
+
+class _EvalSampleLimit extends EvalSampleLimit {
+  const _EvalSampleLimit({required this.type, required this.limit}): super._();
+  factory _EvalSampleLimit.fromJson(Map<String, dynamic> json) => _$EvalSampleLimitFromJson(json);
+
+/// The type of limit.
+@override final  String type;
+/// The limit value.
+@override final  double limit;
+
+/// Create a copy of EvalSampleLimit
+/// with the given fields replaced by the non-null parameter values.
+@override @JsonKey(includeFromJson: false, includeToJson: false)
+@pragma('vm:prefer-inline')
+_$EvalSampleLimitCopyWith<_EvalSampleLimit> get copyWith => __$EvalSampleLimitCopyWithImpl<_EvalSampleLimit>(this, _$identity);
+
+@override
+Map<String, dynamic> toJson() {
+  return _$EvalSampleLimitToJson(this, );
+}
+
+@override
+bool operator ==(Object other) {
+  return identical(this, other) || (other.runtimeType == runtimeType&&other is _EvalSampleLimit&&(identical(other.type, type) || other.type == type)&&(identical(other.limit, limit) || other.limit == limit));
+}
+
+@JsonKey(includeFromJson: false, includeToJson: false)
+@override
+int get hashCode => Object.hash(runtimeType,type,limit);
+
+@override
+String toString() {
+  return 'EvalSampleLimit(type: $type, limit: $limit)';
+}
+
+
+}
+
+/// @nodoc
+abstract mixin class _$EvalSampleLimitCopyWith<$Res> implements $EvalSampleLimitCopyWith<$Res> {
+  factory _$EvalSampleLimitCopyWith(_EvalSampleLimit value, $Res Function(_EvalSampleLimit) _then) = __$EvalSampleLimitCopyWithImpl;
+@override @useResult
+$Res call({
+ String type, double limit
+});
+
+
+
+
+}
+/// @nodoc
+class __$EvalSampleLimitCopyWithImpl<$Res>
+    implements _$EvalSampleLimitCopyWith<$Res> {
+  __$EvalSampleLimitCopyWithImpl(this._self, this._then);
+
+  final _EvalSampleLimit _self;
+  final $Res Function(_EvalSampleLimit) _then;
+
+/// Create a copy of EvalSampleLimit
+/// with the given fields replaced by the non-null parameter values.
+@override @pragma('vm:prefer-inline') $Res call({Object? type = null,Object? limit = null,}) {
+  return _then(_EvalSampleLimit(
+type: null == type ? _self.type : type // ignore: cast_nullable_to_non_nullable
+as String,limit: null == limit ? _self.limit : limit // ignore: cast_nullable_to_non_nullable
+as double,
+  ));
+}
+
+
+}
+
+
+/// @nodoc
+mixin _$EvalSetInfo {
+
+/// Globally unique id for eval set.
+@JsonKey(name: 'eval_set_id') String get evalSetId;/// Tasks in the eval set.
+ List<EvalSetTask> get tasks;
+/// Create a copy of EvalSetInfo
+/// with the given fields replaced by the non-null parameter values.
+@JsonKey(includeFromJson: false, includeToJson: false)
+@pragma('vm:prefer-inline')
+$EvalSetInfoCopyWith<EvalSetInfo> get copyWith => _$EvalSetInfoCopyWithImpl<EvalSetInfo>(this as EvalSetInfo, _$identity);
+
+  /// Serializes this EvalSetInfo to a JSON map.
+  Map<String, dynamic> toJson();
+
+
+@override
+bool operator ==(Object other) {
+  return identical(this, other) || (other.runtimeType == runtimeType&&other is EvalSetInfo&&(identical(other.evalSetId, evalSetId) || other.evalSetId == evalSetId)&&const DeepCollectionEquality().equals(other.tasks, tasks));
+}
+
+@JsonKey(includeFromJson: false, includeToJson: false)
+@override
+int get hashCode => Object.hash(runtimeType,evalSetId,const DeepCollectionEquality().hash(tasks));
+
+@override
+String toString() {
+  return 'EvalSetInfo(evalSetId: $evalSetId, tasks: $tasks)';
+}
+
+
+}
+
+/// @nodoc
+abstract mixin class $EvalSetInfoCopyWith<$Res>  {
+  factory $EvalSetInfoCopyWith(EvalSetInfo value, $Res Function(EvalSetInfo) _then) = _$EvalSetInfoCopyWithImpl;
+@useResult
+$Res call({
+@JsonKey(name: 'eval_set_id') String evalSetId, List<EvalSetTask> tasks
+});
+
+
+
+
+}
+/// @nodoc
+class _$EvalSetInfoCopyWithImpl<$Res>
+    implements $EvalSetInfoCopyWith<$Res> {
+  _$EvalSetInfoCopyWithImpl(this._self, this._then);
+
+  final EvalSetInfo _self;
+  final $Res Function(EvalSetInfo) _then;
+
+/// Create a copy of EvalSetInfo
+/// with the given fields replaced by the non-null parameter values.
+@pragma('vm:prefer-inline') @override $Res call({Object? evalSetId = null,Object? tasks = null,}) {
+  return _then(_self.copyWith(
+evalSetId: null == evalSetId ? _self.evalSetId : evalSetId // ignore: cast_nullable_to_non_nullable
+as String,tasks: null == tasks ? _self.tasks : tasks // ignore: cast_nullable_to_non_nullable
+as List<EvalSetTask>,
+  ));
+}
+
+}
+
+
+/// Adds pattern-matching-related methods to [EvalSetInfo].
+extension EvalSetInfoPatterns on EvalSetInfo {
+/// A variant of `map` that fallback to returning `orElse`.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case _:
+///     return orElse();
+/// }
+/// ```
+
+@optionalTypeArgs TResult maybeMap<TResult extends Object?>(TResult Function( _EvalSetInfo value)?  $default,{required TResult orElse(),}){
+final _that = this;
+switch (_that) {
+case _EvalSetInfo() when $default != null:
+return $default(_that);case _:
+  return orElse();
+
+}
+}
+/// A `switch`-like method, using callbacks.
+///
+/// Callbacks receives the raw object, upcasted.
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case final Subclass2 value:
+///     return ...;
+/// }
+/// ```
+
+@optionalTypeArgs TResult map<TResult extends Object?>(TResult Function( _EvalSetInfo value)  $default,){
+final _that = this;
+switch (_that) {
+case _EvalSetInfo():
+return $default(_that);case _:
+  throw StateError('Unexpected subclass');
+
+}
+}
+/// A variant of `map` that fallback to returning `null`.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case _:
+///     return null;
+/// }
+/// ```
+
+@optionalTypeArgs TResult? mapOrNull<TResult extends Object?>(TResult? Function( _EvalSetInfo value)?  $default,){
+final _that = this;
+switch (_that) {
+case _EvalSetInfo() when $default != null:
+return $default(_that);case _:
+  return null;
+
+}
+}
+/// A variant of `when` that fallback to an `orElse` callback.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case _:
+///     return orElse();
+/// }
+/// ```
+
+@optionalTypeArgs TResult maybeWhen<TResult extends Object?>(TResult Function(@JsonKey(name: 'eval_set_id')  String evalSetId,  List<EvalSetTask> tasks)?  $default,{required TResult orElse(),}) {final _that = this;
+switch (_that) {
+case _EvalSetInfo() when $default != null:
+return $default(_that.evalSetId,_that.tasks);case _:
+  return orElse();
+
+}
+}
+/// A `switch`-like method, using callbacks.
+///
+/// As opposed to `map`, this offers destructuring.
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case Subclass2(:final field2):
+///     return ...;
+/// }
+/// ```
+
+@optionalTypeArgs TResult when<TResult extends Object?>(TResult Function(@JsonKey(name: 'eval_set_id')  String evalSetId,  List<EvalSetTask> tasks)  $default,) {final _that = this;
+switch (_that) {
+case _EvalSetInfo():
+return $default(_that.evalSetId,_that.tasks);case _:
+  throw StateError('Unexpected subclass');
+
+}
+}
+/// A variant of `when` that fallback to returning `null`
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case _:
+///     return null;
+/// }
+/// ```
+
+@optionalTypeArgs TResult? whenOrNull<TResult extends Object?>(TResult? Function(@JsonKey(name: 'eval_set_id')  String evalSetId,  List<EvalSetTask> tasks)?  $default,) {final _that = this;
+switch (_that) {
+case _EvalSetInfo() when $default != null:
+return $default(_that.evalSetId,_that.tasks);case _:
+  return null;
+
+}
+}
+
+}
+
+/// @nodoc
+@JsonSerializable()
+
+class _EvalSetInfo extends EvalSetInfo {
+  const _EvalSetInfo({@JsonKey(name: 'eval_set_id') required this.evalSetId, required final  List<EvalSetTask> tasks}): _tasks = tasks,super._();
+  factory _EvalSetInfo.fromJson(Map<String, dynamic> json) => _$EvalSetInfoFromJson(json);
+
+/// Globally unique id for eval set.
+@override@JsonKey(name: 'eval_set_id') final  String evalSetId;
+/// Tasks in the eval set.
+ final  List<EvalSetTask> _tasks;
+/// Tasks in the eval set.
+@override List<EvalSetTask> get tasks {
+  if (_tasks is EqualUnmodifiableListView) return _tasks;
+  // ignore: implicit_dynamic_type
+  return EqualUnmodifiableListView(_tasks);
+}
+
+
+/// Create a copy of EvalSetInfo
+/// with the given fields replaced by the non-null parameter values.
+@override @JsonKey(includeFromJson: false, includeToJson: false)
+@pragma('vm:prefer-inline')
+_$EvalSetInfoCopyWith<_EvalSetInfo> get copyWith => __$EvalSetInfoCopyWithImpl<_EvalSetInfo>(this, _$identity);
+
+@override
+Map<String, dynamic> toJson() {
+  return _$EvalSetInfoToJson(this, );
+}
+
+@override
+bool operator ==(Object other) {
+  return identical(this, other) || (other.runtimeType == runtimeType&&other is _EvalSetInfo&&(identical(other.evalSetId, evalSetId) || other.evalSetId == evalSetId)&&const DeepCollectionEquality().equals(other._tasks, _tasks));
+}
+
+@JsonKey(includeFromJson: false, includeToJson: false)
+@override
+int get hashCode => Object.hash(runtimeType,evalSetId,const DeepCollectionEquality().hash(_tasks));
+
+@override
+String toString() {
+  return 'EvalSetInfo(evalSetId: $evalSetId, tasks: $tasks)';
+}
+
+
+}
+
+/// @nodoc
+abstract mixin class _$EvalSetInfoCopyWith<$Res> implements $EvalSetInfoCopyWith<$Res> {
+  factory _$EvalSetInfoCopyWith(_EvalSetInfo value, $Res Function(_EvalSetInfo) _then) = __$EvalSetInfoCopyWithImpl;
+@override @useResult
+$Res call({
+@JsonKey(name: 'eval_set_id') String evalSetId, List<EvalSetTask> tasks
+});
+
+
+
+
+}
+/// @nodoc
+class __$EvalSetInfoCopyWithImpl<$Res>
+    implements _$EvalSetInfoCopyWith<$Res> {
+  __$EvalSetInfoCopyWithImpl(this._self, this._then);
+
+  final _EvalSetInfo _self;
+  final $Res Function(_EvalSetInfo) _then;
+
+/// Create a copy of EvalSetInfo
+/// with the given fields replaced by the non-null parameter values.
+@override @pragma('vm:prefer-inline') $Res call({Object? evalSetId = null,Object? tasks = null,}) {
+  return _then(_EvalSetInfo(
+evalSetId: null == evalSetId ? _self.evalSetId : evalSetId // ignore: cast_nullable_to_non_nullable
+as String,tasks: null == tasks ? _self._tasks : tasks // ignore: cast_nullable_to_non_nullable
+as List<EvalSetTask>,
+  ));
+}
+
+
+}
+
+
+/// @nodoc
+mixin _$EvalSetTask {
+
+/// Task name.
+ String? get name;/// Unique task id.
+@JsonKey(name: 'task_id') String get taskId;/// Task source file.
+@JsonKey(name: 'task_file') String? get taskFile;/// Task arguments.
+@JsonKey(name: 'task_args', defaultValue: {}) Map<String, dynamic> get taskArgs;/// Model used for evaluation.
+ String get model;/// Model specific arguments.
+@JsonKey(name: 'model_args', defaultValue: {}) Map<String, dynamic> get modelArgs;/// Model roles.
+@JsonKey(name: 'model_roles') Map<String, String>? get modelRoles;/// Sequence number of task in eval set.
+ int get sequence;
+/// Create a copy of EvalSetTask
+/// with the given fields replaced by the non-null parameter values.
+@JsonKey(includeFromJson: false, includeToJson: false)
+@pragma('vm:prefer-inline')
+$EvalSetTaskCopyWith<EvalSetTask> get copyWith => _$EvalSetTaskCopyWithImpl<EvalSetTask>(this as EvalSetTask, _$identity);
+
+  /// Serializes this EvalSetTask to a JSON map.
+  Map<String, dynamic> toJson();
+
+
+@override
+bool operator ==(Object other) {
+  return identical(this, other) || (other.runtimeType == runtimeType&&other is EvalSetTask&&(identical(other.name, name) || other.name == name)&&(identical(other.taskId, taskId) || other.taskId == taskId)&&(identical(other.taskFile, taskFile) || other.taskFile == taskFile)&&const DeepCollectionEquality().equals(other.taskArgs, taskArgs)&&(identical(other.model, model) || other.model == model)&&const DeepCollectionEquality().equals(other.modelArgs, modelArgs)&&const DeepCollectionEquality().equals(other.modelRoles, modelRoles)&&(identical(other.sequence, sequence) || other.sequence == sequence));
+}
+
+@JsonKey(includeFromJson: false, includeToJson: false)
+@override
+int get hashCode => Object.hash(runtimeType,name,taskId,taskFile,const DeepCollectionEquality().hash(taskArgs),model,const DeepCollectionEquality().hash(modelArgs),const DeepCollectionEquality().hash(modelRoles),sequence);
+
+@override
+String toString() {
+  return 'EvalSetTask(name: $name, taskId: $taskId, taskFile: $taskFile, taskArgs: $taskArgs, model: $model, modelArgs: $modelArgs, modelRoles: $modelRoles, sequence: $sequence)';
+}
+
+
+}
+
+/// @nodoc
+abstract mixin class $EvalSetTaskCopyWith<$Res>  {
+  factory $EvalSetTaskCopyWith(EvalSetTask value, $Res Function(EvalSetTask) _then) = _$EvalSetTaskCopyWithImpl;
+@useResult
+$Res call({
+ String? name,@JsonKey(name: 'task_id') String taskId,@JsonKey(name: 'task_file') String? taskFile,@JsonKey(name: 'task_args', defaultValue: {}) Map<String, dynamic> taskArgs, String model,@JsonKey(name: 'model_args', defaultValue: {}) Map<String, dynamic> modelArgs,@JsonKey(name: 'model_roles') Map<String, String>? modelRoles, int sequence
+});
+
+
+
+
+}
+/// @nodoc
+class _$EvalSetTaskCopyWithImpl<$Res>
+    implements $EvalSetTaskCopyWith<$Res> {
+  _$EvalSetTaskCopyWithImpl(this._self, this._then);
+
+  final EvalSetTask _self;
+  final $Res Function(EvalSetTask) _then;
+
+/// Create a copy of EvalSetTask
+/// with the given fields replaced by the non-null parameter values.
+@pragma('vm:prefer-inline') @override $Res call({Object? name = freezed,Object? taskId = null,Object? taskFile = freezed,Object? taskArgs = null,Object? model = null,Object? modelArgs = null,Object? modelRoles = freezed,Object? sequence = null,}) {
+  return _then(_self.copyWith(
+name: freezed == name ? _self.name : name // ignore: cast_nullable_to_non_nullable
+as String?,taskId: null == taskId ? _self.taskId : taskId // ignore: cast_nullable_to_non_nullable
+as String,taskFile: freezed == taskFile ? _self.taskFile : taskFile // ignore: cast_nullable_to_non_nullable
+as String?,taskArgs: null == taskArgs ? _self.taskArgs : taskArgs // ignore: cast_nullable_to_non_nullable
+as Map<String, dynamic>,model: null == model ? _self.model : model // ignore: cast_nullable_to_non_nullable
+as String,modelArgs: null == modelArgs ? _self.modelArgs : modelArgs // ignore: cast_nullable_to_non_nullable
+as Map<String, dynamic>,modelRoles: freezed == modelRoles ? _self.modelRoles : modelRoles // ignore: cast_nullable_to_non_nullable
+as Map<String, String>?,sequence: null == sequence ? _self.sequence : sequence // ignore: cast_nullable_to_non_nullable
+as int,
+  ));
+}
+
+}
+
+
+/// Adds pattern-matching-related methods to [EvalSetTask].
+extension EvalSetTaskPatterns on EvalSetTask {
+/// A variant of `map` that fallback to returning `orElse`.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case _:
+///     return orElse();
+/// }
+/// ```
+
+@optionalTypeArgs TResult maybeMap<TResult extends Object?>(TResult Function( _EvalSetTask value)?  $default,{required TResult orElse(),}){
+final _that = this;
+switch (_that) {
+case _EvalSetTask() when $default != null:
+return $default(_that);case _:
+  return orElse();
+
+}
+}
+/// A `switch`-like method, using callbacks.
+///
+/// Callbacks receives the raw object, upcasted.
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case final Subclass2 value:
+///     return ...;
+/// }
+/// ```
+
+@optionalTypeArgs TResult map<TResult extends Object?>(TResult Function( _EvalSetTask value)  $default,){
+final _that = this;
+switch (_that) {
+case _EvalSetTask():
+return $default(_that);case _:
+  throw StateError('Unexpected subclass');
+
+}
+}
+/// A variant of `map` that fallback to returning `null`.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case _:
+///     return null;
+/// }
+/// ```
+
+@optionalTypeArgs TResult? mapOrNull<TResult extends Object?>(TResult? Function( _EvalSetTask value)?  $default,){
+final _that = this;
+switch (_that) {
+case _EvalSetTask() when $default != null:
+return $default(_that);case _:
+  return null;
+
+}
+}
+/// A variant of `when` that fallback to an `orElse` callback.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case _:
+///     return orElse();
+/// }
+/// ```
+
+@optionalTypeArgs TResult maybeWhen<TResult extends Object?>(TResult Function( String? name, @JsonKey(name: 'task_id')  String taskId, @JsonKey(name: 'task_file')  String? taskFile, @JsonKey(name: 'task_args', defaultValue: {})  Map<String, dynamic> taskArgs,  String model, @JsonKey(name: 'model_args', defaultValue: {})  Map<String, dynamic> modelArgs, @JsonKey(name: 'model_roles')  Map<String, String>? modelRoles,  int sequence)?  $default,{required TResult orElse(),}) {final _that = this;
+switch (_that) {
+case _EvalSetTask() when $default != null:
+return $default(_that.name,_that.taskId,_that.taskFile,_that.taskArgs,_that.model,_that.modelArgs,_that.modelRoles,_that.sequence);case _:
+  return orElse();
+
+}
+}
+/// A `switch`-like method, using callbacks.
+///
+/// As opposed to `map`, this offers destructuring.
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case Subclass2(:final field2):
+///     return ...;
+/// }
+/// ```
+
+@optionalTypeArgs TResult when<TResult extends Object?>(TResult Function( String? name, @JsonKey(name: 'task_id')  String taskId, @JsonKey(name: 'task_file')  String? taskFile, @JsonKey(name: 'task_args', defaultValue: {})  Map<String, dynamic> taskArgs,  String model, @JsonKey(name: 'model_args', defaultValue: {})  Map<String, dynamic> modelArgs, @JsonKey(name: 'model_roles')  Map<String, String>? modelRoles,  int sequence)  $default,) {final _that = this;
+switch (_that) {
+case _EvalSetTask():
+return $default(_that.name,_that.taskId,_that.taskFile,_that.taskArgs,_that.model,_that.modelArgs,_that.modelRoles,_that.sequence);case _:
+  throw StateError('Unexpected subclass');
+
+}
+}
+/// A variant of `when` that fallback to returning `null`
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case _:
+///     return null;
+/// }
+/// ```
+
+@optionalTypeArgs TResult? whenOrNull<TResult extends Object?>(TResult? Function( String? name, @JsonKey(name: 'task_id')  String taskId, @JsonKey(name: 'task_file')  String? taskFile, @JsonKey(name: 'task_args', defaultValue: {})  Map<String, dynamic> taskArgs,  String model, @JsonKey(name: 'model_args', defaultValue: {})  Map<String, dynamic> modelArgs, @JsonKey(name: 'model_roles')  Map<String, String>? modelRoles,  int sequence)?  $default,) {final _that = this;
+switch (_that) {
+case _EvalSetTask() when $default != null:
+return $default(_that.name,_that.taskId,_that.taskFile,_that.taskArgs,_that.model,_that.modelArgs,_that.modelRoles,_that.sequence);case _:
+  return null;
+
+}
+}
+
+}
+
+/// @nodoc
+@JsonSerializable()
+
+class _EvalSetTask extends EvalSetTask {
+  const _EvalSetTask({this.name, @JsonKey(name: 'task_id') required this.taskId, @JsonKey(name: 'task_file') this.taskFile, @JsonKey(name: 'task_args', defaultValue: {}) final  Map<String, dynamic> taskArgs = const {}, required this.model, @JsonKey(name: 'model_args', defaultValue: {}) final  Map<String, dynamic> modelArgs = const {}, @JsonKey(name: 'model_roles') final  Map<String, String>? modelRoles, required this.sequence}): _taskArgs = taskArgs,_modelArgs = modelArgs,_modelRoles = modelRoles,super._();
+  factory _EvalSetTask.fromJson(Map<String, dynamic> json) => _$EvalSetTaskFromJson(json);
+
+/// Task name.
+@override final  String? name;
+/// Unique task id.
+@override@JsonKey(name: 'task_id') final  String taskId;
+/// Task source file.
+@override@JsonKey(name: 'task_file') final  String? taskFile;
+/// Task arguments.
+ final  Map<String, dynamic> _taskArgs;
+/// Task arguments.
+@override@JsonKey(name: 'task_args', defaultValue: {}) Map<String, dynamic> get taskArgs {
+  if (_taskArgs is EqualUnmodifiableMapView) return _taskArgs;
+  // ignore: implicit_dynamic_type
+  return EqualUnmodifiableMapView(_taskArgs);
+}
+
+/// Model used for evaluation.
+@override final  String model;
+/// Model specific arguments.
+ final  Map<String, dynamic> _modelArgs;
+/// Model specific arguments.
+@override@JsonKey(name: 'model_args', defaultValue: {}) Map<String, dynamic> get modelArgs {
+  if (_modelArgs is EqualUnmodifiableMapView) return _modelArgs;
+  // ignore: implicit_dynamic_type
+  return EqualUnmodifiableMapView(_modelArgs);
+}
+
+/// Model roles.
+ final  Map<String, String>? _modelRoles;
+/// Model roles.
+@override@JsonKey(name: 'model_roles') Map<String, String>? get modelRoles {
+  final value = _modelRoles;
+  if (value == null) return null;
+  if (_modelRoles is EqualUnmodifiableMapView) return _modelRoles;
+  // ignore: implicit_dynamic_type
+  return EqualUnmodifiableMapView(value);
+}
+
+/// Sequence number of task in eval set.
+@override final  int sequence;
+
+/// Create a copy of EvalSetTask
+/// with the given fields replaced by the non-null parameter values.
+@override @JsonKey(includeFromJson: false, includeToJson: false)
+@pragma('vm:prefer-inline')
+_$EvalSetTaskCopyWith<_EvalSetTask> get copyWith => __$EvalSetTaskCopyWithImpl<_EvalSetTask>(this, _$identity);
+
+@override
+Map<String, dynamic> toJson() {
+  return _$EvalSetTaskToJson(this, );
+}
+
+@override
+bool operator ==(Object other) {
+  return identical(this, other) || (other.runtimeType == runtimeType&&other is _EvalSetTask&&(identical(other.name, name) || other.name == name)&&(identical(other.taskId, taskId) || other.taskId == taskId)&&(identical(other.taskFile, taskFile) || other.taskFile == taskFile)&&const DeepCollectionEquality().equals(other._taskArgs, _taskArgs)&&(identical(other.model, model) || other.model == model)&&const DeepCollectionEquality().equals(other._modelArgs, _modelArgs)&&const DeepCollectionEquality().equals(other._modelRoles, _modelRoles)&&(identical(other.sequence, sequence) || other.sequence == sequence));
+}
+
+@JsonKey(includeFromJson: false, includeToJson: false)
+@override
+int get hashCode => Object.hash(runtimeType,name,taskId,taskFile,const DeepCollectionEquality().hash(_taskArgs),model,const DeepCollectionEquality().hash(_modelArgs),const DeepCollectionEquality().hash(_modelRoles),sequence);
+
+@override
+String toString() {
+  return 'EvalSetTask(name: $name, taskId: $taskId, taskFile: $taskFile, taskArgs: $taskArgs, model: $model, modelArgs: $modelArgs, modelRoles: $modelRoles, sequence: $sequence)';
+}
+
+
+}
+
+/// @nodoc
+abstract mixin class _$EvalSetTaskCopyWith<$Res> implements $EvalSetTaskCopyWith<$Res> {
+  factory _$EvalSetTaskCopyWith(_EvalSetTask value, $Res Function(_EvalSetTask) _then) = __$EvalSetTaskCopyWithImpl;
+@override @useResult
+$Res call({
+ String? name,@JsonKey(name: 'task_id') String taskId,@JsonKey(name: 'task_file') String? taskFile,@JsonKey(name: 'task_args', defaultValue: {}) Map<String, dynamic> taskArgs, String model,@JsonKey(name: 'model_args', defaultValue: {}) Map<String, dynamic> modelArgs,@JsonKey(name: 'model_roles') Map<String, String>? modelRoles, int sequence
+});
+
+
+
+
+}
+/// @nodoc
+class __$EvalSetTaskCopyWithImpl<$Res>
+    implements _$EvalSetTaskCopyWith<$Res> {
+  __$EvalSetTaskCopyWithImpl(this._self, this._then);
+
+  final _EvalSetTask _self;
+  final $Res Function(_EvalSetTask) _then;
+
+/// Create a copy of EvalSetTask
+/// with the given fields replaced by the non-null parameter values.
+@override @pragma('vm:prefer-inline') $Res call({Object? name = freezed,Object? taskId = null,Object? taskFile = freezed,Object? taskArgs = null,Object? model = null,Object? modelArgs = null,Object? modelRoles = freezed,Object? sequence = null,}) {
+  return _then(_EvalSetTask(
+name: freezed == name ? _self.name : name // ignore: cast_nullable_to_non_nullable
+as String?,taskId: null == taskId ? _self.taskId : taskId // ignore: cast_nullable_to_non_nullable
+as String,taskFile: freezed == taskFile ? _self.taskFile : taskFile // ignore: cast_nullable_to_non_nullable
+as String?,taskArgs: null == taskArgs ? _self._taskArgs : taskArgs // ignore: cast_nullable_to_non_nullable
+as Map<String, dynamic>,model: null == model ? _self.model : model // ignore: cast_nullable_to_non_nullable
+as String,modelArgs: null == modelArgs ? _self._modelArgs : modelArgs // ignore: cast_nullable_to_non_nullable
+as Map<String, dynamic>,modelRoles: freezed == modelRoles ? _self._modelRoles : modelRoles // ignore: cast_nullable_to_non_nullable
+as Map<String, String>?,sequence: null == sequence ? _self.sequence : sequence // ignore: cast_nullable_to_non_nullable
+as int,
+  ));
+}
+
+
+}
+
+// dart format on
diff --git a/packages/eval_config/lib/src/models/eval_log.g.dart b/packages/eval_config/lib/src/models/eval_log.g.dart
new file mode 100644
index 0000000..f6fa452
--- /dev/null
+++ b/packages/eval_config/lib/src/models/eval_log.g.dart
@@ -0,0 +1,961 @@
+// GENERATED CODE - DO NOT MODIFY BY HAND
+
+part of 'eval_log.dart';
+
+// **************************************************************************
+// JsonSerializableGenerator
+// **************************************************************************
+
+_EvalLog _$EvalLogFromJson(Map<String, dynamic> json) => _EvalLog(
+  version: (json['version'] as num?)?.toInt() ?? 2,
+  status: json['status'] as String? ?? 'started',
+  eval: EvalSpec.fromJson(json['eval'] as Map<String, dynamic>),
+  plan: json['plan'] == null
+      ? null
+      : EvalPlan.fromJson(json['plan'] as Map<String, dynamic>),
+  results: json['results'] == null
+      ? null
+      : EvalResults.fromJson(json['results'] as Map<String, dynamic>),
+  stats: json['stats'] == null
+      ? null
+      : EvalStats.fromJson(json['stats'] as Map<String, dynamic>),
+  error: json['error'] == null
+      ? null
+      : EvalError.fromJson(json['error'] as Map<String, dynamic>),
+  invalidated: json['invalidated'] as bool? ?? false,
+  samples: (json['samples'] as List<dynamic>?)
+      ?.map((e) => EvalSample.fromJson(e as Map<String, dynamic>))
+      .toList(),
+  reductions: (json['reductions'] as List<dynamic>?)
+      ?.map((e) => EvalSampleReductions.fromJson(e as Map<String, dynamic>))
+      .toList(),
+  location: json['location'] as String?,
+  etag: json['etag'] as String?,
+  evalSetInfo: json['eval_set_info'] == null
+      ? null
+      : EvalSetInfo.fromJson(json['eval_set_info'] as Map<String, dynamic>),
+);
+
+Map<String, dynamic> _$EvalLogToJson(_EvalLog instance) => <String, dynamic>{
+  'version': instance.version,
+  'status': instance.status,
+  'eval': instance.eval.toJson(),
+  'plan': instance.plan?.toJson(),
+  'results': instance.results?.toJson(),
+  'stats': instance.stats?.toJson(),
+  'error': instance.error?.toJson(),
+  'invalidated': instance.invalidated,
+  'samples': instance.samples?.map((e) => e.toJson()).toList(),
+  'reductions': instance.reductions?.map((e) => e.toJson()).toList(),
+  'location': instance.location,
+  'etag': instance.etag,
+  'eval_set_info': instance.evalSetInfo?.toJson(),
+};
+
+_EvalSpec _$EvalSpecFromJson(Map<String, dynamic> json) => _EvalSpec(
+  evalSetId: json['eval_set_id'] as String?,
+  evalId: json['eval_id'] as String,
+  runId: json['run_id'] as String,
+  created: json['created'] as String,
+  task: json['task'] as String,
+  taskId: json['task_id'] as String,
+  taskVersion: json['task_version'] as Object? ?? 0,
+  taskFile: json['task_file'] as String?,
+  taskDisplayName: json['task_display_name'] as String?,
+  taskRegistryName: json['task_registry_name'] as String?,
+  taskAttribs: json['task_attribs'] as Map<String, dynamic>? ?? {},
+  taskArgs: json['task_args'] as Map<String, dynamic>? ?? {},
+  taskArgsPassed: json['task_args_passed'] as Map<String, dynamic>? ?? {},
+  solver: json['solver'] as String?,
+  solverArgs: json['solver_args'] as Map<String, dynamic>? ?? {},
+  solverArgsPassed: json['solver_args_passed'] as Map<String, dynamic>? ?? {},
+  tags:
+      (json['tags'] as List<dynamic>?)?.map((e) => e as String).toList() ??
+      const [],
+  dataset: json['dataset'] == null
+      ? null
+      : EvalDataset.fromJson(json['dataset'] as Map<String, dynamic>),
+  sandbox: json['sandbox'],
+  model: json['model'] as String,
+  modelGenerateConfig: json['model_generate_config'] == null
+      ? null
+      : GenerateConfig.fromJson(
+          json['model_generate_config'] as Map<String, dynamic>,
+        ),
+  modelBaseUrl: json['model_base_url'] as String?,
+  modelArgs: json['model_args'] as Map<String, dynamic>? ?? {},
+  modelRoles: (json['model_roles'] as Map<String, dynamic>?)?.map(
+    (k, e) => MapEntry(k, e as String),
+  ),
+  config: json['config'] == null
+      ? const EvalConfig()
+      : EvalConfig.fromJson(json['config'] as Map<String, dynamic>),
+  revision: json['revision'] == null
+      ? null
+      : EvalRevision.fromJson(json['revision'] as Map<String, dynamic>),
+  packages:
+      (json['packages'] as Map<String, dynamic>?)?.map(
+        (k, e) => MapEntry(k, e as String),
+      ) ??
+      {},
+  metadata: json['metadata'] as Map<String, dynamic>?,
+  scorers:
+      (json['scorers'] as List<dynamic>?)?.map((e) => e as Object).toList() ??
+      const [],
+  metrics:
+      (json['metrics'] as List<dynamic>?)?.map((e) => e as Object).toList() ??
+      const [],
+);
+
+Map<String, dynamic> _$EvalSpecToJson(_EvalSpec instance) => <String, dynamic>{
+  'eval_set_id': instance.evalSetId,
+  'eval_id': instance.evalId,
+  'run_id': instance.runId,
+  'created': instance.created,
+  'task': instance.task,
+  'task_id': instance.taskId,
+  'task_version': instance.taskVersion,
+  'task_file': instance.taskFile,
+  'task_display_name': instance.taskDisplayName,
+  'task_registry_name': instance.taskRegistryName,
+  'task_attribs': instance.taskAttribs,
+  'task_args': instance.taskArgs,
+  'task_args_passed': instance.taskArgsPassed,
+  'solver': instance.solver,
+  'solver_args': instance.solverArgs,
+  'solver_args_passed': instance.solverArgsPassed,
+  'tags': instance.tags,
+  'dataset': instance.dataset?.toJson(),
+  'sandbox': instance.sandbox,
+  'model': instance.model,
+  'model_generate_config': instance.modelGenerateConfig?.toJson(),
+  'model_base_url': instance.modelBaseUrl,
+  'model_args': instance.modelArgs,
+  'model_roles': instance.modelRoles,
+  'config': instance.config.toJson(),
+  'revision': instance.revision?.toJson(),
+  'packages': instance.packages,
+  'metadata': instance.metadata,
+  'scorers': instance.scorers,
+  'metrics': instance.metrics,
+};
+
+_EvalDataset _$EvalDatasetFromJson(Map<String, dynamic> json) => _EvalDataset(
+  name: json['name'] as String?,
+  location: json['location'] as String?,
+  samples: (json['samples'] as num).toInt(),
+  sampleIds: (json['sample_ids'] as List<dynamic>?)
+      ?.map((e) => e as Object)
+      .toList(),
+  shuffled: json['shuffled'] as bool? ?? false,
+);
+
+Map<String, dynamic> _$EvalDatasetToJson(_EvalDataset instance) =>
+    <String, dynamic>{
+      'name': instance.name,
+      'location': instance.location,
+      'samples': instance.samples,
+      'sample_ids': instance.sampleIds,
+      'shuffled': instance.shuffled,
+    };
+
+_EvalConfig _$EvalConfigFromJson(Map<String, dynamic> json) => _EvalConfig(
+  limit: json['limit'],
+  sampleId: json['sample_id'],
+  sampleShuffle: json['sample_shuffle'] as bool?,
+  epochs: (json['epochs'] as num?)?.toInt(),
+  epochsReducer: (json['epochs_reducer'] as List<dynamic>?)
+      ?.map((e) => e as String)
+      .toList(),
+  approval: json['approval'] as String?,
+  failOnError: json['fail_on_error'],
+  continueOnFail: json['continue_on_fail'] as bool?,
+  retryOnError: (json['retry_on_error'] as num?)?.toInt(),
+  messageLimit: (json['message_limit'] as num?)?.toInt(),
+  tokenLimit: (json['token_limit'] as num?)?.toInt(),
+  timeLimit: (json['time_limit'] as num?)?.toInt(),
+  workingLimit: (json['working_limit'] as num?)?.toInt(),
+  maxSamples: (json['max_samples'] as num?)?.toInt(),
+  maxTasks: (json['max_tasks'] as num?)?.toInt(),
+  maxSubprocesses: (json['max_subprocesses'] as num?)?.toInt(),
+  maxSandboxes: (json['max_sandboxes'] as num?)?.toInt(),
+  sandboxCleanup: json['sandbox_cleanup'] as bool?,
+  logSamples: json['log_samples'] as bool?,
+  logRealtime: json['log_realtime'] as bool?,
+  logImages: json['log_images'] as bool?,
+  logBuffer: (json['log_buffer'] as num?)?.toInt(),
+  logShared: (json['log_shared'] as num?)?.toInt(),
+  scoreDisplay: json['score_display'] as bool?,
+);
+
+Map<String, dynamic> _$EvalConfigToJson(_EvalConfig instance) =>
+    <String, dynamic>{
+      'limit': instance.limit,
+      'sample_id': instance.sampleId,
+      'sample_shuffle': instance.sampleShuffle,
+      'epochs': instance.epochs,
+      'epochs_reducer': instance.epochsReducer,
+      'approval': instance.approval,
+      'fail_on_error': instance.failOnError,
+      'continue_on_fail': instance.continueOnFail,
+      'retry_on_error': instance.retryOnError,
+      'message_limit': instance.messageLimit,
+      'token_limit': instance.tokenLimit,
+      'time_limit': instance.timeLimit,
+      'working_limit': instance.workingLimit,
+      'max_samples': instance.maxSamples,
+      'max_tasks': instance.maxTasks,
+      'max_subprocesses': instance.maxSubprocesses,
+      'max_sandboxes': instance.maxSandboxes,
+      'sandbox_cleanup': instance.sandboxCleanup,
+      'log_samples': instance.logSamples,
+      'log_realtime': instance.logRealtime,
+      'log_images': instance.logImages,
+      'log_buffer': instance.logBuffer,
+      'log_shared': instance.logShared,
+      'score_display': instance.scoreDisplay,
+    };
+
+_EvalRevision _$EvalRevisionFromJson(Map<String, dynamic> json) =>
+    _EvalRevision(
+      type: json['type'] as String,
+      origin: json['origin'] as String,
+      commit: json['commit'] as String,
+      dirty: json['dirty'] as bool? ?? false,
+    );
+
+Map<String, dynamic> _$EvalRevisionToJson(_EvalRevision instance) =>
+    <String, dynamic>{
+      'type': instance.type,
+      'origin': instance.origin,
+      'commit': instance.commit,
+      'dirty': instance.dirty,
+    };
+
+_EvalPlan _$EvalPlanFromJson(Map<String, dynamic> json) => _EvalPlan(
+  name: json['name'] as String? ?? 'plan',
+  steps:
+      (json['steps'] as List<dynamic>?)
+          ?.map((e) => EvalPlanStep.fromJson(e as Map<String, dynamic>))
+          .toList() ??
+      const [],
+  finish: json['finish'] == null
+      ? null
+      : EvalPlanStep.fromJson(json['finish'] as Map<String, dynamic>),
+  config: json['config'] == null
+      ? const GenerateConfig()
+      : GenerateConfig.fromJson(json['config'] as Map<String, dynamic>),
+);
+
+Map<String, dynamic> _$EvalPlanToJson(_EvalPlan instance) => <String, dynamic>{
+  'name': instance.name,
+  'steps': instance.steps.map((e) => e.toJson()).toList(),
+  'finish': instance.finish?.toJson(),
+  'config': instance.config.toJson(),
+};
+
+_EvalPlanStep _$EvalPlanStepFromJson(Map<String, dynamic> json) =>
+    _EvalPlanStep(
+      solver: json['solver'] as String,
+      params: json['params'] as Map<String, dynamic>? ?? const {},
+      paramsPassed: json['params_passed'] as Map<String, dynamic>?,
+    );
+
+Map<String, dynamic> _$EvalPlanStepToJson(_EvalPlanStep instance) =>
+    <String, dynamic>{
+      'solver': instance.solver,
+      'params': instance.params,
+      'params_passed': instance.paramsPassed,
+    };
+
+_EvalResults _$EvalResultsFromJson(Map<String, dynamic> json) => _EvalResults(
+  totalSamples: (json['total_samples'] as num?)?.toInt() ?? 0,
+  completedSamples: (json['completed_samples'] as num?)?.toInt() ?? 0,
+  earlyStopping: json['early_stopping'] == null
+      ? null
+      : EarlyStoppingSummary.fromJson(
+          json['early_stopping'] as Map<String, dynamic>,
+        ),
+  scores:
+      (json['scores'] as List<dynamic>?)
+          ?.map((e) => EvalScore.fromJson(e as Map<String, dynamic>))
+          .toList() ??
+      const [],
+  metadata: json['metadata'] as Map<String, dynamic>? ?? const {},
+  sampleReductions: (json['sample_reductions'] as List<dynamic>?)
+      ?.map((e) => EvalSampleReductions.fromJson(e as Map<String, dynamic>))
+      .toList(),
+);
+
+Map<String, dynamic> _$EvalResultsToJson(_EvalResults instance) =>
+    <String, dynamic>{
+      'total_samples': instance.totalSamples,
+      'completed_samples': instance.completedSamples,
+      'early_stopping': instance.earlyStopping?.toJson(),
+      'scores': instance.scores.map((e) => e.toJson()).toList(),
+      'metadata': instance.metadata,
+      'sample_reductions': instance.sampleReductions
+          ?.map((e) => e.toJson())
+          .toList(),
+    };
+
+_EarlyStoppingSummary _$EarlyStoppingSummaryFromJson(
+  Map<String, dynamic> json,
+) => _EarlyStoppingSummary(
+  type: json['type'] as String,
+  limit: (json['limit'] as num?)?.toDouble(),
+  score: (json['score'] as num?)?.toDouble(),
+  metadata: json['metadata'] as Map<String, dynamic>? ?? const {},
+);
+
+Map<String, dynamic> _$EarlyStoppingSummaryToJson(
+  _EarlyStoppingSummary instance,
+) => <String, dynamic>{
+  'type': instance.type,
+  'limit': instance.limit,
+  'score': instance.score,
+  'metadata': instance.metadata,
+};
+
+_EvalScore _$EvalScoreFromJson(Map<String, dynamic> json) => _EvalScore(
+  name: json['name'] as String,
+  scorer: json['scorer'] as String,
+  reducer: json['reducer'] as String?,
+  scoredSamples: (json['scored_samples'] as num?)?.toInt(),
+  unscoredSamples: (json['unscored_samples'] as num?)?.toInt(),
+  params: json['params'] as Map<String, dynamic>? ?? const {},
+  metrics: json['metrics'] == null
+      ? const []
+      : _metricsFromJson(json['metrics']),
+  metadata: json['metadata'] as Map<String, dynamic>?,
+);
+
+Map<String, dynamic> _$EvalScoreToJson(_EvalScore instance) =>
+    <String, dynamic>{
+      'name': instance.name,
+      'scorer': instance.scorer,
+      'reducer': instance.reducer,
+      'scored_samples': instance.scoredSamples,
+      'unscored_samples': instance.unscoredSamples,
+      'params': instance.params,
+      'metrics': instance.metrics.map((e) => e.toJson()).toList(),
+      'metadata': instance.metadata,
+    };
+
+_EvalMetric _$EvalMetricFromJson(Map<String, dynamic> json) => _EvalMetric(
+  name: json['name'] as String,
+  value: json['value'] as Object,
+  params: json['params'] as Map<String, dynamic>? ?? const {},
+  metadata: json['metadata'] as Map<String, dynamic>?,
+);
+
+Map<String, dynamic> _$EvalMetricToJson(_EvalMetric instance) =>
+    <String, dynamic>{
+      'name': instance.name,
+      'value': instance.value,
+      'params': instance.params,
+      'metadata': instance.metadata,
+    };
+
+_EvalSampleReductions _$EvalSampleReductionsFromJson(
+  Map<String, dynamic> json,
+) => _EvalSampleReductions(
+  scorer: json['scorer'] as String,
+  reducer: json['reducer'] as String?,
+  samples: (json['samples'] as List<dynamic>)
+      .map((e) => EvalSampleScore.fromJson(e as Map<String, dynamic>))
+      .toList(),
+);
+
+Map<String, dynamic> _$EvalSampleReductionsToJson(
+  _EvalSampleReductions instance,
+) => <String, dynamic>{
+  'scorer': instance.scorer,
+  'reducer': instance.reducer,
+  'samples': instance.samples.map((e) => e.toJson()).toList(),
+};
+
+_EvalStats _$EvalStatsFromJson(Map<String, dynamic> json) => _EvalStats(
+  startedAt: json['started_at'] as String,
+  completedAt: json['completed_at'] as String,
+  modelUsage:
+      (json['model_usage'] as Map<String, dynamic>?)?.map(
+        (k, e) => MapEntry(k, ModelUsage.fromJson(e as Map<String, dynamic>)),
+      ) ??
+      {},
+);
+
+Map<String, dynamic> _$EvalStatsToJson(_EvalStats instance) =>
+    <String, dynamic>{
+      'started_at': instance.startedAt,
+      'completed_at': instance.completedAt,
+      'model_usage': instance.modelUsage.map((k, e) => MapEntry(k, e.toJson())),
+    };
+
+_EvalError _$EvalErrorFromJson(Map<String, dynamic> json) => _EvalError(
+  message: json['message'] as String,
+  traceback: json['traceback'] as String,
+  tracebackAnsi: json['traceback_ansi'] as String,
+);
+
+Map<String, dynamic> _$EvalErrorToJson(_EvalError instance) =>
+    <String, dynamic>{
+      'message': instance.message,
+      'traceback': instance.traceback,
+      'traceback_ansi': instance.tracebackAnsi,
+    };
+
+_EvalSample _$EvalSampleFromJson(Map<String, dynamic> json) => _EvalSample(
+  id: json['id'] as Object,
+  epoch: (json['epoch'] as num).toInt(),
+  input: json['input'] as Object,
+  choices: (json['choices'] as List<dynamic>?)
+      ?.map((e) => e as String)
+      .toList(),
+  target: json['target'],
+  metadata: json['metadata'] as Map<String, dynamic>? ?? const {},
+  sandbox: json['sandbox'],
+  files: (json['files'] as List<dynamic>?)?.map((e) => e as String).toList(),
+  setup: json['setup'] as String?,
+  messages:
+      (json['messages'] as List<dynamic>?)
+          ?.map((e) => ChatMessage.fromJson(e as Map<String, dynamic>))
+          .toList() ??
+      const [],
+  output: ModelOutput.fromJson(json['output'] as Map<String, dynamic>),
+  scores: (json['scores'] as Map<String, dynamic>?)?.map(
+    (k, e) => MapEntry(k, Score.fromJson(e as Map<String, dynamic>)),
+  ),
+  store: json['store'] as Map<String, dynamic>? ?? const {},
+  events:
+      (json['events'] as List<dynamic>?)?.map((e) => e as Object).toList() ??
+      const [],
+  modelUsage:
+      (json['model_usage'] as Map<String, dynamic>?)?.map(
+        (k, e) => MapEntry(k, ModelUsage.fromJson(e as Map<String, dynamic>)),
+      ) ??
+      {},
+  startedAt: json['started_at'] as String?,
+  completedAt: json['completed_at'] as String?,
+  totalTime: (json['total_time'] as num?)?.toDouble(),
+  workingTime: (json['working_time'] as num?)?.toDouble(),
+  uuid: json['uuid'] as String?,
+  invalidation: json['invalidation'] == null
+      ? null
+      : ProvenanceData.fromJson(json['invalidation'] as Map<String, dynamic>),
+  error: json['error'] == null
+      ? null
+      : EvalError.fromJson(json['error'] as Map<String, dynamic>),
+  errorRetries: (json['error_retries'] as List<dynamic>?)
+      ?.map((e) => EvalError.fromJson(e as Map<String, dynamic>))
+      .toList(),
+  attachments:
+      (json['attachments'] as Map<String, dynamic>?)?.map(
+        (k, e) => MapEntry(k, e as String),
+      ) ??
+      const {},
+  limit: json['limit'] == null
+      ? null
+      : EvalSampleLimit.fromJson(json['limit'] as Map<String, dynamic>),
+);
+
+Map<String, dynamic> _$EvalSampleToJson(_EvalSample instance) =>
+    <String, dynamic>{
+      'id': instance.id,
+      'epoch': instance.epoch,
+      'input': instance.input,
+      'choices': instance.choices,
+      'target': instance.target,
+      'metadata': instance.metadata,
+      'sandbox': instance.sandbox,
+      'files': instance.files,
+      'setup': instance.setup,
+      'messages': instance.messages.map((e) => e.toJson()).toList(),
+      'output': instance.output.toJson(),
+      'scores': instance.scores?.map((k, e) => MapEntry(k, e.toJson())),
+      'store': instance.store,
+      'events': instance.events,
+      'model_usage': instance.modelUsage.map((k, e) => MapEntry(k, e.toJson())),
+      'started_at': instance.startedAt,
+      'completed_at': instance.completedAt,
+      'total_time': instance.totalTime,
+      'working_time': instance.workingTime,
+      'uuid': instance.uuid,
+      'invalidation': instance.invalidation?.toJson(),
+      'error': instance.error?.toJson(),
+      'error_retries': instance.errorRetries?.map((e) => e.toJson()).toList(),
+      'attachments': instance.attachments,
+      'limit': instance.limit?.toJson(),
+    };
+
+_ModelOutput _$ModelOutputFromJson(Map<String, dynamic> json) => _ModelOutput(
+  model: json['model'] as String,
+  choices:
+      (json['choices'] as List<dynamic>?)
+          ?.map((e) => ChatCompletionChoice.fromJson(e as Map<String, dynamic>))
+          .toList() ??
+      const [],
+  usage: json['usage'] == null
+      ? null
+      : ModelUsage.fromJson(json['usage'] as Map<String, dynamic>),
+  completion: json['completion'] as String,
+  stopReason: json['stop_reason'] as String? ?? 'unknown',
+  time: (json['time'] as num?)?.toDouble(),
+  metadata: json['metadata'] as Map<String, dynamic>? ?? const {},
+  error: json['error'] as String?,
+  message: json['message'] == null
+      ? null
+      : ChatMessageAssistant.fromJson(json['message'] as Map<String, dynamic>),
+);
+
+Map<String, dynamic> _$ModelOutputToJson(_ModelOutput instance) =>
+    <String, dynamic>{
+      'model': instance.model,
+      'choices': instance.choices.map((e) => e.toJson()).toList(),
+      'usage': instance.usage?.toJson(),
+      'completion': instance.completion,
+      'stop_reason': instance.stopReason,
+      'time': instance.time,
+      'metadata': instance.metadata,
+      'error': instance.error,
+      'message': instance.message?.toJson(),
+    };
+
+_ChatCompletionChoice _$ChatCompletionChoiceFromJson(
+  Map<String, dynamic> json,
+) => _ChatCompletionChoice(
+  message: ChatMessageAssistant.fromJson(
+    json['message'] as Map<String, dynamic>,
+  ),
+  stopReason: json['stop_reason'] as String? ?? 'unknown',
+  logprobs: json['logprobs'] == null
+      ? null
+      : Logprobs.fromJson(json['logprobs'] as Map<String, dynamic>),
+);
+
+Map<String, dynamic> _$ChatCompletionChoiceToJson(
+  _ChatCompletionChoice instance,
+) => <String, dynamic>{
+  'message': instance.message.toJson(),
+  'stop_reason': instance.stopReason,
+  'logprobs': instance.logprobs?.toJson(),
+};
+
+_ModelUsage _$ModelUsageFromJson(Map<String, dynamic> json) => _ModelUsage(
+  inputTokens: (json['input_tokens'] as num?)?.toInt() ?? 0,
+  outputTokens: (json['output_tokens'] as num?)?.toInt() ?? 0,
+  totalTokens: (json['total_tokens'] as num?)?.toInt() ?? 0,
+  inputTokensCacheWrite: (json['input_tokens_cache_write'] as num?)?.toInt(),
+  inputTokensCacheRead: (json['input_tokens_cache_read'] as num?)?.toInt(),
+  reasoningTokens: (json['reasoning_tokens'] as num?)?.toInt() ?? 0,
+);
+
+Map<String, dynamic> _$ModelUsageToJson(_ModelUsage instance) =>
+    <String, dynamic>{
+      'input_tokens': instance.inputTokens,
+      'output_tokens': instance.outputTokens,
+      'total_tokens': instance.totalTokens,
+      'input_tokens_cache_write': instance.inputTokensCacheWrite,
+      'input_tokens_cache_read': instance.inputTokensCacheRead,
+      'reasoning_tokens': instance.reasoningTokens,
+    };
+
+ChatMessageSystem _$ChatMessageSystemFromJson(Map<String, dynamic> json) =>
+    ChatMessageSystem(
+      id: json['id'] as String?,
+      content: json['content'] as Object,
+      source: json['source'] as String?,
+      metadata: json['metadata'] as Map<String, dynamic>?,
+      role: json['role'] as String? ?? 'system',
+    );
+
+Map<String, dynamic> _$ChatMessageSystemToJson(ChatMessageSystem instance) =>
+    <String, dynamic>{
+      'id': instance.id,
+      'content': instance.content,
+      'source': instance.source,
+      'metadata': instance.metadata,
+      'role': instance.role,
+    };
+
+ChatMessageUser _$ChatMessageUserFromJson(Map<String, dynamic> json) =>
+    ChatMessageUser(
+      id: json['id'] as String?,
+      content: json['content'] as Object,
+      source: json['source'] as String?,
+      metadata: json['metadata'] as Map<String, dynamic>?,
+      role: json['role'] as String? ?? 'user',
+      toolCallId: json['tool_call_id'],
+    );
+
+Map<String, dynamic> _$ChatMessageUserToJson(ChatMessageUser instance) =>
+    <String, dynamic>{
+      'id': instance.id,
+      'content': instance.content,
+      'source': instance.source,
+      'metadata': instance.metadata,
+      'role': instance.role,
+      'tool_call_id': instance.toolCallId,
+    };
+
+ChatMessageAssistant _$ChatMessageAssistantFromJson(
+  Map<String, dynamic> json,
+) => ChatMessageAssistant(
+  id: json['id'] as String?,
+  content: json['content'] as Object,
+  source: json['source'] as String?,
+  metadata: json['metadata'] as Map<String, dynamic>?,
+  role: json['role'] as String? ?? 'assistant',
+  toolCalls: (json['tool_calls'] as List<dynamic>?)
+      ?.map((e) => ToolCall.fromJson(e as Map<String, dynamic>))
+      .toList(),
+  model: json['model'] as String?,
+);
+
+Map<String, dynamic> _$ChatMessageAssistantToJson(
+  ChatMessageAssistant instance,
+) => <String, dynamic>{
+  'id': instance.id,
+  'content': instance.content,
+  'source': instance.source,
+  'metadata': instance.metadata,
+  'role': instance.role,
+  'tool_calls': instance.toolCalls?.map((e) => e.toJson()).toList(),
+  'model': instance.model,
+};
+
+ChatMessageTool _$ChatMessageToolFromJson(Map<String, dynamic> json) =>
+    ChatMessageTool(
+      id: json['id'] as String?,
+      content: json['content'] as Object,
+      source: json['source'] as String?,
+      metadata: json['metadata'] as Map<String, dynamic>?,
+      role: json['role'] as String? ?? 'tool',
+      toolCallId: json['tool_call_id'] as String?,
+      function: json['function'] as String?,
+      error: json['error'] == null
+          ? null
+          : ToolCallError.fromJson(json['error'] as Map<String, dynamic>),
+    );
+
+Map<String, dynamic> _$ChatMessageToolToJson(ChatMessageTool instance) =>
+    <String, dynamic>{
+      'id': instance.id,
+      'content': instance.content,
+      'source': instance.source,
+      'metadata': instance.metadata,
+      'role': instance.role,
+      'tool_call_id': instance.toolCallId,
+      'function': instance.function,
+      'error': instance.error?.toJson(),
+    };
+
+ContentText _$ContentTextFromJson(Map<String, dynamic> json) => ContentText(
+  text: json['text'] as String,
+  refusal: json['refusal'] as bool? ?? false,
+  citations: (json['citations'] as List<dynamic>?)
+      ?.map((e) => e as Object)
+      .toList(),
+  type: json['type'] as String? ?? 'text',
+);
+
+Map<String, dynamic> _$ContentTextToJson(ContentText instance) =>
+    <String, dynamic>{
+      'text': instance.text,
+      'refusal': instance.refusal,
+      'citations': instance.citations,
+      'type': instance.type,
+    };
+
+ContentReasoning _$ContentReasoningFromJson(Map<String, dynamic> json) =>
+    ContentReasoning(
+      reasoning: json['reasoning'] as String,
+      summary: json['summary'] as String?,
+      signature: json['signature'] as String?,
+      redacted: json['redacted'] as bool? ?? false,
+      text: json['text'] as String?,
+      type: json['type'] as String? ?? 'reasoning',
+    );
+
+Map<String, dynamic> _$ContentReasoningToJson(ContentReasoning instance) =>
+    <String, dynamic>{
+      'reasoning': instance.reasoning,
+      'summary': instance.summary,
+      'signature': instance.signature,
+      'redacted': instance.redacted,
+      'text': instance.text,
+      'type': instance.type,
+    };
+
+ContentImage _$ContentImageFromJson(Map<String, dynamic> json) => ContentImage(
+  image: json['image'] as String,
+  detail: json['detail'] as String? ?? 'auto',
+  type: json['type'] as String? ?? 'image',
+);
+
+Map<String, dynamic> _$ContentImageToJson(ContentImage instance) =>
+    <String, dynamic>{
+      'image': instance.image,
+      'detail': instance.detail,
+      'type': instance.type,
+    };
+
+ContentAudio _$ContentAudioFromJson(Map<String, dynamic> json) => ContentAudio(
+  audio: json['audio'] as String,
+  format: json['format'] as String,
+  type: json['type'] as String? ?? 'audio',
+);
+
+Map<String, dynamic> _$ContentAudioToJson(ContentAudio instance) =>
+    <String, dynamic>{
+      'audio': instance.audio,
+      'format': instance.format,
+      'type': instance.type,
+    };
+
+ContentVideo _$ContentVideoFromJson(Map<String, dynamic> json) => ContentVideo(
+  video: json['video'] as String,
+  format: json['format'] as String,
+  type: json['type'] as String? ?? 'video',
+);
+
+Map<String, dynamic> _$ContentVideoToJson(ContentVideo instance) =>
+    <String, dynamic>{
+      'video': instance.video,
+      'format': instance.format,
+      'type': instance.type,
+    };
+
+ContentDocument _$ContentDocumentFromJson(Map<String, dynamic> json) =>
+    ContentDocument(
+      document: json['document'] as String,
+      filename: json['filename'] as String?,
+      mimeType: json['mime_type'] as String?,
+      type: json['type'] as String? ?? 'document',
+    );
+
+Map<String, dynamic> _$ContentDocumentToJson(ContentDocument instance) =>
+    <String, dynamic>{
+      'document': instance.document,
+      'filename': instance.filename,
+      'mime_type': instance.mimeType,
+      'type': instance.type,
+    };
+
+ContentData _$ContentDataFromJson(Map<String, dynamic> json) => ContentData(
+  data: json['data'] as Map<String, dynamic>,
+  type: json['type'] as String? ?? 'data',
+);
+
+Map<String, dynamic> _$ContentDataToJson(ContentData instance) =>
+    <String, dynamic>{'data': instance.data, 'type': instance.type};
+
+ContentToolUse _$ContentToolUseFromJson(Map<String, dynamic> json) =>
+    ContentToolUse(
+      toolType: json['tool_type'] as String,
+      id: json['id'] as String,
+      name: json['name'] as String,
+      context: json['context'] as Map<String, dynamic>?,
+      arguments: json['arguments'] as Map<String, dynamic>,
+      result: json['result'],
+      error: json['error'],
+      type: json['type'] as String? ?? 'tool_use',
+    );
+
+Map<String, dynamic> _$ContentToolUseToJson(ContentToolUse instance) =>
+    <String, dynamic>{
+      'tool_type': instance.toolType,
+      'id': instance.id,
+      'name': instance.name,
+      'context': instance.context,
+      'arguments': instance.arguments,
+      'result': instance.result,
+      'error': instance.error,
+      'type': instance.type,
+    };
+
+_EvalSampleScore _$EvalSampleScoreFromJson(Map<String, dynamic> json) =>
+    _EvalSampleScore(
+      value: json['value'] as Object,
+      answer: json['answer'] as String?,
+      explanation: json['explanation'] as String?,
+      metadata: json['metadata'] as Map<String, dynamic>? ?? const {},
+      history:
+          (json['history'] as List<dynamic>?)
+              ?.map((e) => e as Object)
+              .toList() ??
+          const [],
+      sampleId: json['sample_id'],
+    );
+
+Map<String, dynamic> _$EvalSampleScoreToJson(_EvalSampleScore instance) =>
+    <String, dynamic>{
+      'value': instance.value,
+      'answer': instance.answer,
+      'explanation': instance.explanation,
+      'metadata': instance.metadata,
+      'history': instance.history,
+      'sample_id': instance.sampleId,
+    };
+
+_Score _$ScoreFromJson(Map<String, dynamic> json) => _Score(
+  value: json['value'] as Object,
+  answer: json['answer'] as String?,
+  explanation: json['explanation'] as String?,
+  metadata: json['metadata'] as Map<String, dynamic>?,
+);
+
+Map<String, dynamic> _$ScoreToJson(_Score instance) => <String, dynamic>{
+  'value': instance.value,
+  'answer': instance.answer,
+  'explanation': instance.explanation,
+  'metadata': instance.metadata,
+};
+
+_ToolCall _$ToolCallFromJson(Map<String, dynamic> json) => _ToolCall(
+  id: json['id'] as String,
+  function: json['function'] as String,
+  arguments: json['arguments'] as Map<String, dynamic>,
+  type: json['type'] as String? ?? 'call',
+);
+
+Map<String, dynamic> _$ToolCallToJson(_ToolCall instance) => <String, dynamic>{
+  'id': instance.id,
+  'function': instance.function,
+  'arguments': instance.arguments,
+  'type': instance.type,
+};
+
+_ToolCallError _$ToolCallErrorFromJson(Map<String, dynamic> json) =>
+    _ToolCallError(
+      message: json['message'] as String,
+      code: (json['code'] as num?)?.toInt(),
+      data: json['data'] as Map<String, dynamic>?,
+    );
+
+Map<String, dynamic> _$ToolCallErrorToJson(_ToolCallError instance) =>
+    <String, dynamic>{
+      'message': instance.message,
+      'code': instance.code,
+      'data': instance.data,
+    };
+
+_GenerateConfig _$GenerateConfigFromJson(Map<String, dynamic> json) =>
+    _GenerateConfig(
+      maxRetries: (json['max_retries'] as num?)?.toInt(),
+      timeout: (json['timeout'] as num?)?.toInt(),
+      attemptTimeout: (json['attempt_timeout'] as num?)?.toInt(),
+      maxConnections: (json['max_connections'] as num?)?.toInt(),
+      systemMessage: json['system_message'] as String?,
+      maxTokens: (json['max_tokens'] as num?)?.toInt(),
+      topP: (json['top_p'] as num?)?.toDouble(),
+      temperature: (json['temperature'] as num?)?.toDouble(),
+      stopSeqs: (json['stop_seqs'] as List<dynamic>?)
+          ?.map((e) => e as String)
+          .toList(),
+      bestOf: (json['best_of'] as num?)?.toInt(),
+      frequencyPenalty: (json['frequency_penalty'] as num?)?.toDouble(),
+      presencePenalty: (json['presence_penalty'] as num?)?.toDouble(),
+      logitBias: (json['logit_bias'] as Map<String, dynamic>?)?.map(
+        (k, e) => MapEntry(k, (e as num).toDouble()),
+      ),
+      seed: (json['seed'] as num?)?.toInt(),
+      topK: (json['top_k'] as num?)?.toInt(),
+      numChoices: (json['num_choices'] as num?)?.toInt(),
+      logprobs: json['logprobs'] as bool?,
+      topLogprobs: (json['top_logprobs'] as num?)?.toInt(),
+      parallelToolCalls: json['parallel_tool_calls'] as bool?,
+      internalTools: json['internal_tools'] as bool?,
+      maxToolOutput: (json['max_tool_output'] as num?)?.toInt(),
+      cachePrompt: json['cache_prompt'],
+    );
+
+Map<String, dynamic> _$GenerateConfigToJson(_GenerateConfig instance) =>
+    <String, dynamic>{
+      'max_retries': instance.maxRetries,
+      'timeout': instance.timeout,
+      'attempt_timeout': instance.attemptTimeout,
+      'max_connections': instance.maxConnections,
+      'system_message': instance.systemMessage,
+      'max_tokens': instance.maxTokens,
+      'top_p': instance.topP,
+      'temperature': instance.temperature,
+      'stop_seqs': instance.stopSeqs,
+      'best_of': instance.bestOf,
+      'frequency_penalty': instance.frequencyPenalty,
+      'presence_penalty': instance.presencePenalty,
+      'logit_bias': instance.logitBias,
+      'seed': instance.seed,
+      'top_k': instance.topK,
+      'num_choices': instance.numChoices,
+      'logprobs': instance.logprobs,
+      'top_logprobs': instance.topLogprobs,
+      'parallel_tool_calls': instance.parallelToolCalls,
+      'internal_tools': instance.internalTools,
+      'max_tool_output': instance.maxToolOutput,
+      'cache_prompt': instance.cachePrompt,
+    };
+
+_Logprobs _$LogprobsFromJson(Map<String, dynamic> json) => _Logprobs(
+  content: (json['content'] as List<dynamic>).map((e) => e as Object).toList(),
+);
+
+Map<String, dynamic> _$LogprobsToJson(_Logprobs instance) => <String, dynamic>{
+  'content': instance.content,
+};
+
+_ProvenanceData _$ProvenanceDataFromJson(Map<String, dynamic> json) =>
+    _ProvenanceData(
+      location: json['location'] as String,
+      shash: json['shash'] as String,
+    );
+
+Map<String, dynamic> _$ProvenanceDataToJson(_ProvenanceData instance) =>
+    <String, dynamic>{'location': instance.location, 'shash': instance.shash};
+
+_EvalSampleLimit _$EvalSampleLimitFromJson(Map<String, dynamic> json) =>
+    _EvalSampleLimit(
+      type: json['type'] as String,
+      limit: (json['limit'] as num).toDouble(),
+    );
+
+Map<String, dynamic> _$EvalSampleLimitToJson(_EvalSampleLimit instance) =>
+    <String, dynamic>{'type': instance.type, 'limit': instance.limit};
+
+_EvalSetInfo _$EvalSetInfoFromJson(Map<String, dynamic> json) => _EvalSetInfo(
+  evalSetId: json['eval_set_id'] as String,
+  tasks: (json['tasks'] as List<dynamic>)
+      .map((e) => EvalSetTask.fromJson(e as Map<String, dynamic>))
+      .toList(),
+);
+
+Map<String, dynamic> _$EvalSetInfoToJson(_EvalSetInfo instance) =>
+    <String, dynamic>{
+      'eval_set_id': instance.evalSetId,
+      'tasks': instance.tasks.map((e) => e.toJson()).toList(),
+    };
+
+_EvalSetTask _$EvalSetTaskFromJson(Map<String, dynamic> json) => _EvalSetTask(
+  name: json['name'] as String?,
+  taskId: json['task_id'] as String,
+  taskFile: json['task_file'] as String?,
+  taskArgs: json['task_args'] as Map<String, dynamic>? ?? {},
+  model: json['model'] as String,
+  modelArgs: json['model_args'] as Map<String, dynamic>? ?? {},
+  modelRoles: (json['model_roles'] as Map<String, dynamic>?)?.map(
+    (k, e) => MapEntry(k, e as String),
+  ),
+  sequence: (json['sequence'] as num).toInt(),
+);
+
+Map<String, dynamic> _$EvalSetTaskToJson(_EvalSetTask instance) =>
+    <String, dynamic>{
+      'name': instance.name,
+      'task_id': instance.taskId,
+      'task_file': instance.taskFile,
+      'task_args': instance.taskArgs,
+      'model': instance.model,
+      'model_args': instance.modelArgs,
+      'model_roles': instance.modelRoles,
+      'sequence': instance.sequence,
+    };
diff --git a/packages/eval_config/lib/src/models/eval_set.dart b/packages/eval_config/lib/src/models/eval_set.dart
new file mode 100644
index 0000000..6dd1882
--- /dev/null
+++ b/packages/eval_config/lib/src/models/eval_set.dart
@@ -0,0 +1,188 @@
+import 'package:freezed_annotation/freezed_annotation.dart';
+import 'package:eval_config/src/models/models.dart';
+
+part 'eval_set.freezed.dart';
+part 'eval_set.g.dart';
+
+/// Dart representation of Inspect AI's `eval_set()` function parameters.
+///
+/// Models the configuration passed to
+/// [`inspect_ai.eval_set()`](https://inspect.aisi.org.uk/reference/inspect_ai.html#eval_set).
+///
+/// This is the **Inspect AI** side of the eval set contract — it mirrors the
+/// Python function signature. For the Dart-side resolved config that is
+/// serialised *to* the Python runner, see `config/eval_set.dart`.
+@freezed
+sealed class EvalSet with _$EvalSet {
+  const factory EvalSet({
+    /// Task(s) to evaluate.
+    ///
+    /// Accepts task file paths, task function names, or other task specifiers.
+    required List<Task> tasks,
+
+    /// Output path for logging results.
+    ///
+    /// Required to ensure a unique storage scope is assigned for the set.
+    @JsonKey(name: 'log_dir') required String logDir,
+
+    /// Maximum number of retry attempts before giving up (defaults to 10).
+    @JsonKey(name: 'retry_attempts') int? retryAttempts,
+
+    /// Time in seconds to wait between retry attempts, increased
+    /// exponentially (defaults to 30).
+    @JsonKey(name: 'retry_wait') double? retryWait,
+
+    /// Reduce `max_connections` at this rate with each retry
+    /// (defaults to 1.0 — no reduction).
+    @JsonKey(name: 'retry_connections') double? retryConnections,
+
+    /// Cleanup failed log files after retries (defaults to true).
+    @JsonKey(name: 'retry_cleanup') bool? retryCleanup,
+
+    /// Model(s) for evaluation.
+    ///
+    /// A list of Provider/model strings (e.g. `"openai/gpt-4o"`)
+    /// If not specified, uses the `INSPECT_EVAL_MODEL` environment variable.
+    List<String>? model,
+
+    /// Base URL for communicating with the model API.
+    @JsonKey(name: 'model_base_url') String? modelBaseUrl,
+
+    /// Model creation arguments (dictionary or path to JSON/YAML config).
+    @JsonKey(name: 'model_args') @Default({}) Map<String, Object?> modelArgs,
+
+    /// Named roles for use in `get_model()`.
+    @JsonKey(name: 'model_roles') Map<String, String>? modelRoles,
+
+    /// Task creation arguments (dictionary or path to JSON/YAML config).
+    @JsonKey(name: 'task_args') @Default({}) Map<String, Object?> taskArgs,
+
+    /// Sandbox environment type (or a shorthand spec).
+    Object? sandbox,
+
+    /// Cleanup sandbox environments after task completes (defaults to true).
+    @JsonKey(name: 'sandbox_cleanup') bool? sandboxCleanup,
+
+    /// Alternative solver(s) for evaluating task(s).
+    Object? solver,
+
+    /// Tags to associate with this evaluation run.
+    List<String>? tags,
+
+    /// Metadata to associate with this evaluation run.
+    Map<String, dynamic>? metadata,
+
+    /// Trace message interactions with evaluated model to terminal.
+    bool? trace,
+
+    /// Task display type (defaults to `"full"`).
+    String? display,
+
+    /// Tool use approval policies.
+    Object? approval,
+
+    /// Score output (defaults to true).
+    @Default(true) bool score,
+
+    /// Level for logging to the console (defaults to `"warning"`).
+    @JsonKey(name: 'log_level') String? logLevel,
+
+    /// Level for logging to the log file (defaults to `"info"`).
+    @JsonKey(name: 'log_level_transcript') String? logLevelTranscript,
+
+    /// Format for writing log files (`"eval"` or `"json"`).
+    @JsonKey(name: 'log_format') String? logFormat,
+
+    /// Limit evaluated samples (defaults to all samples).
+    ///
+    /// Can be an `int` count or a `[start, end]` range.
+    Object? limit,
+
+    /// Evaluate specific sample(s) from the dataset.
+    @JsonKey(name: 'sample_id') Object? sampleId,
+
+    /// Shuffle order of samples (pass a seed to make the order deterministic).
+    @JsonKey(name: 'sample_shuffle') Object? sampleShuffle,
+
+    /// Epochs to repeat samples for and optional score reducer function(s).
+    Object? epochs,
+
+    /// Fail on sample errors.
+    ///
+    /// `0.0–1.0` = fail if proportion exceeds threshold,
+    /// `>1` = fail if count exceeds threshold.
+    @JsonKey(name: 'fail_on_error') double? failOnError,
+
+    /// Continue running even if `fail_on_error` condition is met.
+    @JsonKey(name: 'continue_on_fail') bool? continueOnFail,
+
+    /// Number of times to retry samples on error (default: no retries).
+    @JsonKey(name: 'retry_on_error') int? retryOnError,
+
+    /// Raise task errors for debugging (defaults to false).
+    @JsonKey(name: 'debug_errors') bool? debugErrors,
+
+    /// Limit on total messages per sample.
+    @JsonKey(name: 'message_limit') int? messageLimit,
+
+    /// Limit on total tokens per sample.
+    @JsonKey(name: 'token_limit') int? tokenLimit,
+
+    /// Limit on clock time (in seconds) per sample.
+    @JsonKey(name: 'time_limit') int? timeLimit,
+
+    /// Limit on working time (in seconds) per sample.
+    ///
+    /// Working time includes model generation, tool calls, etc. but does not
+    /// include waiting on retries or shared resources.
+    @JsonKey(name: 'working_limit') int? workingLimit,
+
+    /// Limit on total cost (in dollars) per sample.
+    @JsonKey(name: 'cost_limit') double? costLimit,
+
+    /// JSON file with model prices for cost tracking.
+    @JsonKey(name: 'model_cost_config') Map<String, Object?>? modelCostConfig,
+
+    /// Maximum samples to run in parallel (default is `max_connections`).
+    @JsonKey(name: 'max_samples') int? maxSamples,
+
+    /// Maximum tasks to run in parallel.
+    @JsonKey(name: 'max_tasks') int? maxTasks,
+
+    /// Maximum subprocesses to run in parallel (default is `os.cpu_count()`).
+    @JsonKey(name: 'max_subprocesses') int? maxSubprocesses,
+
+    /// Maximum sandboxes (per-provider) to run in parallel.
+    @JsonKey(name: 'max_sandboxes') int? maxSandboxes,
+
+    /// Log detailed samples and scores (defaults to true).
+    @JsonKey(name: 'log_samples') bool? logSamples,
+
+    /// Log events in realtime (defaults to true).
+    @JsonKey(name: 'log_realtime') bool? logRealtime,
+
+    /// Log base64-encoded images (defaults to false).
+    @JsonKey(name: 'log_images') bool? logImages,
+
+    /// Number of samples to buffer before writing log file.
+    @JsonKey(name: 'log_buffer') int? logBuffer,
+
+    /// Sync sample events for realtime viewing.
+    @JsonKey(name: 'log_shared') int? logShared,
+
+    /// Directory to bundle logs and viewer into.
+    @JsonKey(name: 'bundle_dir') String? bundleDir,
+
+    /// Overwrite files in `bundle_dir` (defaults to false).
+    @JsonKey(name: 'bundle_overwrite') @Default(false) bool bundleOverwrite,
+
+    /// Allow log directory to contain unrelated logs (defaults to false).
+    @JsonKey(name: 'log_dir_allow_dirty') bool? logDirAllowDirty,
+
+    /// ID for the eval set. Generated if not specified.
+    @JsonKey(name: 'eval_set_id') String? evalSetId,
+  }) = _EvalSet;
+
+  factory EvalSet.fromJson(Map<String, dynamic> json) =>
+      _$EvalSetFromJson(json);
+}
diff --git a/packages/eval_config/lib/src/models/eval_set.freezed.dart b/packages/eval_config/lib/src/models/eval_set.freezed.dart
new file mode 100644
index 0000000..6836321
--- /dev/null
+++ b/packages/eval_config/lib/src/models/eval_set.freezed.dart
@@ -0,0 +1,609 @@
+// GENERATED CODE - DO NOT MODIFY BY HAND
+// coverage:ignore-file
+// ignore_for_file: type=lint
+// ignore_for_file: unused_element, deprecated_member_use, deprecated_member_use_from_same_package, use_function_type_syntax_for_parameters, unnecessary_const, avoid_init_to_null, invalid_override_different_default_values_named, prefer_expression_function_bodies, annotate_overrides, invalid_annotation_target, unnecessary_question_mark
+
+part of 'eval_set.dart';
+
+// **************************************************************************
+// FreezedGenerator
+// **************************************************************************
+
+// dart format off
+T _$identity<T>(T value) => value;
+
+/// @nodoc
+mixin _$EvalSet {
+
+/// Task(s) to evaluate.
+///
+/// Accepts task file paths, task function names, or other task specifiers.
+ List<Task> get tasks;/// Output path for logging results.
+///
+/// Required to ensure a unique storage scope is assigned for the set.
+@JsonKey(name: 'log_dir') String get logDir;/// Maximum number of retry attempts before giving up (defaults to 10).
+@JsonKey(name: 'retry_attempts') int? get retryAttempts;/// Time in seconds to wait between retry attempts, increased
+/// exponentially (defaults to 30).
+@JsonKey(name: 'retry_wait') double? get retryWait;/// Reduce `max_connections` at this rate with each retry
+/// (defaults to 1.0 — no reduction).
+@JsonKey(name: 'retry_connections') double? get retryConnections;/// Cleanup failed log files after retries (defaults to true).
+@JsonKey(name: 'retry_cleanup') bool? get retryCleanup;/// Model(s) for evaluation.
+///
+/// A list of Provider/model strings (e.g. `"openai/gpt-4o"`)
+/// If not specified, uses the `INSPECT_EVAL_MODEL` environment variable.
+ List<String>? get model;/// Base URL for communicating with the model API.
+@JsonKey(name: 'model_base_url') String? get modelBaseUrl;/// Model creation arguments (dictionary or path to JSON/YAML config).
+@JsonKey(name: 'model_args') Map<String, Object?> get modelArgs;/// Named roles for use in `get_model()`.
+@JsonKey(name: 'model_roles') Map<String, String>? get modelRoles;/// Task creation arguments (dictionary or path to JSON/YAML config).
+@JsonKey(name: 'task_args') Map<String, Object?> get taskArgs;/// Sandbox environment type (or a shorthand spec).
+ Object? get sandbox;/// Cleanup sandbox environments after task completes (defaults to true).
+@JsonKey(name: 'sandbox_cleanup') bool? get sandboxCleanup;/// Alternative solver(s) for evaluating task(s).
+ Object? get solver;/// Tags to associate with this evaluation run.
+ List<String>? get tags;/// Metadata to associate with this evaluation run.
+ Map<String, dynamic>? get metadata;/// Trace message interactions with evaluated model to terminal.
+ bool? get trace;/// Task display type (defaults to `"full"`).
+ String? get display;/// Tool use approval policies.
+ Object? get approval;/// Score output (defaults to true).
+ bool get score;/// Level for logging to the console (defaults to `"warning"`).
+@JsonKey(name: 'log_level') String? get logLevel;/// Level for logging to the log file (defaults to `"info"`).
+@JsonKey(name: 'log_level_transcript') String? get logLevelTranscript;/// Format for writing log files (`"eval"` or `"json"`).
+@JsonKey(name: 'log_format') String? get logFormat;/// Limit evaluated samples (defaults to all samples).
+///
+/// Can be an `int` count or a `[start, end]` range.
+ Object? get limit;/// Evaluate specific sample(s) from the dataset.
+@JsonKey(name: 'sample_id') Object? get sampleId;/// Shuffle order of samples (pass a seed to make the order deterministic).
+@JsonKey(name: 'sample_shuffle') Object? get sampleShuffle;/// Epochs to repeat samples for and optional score reducer function(s).
+ Object? get epochs;/// Fail on sample errors.
+///
+/// `0.0–1.0` = fail if proportion exceeds threshold,
+/// `>1` = fail if count exceeds threshold.
+@JsonKey(name: 'fail_on_error') double? get failOnError;/// Continue running even if `fail_on_error` condition is met.
+@JsonKey(name: 'continue_on_fail') bool? get continueOnFail;/// Number of times to retry samples on error (default: no retries).
+@JsonKey(name: 'retry_on_error') int? get retryOnError;/// Raise task errors for debugging (defaults to false).
+@JsonKey(name: 'debug_errors') bool? get debugErrors;/// Limit on total messages per sample.
+@JsonKey(name: 'message_limit') int? get messageLimit;/// Limit on total tokens per sample.
+@JsonKey(name: 'token_limit') int? get tokenLimit;/// Limit on clock time (in seconds) per sample.
+@JsonKey(name: 'time_limit') int? get timeLimit;/// Limit on working time (in seconds) per sample.
+///
+/// Working time includes model generation, tool calls, etc. but does not
+/// include waiting on retries or shared resources.
+@JsonKey(name: 'working_limit') int? get workingLimit;/// Limit on total cost (in dollars) per sample.
+@JsonKey(name: 'cost_limit') double? get costLimit;/// JSON file with model prices for cost tracking.
+@JsonKey(name: 'model_cost_config') Map<String, Object?>? get modelCostConfig;/// Maximum samples to run in parallel (default is `max_connections`).
+@JsonKey(name: 'max_samples') int? get maxSamples;/// Maximum tasks to run in parallel.
+@JsonKey(name: 'max_tasks') int? get maxTasks;/// Maximum subprocesses to run in parallel (default is `os.cpu_count()`).
+@JsonKey(name: 'max_subprocesses') int? get maxSubprocesses;/// Maximum sandboxes (per-provider) to run in parallel.
+@JsonKey(name: 'max_sandboxes') int? get maxSandboxes;/// Log detailed samples and scores (defaults to true).
+@JsonKey(name: 'log_samples') bool? get logSamples;/// Log events in realtime (defaults to true).
+@JsonKey(name: 'log_realtime') bool? get logRealtime;/// Log base64-encoded images (defaults to false).
+@JsonKey(name: 'log_images') bool? get logImages;/// Number of samples to buffer before writing log file.
+@JsonKey(name: 'log_buffer') int? get logBuffer;/// Sync sample events for realtime viewing.
+@JsonKey(name: 'log_shared') int? get logShared;/// Directory to bundle logs and viewer into.
+@JsonKey(name: 'bundle_dir') String? get bundleDir;/// Overwrite files in `bundle_dir` (defaults to false).
+@JsonKey(name: 'bundle_overwrite') bool get bundleOverwrite;/// Allow log directory to contain unrelated logs (defaults to false).
+@JsonKey(name: 'log_dir_allow_dirty') bool? get logDirAllowDirty;/// ID for the eval set. Generated if not specified.
+@JsonKey(name: 'eval_set_id') String? get evalSetId;
+/// Create a copy of EvalSet
+/// with the given fields replaced by the non-null parameter values.
+@JsonKey(includeFromJson: false, includeToJson: false)
+@pragma('vm:prefer-inline')
+$EvalSetCopyWith<EvalSet> get copyWith => _$EvalSetCopyWithImpl<EvalSet>(this as EvalSet, _$identity);
+
+  /// Serializes this EvalSet to a JSON map.
+  Map<String, dynamic> toJson();
+
+
+@override
+bool operator ==(Object other) {
+  return identical(this, other) || (other.runtimeType == runtimeType&&other is EvalSet&&const DeepCollectionEquality().equals(other.tasks, tasks)&&(identical(other.logDir, logDir) || other.logDir == logDir)&&(identical(other.retryAttempts, retryAttempts) || other.retryAttempts == retryAttempts)&&(identical(other.retryWait, retryWait) || other.retryWait == retryWait)&&(identical(other.retryConnections, retryConnections) || other.retryConnections == retryConnections)&&(identical(other.retryCleanup, retryCleanup) || other.retryCleanup == retryCleanup)&&const DeepCollectionEquality().equals(other.model, model)&&(identical(other.modelBaseUrl, modelBaseUrl) || other.modelBaseUrl == modelBaseUrl)&&const DeepCollectionEquality().equals(other.modelArgs, modelArgs)&&const DeepCollectionEquality().equals(other.modelRoles, modelRoles)&&const DeepCollectionEquality().equals(other.taskArgs, taskArgs)&&const DeepCollectionEquality().equals(other.sandbox, sandbox)&&(identical(other.sandboxCleanup, sandboxCleanup) || other.sandboxCleanup == sandboxCleanup)&&const DeepCollectionEquality().equals(other.solver, solver)&&const DeepCollectionEquality().equals(other.tags, tags)&&const DeepCollectionEquality().equals(other.metadata, metadata)&&(identical(other.trace, trace) || other.trace == trace)&&(identical(other.display, display) || other.display == display)&&const DeepCollectionEquality().equals(other.approval, approval)&&(identical(other.score, score) || other.score == score)&&(identical(other.logLevel, logLevel) || other.logLevel == logLevel)&&(identical(other.logLevelTranscript, logLevelTranscript) || other.logLevelTranscript == logLevelTranscript)&&(identical(other.logFormat, logFormat) || other.logFormat == logFormat)&&const DeepCollectionEquality().equals(other.limit, limit)&&const DeepCollectionEquality().equals(other.sampleId, sampleId)&&const DeepCollectionEquality().equals(other.sampleShuffle, sampleShuffle)&&const DeepCollectionEquality().equals(other.epochs, epochs)&&(identical(other.failOnError, failOnError) || other.failOnError == failOnError)&&(identical(other.continueOnFail, continueOnFail) || other.continueOnFail == continueOnFail)&&(identical(other.retryOnError, retryOnError) || other.retryOnError == retryOnError)&&(identical(other.debugErrors, debugErrors) || other.debugErrors == debugErrors)&&(identical(other.messageLimit, messageLimit) || other.messageLimit == messageLimit)&&(identical(other.tokenLimit, tokenLimit) || other.tokenLimit == tokenLimit)&&(identical(other.timeLimit, timeLimit) || other.timeLimit == timeLimit)&&(identical(other.workingLimit, workingLimit) || other.workingLimit == workingLimit)&&(identical(other.costLimit, costLimit) || other.costLimit == costLimit)&&const DeepCollectionEquality().equals(other.modelCostConfig, modelCostConfig)&&(identical(other.maxSamples, maxSamples) || other.maxSamples == maxSamples)&&(identical(other.maxTasks, maxTasks) || other.maxTasks == maxTasks)&&(identical(other.maxSubprocesses, maxSubprocesses) || other.maxSubprocesses == maxSubprocesses)&&(identical(other.maxSandboxes, maxSandboxes) || other.maxSandboxes == maxSandboxes)&&(identical(other.logSamples, logSamples) || other.logSamples == logSamples)&&(identical(other.logRealtime, logRealtime) || other.logRealtime == logRealtime)&&(identical(other.logImages, logImages) || other.logImages == logImages)&&(identical(other.logBuffer, logBuffer) || other.logBuffer == logBuffer)&&(identical(other.logShared, logShared) || other.logShared == logShared)&&(identical(other.bundleDir, bundleDir) || other.bundleDir == bundleDir)&&(identical(other.bundleOverwrite, bundleOverwrite) || other.bundleOverwrite == bundleOverwrite)&&(identical(other.logDirAllowDirty, logDirAllowDirty) || other.logDirAllowDirty == logDirAllowDirty)&&(identical(other.evalSetId, evalSetId) || other.evalSetId == evalSetId));
+}
+
+@JsonKey(includeFromJson: false, includeToJson: false)
+@override
+int get hashCode => Object.hashAll([runtimeType,const DeepCollectionEquality().hash(tasks),logDir,retryAttempts,retryWait,retryConnections,retryCleanup,const DeepCollectionEquality().hash(model),modelBaseUrl,const DeepCollectionEquality().hash(modelArgs),const DeepCollectionEquality().hash(modelRoles),const DeepCollectionEquality().hash(taskArgs),const DeepCollectionEquality().hash(sandbox),sandboxCleanup,const DeepCollectionEquality().hash(solver),const DeepCollectionEquality().hash(tags),const DeepCollectionEquality().hash(metadata),trace,display,const DeepCollectionEquality().hash(approval),score,logLevel,logLevelTranscript,logFormat,const DeepCollectionEquality().hash(limit),const DeepCollectionEquality().hash(sampleId),const DeepCollectionEquality().hash(sampleShuffle),const DeepCollectionEquality().hash(epochs),failOnError,continueOnFail,retryOnError,debugErrors,messageLimit,tokenLimit,timeLimit,workingLimit,costLimit,const DeepCollectionEquality().hash(modelCostConfig),maxSamples,maxTasks,maxSubprocesses,maxSandboxes,logSamples,logRealtime,logImages,logBuffer,logShared,bundleDir,bundleOverwrite,logDirAllowDirty,evalSetId]);
+
+@override
+String toString() {
+  return 'EvalSet(tasks: $tasks, logDir: $logDir, retryAttempts: $retryAttempts, retryWait: $retryWait, retryConnections: $retryConnections, retryCleanup: $retryCleanup, model: $model, modelBaseUrl: $modelBaseUrl, modelArgs: $modelArgs, modelRoles: $modelRoles, taskArgs: $taskArgs, sandbox: $sandbox, sandboxCleanup: $sandboxCleanup, solver: $solver, tags: $tags, metadata: $metadata, trace: $trace, display: $display, approval: $approval, score: $score, logLevel: $logLevel, logLevelTranscript: $logLevelTranscript, logFormat: $logFormat, limit: $limit, sampleId: $sampleId, sampleShuffle: $sampleShuffle, epochs: $epochs, failOnError: $failOnError, continueOnFail: $continueOnFail, retryOnError: $retryOnError, debugErrors: $debugErrors, messageLimit: $messageLimit, tokenLimit: $tokenLimit, timeLimit: $timeLimit, workingLimit: $workingLimit, costLimit: $costLimit, modelCostConfig: $modelCostConfig, maxSamples: $maxSamples, maxTasks: $maxTasks, maxSubprocesses: $maxSubprocesses, maxSandboxes: $maxSandboxes, logSamples: $logSamples, logRealtime: $logRealtime, logImages: $logImages, logBuffer: $logBuffer, logShared: $logShared, bundleDir: $bundleDir, bundleOverwrite: $bundleOverwrite, logDirAllowDirty: $logDirAllowDirty, evalSetId: $evalSetId)';
+}
+
+
+}
+
+/// @nodoc
+abstract mixin class $EvalSetCopyWith<$Res>  {
+  factory $EvalSetCopyWith(EvalSet value, $Res Function(EvalSet) _then) = _$EvalSetCopyWithImpl;
+@useResult
+$Res call({
+ List<Task> tasks,@JsonKey(name: 'log_dir') String logDir,@JsonKey(name: 'retry_attempts') int? retryAttempts,@JsonKey(name: 'retry_wait') double? retryWait,@JsonKey(name: 'retry_connections') double? retryConnections,@JsonKey(name: 'retry_cleanup') bool? retryCleanup, List<String>? model,@JsonKey(name: 'model_base_url') String? modelBaseUrl,@JsonKey(name: 'model_args') Map<String, Object?> modelArgs,@JsonKey(name: 'model_roles') Map<String, String>? modelRoles,@JsonKey(name: 'task_args') Map<String, Object?> taskArgs, Object? sandbox,@JsonKey(name: 'sandbox_cleanup') bool? sandboxCleanup, Object? solver, List<String>? tags, Map<String, dynamic>? metadata, bool? trace, String? display, Object? approval, bool score,@JsonKey(name: 'log_level') String? logLevel,@JsonKey(name: 'log_level_transcript') String? logLevelTranscript,@JsonKey(name: 'log_format') String? logFormat, Object? limit,@JsonKey(name: 'sample_id') Object? sampleId,@JsonKey(name: 'sample_shuffle') Object? sampleShuffle, Object? epochs,@JsonKey(name: 'fail_on_error') double? failOnError,@JsonKey(name: 'continue_on_fail') bool? continueOnFail,@JsonKey(name: 'retry_on_error') int? retryOnError,@JsonKey(name: 'debug_errors') bool? debugErrors,@JsonKey(name: 'message_limit') int? messageLimit,@JsonKey(name: 'token_limit') int? tokenLimit,@JsonKey(name: 'time_limit') int? timeLimit,@JsonKey(name: 'working_limit') int? workingLimit,@JsonKey(name: 'cost_limit') double? costLimit,@JsonKey(name: 'model_cost_config') Map<String, Object?>? modelCostConfig,@JsonKey(name: 'max_samples') int? maxSamples,@JsonKey(name: 'max_tasks') int? maxTasks,@JsonKey(name: 'max_subprocesses') int? maxSubprocesses,@JsonKey(name: 'max_sandboxes') int? maxSandboxes,@JsonKey(name: 'log_samples') bool? logSamples,@JsonKey(name: 'log_realtime') bool? logRealtime,@JsonKey(name: 'log_images') bool? logImages,@JsonKey(name: 'log_buffer') int? logBuffer,@JsonKey(name: 'log_shared') int? logShared,@JsonKey(name: 'bundle_dir') String? bundleDir,@JsonKey(name: 'bundle_overwrite') bool bundleOverwrite,@JsonKey(name: 'log_dir_allow_dirty') bool? logDirAllowDirty,@JsonKey(name: 'eval_set_id') String? evalSetId
+});
+
+
+
+
+}
+/// @nodoc
+class _$EvalSetCopyWithImpl<$Res>
+    implements $EvalSetCopyWith<$Res> {
+  _$EvalSetCopyWithImpl(this._self, this._then);
+
+  final EvalSet _self;
+  final $Res Function(EvalSet) _then;
+
+/// Create a copy of EvalSet
+/// with the given fields replaced by the non-null parameter values.
+@pragma('vm:prefer-inline') @override $Res call({Object? tasks = null,Object? logDir = null,Object? retryAttempts = freezed,Object? retryWait = freezed,Object? retryConnections = freezed,Object? retryCleanup = freezed,Object? model = freezed,Object? modelBaseUrl = freezed,Object? modelArgs = null,Object? modelRoles = freezed,Object? taskArgs = null,Object? sandbox = freezed,Object? sandboxCleanup = freezed,Object? solver = freezed,Object? tags = freezed,Object? metadata = freezed,Object? trace = freezed,Object? display = freezed,Object? approval = freezed,Object? score = null,Object? logLevel = freezed,Object? logLevelTranscript = freezed,Object? logFormat = freezed,Object? limit = freezed,Object? sampleId = freezed,Object? sampleShuffle = freezed,Object? epochs = freezed,Object? failOnError = freezed,Object? continueOnFail = freezed,Object? retryOnError = freezed,Object? debugErrors = freezed,Object? messageLimit = freezed,Object? tokenLimit = freezed,Object? timeLimit = freezed,Object? workingLimit = freezed,Object? costLimit = freezed,Object? modelCostConfig = freezed,Object? maxSamples = freezed,Object? maxTasks = freezed,Object? maxSubprocesses = freezed,Object? maxSandboxes = freezed,Object? logSamples = freezed,Object? logRealtime = freezed,Object? logImages = freezed,Object? logBuffer = freezed,Object? logShared = freezed,Object? bundleDir = freezed,Object? bundleOverwrite = null,Object? logDirAllowDirty = freezed,Object? evalSetId = freezed,}) {
+  return _then(_self.copyWith(
+tasks: null == tasks ? _self.tasks : tasks // ignore: cast_nullable_to_non_nullable
+as List<Task>,logDir: null == logDir ? _self.logDir : logDir // ignore: cast_nullable_to_non_nullable
+as String,retryAttempts: freezed == retryAttempts ? _self.retryAttempts : retryAttempts // ignore: cast_nullable_to_non_nullable
+as int?,retryWait: freezed == retryWait ? _self.retryWait : retryWait // ignore: cast_nullable_to_non_nullable
+as double?,retryConnections: freezed == retryConnections ? _self.retryConnections : retryConnections // ignore: cast_nullable_to_non_nullable
+as double?,retryCleanup: freezed == retryCleanup ? _self.retryCleanup : retryCleanup // ignore: cast_nullable_to_non_nullable
+as bool?,model: freezed == model ? _self.model : model // ignore: cast_nullable_to_non_nullable
+as List<String>?,modelBaseUrl: freezed == modelBaseUrl ? _self.modelBaseUrl : modelBaseUrl // ignore: cast_nullable_to_non_nullable
+as String?,modelArgs: null == modelArgs ? _self.modelArgs : modelArgs // ignore: cast_nullable_to_non_nullable
+as Map<String, Object?>,modelRoles: freezed == modelRoles ? _self.modelRoles : modelRoles // ignore: cast_nullable_to_non_nullable
+as Map<String, String>?,taskArgs: null == taskArgs ? _self.taskArgs : taskArgs // ignore: cast_nullable_to_non_nullable
+as Map<String, Object?>,sandbox: freezed == sandbox ? _self.sandbox : sandbox ,sandboxCleanup: freezed == sandboxCleanup ? _self.sandboxCleanup : sandboxCleanup // ignore: cast_nullable_to_non_nullable
+as bool?,solver: freezed == solver ? _self.solver : solver ,tags: freezed == tags ? _self.tags : tags // ignore: cast_nullable_to_non_nullable
+as List<String>?,metadata: freezed == metadata ? _self.metadata : metadata // ignore: cast_nullable_to_non_nullable
+as Map<String, dynamic>?,trace: freezed == trace ? _self.trace : trace // ignore: cast_nullable_to_non_nullable
+as bool?,display: freezed == display ? _self.display : display // ignore: cast_nullable_to_non_nullable
+as String?,approval: freezed == approval ? _self.approval : approval ,score: null == score ? _self.score : score // ignore: cast_nullable_to_non_nullable
+as bool,logLevel: freezed == logLevel ? _self.logLevel : logLevel // ignore: cast_nullable_to_non_nullable
+as String?,logLevelTranscript: freezed == logLevelTranscript ? _self.logLevelTranscript : logLevelTranscript // ignore: cast_nullable_to_non_nullable
+as String?,logFormat: freezed == logFormat ? _self.logFormat : logFormat // ignore: cast_nullable_to_non_nullable
+as String?,limit: freezed == limit ? _self.limit : limit ,sampleId: freezed == sampleId ? _self.sampleId : sampleId ,sampleShuffle: freezed == sampleShuffle ? _self.sampleShuffle : sampleShuffle ,epochs: freezed == epochs ? _self.epochs : epochs ,failOnError: freezed == failOnError ? _self.failOnError : failOnError // ignore: cast_nullable_to_non_nullable
+as double?,continueOnFail: freezed == continueOnFail ? _self.continueOnFail : continueOnFail // ignore: cast_nullable_to_non_nullable
+as bool?,retryOnError: freezed == retryOnError ? _self.retryOnError : retryOnError // ignore: cast_nullable_to_non_nullable
+as int?,debugErrors: freezed == debugErrors ? _self.debugErrors : debugErrors // ignore: cast_nullable_to_non_nullable
+as bool?,messageLimit: freezed == messageLimit ? _self.messageLimit : messageLimit // ignore: cast_nullable_to_non_nullable
+as int?,tokenLimit: freezed == tokenLimit ? _self.tokenLimit : tokenLimit // ignore: cast_nullable_to_non_nullable
+as int?,timeLimit: freezed == timeLimit ? _self.timeLimit : timeLimit // ignore: cast_nullable_to_non_nullable
+as int?,workingLimit: freezed == workingLimit ? _self.workingLimit : workingLimit // ignore: cast_nullable_to_non_nullable
+as int?,costLimit: freezed == costLimit ? _self.costLimit : costLimit // ignore: cast_nullable_to_non_nullable
+as double?,modelCostConfig: freezed == modelCostConfig ? _self.modelCostConfig : modelCostConfig // ignore: cast_nullable_to_non_nullable
+as Map<String, Object?>?,maxSamples: freezed == maxSamples ? _self.maxSamples : maxSamples // ignore: cast_nullable_to_non_nullable
+as int?,maxTasks: freezed == maxTasks ? _self.maxTasks : maxTasks // ignore: cast_nullable_to_non_nullable
+as int?,maxSubprocesses: freezed == maxSubprocesses ? _self.maxSubprocesses : maxSubprocesses // ignore: cast_nullable_to_non_nullable
+as int?,maxSandboxes: freezed == maxSandboxes ? _self.maxSandboxes : maxSandboxes // ignore: cast_nullable_to_non_nullable
+as int?,logSamples: freezed == logSamples ? _self.logSamples : logSamples // ignore: cast_nullable_to_non_nullable
+as bool?,logRealtime: freezed == logRealtime ? _self.logRealtime : logRealtime // ignore: cast_nullable_to_non_nullable
+as bool?,logImages: freezed == logImages ? _self.logImages : logImages // ignore: cast_nullable_to_non_nullable
+as bool?,logBuffer: freezed == logBuffer ? _self.logBuffer : logBuffer // ignore: cast_nullable_to_non_nullable
+as int?,logShared: freezed == logShared ? _self.logShared : logShared // ignore: cast_nullable_to_non_nullable
+as int?,bundleDir: freezed == bundleDir ? _self.bundleDir : bundleDir // ignore: cast_nullable_to_non_nullable
+as String?,bundleOverwrite: null == bundleOverwrite ? _self.bundleOverwrite : bundleOverwrite // ignore: cast_nullable_to_non_nullable
+as bool,logDirAllowDirty: freezed == logDirAllowDirty ? _self.logDirAllowDirty : logDirAllowDirty // ignore: cast_nullable_to_non_nullable
+as bool?,evalSetId: freezed == evalSetId ? _self.evalSetId : evalSetId // ignore: cast_nullable_to_non_nullable
+as String?,
+  ));
+}
+
+}
+
+
+/// Adds pattern-matching-related methods to [EvalSet].
+extension EvalSetPatterns on EvalSet {
+/// A variant of `map` that fallback to returning `orElse`.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case _:
+///     return orElse();
+/// }
+/// ```
+
+@optionalTypeArgs TResult maybeMap<TResult extends Object?>(TResult Function( _EvalSet value)?  $default,{required TResult orElse(),}){
+final _that = this;
+switch (_that) {
+case _EvalSet() when $default != null:
+return $default(_that);case _:
+  return orElse();
+
+}
+}
+/// A `switch`-like method, using callbacks.
+///
+/// Callbacks receives the raw object, upcasted.
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case final Subclass2 value:
+///     return ...;
+/// }
+/// ```
+
+@optionalTypeArgs TResult map<TResult extends Object?>(TResult Function( _EvalSet value)  $default,){
+final _that = this;
+switch (_that) {
+case _EvalSet():
+return $default(_that);}
+}
+/// A variant of `map` that fallback to returning `null`.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case _:
+///     return null;
+/// }
+/// ```
+
+@optionalTypeArgs TResult? mapOrNull<TResult extends Object?>(TResult? Function( _EvalSet value)?  $default,){
+final _that = this;
+switch (_that) {
+case _EvalSet() when $default != null:
+return $default(_that);case _:
+  return null;
+
+}
+}
+/// A variant of `when` that fallback to an `orElse` callback.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case _:
+///     return orElse();
+/// }
+/// ```
+
+@optionalTypeArgs TResult maybeWhen<TResult extends Object?>(TResult Function( List<Task> tasks, @JsonKey(name: 'log_dir')  String logDir, @JsonKey(name: 'retry_attempts')  int? retryAttempts, @JsonKey(name: 'retry_wait')  double? retryWait, @JsonKey(name: 'retry_connections')  double? retryConnections, @JsonKey(name: 'retry_cleanup')  bool? retryCleanup,  List<String>? model, @JsonKey(name: 'model_base_url')  String? modelBaseUrl, @JsonKey(name: 'model_args')  Map<String, Object?> modelArgs, @JsonKey(name: 'model_roles')  Map<String, String>? modelRoles, @JsonKey(name: 'task_args')  Map<String, Object?> taskArgs,  Object? sandbox, @JsonKey(name: 'sandbox_cleanup')  bool? sandboxCleanup,  Object? solver,  List<String>? tags,  Map<String, dynamic>? metadata,  bool? trace,  String? display,  Object? approval,  bool score, @JsonKey(name: 'log_level')  String? logLevel, @JsonKey(name: 'log_level_transcript')  String? logLevelTranscript, @JsonKey(name: 'log_format')  String? logFormat,  Object? limit, @JsonKey(name: 'sample_id')  Object? sampleId, @JsonKey(name: 'sample_shuffle')  Object? sampleShuffle,  Object? epochs, @JsonKey(name: 'fail_on_error')  double? failOnError, @JsonKey(name: 'continue_on_fail')  bool? continueOnFail, @JsonKey(name: 'retry_on_error')  int? retryOnError, @JsonKey(name: 'debug_errors')  bool? debugErrors, @JsonKey(name: 'message_limit')  int? messageLimit, @JsonKey(name: 'token_limit')  int? tokenLimit, @JsonKey(name: 'time_limit')  int? timeLimit, @JsonKey(name: 'working_limit')  int? workingLimit, @JsonKey(name: 'cost_limit')  double? costLimit, @JsonKey(name: 'model_cost_config')  Map<String, Object?>? modelCostConfig, @JsonKey(name: 'max_samples')  int? maxSamples, @JsonKey(name: 'max_tasks')  int? maxTasks, @JsonKey(name: 'max_subprocesses')  int? maxSubprocesses, @JsonKey(name: 'max_sandboxes')  int? maxSandboxes, @JsonKey(name: 'log_samples')  bool? logSamples, @JsonKey(name: 'log_realtime')  bool? logRealtime, @JsonKey(name: 'log_images')  bool? logImages, @JsonKey(name: 'log_buffer')  int? logBuffer, @JsonKey(name: 'log_shared')  int? logShared, @JsonKey(name: 'bundle_dir')  String? bundleDir, @JsonKey(name: 'bundle_overwrite')  bool bundleOverwrite, @JsonKey(name: 'log_dir_allow_dirty')  bool? logDirAllowDirty, @JsonKey(name: 'eval_set_id')  String? evalSetId)?  $default,{required TResult orElse(),}) {final _that = this;
+switch (_that) {
+case _EvalSet() when $default != null:
+return $default(_that.tasks,_that.logDir,_that.retryAttempts,_that.retryWait,_that.retryConnections,_that.retryCleanup,_that.model,_that.modelBaseUrl,_that.modelArgs,_that.modelRoles,_that.taskArgs,_that.sandbox,_that.sandboxCleanup,_that.solver,_that.tags,_that.metadata,_that.trace,_that.display,_that.approval,_that.score,_that.logLevel,_that.logLevelTranscript,_that.logFormat,_that.limit,_that.sampleId,_that.sampleShuffle,_that.epochs,_that.failOnError,_that.continueOnFail,_that.retryOnError,_that.debugErrors,_that.messageLimit,_that.tokenLimit,_that.timeLimit,_that.workingLimit,_that.costLimit,_that.modelCostConfig,_that.maxSamples,_that.maxTasks,_that.maxSubprocesses,_that.maxSandboxes,_that.logSamples,_that.logRealtime,_that.logImages,_that.logBuffer,_that.logShared,_that.bundleDir,_that.bundleOverwrite,_that.logDirAllowDirty,_that.evalSetId);case _:
+  return orElse();
+
+}
+}
+/// A `switch`-like method, using callbacks.
+///
+/// As opposed to `map`, this offers destructuring.
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case Subclass2(:final field2):
+///     return ...;
+/// }
+/// ```
+
+@optionalTypeArgs TResult when<TResult extends Object?>(TResult Function( List<Task> tasks, @JsonKey(name: 'log_dir')  String logDir, @JsonKey(name: 'retry_attempts')  int? retryAttempts, @JsonKey(name: 'retry_wait')  double? retryWait, @JsonKey(name: 'retry_connections')  double? retryConnections, @JsonKey(name: 'retry_cleanup')  bool? retryCleanup,  List<String>? model, @JsonKey(name: 'model_base_url')  String? modelBaseUrl, @JsonKey(name: 'model_args')  Map<String, Object?> modelArgs, @JsonKey(name: 'model_roles')  Map<String, String>? modelRoles, @JsonKey(name: 'task_args')  Map<String, Object?> taskArgs,  Object? sandbox, @JsonKey(name: 'sandbox_cleanup')  bool? sandboxCleanup,  Object? solver,  List<String>? tags,  Map<String, dynamic>? metadata,  bool? trace,  String? display,  Object? approval,  bool score, @JsonKey(name: 'log_level')  String? logLevel, @JsonKey(name: 'log_level_transcript')  String? logLevelTranscript, @JsonKey(name: 'log_format')  String? logFormat,  Object? limit, @JsonKey(name: 'sample_id')  Object? sampleId, @JsonKey(name: 'sample_shuffle')  Object? sampleShuffle,  Object? epochs, @JsonKey(name: 'fail_on_error')  double? failOnError, @JsonKey(name: 'continue_on_fail')  bool? continueOnFail, @JsonKey(name: 'retry_on_error')  int? retryOnError, @JsonKey(name: 'debug_errors')  bool? debugErrors, @JsonKey(name: 'message_limit')  int? messageLimit, @JsonKey(name: 'token_limit')  int? tokenLimit, @JsonKey(name: 'time_limit')  int? timeLimit, @JsonKey(name: 'working_limit')  int? workingLimit, @JsonKey(name: 'cost_limit')  double? costLimit, @JsonKey(name: 'model_cost_config')  Map<String, Object?>? modelCostConfig, @JsonKey(name: 'max_samples')  int? maxSamples, @JsonKey(name: 'max_tasks')  int? maxTasks, @JsonKey(name: 'max_subprocesses')  int? maxSubprocesses, @JsonKey(name: 'max_sandboxes')  int? maxSandboxes, @JsonKey(name: 'log_samples')  bool? logSamples, @JsonKey(name: 'log_realtime')  bool? logRealtime, @JsonKey(name: 'log_images')  bool? logImages, @JsonKey(name: 'log_buffer')  int? logBuffer, @JsonKey(name: 'log_shared')  int? logShared, @JsonKey(name: 'bundle_dir')  String? bundleDir, @JsonKey(name: 'bundle_overwrite')  bool bundleOverwrite, @JsonKey(name: 'log_dir_allow_dirty')  bool? logDirAllowDirty, @JsonKey(name: 'eval_set_id')  String? evalSetId)  $default,) {final _that = this;
+switch (_that) {
+case _EvalSet():
+return $default(_that.tasks,_that.logDir,_that.retryAttempts,_that.retryWait,_that.retryConnections,_that.retryCleanup,_that.model,_that.modelBaseUrl,_that.modelArgs,_that.modelRoles,_that.taskArgs,_that.sandbox,_that.sandboxCleanup,_that.solver,_that.tags,_that.metadata,_that.trace,_that.display,_that.approval,_that.score,_that.logLevel,_that.logLevelTranscript,_that.logFormat,_that.limit,_that.sampleId,_that.sampleShuffle,_that.epochs,_that.failOnError,_that.continueOnFail,_that.retryOnError,_that.debugErrors,_that.messageLimit,_that.tokenLimit,_that.timeLimit,_that.workingLimit,_that.costLimit,_that.modelCostConfig,_that.maxSamples,_that.maxTasks,_that.maxSubprocesses,_that.maxSandboxes,_that.logSamples,_that.logRealtime,_that.logImages,_that.logBuffer,_that.logShared,_that.bundleDir,_that.bundleOverwrite,_that.logDirAllowDirty,_that.evalSetId);}
+}
+/// A variant of `when` that fallback to returning `null`
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case _:
+///     return null;
+/// }
+/// ```
+
+@optionalTypeArgs TResult? whenOrNull<TResult extends Object?>(TResult? Function( List<Task> tasks, @JsonKey(name: 'log_dir')  String logDir, @JsonKey(name: 'retry_attempts')  int? retryAttempts, @JsonKey(name: 'retry_wait')  double? retryWait, @JsonKey(name: 'retry_connections')  double? retryConnections, @JsonKey(name: 'retry_cleanup')  bool? retryCleanup,  List<String>? model, @JsonKey(name: 'model_base_url')  String? modelBaseUrl, @JsonKey(name: 'model_args')  Map<String, Object?> modelArgs, @JsonKey(name: 'model_roles')  Map<String, String>? modelRoles, @JsonKey(name: 'task_args')  Map<String, Object?> taskArgs,  Object? sandbox, @JsonKey(name: 'sandbox_cleanup')  bool? sandboxCleanup,  Object? solver,  List<String>? tags,  Map<String, dynamic>? metadata,  bool? trace,  String? display,  Object? approval,  bool score, @JsonKey(name: 'log_level')  String? logLevel, @JsonKey(name: 'log_level_transcript')  String? logLevelTranscript, @JsonKey(name: 'log_format')  String? logFormat,  Object? limit, @JsonKey(name: 'sample_id')  Object? sampleId, @JsonKey(name: 'sample_shuffle')  Object? sampleShuffle,  Object? epochs, @JsonKey(name: 'fail_on_error')  double? failOnError, @JsonKey(name: 'continue_on_fail')  bool? continueOnFail, @JsonKey(name: 'retry_on_error')  int? retryOnError, @JsonKey(name: 'debug_errors')  bool? debugErrors, @JsonKey(name: 'message_limit')  int? messageLimit, @JsonKey(name: 'token_limit')  int? tokenLimit, @JsonKey(name: 'time_limit')  int? timeLimit, @JsonKey(name: 'working_limit')  int? workingLimit, @JsonKey(name: 'cost_limit')  double? costLimit, @JsonKey(name: 'model_cost_config')  Map<String, Object?>? modelCostConfig, @JsonKey(name: 'max_samples')  int? maxSamples, @JsonKey(name: 'max_tasks')  int? maxTasks, @JsonKey(name: 'max_subprocesses')  int? maxSubprocesses, @JsonKey(name: 'max_sandboxes')  int? maxSandboxes, @JsonKey(name: 'log_samples')  bool? logSamples, @JsonKey(name: 'log_realtime')  bool? logRealtime, @JsonKey(name: 'log_images')  bool? logImages, @JsonKey(name: 'log_buffer')  int? logBuffer, @JsonKey(name: 'log_shared')  int? logShared, @JsonKey(name: 'bundle_dir')  String? bundleDir, @JsonKey(name: 'bundle_overwrite')  bool bundleOverwrite, @JsonKey(name: 'log_dir_allow_dirty')  bool? logDirAllowDirty, @JsonKey(name: 'eval_set_id')  String? evalSetId)?  $default,) {final _that = this;
+switch (_that) {
+case _EvalSet() when $default != null:
+return $default(_that.tasks,_that.logDir,_that.retryAttempts,_that.retryWait,_that.retryConnections,_that.retryCleanup,_that.model,_that.modelBaseUrl,_that.modelArgs,_that.modelRoles,_that.taskArgs,_that.sandbox,_that.sandboxCleanup,_that.solver,_that.tags,_that.metadata,_that.trace,_that.display,_that.approval,_that.score,_that.logLevel,_that.logLevelTranscript,_that.logFormat,_that.limit,_that.sampleId,_that.sampleShuffle,_that.epochs,_that.failOnError,_that.continueOnFail,_that.retryOnError,_that.debugErrors,_that.messageLimit,_that.tokenLimit,_that.timeLimit,_that.workingLimit,_that.costLimit,_that.modelCostConfig,_that.maxSamples,_that.maxTasks,_that.maxSubprocesses,_that.maxSandboxes,_that.logSamples,_that.logRealtime,_that.logImages,_that.logBuffer,_that.logShared,_that.bundleDir,_that.bundleOverwrite,_that.logDirAllowDirty,_that.evalSetId);case _:
+  return null;
+
+}
+}
+
+}
+
+/// @nodoc
+@JsonSerializable()
+
+class _EvalSet implements EvalSet {
+  const _EvalSet({required final  List<Task> tasks, @JsonKey(name: 'log_dir') required this.logDir, @JsonKey(name: 'retry_attempts') this.retryAttempts, @JsonKey(name: 'retry_wait') this.retryWait, @JsonKey(name: 'retry_connections') this.retryConnections, @JsonKey(name: 'retry_cleanup') this.retryCleanup, final  List<String>? model, @JsonKey(name: 'model_base_url') this.modelBaseUrl, @JsonKey(name: 'model_args') final  Map<String, Object?> modelArgs = const {}, @JsonKey(name: 'model_roles') final  Map<String, String>? modelRoles, @JsonKey(name: 'task_args') final  Map<String, Object?> taskArgs = const {}, this.sandbox, @JsonKey(name: 'sandbox_cleanup') this.sandboxCleanup, this.solver, final  List<String>? tags, final  Map<String, dynamic>? metadata, this.trace, this.display, this.approval, this.score = true, @JsonKey(name: 'log_level') this.logLevel, @JsonKey(name: 'log_level_transcript') this.logLevelTranscript, @JsonKey(name: 'log_format') this.logFormat, this.limit, @JsonKey(name: 'sample_id') this.sampleId, @JsonKey(name: 'sample_shuffle') this.sampleShuffle, this.epochs, @JsonKey(name: 'fail_on_error') this.failOnError, @JsonKey(name: 'continue_on_fail') this.continueOnFail, @JsonKey(name: 'retry_on_error') this.retryOnError, @JsonKey(name: 'debug_errors') this.debugErrors, @JsonKey(name: 'message_limit') this.messageLimit, @JsonKey(name: 'token_limit') this.tokenLimit, @JsonKey(name: 'time_limit') this.timeLimit, @JsonKey(name: 'working_limit') this.workingLimit, @JsonKey(name: 'cost_limit') this.costLimit, @JsonKey(name: 'model_cost_config') final  Map<String, Object?>? modelCostConfig, @JsonKey(name: 'max_samples') this.maxSamples, @JsonKey(name: 'max_tasks') this.maxTasks, @JsonKey(name: 'max_subprocesses') this.maxSubprocesses, @JsonKey(name: 'max_sandboxes') this.maxSandboxes, @JsonKey(name: 'log_samples') this.logSamples, @JsonKey(name: 'log_realtime') this.logRealtime, @JsonKey(name: 'log_images') this.logImages, @JsonKey(name: 'log_buffer') this.logBuffer, @JsonKey(name: 'log_shared') this.logShared, @JsonKey(name: 'bundle_dir') this.bundleDir, @JsonKey(name: 'bundle_overwrite') this.bundleOverwrite = false, @JsonKey(name: 'log_dir_allow_dirty') this.logDirAllowDirty, @JsonKey(name: 'eval_set_id') this.evalSetId}): _tasks = tasks,_model = model,_modelArgs = modelArgs,_modelRoles = modelRoles,_taskArgs = taskArgs,_tags = tags,_metadata = metadata,_modelCostConfig = modelCostConfig;
+  factory _EvalSet.fromJson(Map<String, dynamic> json) => _$EvalSetFromJson(json);
+
+/// Task(s) to evaluate.
+///
+/// Accepts task file paths, task function names, or other task specifiers.
+ final  List<Task> _tasks;
+/// Task(s) to evaluate.
+///
+/// Accepts task file paths, task function names, or other task specifiers.
+@override List<Task> get tasks {
+  if (_tasks is EqualUnmodifiableListView) return _tasks;
+  // ignore: implicit_dynamic_type
+  return EqualUnmodifiableListView(_tasks);
+}
+
+/// Output path for logging results.
+///
+/// Required to ensure a unique storage scope is assigned for the set.
+@override@JsonKey(name: 'log_dir') final  String logDir;
+/// Maximum number of retry attempts before giving up (defaults to 10).
+@override@JsonKey(name: 'retry_attempts') final  int? retryAttempts;
+/// Time in seconds to wait between retry attempts, increased
+/// exponentially (defaults to 30).
+@override@JsonKey(name: 'retry_wait') final  double? retryWait;
+/// Reduce `max_connections` at this rate with each retry
+/// (defaults to 1.0 — no reduction).
+@override@JsonKey(name: 'retry_connections') final  double? retryConnections;
+/// Cleanup failed log files after retries (defaults to true).
+@override@JsonKey(name: 'retry_cleanup') final  bool? retryCleanup;
+/// Model(s) for evaluation.
+///
+/// A list of Provider/model strings (e.g. `"openai/gpt-4o"`)
+/// If not specified, uses the `INSPECT_EVAL_MODEL` environment variable.
+ final  List<String>? _model;
+/// Model(s) for evaluation.
+///
+/// A list of Provider/model strings (e.g. `"openai/gpt-4o"`)
+/// If not specified, uses the `INSPECT_EVAL_MODEL` environment variable.
+@override List<String>? get model {
+  final value = _model;
+  if (value == null) return null;
+  if (_model is EqualUnmodifiableListView) return _model;
+  // ignore: implicit_dynamic_type
+  return EqualUnmodifiableListView(value);
+}
+
+/// Base URL for communicating with the model API.
+@override@JsonKey(name: 'model_base_url') final  String? modelBaseUrl;
+/// Model creation arguments (dictionary or path to JSON/YAML config).
+ final  Map<String, Object?> _modelArgs;
+/// Model creation arguments (dictionary or path to JSON/YAML config).
+@override@JsonKey(name: 'model_args') Map<String, Object?> get modelArgs {
+  if (_modelArgs is EqualUnmodifiableMapView) return _modelArgs;
+  // ignore: implicit_dynamic_type
+  return EqualUnmodifiableMapView(_modelArgs);
+}
+
+/// Named roles for use in `get_model()`.
+ final  Map<String, String>? _modelRoles;
+/// Named roles for use in `get_model()`.
+@override@JsonKey(name: 'model_roles') Map<String, String>? get modelRoles {
+  final value = _modelRoles;
+  if (value == null) return null;
+  if (_modelRoles is EqualUnmodifiableMapView) return _modelRoles;
+  // ignore: implicit_dynamic_type
+  return EqualUnmodifiableMapView(value);
+}
+
+/// Task creation arguments (dictionary or path to JSON/YAML config).
+ final  Map<String, Object?> _taskArgs;
+/// Task creation arguments (dictionary or path to JSON/YAML config).
+@override@JsonKey(name: 'task_args') Map<String, Object?> get taskArgs {
+  if (_taskArgs is EqualUnmodifiableMapView) return _taskArgs;
+  // ignore: implicit_dynamic_type
+  return EqualUnmodifiableMapView(_taskArgs);
+}
+
+/// Sandbox environment type (or a shorthand spec).
+@override final  Object? sandbox;
+/// Cleanup sandbox environments after task completes (defaults to true).
+@override@JsonKey(name: 'sandbox_cleanup') final  bool? sandboxCleanup;
+/// Alternative solver(s) for evaluating task(s).
+@override final  Object? solver;
+/// Tags to associate with this evaluation run.
+ final  List<String>? _tags;
+/// Tags to associate with this evaluation run.
+@override List<String>? get tags {
+  final value = _tags;
+  if (value == null) return null;
+  if (_tags is EqualUnmodifiableListView) return _tags;
+  // ignore: implicit_dynamic_type
+  return EqualUnmodifiableListView(value);
+}
+
+/// Metadata to associate with this evaluation run.
+ final  Map<String, dynamic>? _metadata;
+/// Metadata to associate with this evaluation run.
+@override Map<String, dynamic>? get metadata {
+  final value = _metadata;
+  if (value == null) return null;
+  if (_metadata is EqualUnmodifiableMapView) return _metadata;
+  // ignore: implicit_dynamic_type
+  return EqualUnmodifiableMapView(value);
+}
+
+/// Trace message interactions with evaluated model to terminal.
+@override final  bool? trace;
+/// Task display type (defaults to `"full"`).
+@override final  String? display;
+/// Tool use approval policies.
+@override final  Object? approval;
+/// Score output (defaults to true).
+@override@JsonKey() final  bool score;
+/// Level for logging to the console (defaults to `"warning"`).
+@override@JsonKey(name: 'log_level') final  String? logLevel;
+/// Level for logging to the log file (defaults to `"info"`).
+@override@JsonKey(name: 'log_level_transcript') final  String? logLevelTranscript;
+/// Format for writing log files (`"eval"` or `"json"`).
+@override@JsonKey(name: 'log_format') final  String? logFormat;
+/// Limit evaluated samples (defaults to all samples).
+///
+/// Can be an `int` count or a `[start, end]` range.
+@override final  Object? limit;
+/// Evaluate specific sample(s) from the dataset.
+@override@JsonKey(name: 'sample_id') final  Object? sampleId;
+/// Shuffle order of samples (pass a seed to make the order deterministic).
+@override@JsonKey(name: 'sample_shuffle') final  Object? sampleShuffle;
+/// Epochs to repeat samples for and optional score reducer function(s).
+@override final  Object? epochs;
+/// Fail on sample errors.
+///
+/// `0.0–1.0` = fail if proportion exceeds threshold,
+/// `>1` = fail if count exceeds threshold.
+@override@JsonKey(name: 'fail_on_error') final  double? failOnError;
+/// Continue running even if `fail_on_error` condition is met.
+@override@JsonKey(name: 'continue_on_fail') final  bool? continueOnFail;
+/// Number of times to retry samples on error (default: no retries).
+@override@JsonKey(name: 'retry_on_error') final  int? retryOnError;
+/// Raise task errors for debugging (defaults to false).
+@override@JsonKey(name: 'debug_errors') final  bool? debugErrors;
+/// Limit on total messages per sample.
+@override@JsonKey(name: 'message_limit') final  int? messageLimit;
+/// Limit on total tokens per sample.
+@override@JsonKey(name: 'token_limit') final  int? tokenLimit;
+/// Limit on clock time (in seconds) per sample.
+@override@JsonKey(name: 'time_limit') final  int? timeLimit;
+/// Limit on working time (in seconds) per sample.
+///
+/// Working time includes model generation, tool calls, etc. but does not
+/// include waiting on retries or shared resources.
+@override@JsonKey(name: 'working_limit') final  int? workingLimit;
+/// Limit on total cost (in dollars) per sample.
+@override@JsonKey(name: 'cost_limit') final  double? costLimit;
+/// JSON file with model prices for cost tracking.
+ final  Map<String, Object?>? _modelCostConfig;
+/// JSON file with model prices for cost tracking.
+@override@JsonKey(name: 'model_cost_config') Map<String, Object?>? get modelCostConfig {
+  final value = _modelCostConfig;
+  if (value == null) return null;
+  if (_modelCostConfig is EqualUnmodifiableMapView) return _modelCostConfig;
+  // ignore: implicit_dynamic_type
+  return EqualUnmodifiableMapView(value);
+}
+
+/// Maximum samples to run in parallel (default is `max_connections`).
+@override@JsonKey(name: 'max_samples') final  int? maxSamples;
+/// Maximum tasks to run in parallel.
+@override@JsonKey(name: 'max_tasks') final  int? maxTasks;
+/// Maximum subprocesses to run in parallel (default is `os.cpu_count()`).
+@override@JsonKey(name: 'max_subprocesses') final  int? maxSubprocesses;
+/// Maximum sandboxes (per-provider) to run in parallel.
+@override@JsonKey(name: 'max_sandboxes') final  int? maxSandboxes;
+/// Log detailed samples and scores (defaults to true).
+@override@JsonKey(name: 'log_samples') final  bool? logSamples;
+/// Log events in realtime (defaults to true).
+@override@JsonKey(name: 'log_realtime') final  bool? logRealtime;
+/// Log base64-encoded images (defaults to false).
+@override@JsonKey(name: 'log_images') final  bool? logImages;
+/// Number of samples to buffer before writing log file.
+@override@JsonKey(name: 'log_buffer') final  int? logBuffer;
+/// Sync sample events for realtime viewing.
+@override@JsonKey(name: 'log_shared') final  int? logShared;
+/// Directory to bundle logs and viewer into.
+@override@JsonKey(name: 'bundle_dir') final  String? bundleDir;
+/// Overwrite files in `bundle_dir` (defaults to false).
+@override@JsonKey(name: 'bundle_overwrite') final  bool bundleOverwrite;
+/// Allow log directory to contain unrelated logs (defaults to false).
+@override@JsonKey(name: 'log_dir_allow_dirty') final  bool? logDirAllowDirty;
+/// ID for the eval set. Generated if not specified.
+@override@JsonKey(name: 'eval_set_id') final  String? evalSetId;
+
+/// Create a copy of EvalSet
+/// with the given fields replaced by the non-null parameter values.
+@override @JsonKey(includeFromJson: false, includeToJson: false)
+@pragma('vm:prefer-inline')
+_$EvalSetCopyWith<_EvalSet> get copyWith => __$EvalSetCopyWithImpl<_EvalSet>(this, _$identity);
+
+@override
+Map<String, dynamic> toJson() {
+  return _$EvalSetToJson(this, );
+}
+
+@override
+bool operator ==(Object other) {
+  return identical(this, other) || (other.runtimeType == runtimeType&&other is _EvalSet&&const DeepCollectionEquality().equals(other._tasks, _tasks)&&(identical(other.logDir, logDir) || other.logDir == logDir)&&(identical(other.retryAttempts, retryAttempts) || other.retryAttempts == retryAttempts)&&(identical(other.retryWait, retryWait) || other.retryWait == retryWait)&&(identical(other.retryConnections, retryConnections) || other.retryConnections == retryConnections)&&(identical(other.retryCleanup, retryCleanup) || other.retryCleanup == retryCleanup)&&const DeepCollectionEquality().equals(other._model, _model)&&(identical(other.modelBaseUrl, modelBaseUrl) || other.modelBaseUrl == modelBaseUrl)&&const DeepCollectionEquality().equals(other._modelArgs, _modelArgs)&&const DeepCollectionEquality().equals(other._modelRoles, _modelRoles)&&const DeepCollectionEquality().equals(other._taskArgs, _taskArgs)&&const DeepCollectionEquality().equals(other.sandbox, sandbox)&&(identical(other.sandboxCleanup, sandboxCleanup) || other.sandboxCleanup == sandboxCleanup)&&const DeepCollectionEquality().equals(other.solver, solver)&&const DeepCollectionEquality().equals(other._tags, _tags)&&const DeepCollectionEquality().equals(other._metadata, _metadata)&&(identical(other.trace, trace) || other.trace == trace)&&(identical(other.display, display) || other.display == display)&&const DeepCollectionEquality().equals(other.approval, approval)&&(identical(other.score, score) || other.score == score)&&(identical(other.logLevel, logLevel) || other.logLevel == logLevel)&&(identical(other.logLevelTranscript, logLevelTranscript) || other.logLevelTranscript == logLevelTranscript)&&(identical(other.logFormat, logFormat) || other.logFormat == logFormat)&&const DeepCollectionEquality().equals(other.limit, limit)&&const DeepCollectionEquality().equals(other.sampleId, sampleId)&&const DeepCollectionEquality().equals(other.sampleShuffle, sampleShuffle)&&const DeepCollectionEquality().equals(other.epochs, epochs)&&(identical(other.failOnError, failOnError) || other.failOnError == failOnError)&&(identical(other.continueOnFail, continueOnFail) || other.continueOnFail == continueOnFail)&&(identical(other.retryOnError, retryOnError) || other.retryOnError == retryOnError)&&(identical(other.debugErrors, debugErrors) || other.debugErrors == debugErrors)&&(identical(other.messageLimit, messageLimit) || other.messageLimit == messageLimit)&&(identical(other.tokenLimit, tokenLimit) || other.tokenLimit == tokenLimit)&&(identical(other.timeLimit, timeLimit) || other.timeLimit == timeLimit)&&(identical(other.workingLimit, workingLimit) || other.workingLimit == workingLimit)&&(identical(other.costLimit, costLimit) || other.costLimit == costLimit)&&const DeepCollectionEquality().equals(other._modelCostConfig, _modelCostConfig)&&(identical(other.maxSamples, maxSamples) || other.maxSamples == maxSamples)&&(identical(other.maxTasks, maxTasks) || other.maxTasks == maxTasks)&&(identical(other.maxSubprocesses, maxSubprocesses) || other.maxSubprocesses == maxSubprocesses)&&(identical(other.maxSandboxes, maxSandboxes) || other.maxSandboxes == maxSandboxes)&&(identical(other.logSamples, logSamples) || other.logSamples == logSamples)&&(identical(other.logRealtime, logRealtime) || other.logRealtime == logRealtime)&&(identical(other.logImages, logImages) || other.logImages == logImages)&&(identical(other.logBuffer, logBuffer) || other.logBuffer == logBuffer)&&(identical(other.logShared, logShared) || other.logShared == logShared)&&(identical(other.bundleDir, bundleDir) || other.bundleDir == bundleDir)&&(identical(other.bundleOverwrite, bundleOverwrite) || other.bundleOverwrite == bundleOverwrite)&&(identical(other.logDirAllowDirty, logDirAllowDirty) || other.logDirAllowDirty == logDirAllowDirty)&&(identical(other.evalSetId, evalSetId) || other.evalSetId == evalSetId));
+}
+
+@JsonKey(includeFromJson: false, includeToJson: false)
+@override
+int get hashCode => Object.hashAll([runtimeType,const DeepCollectionEquality().hash(_tasks),logDir,retryAttempts,retryWait,retryConnections,retryCleanup,const DeepCollectionEquality().hash(_model),modelBaseUrl,const DeepCollectionEquality().hash(_modelArgs),const DeepCollectionEquality().hash(_modelRoles),const DeepCollectionEquality().hash(_taskArgs),const DeepCollectionEquality().hash(sandbox),sandboxCleanup,const DeepCollectionEquality().hash(solver),const DeepCollectionEquality().hash(_tags),const DeepCollectionEquality().hash(_metadata),trace,display,const DeepCollectionEquality().hash(approval),score,logLevel,logLevelTranscript,logFormat,const DeepCollectionEquality().hash(limit),const DeepCollectionEquality().hash(sampleId),const DeepCollectionEquality().hash(sampleShuffle),const DeepCollectionEquality().hash(epochs),failOnError,continueOnFail,retryOnError,debugErrors,messageLimit,tokenLimit,timeLimit,workingLimit,costLimit,const DeepCollectionEquality().hash(_modelCostConfig),maxSamples,maxTasks,maxSubprocesses,maxSandboxes,logSamples,logRealtime,logImages,logBuffer,logShared,bundleDir,bundleOverwrite,logDirAllowDirty,evalSetId]);
+
+@override
+String toString() {
+  return 'EvalSet(tasks: $tasks, logDir: $logDir, retryAttempts: $retryAttempts, retryWait: $retryWait, retryConnections: $retryConnections, retryCleanup: $retryCleanup, model: $model, modelBaseUrl: $modelBaseUrl, modelArgs: $modelArgs, modelRoles: $modelRoles, taskArgs: $taskArgs, sandbox: $sandbox, sandboxCleanup: $sandboxCleanup, solver: $solver, tags: $tags, metadata: $metadata, trace: $trace, display: $display, approval: $approval, score: $score, logLevel: $logLevel, logLevelTranscript: $logLevelTranscript, logFormat: $logFormat, limit: $limit, sampleId: $sampleId, sampleShuffle: $sampleShuffle, epochs: $epochs, failOnError: $failOnError, continueOnFail: $continueOnFail, retryOnError: $retryOnError, debugErrors: $debugErrors, messageLimit: $messageLimit, tokenLimit: $tokenLimit, timeLimit: $timeLimit, workingLimit: $workingLimit, costLimit: $costLimit, modelCostConfig: $modelCostConfig, maxSamples: $maxSamples, maxTasks: $maxTasks, maxSubprocesses: $maxSubprocesses, maxSandboxes: $maxSandboxes, logSamples: $logSamples, logRealtime: $logRealtime, logImages: $logImages, logBuffer: $logBuffer, logShared: $logShared, bundleDir: $bundleDir, bundleOverwrite: $bundleOverwrite, logDirAllowDirty: $logDirAllowDirty, evalSetId: $evalSetId)';
+}
+
+
+}
+
+/// @nodoc
+abstract mixin class _$EvalSetCopyWith<$Res> implements $EvalSetCopyWith<$Res> {
+  factory _$EvalSetCopyWith(_EvalSet value, $Res Function(_EvalSet) _then) = __$EvalSetCopyWithImpl;
+@override @useResult
+$Res call({
+ List<Task> tasks,@JsonKey(name: 'log_dir') String logDir,@JsonKey(name: 'retry_attempts') int? retryAttempts,@JsonKey(name: 'retry_wait') double? retryWait,@JsonKey(name: 'retry_connections') double? retryConnections,@JsonKey(name: 'retry_cleanup') bool? retryCleanup, List<String>? model,@JsonKey(name: 'model_base_url') String? modelBaseUrl,@JsonKey(name: 'model_args') Map<String, Object?> modelArgs,@JsonKey(name: 'model_roles') Map<String, String>? modelRoles,@JsonKey(name: 'task_args') Map<String, Object?> taskArgs, Object? sandbox,@JsonKey(name: 'sandbox_cleanup') bool? sandboxCleanup, Object? solver, List<String>? tags, Map<String, dynamic>? metadata, bool? trace, String? display, Object? approval, bool score,@JsonKey(name: 'log_level') String? logLevel,@JsonKey(name: 'log_level_transcript') String? logLevelTranscript,@JsonKey(name: 'log_format') String? logFormat, Object? limit,@JsonKey(name: 'sample_id') Object? sampleId,@JsonKey(name: 'sample_shuffle') Object? sampleShuffle, Object? epochs,@JsonKey(name: 'fail_on_error') double? failOnError,@JsonKey(name: 'continue_on_fail') bool? continueOnFail,@JsonKey(name: 'retry_on_error') int? retryOnError,@JsonKey(name: 'debug_errors') bool? debugErrors,@JsonKey(name: 'message_limit') int? messageLimit,@JsonKey(name: 'token_limit') int? tokenLimit,@JsonKey(name: 'time_limit') int? timeLimit,@JsonKey(name: 'working_limit') int? workingLimit,@JsonKey(name: 'cost_limit') double? costLimit,@JsonKey(name: 'model_cost_config') Map<String, Object?>? modelCostConfig,@JsonKey(name: 'max_samples') int? maxSamples,@JsonKey(name: 'max_tasks') int? maxTasks,@JsonKey(name: 'max_subprocesses') int? maxSubprocesses,@JsonKey(name: 'max_sandboxes') int? maxSandboxes,@JsonKey(name: 'log_samples') bool? logSamples,@JsonKey(name: 'log_realtime') bool? logRealtime,@JsonKey(name: 'log_images') bool? logImages,@JsonKey(name: 'log_buffer') int? logBuffer,@JsonKey(name: 'log_shared') int? logShared,@JsonKey(name: 'bundle_dir') String? bundleDir,@JsonKey(name: 'bundle_overwrite') bool bundleOverwrite,@JsonKey(name: 'log_dir_allow_dirty') bool? logDirAllowDirty,@JsonKey(name: 'eval_set_id') String? evalSetId
+});
+
+
+
+
+}
+/// @nodoc
+class __$EvalSetCopyWithImpl<$Res>
+    implements _$EvalSetCopyWith<$Res> {
+  __$EvalSetCopyWithImpl(this._self, this._then);
+
+  final _EvalSet _self;
+  final $Res Function(_EvalSet) _then;
+
+/// Create a copy of EvalSet
+/// with the given fields replaced by the non-null parameter values.
+@override @pragma('vm:prefer-inline') $Res call({Object? tasks = null,Object? logDir = null,Object? retryAttempts = freezed,Object? retryWait = freezed,Object? retryConnections = freezed,Object? retryCleanup = freezed,Object? model = freezed,Object? modelBaseUrl = freezed,Object? modelArgs = null,Object? modelRoles = freezed,Object? taskArgs = null,Object? sandbox = freezed,Object? sandboxCleanup = freezed,Object? solver = freezed,Object? tags = freezed,Object? metadata = freezed,Object? trace = freezed,Object? display = freezed,Object? approval = freezed,Object? score = null,Object? logLevel = freezed,Object? logLevelTranscript = freezed,Object? logFormat = freezed,Object? limit = freezed,Object? sampleId = freezed,Object? sampleShuffle = freezed,Object? epochs = freezed,Object? failOnError = freezed,Object? continueOnFail = freezed,Object? retryOnError = freezed,Object? debugErrors = freezed,Object? messageLimit = freezed,Object? tokenLimit = freezed,Object? timeLimit = freezed,Object? workingLimit = freezed,Object? costLimit = freezed,Object? modelCostConfig = freezed,Object? maxSamples = freezed,Object? maxTasks = freezed,Object? maxSubprocesses = freezed,Object? maxSandboxes = freezed,Object? logSamples = freezed,Object? logRealtime = freezed,Object? logImages = freezed,Object? logBuffer = freezed,Object? logShared = freezed,Object? bundleDir = freezed,Object? bundleOverwrite = null,Object? logDirAllowDirty = freezed,Object? evalSetId = freezed,}) {
+  return _then(_EvalSet(
+tasks: null == tasks ? _self._tasks : tasks // ignore: cast_nullable_to_non_nullable
+as List<Task>,logDir: null == logDir ? _self.logDir : logDir // ignore: cast_nullable_to_non_nullable
+as String,retryAttempts: freezed == retryAttempts ? _self.retryAttempts : retryAttempts // ignore: cast_nullable_to_non_nullable
+as int?,retryWait: freezed == retryWait ? _self.retryWait : retryWait // ignore: cast_nullable_to_non_nullable
+as double?,retryConnections: freezed == retryConnections ? _self.retryConnections : retryConnections // ignore: cast_nullable_to_non_nullable
+as double?,retryCleanup: freezed == retryCleanup ? _self.retryCleanup : retryCleanup // ignore: cast_nullable_to_non_nullable
+as bool?,model: freezed == model ? _self._model : model // ignore: cast_nullable_to_non_nullable
+as List<String>?,modelBaseUrl: freezed == modelBaseUrl ? _self.modelBaseUrl : modelBaseUrl // ignore: cast_nullable_to_non_nullable
+as String?,modelArgs: null == modelArgs ? _self._modelArgs : modelArgs // ignore: cast_nullable_to_non_nullable
+as Map<String, Object?>,modelRoles: freezed == modelRoles ? _self._modelRoles : modelRoles // ignore: cast_nullable_to_non_nullable
+as Map<String, String>?,taskArgs: null == taskArgs ? _self._taskArgs : taskArgs // ignore: cast_nullable_to_non_nullable
+as Map<String, Object?>,sandbox: freezed == sandbox ? _self.sandbox : sandbox ,sandboxCleanup: freezed == sandboxCleanup ? _self.sandboxCleanup : sandboxCleanup // ignore: cast_nullable_to_non_nullable
+as bool?,solver: freezed == solver ? _self.solver : solver ,tags: freezed == tags ? _self._tags : tags // ignore: cast_nullable_to_non_nullable
+as List<String>?,metadata: freezed == metadata ? _self._metadata : metadata // ignore: cast_nullable_to_non_nullable
+as Map<String, dynamic>?,trace: freezed == trace ? _self.trace : trace // ignore: cast_nullable_to_non_nullable
+as bool?,display: freezed == display ? _self.display : display // ignore: cast_nullable_to_non_nullable
+as String?,approval: freezed == approval ? _self.approval : approval ,score: null == score ? _self.score : score // ignore: cast_nullable_to_non_nullable
+as bool,logLevel: freezed == logLevel ? _self.logLevel : logLevel // ignore: cast_nullable_to_non_nullable
+as String?,logLevelTranscript: freezed == logLevelTranscript ? _self.logLevelTranscript : logLevelTranscript // ignore: cast_nullable_to_non_nullable
+as String?,logFormat: freezed == logFormat ? _self.logFormat : logFormat // ignore: cast_nullable_to_non_nullable
+as String?,limit: freezed == limit ? _self.limit : limit ,sampleId: freezed == sampleId ? _self.sampleId : sampleId ,sampleShuffle: freezed == sampleShuffle ? _self.sampleShuffle : sampleShuffle ,epochs: freezed == epochs ? _self.epochs : epochs ,failOnError: freezed == failOnError ? _self.failOnError : failOnError // ignore: cast_nullable_to_non_nullable
+as double?,continueOnFail: freezed == continueOnFail ? _self.continueOnFail : continueOnFail // ignore: cast_nullable_to_non_nullable
+as bool?,retryOnError: freezed == retryOnError ? _self.retryOnError : retryOnError // ignore: cast_nullable_to_non_nullable
+as int?,debugErrors: freezed == debugErrors ? _self.debugErrors : debugErrors // ignore: cast_nullable_to_non_nullable
+as bool?,messageLimit: freezed == messageLimit ? _self.messageLimit : messageLimit // ignore: cast_nullable_to_non_nullable
+as int?,tokenLimit: freezed == tokenLimit ? _self.tokenLimit : tokenLimit // ignore: cast_nullable_to_non_nullable
+as int?,timeLimit: freezed == timeLimit ? _self.timeLimit : timeLimit // ignore: cast_nullable_to_non_nullable
+as int?,workingLimit: freezed == workingLimit ? _self.workingLimit : workingLimit // ignore: cast_nullable_to_non_nullable
+as int?,costLimit: freezed == costLimit ? _self.costLimit : costLimit // ignore: cast_nullable_to_non_nullable
+as double?,modelCostConfig: freezed == modelCostConfig ? _self._modelCostConfig : modelCostConfig // ignore: cast_nullable_to_non_nullable
+as Map<String, Object?>?,maxSamples: freezed == maxSamples ? _self.maxSamples : maxSamples // ignore: cast_nullable_to_non_nullable
+as int?,maxTasks: freezed == maxTasks ? _self.maxTasks : maxTasks // ignore: cast_nullable_to_non_nullable
+as int?,maxSubprocesses: freezed == maxSubprocesses ? _self.maxSubprocesses : maxSubprocesses // ignore: cast_nullable_to_non_nullable
+as int?,maxSandboxes: freezed == maxSandboxes ? _self.maxSandboxes : maxSandboxes // ignore: cast_nullable_to_non_nullable
+as int?,logSamples: freezed == logSamples ? _self.logSamples : logSamples // ignore: cast_nullable_to_non_nullable
+as bool?,logRealtime: freezed == logRealtime ? _self.logRealtime : logRealtime // ignore: cast_nullable_to_non_nullable
+as bool?,logImages: freezed == logImages ? _self.logImages : logImages // ignore: cast_nullable_to_non_nullable
+as bool?,logBuffer: freezed == logBuffer ? _self.logBuffer : logBuffer // ignore: cast_nullable_to_non_nullable
+as int?,logShared: freezed == logShared ? _self.logShared : logShared // ignore: cast_nullable_to_non_nullable
+as int?,bundleDir: freezed == bundleDir ? _self.bundleDir : bundleDir // ignore: cast_nullable_to_non_nullable
+as String?,bundleOverwrite: null == bundleOverwrite ? _self.bundleOverwrite : bundleOverwrite // ignore: cast_nullable_to_non_nullable
+as bool,logDirAllowDirty: freezed == logDirAllowDirty ? _self.logDirAllowDirty : logDirAllowDirty // ignore: cast_nullable_to_non_nullable
+as bool?,evalSetId: freezed == evalSetId ? _self.evalSetId : evalSetId // ignore: cast_nullable_to_non_nullable
+as String?,
+  ));
+}
+
+
+}
+
+// dart format on
diff --git a/packages/eval_config/lib/src/models/eval_set.g.dart b/packages/eval_config/lib/src/models/eval_set.g.dart
new file mode 100644
index 0000000..7b0db55
--- /dev/null
+++ b/packages/eval_config/lib/src/models/eval_set.g.dart
@@ -0,0 +1,117 @@
+// GENERATED CODE - DO NOT MODIFY BY HAND
+
+part of 'eval_set.dart';
+
+// **************************************************************************
+// JsonSerializableGenerator
+// **************************************************************************
+
+_EvalSet _$EvalSetFromJson(Map<String, dynamic> json) => _EvalSet(
+  tasks: (json['tasks'] as List<dynamic>)
+      .map((e) => Task.fromJson(e as Map<String, dynamic>))
+      .toList(),
+  logDir: json['log_dir'] as String,
+  retryAttempts: (json['retry_attempts'] as num?)?.toInt(),
+  retryWait: (json['retry_wait'] as num?)?.toDouble(),
+  retryConnections: (json['retry_connections'] as num?)?.toDouble(),
+  retryCleanup: json['retry_cleanup'] as bool?,
+  model: (json['model'] as List<dynamic>?)?.map((e) => e as String).toList(),
+  modelBaseUrl: json['model_base_url'] as String?,
+  modelArgs: json['model_args'] as Map<String, dynamic>? ?? const {},
+  modelRoles: (json['model_roles'] as Map<String, dynamic>?)?.map(
+    (k, e) => MapEntry(k, e as String),
+  ),
+  taskArgs: json['task_args'] as Map<String, dynamic>? ?? const {},
+  sandbox: json['sandbox'],
+  sandboxCleanup: json['sandbox_cleanup'] as bool?,
+  solver: json['solver'],
+  tags: (json['tags'] as List<dynamic>?)?.map((e) => e as String).toList(),
+  metadata: json['metadata'] as Map<String, dynamic>?,
+  trace: json['trace'] as bool?,
+  display: json['display'] as String?,
+  approval: json['approval'],
+  score: json['score'] as bool? ?? true,
+  logLevel: json['log_level'] as String?,
+  logLevelTranscript: json['log_level_transcript'] as String?,
+  logFormat: json['log_format'] as String?,
+  limit: json['limit'],
+  sampleId: json['sample_id'],
+  sampleShuffle: json['sample_shuffle'],
+  epochs: json['epochs'],
+  failOnError: (json['fail_on_error'] as num?)?.toDouble(),
+  continueOnFail: json['continue_on_fail'] as bool?,
+  retryOnError: (json['retry_on_error'] as num?)?.toInt(),
+  debugErrors: json['debug_errors'] as bool?,
+  messageLimit: (json['message_limit'] as num?)?.toInt(),
+  tokenLimit: (json['token_limit'] as num?)?.toInt(),
+  timeLimit: (json['time_limit'] as num?)?.toInt(),
+  workingLimit: (json['working_limit'] as num?)?.toInt(),
+  costLimit: (json['cost_limit'] as num?)?.toDouble(),
+  modelCostConfig: json['model_cost_config'] as Map<String, dynamic>?,
+  maxSamples: (json['max_samples'] as num?)?.toInt(),
+  maxTasks: (json['max_tasks'] as num?)?.toInt(),
+  maxSubprocesses: (json['max_subprocesses'] as num?)?.toInt(),
+  maxSandboxes: (json['max_sandboxes'] as num?)?.toInt(),
+  logSamples: json['log_samples'] as bool?,
+  logRealtime: json['log_realtime'] as bool?,
+  logImages: json['log_images'] as bool?,
+  logBuffer: (json['log_buffer'] as num?)?.toInt(),
+  logShared: (json['log_shared'] as num?)?.toInt(),
+  bundleDir: json['bundle_dir'] as String?,
+  bundleOverwrite: json['bundle_overwrite'] as bool? ?? false,
+  logDirAllowDirty: json['log_dir_allow_dirty'] as bool?,
+  evalSetId: json['eval_set_id'] as String?,
+);
+
+Map<String, dynamic> _$EvalSetToJson(_EvalSet instance) => <String, dynamic>{
+  'tasks': instance.tasks.map((e) => e.toJson()).toList(),
+  'log_dir': instance.logDir,
+  'retry_attempts': instance.retryAttempts,
+  'retry_wait': instance.retryWait,
+  'retry_connections': instance.retryConnections,
+  'retry_cleanup': instance.retryCleanup,
+  'model': instance.model,
+  'model_base_url': instance.modelBaseUrl,
+  'model_args': instance.modelArgs,
+  'model_roles': instance.modelRoles,
+  'task_args': instance.taskArgs,
+  'sandbox': instance.sandbox,
+  'sandbox_cleanup': instance.sandboxCleanup,
+  'solver': instance.solver,
+  'tags': instance.tags,
+  'metadata': instance.metadata,
+  'trace': instance.trace,
+  'display': instance.display,
+  'approval': instance.approval,
+  'score': instance.score,
+  'log_level': instance.logLevel,
+  'log_level_transcript': instance.logLevelTranscript,
+  'log_format': instance.logFormat,
+  'limit': instance.limit,
+  'sample_id': instance.sampleId,
+  'sample_shuffle': instance.sampleShuffle,
+  'epochs': instance.epochs,
+  'fail_on_error': instance.failOnError,
+  'continue_on_fail': instance.continueOnFail,
+  'retry_on_error': instance.retryOnError,
+  'debug_errors': instance.debugErrors,
+  'message_limit': instance.messageLimit,
+  'token_limit': instance.tokenLimit,
+  'time_limit': instance.timeLimit,
+  'working_limit': instance.workingLimit,
+  'cost_limit': instance.costLimit,
+  'model_cost_config': instance.modelCostConfig,
+  'max_samples': instance.maxSamples,
+  'max_tasks': instance.maxTasks,
+  'max_subprocesses': instance.maxSubprocesses,
+  'max_sandboxes': instance.maxSandboxes,
+  'log_samples': instance.logSamples,
+  'log_realtime': instance.logRealtime,
+  'log_images': instance.logImages,
+  'log_buffer': instance.logBuffer,
+  'log_shared': instance.logShared,
+  'bundle_dir': instance.bundleDir,
+  'bundle_overwrite': instance.bundleOverwrite,
+  'log_dir_allow_dirty': instance.logDirAllowDirty,
+  'eval_set_id': instance.evalSetId,
+};
diff --git a/packages/eval_config/lib/src/models/field_spec.dart b/packages/eval_config/lib/src/models/field_spec.dart
new file mode 100644
index 0000000..e4f4611
--- /dev/null
+++ b/packages/eval_config/lib/src/models/field_spec.dart
@@ -0,0 +1,41 @@
+import 'package:freezed_annotation/freezed_annotation.dart';
+
+part 'field_spec.freezed.dart';
+part 'field_spec.g.dart';
+
+/// Dart representation of Inspect AI's `FieldSpec` dataclass.
+///
+/// Specification for mapping data source fields to sample fields.
+///
+/// See [`FieldSpec`](https://inspect.aisi.org.uk/reference/inspect_ai.dataset.html#fieldspec).
+@freezed
+sealed class FieldSpec with _$FieldSpec {
+  const factory FieldSpec({
+    /// Name of the field containing the sample input.
+    String? input,
+
+    /// Name of the field containing the sample target.
+    String? target,
+
+    /// Name of the field containing the list of answer choices.
+    String? choices,
+
+    /// Name of the field containing the unique sample identifier.
+    String? id,
+
+    /// List of additional field names that should be read as metadata.
+    List<String>? metadata,
+
+    /// Sandbox type along with optional config file.
+    String? sandbox,
+
+    /// Name of the field containing files that go with the sample.
+    String? files,
+
+    /// Name of the field containing the setup script.
+    String? setup,
+  }) = _FieldSpec;
+
+  factory FieldSpec.fromJson(Map<String, dynamic> json) =>
+      _$FieldSpecFromJson(json);
+}
diff --git a/packages/eval_config/lib/src/models/field_spec.freezed.dart b/packages/eval_config/lib/src/models/field_spec.freezed.dart
new file mode 100644
index 0000000..cd5dc3f
--- /dev/null
+++ b/packages/eval_config/lib/src/models/field_spec.freezed.dart
@@ -0,0 +1,317 @@
+// GENERATED CODE - DO NOT MODIFY BY HAND
+// coverage:ignore-file
+// ignore_for_file: type=lint
+// ignore_for_file: unused_element, deprecated_member_use, deprecated_member_use_from_same_package, use_function_type_syntax_for_parameters, unnecessary_const, avoid_init_to_null, invalid_override_different_default_values_named, prefer_expression_function_bodies, annotate_overrides, invalid_annotation_target, unnecessary_question_mark
+
+part of 'field_spec.dart';
+
+// **************************************************************************
+// FreezedGenerator
+// **************************************************************************
+
+// dart format off
+T _$identity<T>(T value) => value;
+
+/// @nodoc
+mixin _$FieldSpec {
+
+/// Name of the field containing the sample input.
+ String? get input;/// Name of the field containing the sample target.
+ String? get target;/// Name of the field containing the list of answer choices.
+ String? get choices;/// Name of the field containing the unique sample identifier.
+ String? get id;/// List of additional field names that should be read as metadata.
+ List<String>? get metadata;/// Sandbox type along with optional config file.
+ String? get sandbox;/// Name of the field containing files that go with the sample.
+ String? get files;/// Name of the field containing the setup script.
+ String? get setup;
+/// Create a copy of FieldSpec
+/// with the given fields replaced by the non-null parameter values.
+@JsonKey(includeFromJson: false, includeToJson: false)
+@pragma('vm:prefer-inline')
+$FieldSpecCopyWith<FieldSpec> get copyWith => _$FieldSpecCopyWithImpl<FieldSpec>(this as FieldSpec, _$identity);
+
+  /// Serializes this FieldSpec to a JSON map.
+  Map<String, dynamic> toJson();
+
+
+@override
+bool operator ==(Object other) {
+  return identical(this, other) || (other.runtimeType == runtimeType&&other is FieldSpec&&(identical(other.input, input) || other.input == input)&&(identical(other.target, target) || other.target == target)&&(identical(other.choices, choices) || other.choices == choices)&&(identical(other.id, id) || other.id == id)&&const DeepCollectionEquality().equals(other.metadata, metadata)&&(identical(other.sandbox, sandbox) || other.sandbox == sandbox)&&(identical(other.files, files) || other.files == files)&&(identical(other.setup, setup) || other.setup == setup));
+}
+
+@JsonKey(includeFromJson: false, includeToJson: false)
+@override
+int get hashCode => Object.hash(runtimeType,input,target,choices,id,const DeepCollectionEquality().hash(metadata),sandbox,files,setup);
+
+@override
+String toString() {
+  return 'FieldSpec(input: $input, target: $target, choices: $choices, id: $id, metadata: $metadata, sandbox: $sandbox, files: $files, setup: $setup)';
+}
+
+
+}
+
+/// @nodoc
+abstract mixin class $FieldSpecCopyWith<$Res>  {
+  factory $FieldSpecCopyWith(FieldSpec value, $Res Function(FieldSpec) _then) = _$FieldSpecCopyWithImpl;
+@useResult
+$Res call({
+ String? input, String? target, String? choices, String? id, List<String>? metadata, String? sandbox, String? files, String? setup
+});
+
+
+
+
+}
+/// @nodoc
+class _$FieldSpecCopyWithImpl<$Res>
+    implements $FieldSpecCopyWith<$Res> {
+  _$FieldSpecCopyWithImpl(this._self, this._then);
+
+  final FieldSpec _self;
+  final $Res Function(FieldSpec) _then;
+
+/// Create a copy of FieldSpec
+/// with the given fields replaced by the non-null parameter values.
+@pragma('vm:prefer-inline') @override $Res call({Object? input = freezed,Object? target = freezed,Object? choices = freezed,Object? id = freezed,Object? metadata = freezed,Object? sandbox = freezed,Object? files = freezed,Object? setup = freezed,}) {
+  return _then(_self.copyWith(
+input: freezed == input ? _self.input : input // ignore: cast_nullable_to_non_nullable
+as String?,target: freezed == target ? _self.target : target // ignore: cast_nullable_to_non_nullable
+as String?,choices: freezed == choices ? _self.choices : choices // ignore: cast_nullable_to_non_nullable
+as String?,id: freezed == id ? _self.id : id // ignore: cast_nullable_to_non_nullable
+as String?,metadata: freezed == metadata ? _self.metadata : metadata // ignore: cast_nullable_to_non_nullable
+as List<String>?,sandbox: freezed == sandbox ? _self.sandbox : sandbox // ignore: cast_nullable_to_non_nullable
+as String?,files: freezed == files ? _self.files : files // ignore: cast_nullable_to_non_nullable
+as String?,setup: freezed == setup ? _self.setup : setup // ignore: cast_nullable_to_non_nullable
+as String?,
+  ));
+}
+
+}
+
+
+/// Adds pattern-matching-related methods to [FieldSpec].
+extension FieldSpecPatterns on FieldSpec {
+/// A variant of `map` that fallback to returning `orElse`.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case _:
+///     return orElse();
+/// }
+/// ```
+
+@optionalTypeArgs TResult maybeMap<TResult extends Object?>(TResult Function( _FieldSpec value)?  $default,{required TResult orElse(),}){
+final _that = this;
+switch (_that) {
+case _FieldSpec() when $default != null:
+return $default(_that);case _:
+  return orElse();
+
+}
+}
+/// A `switch`-like method, using callbacks.
+///
+/// Callbacks receives the raw object, upcasted.
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case final Subclass2 value:
+///     return ...;
+/// }
+/// ```
+
+@optionalTypeArgs TResult map<TResult extends Object?>(TResult Function( _FieldSpec value)  $default,){
+final _that = this;
+switch (_that) {
+case _FieldSpec():
+return $default(_that);}
+}
+/// A variant of `map` that fallback to returning `null`.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case _:
+///     return null;
+/// }
+/// ```
+
+@optionalTypeArgs TResult? mapOrNull<TResult extends Object?>(TResult? Function( _FieldSpec value)?  $default,){
+final _that = this;
+switch (_that) {
+case _FieldSpec() when $default != null:
+return $default(_that);case _:
+  return null;
+
+}
+}
+/// A variant of `when` that fallback to an `orElse` callback.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case _:
+///     return orElse();
+/// }
+/// ```
+
+@optionalTypeArgs TResult maybeWhen<TResult extends Object?>(TResult Function( String? input,  String? target,  String? choices,  String? id,  List<String>? metadata,  String? sandbox,  String? files,  String? setup)?  $default,{required TResult orElse(),}) {final _that = this;
+switch (_that) {
+case _FieldSpec() when $default != null:
+return $default(_that.input,_that.target,_that.choices,_that.id,_that.metadata,_that.sandbox,_that.files,_that.setup);case _:
+  return orElse();
+
+}
+}
+/// A `switch`-like method, using callbacks.
+///
+/// As opposed to `map`, this offers destructuring.
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case Subclass2(:final field2):
+///     return ...;
+/// }
+/// ```
+
+@optionalTypeArgs TResult when<TResult extends Object?>(TResult Function( String? input,  String? target,  String? choices,  String? id,  List<String>? metadata,  String? sandbox,  String? files,  String? setup)  $default,) {final _that = this;
+switch (_that) {
+case _FieldSpec():
+return $default(_that.input,_that.target,_that.choices,_that.id,_that.metadata,_that.sandbox,_that.files,_that.setup);}
+}
+/// A variant of `when` that fallback to returning `null`
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case _:
+///     return null;
+/// }
+/// ```
+
+@optionalTypeArgs TResult? whenOrNull<TResult extends Object?>(TResult? Function( String? input,  String? target,  String? choices,  String? id,  List<String>? metadata,  String? sandbox,  String? files,  String? setup)?  $default,) {final _that = this;
+switch (_that) {
+case _FieldSpec() when $default != null:
+return $default(_that.input,_that.target,_that.choices,_that.id,_that.metadata,_that.sandbox,_that.files,_that.setup);case _:
+  return null;
+
+}
+}
+
+}
+
+/// @nodoc
+@JsonSerializable()
+
+class _FieldSpec implements FieldSpec {
+  const _FieldSpec({this.input, this.target, this.choices, this.id, final  List<String>? metadata, this.sandbox, this.files, this.setup}): _metadata = metadata;
+  factory _FieldSpec.fromJson(Map<String, dynamic> json) => _$FieldSpecFromJson(json);
+
+/// Name of the field containing the sample input.
+@override final  String? input;
+/// Name of the field containing the sample target.
+@override final  String? target;
+/// Name of the field containing the list of answer choices.
+@override final  String? choices;
+/// Name of the field containing the unique sample identifier.
+@override final  String? id;
+/// List of additional field names that should be read as metadata.
+ final  List<String>? _metadata;
+/// List of additional field names that should be read as metadata.
+@override List<String>? get metadata {
+  final value = _metadata;
+  if (value == null) return null;
+  if (_metadata is EqualUnmodifiableListView) return _metadata;
+  // ignore: implicit_dynamic_type
+  return EqualUnmodifiableListView(value);
+}
+
+/// Sandbox type along with optional config file.
+@override final  String? sandbox;
+/// Name of the field containing files that go with the sample.
+@override final  String? files;
+/// Name of the field containing the setup script.
+@override final  String? setup;
+
+/// Create a copy of FieldSpec
+/// with the given fields replaced by the non-null parameter values.
+@override @JsonKey(includeFromJson: false, includeToJson: false)
+@pragma('vm:prefer-inline')
+_$FieldSpecCopyWith<_FieldSpec> get copyWith => __$FieldSpecCopyWithImpl<_FieldSpec>(this, _$identity);
+
+@override
+Map<String, dynamic> toJson() {
+  return _$FieldSpecToJson(this, );
+}
+
+@override
+bool operator ==(Object other) {
+  return identical(this, other) || (other.runtimeType == runtimeType&&other is _FieldSpec&&(identical(other.input, input) || other.input == input)&&(identical(other.target, target) || other.target == target)&&(identical(other.choices, choices) || other.choices == choices)&&(identical(other.id, id) || other.id == id)&&const DeepCollectionEquality().equals(other._metadata, _metadata)&&(identical(other.sandbox, sandbox) || other.sandbox == sandbox)&&(identical(other.files, files) || other.files == files)&&(identical(other.setup, setup) || other.setup == setup));
+}
+
+@JsonKey(includeFromJson: false, includeToJson: false)
+@override
+int get hashCode => Object.hash(runtimeType,input,target,choices,id,const DeepCollectionEquality().hash(_metadata),sandbox,files,setup);
+
+@override
+String toString() {
+  return 'FieldSpec(input: $input, target: $target, choices: $choices, id: $id, metadata: $metadata, sandbox: $sandbox, files: $files, setup: $setup)';
+}
+
+
+}
+
+/// @nodoc
+abstract mixin class _$FieldSpecCopyWith<$Res> implements $FieldSpecCopyWith<$Res> {
+  factory _$FieldSpecCopyWith(_FieldSpec value, $Res Function(_FieldSpec) _then) = __$FieldSpecCopyWithImpl;
+@override @useResult
+$Res call({
+ String? input, String? target, String? choices, String? id, List<String>? metadata, String? sandbox, String? files, String? setup
+});
+
+
+
+
+}
+/// @nodoc
+class __$FieldSpecCopyWithImpl<$Res>
+    implements _$FieldSpecCopyWith<$Res> {
+  __$FieldSpecCopyWithImpl(this._self, this._then);
+
+  final _FieldSpec _self;
+  final $Res Function(_FieldSpec) _then;
+
+/// Create a copy of FieldSpec
+/// with the given fields replaced by the non-null parameter values.
+@override @pragma('vm:prefer-inline') $Res call({Object? input = freezed,Object? target = freezed,Object? choices = freezed,Object? id = freezed,Object? metadata = freezed,Object? sandbox = freezed,Object? files = freezed,Object? setup = freezed,}) {
+  return _then(_FieldSpec(
+input: freezed == input ? _self.input : input // ignore: cast_nullable_to_non_nullable
+as String?,target: freezed == target ? _self.target : target // ignore: cast_nullable_to_non_nullable
+as String?,choices: freezed == choices ? _self.choices : choices // ignore: cast_nullable_to_non_nullable
+as String?,id: freezed == id ? _self.id : id // ignore: cast_nullable_to_non_nullable
+as String?,metadata: freezed == metadata ? _self._metadata : metadata // ignore: cast_nullable_to_non_nullable
+as List<String>?,sandbox: freezed == sandbox ? _self.sandbox : sandbox // ignore: cast_nullable_to_non_nullable
+as String?,files: freezed == files ? _self.files : files // ignore: cast_nullable_to_non_nullable
+as String?,setup: freezed == setup ? _self.setup : setup // ignore: cast_nullable_to_non_nullable
+as String?,
+  ));
+}
+
+
+}
+
+// dart format on
diff --git a/packages/eval_config/lib/src/models/field_spec.g.dart b/packages/eval_config/lib/src/models/field_spec.g.dart
new file mode 100644
index 0000000..8b73656
--- /dev/null
+++ b/packages/eval_config/lib/src/models/field_spec.g.dart
@@ -0,0 +1,32 @@
+// GENERATED CODE - DO NOT MODIFY BY HAND
+
+part of 'field_spec.dart';
+
+// **************************************************************************
+// JsonSerializableGenerator
+// **************************************************************************
+
+_FieldSpec _$FieldSpecFromJson(Map<String, dynamic> json) => _FieldSpec(
+  input: json['input'] as String?,
+  target: json['target'] as String?,
+  choices: json['choices'] as String?,
+  id: json['id'] as String?,
+  metadata: (json['metadata'] as List<dynamic>?)
+      ?.map((e) => e as String)
+      .toList(),
+  sandbox: json['sandbox'] as String?,
+  files: json['files'] as String?,
+  setup: json['setup'] as String?,
+);
+
+Map<String, dynamic> _$FieldSpecToJson(_FieldSpec instance) =>
+    <String, dynamic>{
+      'input': instance.input,
+      'target': instance.target,
+      'choices': instance.choices,
+      'id': instance.id,
+      'metadata': instance.metadata,
+      'sandbox': instance.sandbox,
+      'files': instance.files,
+      'setup': instance.setup,
+    };
diff --git a/packages/eval_config/lib/src/models/job.dart b/packages/eval_config/lib/src/models/job.dart
new file mode 100644
index 0000000..800f19c
--- /dev/null
+++ b/packages/eval_config/lib/src/models/job.dart
@@ -0,0 +1,279 @@
+import 'package:freezed_annotation/freezed_annotation.dart';
+
+part 'job.freezed.dart';
+part 'job.g.dart';
+
+/// A job configuration defining what to run and how to run it.
+///
+/// Jobs combine runtime settings (log directory, sandbox type, rate limits)
+/// with filtering (which models, variants, and tasks to include).
+///
+/// Top-level fields cover the most common settings. For full control over
+/// `eval_set()` and `Task` parameters, use [evalSetOverrides] and
+/// [taskDefaults] respectively — any valid `eval_set()` / `Task` kwarg can
+/// be specified there and will be passed through to the Python runner.
+///
+/// Example YAML:
+/// ```yaml
+/// log_dir: ./logs/my_run
+/// sandbox: podman
+/// max_connections: 10
+/// models:
+///   - google/gemini-2.5-flash
+/// variants:
+///   baseline: {}
+///   context_only:
+///     context_files: [./context_files/flutter.md]
+/// tasks:
+///   dart_qa:
+///     include-samples: [sample_1]
+///
+/// # Pass-through to eval_set()
+/// eval_set_overrides:
+///   retry_attempts: 20
+///   log_level: debug
+///
+/// # Default Task-level overrides applied to every task
+/// task_defaults:
+///   time_limit: 600
+///   message_limit: 50
+/// ```
+@freezed
+sealed class Job with _$Job {
+  const factory Job({
+    // ------------------------------------------------------------------
+    // Core job settings
+    // ------------------------------------------------------------------
+
+    /// Directory to write evaluation logs to.
+    @JsonKey(name: 'log_dir') required String logDir,
+
+    /// Sandbox type: `'local'`, `'docker'`, or `'podman'`.
+    @JsonKey(name: 'sandbox_type') @Default('local') String sandboxType,
+
+    /// Maximum concurrent API connections.
+    @JsonKey(name: 'max_connections') @Default(10) int maxConnections,
+
+    /// Models to run. `null` means use defaults from registries.
+    List<String>? models,
+
+    /// Named variant map. Keys are variant names, values are config dicts.
+    /// `null` means baseline only.
+    Map<String, Map<String, dynamic>>? variants,
+
+    /// Glob patterns for discovering task directories (relative to dataset root).
+    @JsonKey(name: 'task_paths') List<String>? taskPaths,
+
+    /// Per-task configurations with inline overrides.
+    /// `null` means run all tasks.
+    Map<String, JobTask>? tasks,
+
+    /// If `true`, copy final workspace to `<logDir>/examples/` after each sample.
+    @JsonKey(name: 'save_examples') @Default(false) bool saveExamples,
+
+    // ------------------------------------------------------------------
+    // Promoted eval_set() parameters (convenience top-level keys)
+    // ------------------------------------------------------------------
+
+    /// Maximum retry attempts before giving up (defaults to 10).
+    @JsonKey(name: 'retry_attempts') int? retryAttempts,
+
+    /// Maximum number of retry attempts for failed samples.
+    @JsonKey(name: 'max_retries') int? maxRetries,
+
+    /// Time in seconds to wait between retry attempts (exponential backoff).
+    @JsonKey(name: 'retry_wait') double? retryWait,
+
+    /// Reduce `max_connections` at this rate with each retry (default 1.0).
+    @JsonKey(name: 'retry_connections') double? retryConnections,
+
+    /// Cleanup failed log files after retries (defaults to true).
+    @JsonKey(name: 'retry_cleanup') bool? retryCleanup,
+
+    /// Fail on sample errors.
+    ///
+    /// `0.0–1.0` = fail if proportion exceeds threshold,
+    /// `>1` = fail if count exceeds threshold.
+    @JsonKey(name: 'fail_on_error') double? failOnError,
+
+    /// Continue running even if `fail_on_error` condition is met.
+    @JsonKey(name: 'continue_on_fail') bool? continueOnFail,
+
+    /// Number of times to retry samples on error (default: no retries).
+    @JsonKey(name: 'retry_on_error') int? retryOnError,
+
+    /// Raise task errors for debugging (defaults to false).
+    @JsonKey(name: 'debug_errors') bool? debugErrors,
+
+    /// Maximum samples to run in parallel (default is `max_connections`).
+    @JsonKey(name: 'max_samples') int? maxSamples,
+
+    /// Maximum tasks to run in parallel.
+    @JsonKey(name: 'max_tasks') int? maxTasks,
+
+    /// Maximum subprocesses to run in parallel.
+    @JsonKey(name: 'max_subprocesses') int? maxSubprocesses,
+
+    /// Maximum sandboxes (per-provider) to run in parallel.
+    @JsonKey(name: 'max_sandboxes') int? maxSandboxes,
+
+    /// Level for logging to the console (e.g. `"warning"`, `"info"`, `"debug"`).
+    @JsonKey(name: 'log_level') String? logLevel,
+
+    /// Level for logging to the log file (defaults to `"info"`).
+    @JsonKey(name: 'log_level_transcript') String? logLevelTranscript,
+
+    /// Format for writing log files (`"eval"` or `"json"`).
+    @JsonKey(name: 'log_format') String? logFormat,
+
+    /// Tags to associate with this evaluation run.
+    List<String>? tags,
+
+    /// Metadata to associate with this evaluation run.
+    Map<String, dynamic>? metadata,
+
+    /// Trace message interactions with evaluated model to terminal.
+    bool? trace,
+
+    /// Task display type (defaults to `"full"`).
+    String? display,
+
+    /// Score output (defaults to true).
+    bool? score,
+
+    /// Limit evaluated samples (int count or `[start, end]` range).
+    Object? limit,
+
+    /// Evaluate specific sample(s) from the dataset.
+    @JsonKey(name: 'sample_id') Object? sampleId,
+
+    /// Shuffle order of samples (pass a seed to make order deterministic).
+    @JsonKey(name: 'sample_shuffle') Object? sampleShuffle,
+
+    /// Epochs to repeat samples for and optional score reducer function(s).
+    Object? epochs,
+
+    /// Tool use approval policies (string or config dict).
+    Object? approval,
+
+    /// Alternative solver(s) for evaluating task(s) (string or config dict).
+    Object? solver,
+
+    /// Sandbox cleanup after task completes (defaults to true).
+    @JsonKey(name: 'sandbox_cleanup') bool? sandboxCleanup,
+
+    /// Base URL for communicating with the model API.
+    @JsonKey(name: 'model_base_url') String? modelBaseUrl,
+
+    /// Model creation arguments.
+    @JsonKey(name: 'model_args') Map<String, Object?>? modelArgs,
+
+    /// Named roles for use in `get_model()`.
+    @JsonKey(name: 'model_roles') Map<String, String>? modelRoles,
+
+    /// Task creation arguments.
+    @JsonKey(name: 'task_args') Map<String, Object?>? taskArgs,
+
+    /// Limit on total messages per sample.
+    @JsonKey(name: 'message_limit') int? messageLimit,
+
+    /// Limit on total tokens per sample.
+    @JsonKey(name: 'token_limit') int? tokenLimit,
+
+    /// Limit on clock time (in seconds) per sample.
+    @JsonKey(name: 'time_limit') int? timeLimit,
+
+    /// Limit on working time (in seconds) per sample.
+    @JsonKey(name: 'working_limit') int? workingLimit,
+
+    /// Limit on total cost (in dollars) per sample.
+    @JsonKey(name: 'cost_limit') double? costLimit,
+
+    /// JSON file with model prices for cost tracking.
+    @JsonKey(name: 'model_cost_config') Map<String, Object?>? modelCostConfig,
+
+    /// Log detailed samples and scores (defaults to true).
+    @JsonKey(name: 'log_samples') bool? logSamples,
+
+    /// Log events in realtime (defaults to true).
+    @JsonKey(name: 'log_realtime') bool? logRealtime,
+
+    /// Log base64-encoded images (defaults to false).
+    @JsonKey(name: 'log_images') bool? logImages,
+
+    /// Number of samples to buffer before writing log file.
+    @JsonKey(name: 'log_buffer') int? logBuffer,
+
+    /// Sync sample events for realtime viewing.
+    @JsonKey(name: 'log_shared') int? logShared,
+
+    /// Directory to bundle logs and viewer into.
+    @JsonKey(name: 'bundle_dir') String? bundleDir,
+
+    /// Overwrite files in `bundle_dir` (defaults to false).
+    @JsonKey(name: 'bundle_overwrite') bool? bundleOverwrite,
+
+    /// Allow log directory to contain unrelated logs (defaults to false).
+    @JsonKey(name: 'log_dir_allow_dirty') bool? logDirAllowDirty,
+
+    /// ID for the eval set. Generated if not specified.
+    @JsonKey(name: 'eval_set_id') String? evalSetId,
+
+    // ------------------------------------------------------------------
+    // Pass-through overrides
+    // ------------------------------------------------------------------
+
+    /// Additional `eval_set()` kwargs not covered by top-level fields.
+    ///
+    /// Any valid `eval_set()` parameter can be specified here and will be
+    /// merged into the output JSON. Top-level fields take precedence.
+    @JsonKey(name: 'eval_set_overrides') Map<String, dynamic>? evalSetOverrides,
+
+    /// Default `Task` kwargs applied to every task in this job.
+    ///
+    /// Per-task overrides (from `task.yaml`) take precedence.
+    @JsonKey(name: 'task_defaults') Map<String, dynamic>? taskDefaults,
+  }) = _Job;
+
+  factory Job.fromJson(Map<String, dynamic> json) => _$JobFromJson(json);
+}
+
+/// Per-task configuration within a job.
+///
+/// Allows overriding which samples run for specific tasks and providing
+/// a custom system message.
+@freezed
+sealed class JobTask with _$JobTask {
+  const factory JobTask({
+    /// Task identifier matching a task directory name in `tasks/`.
+    required String id,
+
+    /// Only run these sample IDs. Mutually exclusive with [excludeSamples].
+    @JsonKey(name: 'include_samples') List<String>? includeSamples,
+
+    /// Exclude these sample IDs. Mutually exclusive with [includeSamples].
+    @JsonKey(name: 'exclude_samples') List<String>? excludeSamples,
+
+    /// Override system message for this task.
+    @JsonKey(name: 'system_message') String? systemMessage,
+  }) = _JobTask;
+
+  factory JobTask.fromJson(Map<String, dynamic> json) =>
+      _$JobTaskFromJson(json);
+
+  /// Create a [JobTask] from parsed YAML data.
+  ///
+  /// The [taskId] is the map key from the job YAML `tasks:` section.
+  /// The [data] may be `null` for a simple task reference with no overrides.
+  factory JobTask.fromYaml(String taskId, Map<String, dynamic>? data) {
+    if (data == null) {
+      return JobTask(id: taskId);
+    }
+    return JobTask(
+      id: taskId,
+      includeSamples: (data['include-samples'] as List?)?.cast<String>(),
+      excludeSamples: (data['exclude-samples'] as List?)?.cast<String>(),
+      systemMessage: data['system_message'] as String?,
+    );
+  }
+}
diff --git a/packages/eval_config/lib/src/models/job.freezed.dart b/packages/eval_config/lib/src/models/job.freezed.dart
new file mode 100644
index 0000000..e249877
--- /dev/null
+++ b/packages/eval_config/lib/src/models/job.freezed.dart
@@ -0,0 +1,989 @@
+// GENERATED CODE - DO NOT MODIFY BY HAND
+// coverage:ignore-file
+// ignore_for_file: type=lint
+// ignore_for_file: unused_element, deprecated_member_use, deprecated_member_use_from_same_package, use_function_type_syntax_for_parameters, unnecessary_const, avoid_init_to_null, invalid_override_different_default_values_named, prefer_expression_function_bodies, annotate_overrides, invalid_annotation_target, unnecessary_question_mark
+
+part of 'job.dart';
+
+// **************************************************************************
+// FreezedGenerator
+// **************************************************************************
+
+// dart format off
+T _$identity<T>(T value) => value;
+
+/// @nodoc
+mixin _$Job {
+
+// ------------------------------------------------------------------
+// Core job settings
+// ------------------------------------------------------------------
+/// Directory to write evaluation logs to.
+@JsonKey(name: 'log_dir') String get logDir;/// Sandbox type: `'local'`, `'docker'`, or `'podman'`.
+@JsonKey(name: 'sandbox_type') String get sandboxType;/// Maximum concurrent API connections.
+@JsonKey(name: 'max_connections') int get maxConnections;/// Models to run. `null` means use defaults from registries.
+ List<String>? get models;/// Named variant map. Keys are variant names, values are config dicts.
+/// `null` means baseline only.
+ Map<String, Map<String, dynamic>>? get variants;/// Glob patterns for discovering task directories (relative to dataset root).
+@JsonKey(name: 'task_paths') List<String>? get taskPaths;/// Per-task configurations with inline overrides.
+/// `null` means run all tasks.
+ Map<String, JobTask>? get tasks;/// If `true`, copy final workspace to `<logDir>/examples/` after each sample.
+@JsonKey(name: 'save_examples') bool get saveExamples;// ------------------------------------------------------------------
+// Promoted eval_set() parameters (convenience top-level keys)
+// ------------------------------------------------------------------
+/// Maximum retry attempts before giving up (defaults to 10).
+@JsonKey(name: 'retry_attempts') int? get retryAttempts;/// Maximum number of retry attempts for failed samples.
+@JsonKey(name: 'max_retries') int? get maxRetries;/// Time in seconds to wait between retry attempts (exponential backoff).
+@JsonKey(name: 'retry_wait') double? get retryWait;/// Reduce `max_connections` at this rate with each retry (default 1.0).
+@JsonKey(name: 'retry_connections') double? get retryConnections;/// Cleanup failed log files after retries (defaults to true).
+@JsonKey(name: 'retry_cleanup') bool? get retryCleanup;/// Fail on sample errors.
+///
+/// `0.0–1.0` = fail if proportion exceeds threshold,
+/// `>1` = fail if count exceeds threshold.
+@JsonKey(name: 'fail_on_error') double? get failOnError;/// Continue running even if `fail_on_error` condition is met.
+@JsonKey(name: 'continue_on_fail') bool? get continueOnFail;/// Number of times to retry samples on error (default: no retries).
+@JsonKey(name: 'retry_on_error') int? get retryOnError;/// Raise task errors for debugging (defaults to false).
+@JsonKey(name: 'debug_errors') bool? get debugErrors;/// Maximum samples to run in parallel (default is `max_connections`).
+@JsonKey(name: 'max_samples') int? get maxSamples;/// Maximum tasks to run in parallel.
+@JsonKey(name: 'max_tasks') int? get maxTasks;/// Maximum subprocesses to run in parallel.
+@JsonKey(name: 'max_subprocesses') int? get maxSubprocesses;/// Maximum sandboxes (per-provider) to run in parallel.
+@JsonKey(name: 'max_sandboxes') int? get maxSandboxes;/// Level for logging to the console (e.g. `"warning"`, `"info"`, `"debug"`).
+@JsonKey(name: 'log_level') String? get logLevel;/// Level for logging to the log file (defaults to `"info"`).
+@JsonKey(name: 'log_level_transcript') String? get logLevelTranscript;/// Format for writing log files (`"eval"` or `"json"`).
+@JsonKey(name: 'log_format') String? get logFormat;/// Tags to associate with this evaluation run.
+ List<String>? get tags;/// Metadata to associate with this evaluation run.
+ Map<String, dynamic>? get metadata;/// Trace message interactions with evaluated model to terminal.
+ bool? get trace;/// Task display type (defaults to `"full"`).
+ String? get display;/// Score output (defaults to true).
+ bool? get score;/// Limit evaluated samples (int count or `[start, end]` range).
+ Object? get limit;/// Evaluate specific sample(s) from the dataset.
+@JsonKey(name: 'sample_id') Object? get sampleId;/// Shuffle order of samples (pass a seed to make order deterministic).
+@JsonKey(name: 'sample_shuffle') Object? get sampleShuffle;/// Epochs to repeat samples for and optional score reducer function(s).
+ Object? get epochs;/// Tool use approval policies (string or config dict).
+ Object? get approval;/// Alternative solver(s) for evaluating task(s) (string or config dict).
+ Object? get solver;/// Sandbox cleanup after task completes (defaults to true).
+@JsonKey(name: 'sandbox_cleanup') bool? get sandboxCleanup;/// Base URL for communicating with the model API.
+@JsonKey(name: 'model_base_url') String? get modelBaseUrl;/// Model creation arguments.
+@JsonKey(name: 'model_args') Map<String, Object?>? get modelArgs;/// Named roles for use in `get_model()`.
+@JsonKey(name: 'model_roles') Map<String, String>? get modelRoles;/// Task creation arguments.
+@JsonKey(name: 'task_args') Map<String, Object?>? get taskArgs;/// Limit on total messages per sample.
+@JsonKey(name: 'message_limit') int? get messageLimit;/// Limit on total tokens per sample.
+@JsonKey(name: 'token_limit') int? get tokenLimit;/// Limit on clock time (in seconds) per sample.
+@JsonKey(name: 'time_limit') int? get timeLimit;/// Limit on working time (in seconds) per sample.
+@JsonKey(name: 'working_limit') int? get workingLimit;/// Limit on total cost (in dollars) per sample.
+@JsonKey(name: 'cost_limit') double? get costLimit;/// JSON file with model prices for cost tracking.
+@JsonKey(name: 'model_cost_config') Map<String, Object?>? get modelCostConfig;/// Log detailed samples and scores (defaults to true).
+@JsonKey(name: 'log_samples') bool? get logSamples;/// Log events in realtime (defaults to true).
+@JsonKey(name: 'log_realtime') bool? get logRealtime;/// Log base64-encoded images (defaults to false).
+@JsonKey(name: 'log_images') bool? get logImages;/// Number of samples to buffer before writing log file.
+@JsonKey(name: 'log_buffer') int? get logBuffer;/// Sync sample events for realtime viewing.
+@JsonKey(name: 'log_shared') int? get logShared;/// Directory to bundle logs and viewer into.
+@JsonKey(name: 'bundle_dir') String? get bundleDir;/// Overwrite files in `bundle_dir` (defaults to false).
+@JsonKey(name: 'bundle_overwrite') bool? get bundleOverwrite;/// Allow log directory to contain unrelated logs (defaults to false).
+@JsonKey(name: 'log_dir_allow_dirty') bool? get logDirAllowDirty;/// ID for the eval set. Generated if not specified.
+@JsonKey(name: 'eval_set_id') String? get evalSetId;// ------------------------------------------------------------------
+// Pass-through overrides
+// ------------------------------------------------------------------
+/// Additional `eval_set()` kwargs not covered by top-level fields.
+///
+/// Any valid `eval_set()` parameter can be specified here and will be
+/// merged into the output JSON. Top-level fields take precedence.
+@JsonKey(name: 'eval_set_overrides') Map<String, dynamic>? get evalSetOverrides;/// Default `Task` kwargs applied to every task in this job.
+///
+/// Per-task overrides (from `task.yaml`) take precedence.
+@JsonKey(name: 'task_defaults') Map<String, dynamic>? get taskDefaults;
+/// Create a copy of Job
+/// with the given fields replaced by the non-null parameter values.
+@JsonKey(includeFromJson: false, includeToJson: false)
+@pragma('vm:prefer-inline')
+$JobCopyWith<Job> get copyWith => _$JobCopyWithImpl<Job>(this as Job, _$identity);
+
+  /// Serializes this Job to a JSON map.
+  Map<String, dynamic> toJson();
+
+
+@override
+bool operator ==(Object other) {
+  return identical(this, other) || (other.runtimeType == runtimeType&&other is Job&&(identical(other.logDir, logDir) || other.logDir == logDir)&&(identical(other.sandboxType, sandboxType) || other.sandboxType == sandboxType)&&(identical(other.maxConnections, maxConnections) || other.maxConnections == maxConnections)&&const DeepCollectionEquality().equals(other.models, models)&&const DeepCollectionEquality().equals(other.variants, variants)&&const DeepCollectionEquality().equals(other.taskPaths, taskPaths)&&const DeepCollectionEquality().equals(other.tasks, tasks)&&(identical(other.saveExamples, saveExamples) || other.saveExamples == saveExamples)&&(identical(other.retryAttempts, retryAttempts) || other.retryAttempts == retryAttempts)&&(identical(other.maxRetries, maxRetries) || other.maxRetries == maxRetries)&&(identical(other.retryWait, retryWait) || other.retryWait == retryWait)&&(identical(other.retryConnections, retryConnections) || other.retryConnections == retryConnections)&&(identical(other.retryCleanup, retryCleanup) || other.retryCleanup == retryCleanup)&&(identical(other.failOnError, failOnError) || other.failOnError == failOnError)&&(identical(other.continueOnFail, continueOnFail) || other.continueOnFail == continueOnFail)&&(identical(other.retryOnError, retryOnError) || other.retryOnError == retryOnError)&&(identical(other.debugErrors, debugErrors) || other.debugErrors == debugErrors)&&(identical(other.maxSamples, maxSamples) || other.maxSamples == maxSamples)&&(identical(other.maxTasks, maxTasks) || other.maxTasks == maxTasks)&&(identical(other.maxSubprocesses, maxSubprocesses) || other.maxSubprocesses == maxSubprocesses)&&(identical(other.maxSandboxes, maxSandboxes) || other.maxSandboxes == maxSandboxes)&&(identical(other.logLevel, logLevel) || other.logLevel == logLevel)&&(identical(other.logLevelTranscript, logLevelTranscript) || other.logLevelTranscript == logLevelTranscript)&&(identical(other.logFormat, logFormat) || other.logFormat == logFormat)&&const DeepCollectionEquality().equals(other.tags, tags)&&const DeepCollectionEquality().equals(other.metadata, metadata)&&(identical(other.trace, trace) || other.trace == trace)&&(identical(other.display, display) || other.display == display)&&(identical(other.score, score) || other.score == score)&&const DeepCollectionEquality().equals(other.limit, limit)&&const DeepCollectionEquality().equals(other.sampleId, sampleId)&&const DeepCollectionEquality().equals(other.sampleShuffle, sampleShuffle)&&const DeepCollectionEquality().equals(other.epochs, epochs)&&const DeepCollectionEquality().equals(other.approval, approval)&&const DeepCollectionEquality().equals(other.solver, solver)&&(identical(other.sandboxCleanup, sandboxCleanup) || other.sandboxCleanup == sandboxCleanup)&&(identical(other.modelBaseUrl, modelBaseUrl) || other.modelBaseUrl == modelBaseUrl)&&const DeepCollectionEquality().equals(other.modelArgs, modelArgs)&&const DeepCollectionEquality().equals(other.modelRoles, modelRoles)&&const DeepCollectionEquality().equals(other.taskArgs, taskArgs)&&(identical(other.messageLimit, messageLimit) || other.messageLimit == messageLimit)&&(identical(other.tokenLimit, tokenLimit) || other.tokenLimit == tokenLimit)&&(identical(other.timeLimit, timeLimit) || other.timeLimit == timeLimit)&&(identical(other.workingLimit, workingLimit) || other.workingLimit == workingLimit)&&(identical(other.costLimit, costLimit) || other.costLimit == costLimit)&&const DeepCollectionEquality().equals(other.modelCostConfig, modelCostConfig)&&(identical(other.logSamples, logSamples) || other.logSamples == logSamples)&&(identical(other.logRealtime, logRealtime) || other.logRealtime == logRealtime)&&(identical(other.logImages, logImages) || other.logImages == logImages)&&(identical(other.logBuffer, logBuffer) || other.logBuffer == logBuffer)&&(identical(other.logShared, logShared) || other.logShared == logShared)&&(identical(other.bundleDir, bundleDir) || other.bundleDir == bundleDir)&&(identical(other.bundleOverwrite, bundleOverwrite) || other.bundleOverwrite == bundleOverwrite)&&(identical(other.logDirAllowDirty, logDirAllowDirty) || other.logDirAllowDirty == logDirAllowDirty)&&(identical(other.evalSetId, evalSetId) || other.evalSetId == evalSetId)&&const DeepCollectionEquality().equals(other.evalSetOverrides, evalSetOverrides)&&const DeepCollectionEquality().equals(other.taskDefaults, taskDefaults));
+}
+
+@JsonKey(includeFromJson: false, includeToJson: false)
+@override
+int get hashCode => Object.hashAll([runtimeType,logDir,sandboxType,maxConnections,const DeepCollectionEquality().hash(models),const DeepCollectionEquality().hash(variants),const DeepCollectionEquality().hash(taskPaths),const DeepCollectionEquality().hash(tasks),saveExamples,retryAttempts,maxRetries,retryWait,retryConnections,retryCleanup,failOnError,continueOnFail,retryOnError,debugErrors,maxSamples,maxTasks,maxSubprocesses,maxSandboxes,logLevel,logLevelTranscript,logFormat,const DeepCollectionEquality().hash(tags),const DeepCollectionEquality().hash(metadata),trace,display,score,const DeepCollectionEquality().hash(limit),const DeepCollectionEquality().hash(sampleId),const DeepCollectionEquality().hash(sampleShuffle),const DeepCollectionEquality().hash(epochs),const DeepCollectionEquality().hash(approval),const DeepCollectionEquality().hash(solver),sandboxCleanup,modelBaseUrl,const DeepCollectionEquality().hash(modelArgs),const DeepCollectionEquality().hash(modelRoles),const DeepCollectionEquality().hash(taskArgs),messageLimit,tokenLimit,timeLimit,workingLimit,costLimit,const DeepCollectionEquality().hash(modelCostConfig),logSamples,logRealtime,logImages,logBuffer,logShared,bundleDir,bundleOverwrite,logDirAllowDirty,evalSetId,const DeepCollectionEquality().hash(evalSetOverrides),const DeepCollectionEquality().hash(taskDefaults)]);
+
+@override
+String toString() {
+  return 'Job(logDir: $logDir, sandboxType: $sandboxType, maxConnections: $maxConnections, models: $models, variants: $variants, taskPaths: $taskPaths, tasks: $tasks, saveExamples: $saveExamples, retryAttempts: $retryAttempts, maxRetries: $maxRetries, retryWait: $retryWait, retryConnections: $retryConnections, retryCleanup: $retryCleanup, failOnError: $failOnError, continueOnFail: $continueOnFail, retryOnError: $retryOnError, debugErrors: $debugErrors, maxSamples: $maxSamples, maxTasks: $maxTasks, maxSubprocesses: $maxSubprocesses, maxSandboxes: $maxSandboxes, logLevel: $logLevel, logLevelTranscript: $logLevelTranscript, logFormat: $logFormat, tags: $tags, metadata: $metadata, trace: $trace, display: $display, score: $score, limit: $limit, sampleId: $sampleId, sampleShuffle: $sampleShuffle, epochs: $epochs, approval: $approval, solver: $solver, sandboxCleanup: $sandboxCleanup, modelBaseUrl: $modelBaseUrl, modelArgs: $modelArgs, modelRoles: $modelRoles, taskArgs: $taskArgs, messageLimit: $messageLimit, tokenLimit: $tokenLimit, timeLimit: $timeLimit, workingLimit: $workingLimit, costLimit: $costLimit, modelCostConfig: $modelCostConfig, logSamples: $logSamples, logRealtime: $logRealtime, logImages: $logImages, logBuffer: $logBuffer, logShared: $logShared, bundleDir: $bundleDir, bundleOverwrite: $bundleOverwrite, logDirAllowDirty: $logDirAllowDirty, evalSetId: $evalSetId, evalSetOverrides: $evalSetOverrides, taskDefaults: $taskDefaults)';
+}
+
+
+}
+
+/// @nodoc
+abstract mixin class $JobCopyWith<$Res>  {
+  factory $JobCopyWith(Job value, $Res Function(Job) _then) = _$JobCopyWithImpl;
+@useResult
+$Res call({
+@JsonKey(name: 'log_dir') String logDir,@JsonKey(name: 'sandbox_type') String sandboxType,@JsonKey(name: 'max_connections') int maxConnections, List<String>? models, Map<String, Map<String, dynamic>>? variants,@JsonKey(name: 'task_paths') List<String>? taskPaths, Map<String, JobTask>? tasks,@JsonKey(name: 'save_examples') bool saveExamples,@JsonKey(name: 'retry_attempts') int? retryAttempts,@JsonKey(name: 'max_retries') int? maxRetries,@JsonKey(name: 'retry_wait') double? retryWait,@JsonKey(name: 'retry_connections') double? retryConnections,@JsonKey(name: 'retry_cleanup') bool? retryCleanup,@JsonKey(name: 'fail_on_error') double? failOnError,@JsonKey(name: 'continue_on_fail') bool? continueOnFail,@JsonKey(name: 'retry_on_error') int? retryOnError,@JsonKey(name: 'debug_errors') bool? debugErrors,@JsonKey(name: 'max_samples') int? maxSamples,@JsonKey(name: 'max_tasks') int? maxTasks,@JsonKey(name: 'max_subprocesses') int? maxSubprocesses,@JsonKey(name: 'max_sandboxes') int? maxSandboxes,@JsonKey(name: 'log_level') String? logLevel,@JsonKey(name: 'log_level_transcript') String? logLevelTranscript,@JsonKey(name: 'log_format') String? logFormat, List<String>? tags, Map<String, dynamic>? metadata, bool? trace, String? display, bool? score, Object? limit,@JsonKey(name: 'sample_id') Object? sampleId,@JsonKey(name: 'sample_shuffle') Object? sampleShuffle, Object? epochs, Object? approval, Object? solver,@JsonKey(name: 'sandbox_cleanup') bool? sandboxCleanup,@JsonKey(name: 'model_base_url') String? modelBaseUrl,@JsonKey(name: 'model_args') Map<String, Object?>? modelArgs,@JsonKey(name: 'model_roles') Map<String, String>? modelRoles,@JsonKey(name: 'task_args') Map<String, Object?>? taskArgs,@JsonKey(name: 'message_limit') int? messageLimit,@JsonKey(name: 'token_limit') int? tokenLimit,@JsonKey(name: 'time_limit') int? timeLimit,@JsonKey(name: 'working_limit') int? workingLimit,@JsonKey(name: 'cost_limit') double? costLimit,@JsonKey(name: 'model_cost_config') Map<String, Object?>? modelCostConfig,@JsonKey(name: 'log_samples') bool? logSamples,@JsonKey(name: 'log_realtime') bool? logRealtime,@JsonKey(name: 'log_images') bool? logImages,@JsonKey(name: 'log_buffer') int? logBuffer,@JsonKey(name: 'log_shared') int? logShared,@JsonKey(name: 'bundle_dir') String? bundleDir,@JsonKey(name: 'bundle_overwrite') bool? bundleOverwrite,@JsonKey(name: 'log_dir_allow_dirty') bool? logDirAllowDirty,@JsonKey(name: 'eval_set_id') String? evalSetId,@JsonKey(name: 'eval_set_overrides') Map<String, dynamic>? evalSetOverrides,@JsonKey(name: 'task_defaults') Map<String, dynamic>? taskDefaults
+});
+
+
+
+
+}
+/// @nodoc
+class _$JobCopyWithImpl<$Res>
+    implements $JobCopyWith<$Res> {
+  _$JobCopyWithImpl(this._self, this._then);
+
+  final Job _self;
+  final $Res Function(Job) _then;
+
+/// Create a copy of Job
+/// with the given fields replaced by the non-null parameter values.
+@pragma('vm:prefer-inline') @override $Res call({Object? logDir = null,Object? sandboxType = null,Object? maxConnections = null,Object? models = freezed,Object? variants = freezed,Object? taskPaths = freezed,Object? tasks = freezed,Object? saveExamples = null,Object? retryAttempts = freezed,Object? maxRetries = freezed,Object? retryWait = freezed,Object? retryConnections = freezed,Object? retryCleanup = freezed,Object? failOnError = freezed,Object? continueOnFail = freezed,Object? retryOnError = freezed,Object? debugErrors = freezed,Object? maxSamples = freezed,Object? maxTasks = freezed,Object? maxSubprocesses = freezed,Object? maxSandboxes = freezed,Object? logLevel = freezed,Object? logLevelTranscript = freezed,Object? logFormat = freezed,Object? tags = freezed,Object? metadata = freezed,Object? trace = freezed,Object? display = freezed,Object? score = freezed,Object? limit = freezed,Object? sampleId = freezed,Object? sampleShuffle = freezed,Object? epochs = freezed,Object? approval = freezed,Object? solver = freezed,Object? sandboxCleanup = freezed,Object? modelBaseUrl = freezed,Object? modelArgs = freezed,Object? modelRoles = freezed,Object? taskArgs = freezed,Object? messageLimit = freezed,Object? tokenLimit = freezed,Object? timeLimit = freezed,Object? workingLimit = freezed,Object? costLimit = freezed,Object? modelCostConfig = freezed,Object? logSamples = freezed,Object? logRealtime = freezed,Object? logImages = freezed,Object? logBuffer = freezed,Object? logShared = freezed,Object? bundleDir = freezed,Object? bundleOverwrite = freezed,Object? logDirAllowDirty = freezed,Object? evalSetId = freezed,Object? evalSetOverrides = freezed,Object? taskDefaults = freezed,}) {
+  return _then(_self.copyWith(
+logDir: null == logDir ? _self.logDir : logDir // ignore: cast_nullable_to_non_nullable
+as String,sandboxType: null == sandboxType ? _self.sandboxType : sandboxType // ignore: cast_nullable_to_non_nullable
+as String,maxConnections: null == maxConnections ? _self.maxConnections : maxConnections // ignore: cast_nullable_to_non_nullable
+as int,models: freezed == models ? _self.models : models // ignore: cast_nullable_to_non_nullable
+as List<String>?,variants: freezed == variants ? _self.variants : variants // ignore: cast_nullable_to_non_nullable
+as Map<String, Map<String, dynamic>>?,taskPaths: freezed == taskPaths ? _self.taskPaths : taskPaths // ignore: cast_nullable_to_non_nullable
+as List<String>?,tasks: freezed == tasks ? _self.tasks : tasks // ignore: cast_nullable_to_non_nullable
+as Map<String, JobTask>?,saveExamples: null == saveExamples ? _self.saveExamples : saveExamples // ignore: cast_nullable_to_non_nullable
+as bool,retryAttempts: freezed == retryAttempts ? _self.retryAttempts : retryAttempts // ignore: cast_nullable_to_non_nullable
+as int?,maxRetries: freezed == maxRetries ? _self.maxRetries : maxRetries // ignore: cast_nullable_to_non_nullable
+as int?,retryWait: freezed == retryWait ? _self.retryWait : retryWait // ignore: cast_nullable_to_non_nullable
+as double?,retryConnections: freezed == retryConnections ? _self.retryConnections : retryConnections // ignore: cast_nullable_to_non_nullable
+as double?,retryCleanup: freezed == retryCleanup ? _self.retryCleanup : retryCleanup // ignore: cast_nullable_to_non_nullable
+as bool?,failOnError: freezed == failOnError ? _self.failOnError : failOnError // ignore: cast_nullable_to_non_nullable
+as double?,continueOnFail: freezed == continueOnFail ? _self.continueOnFail : continueOnFail // ignore: cast_nullable_to_non_nullable
+as bool?,retryOnError: freezed == retryOnError ? _self.retryOnError : retryOnError // ignore: cast_nullable_to_non_nullable
+as int?,debugErrors: freezed == debugErrors ? _self.debugErrors : debugErrors // ignore: cast_nullable_to_non_nullable
+as bool?,maxSamples: freezed == maxSamples ? _self.maxSamples : maxSamples // ignore: cast_nullable_to_non_nullable
+as int?,maxTasks: freezed == maxTasks ? _self.maxTasks : maxTasks // ignore: cast_nullable_to_non_nullable
+as int?,maxSubprocesses: freezed == maxSubprocesses ? _self.maxSubprocesses : maxSubprocesses // ignore: cast_nullable_to_non_nullable
+as int?,maxSandboxes: freezed == maxSandboxes ? _self.maxSandboxes : maxSandboxes // ignore: cast_nullable_to_non_nullable
+as int?,logLevel: freezed == logLevel ? _self.logLevel : logLevel // ignore: cast_nullable_to_non_nullable
+as String?,logLevelTranscript: freezed == logLevelTranscript ? _self.logLevelTranscript : logLevelTranscript // ignore: cast_nullable_to_non_nullable
+as String?,logFormat: freezed == logFormat ? _self.logFormat : logFormat // ignore: cast_nullable_to_non_nullable
+as String?,tags: freezed == tags ? _self.tags : tags // ignore: cast_nullable_to_non_nullable
+as List<String>?,metadata: freezed == metadata ? _self.metadata : metadata // ignore: cast_nullable_to_non_nullable
+as Map<String, dynamic>?,trace: freezed == trace ? _self.trace : trace // ignore: cast_nullable_to_non_nullable
+as bool?,display: freezed == display ? _self.display : display // ignore: cast_nullable_to_non_nullable
+as String?,score: freezed == score ? _self.score : score // ignore: cast_nullable_to_non_nullable
+as bool?,limit: freezed == limit ? _self.limit : limit ,sampleId: freezed == sampleId ? _self.sampleId : sampleId ,sampleShuffle: freezed == sampleShuffle ? _self.sampleShuffle : sampleShuffle ,epochs: freezed == epochs ? _self.epochs : epochs ,approval: freezed == approval ? _self.approval : approval ,solver: freezed == solver ? _self.solver : solver ,sandboxCleanup: freezed == sandboxCleanup ? _self.sandboxCleanup : sandboxCleanup // ignore: cast_nullable_to_non_nullable
+as bool?,modelBaseUrl: freezed == modelBaseUrl ? _self.modelBaseUrl : modelBaseUrl // ignore: cast_nullable_to_non_nullable
+as String?,modelArgs: freezed == modelArgs ? _self.modelArgs : modelArgs // ignore: cast_nullable_to_non_nullable
+as Map<String, Object?>?,modelRoles: freezed == modelRoles ? _self.modelRoles : modelRoles // ignore: cast_nullable_to_non_nullable
+as Map<String, String>?,taskArgs: freezed == taskArgs ? _self.taskArgs : taskArgs // ignore: cast_nullable_to_non_nullable
+as Map<String, Object?>?,messageLimit: freezed == messageLimit ? _self.messageLimit : messageLimit // ignore: cast_nullable_to_non_nullable
+as int?,tokenLimit: freezed == tokenLimit ? _self.tokenLimit : tokenLimit // ignore: cast_nullable_to_non_nullable
+as int?,timeLimit: freezed == timeLimit ? _self.timeLimit : timeLimit // ignore: cast_nullable_to_non_nullable
+as int?,workingLimit: freezed == workingLimit ? _self.workingLimit : workingLimit // ignore: cast_nullable_to_non_nullable
+as int?,costLimit: freezed == costLimit ? _self.costLimit : costLimit // ignore: cast_nullable_to_non_nullable
+as double?,modelCostConfig: freezed == modelCostConfig ? _self.modelCostConfig : modelCostConfig // ignore: cast_nullable_to_non_nullable
+as Map<String, Object?>?,logSamples: freezed == logSamples ? _self.logSamples : logSamples // ignore: cast_nullable_to_non_nullable
+as bool?,logRealtime: freezed == logRealtime ? _self.logRealtime : logRealtime // ignore: cast_nullable_to_non_nullable
+as bool?,logImages: freezed == logImages ? _self.logImages : logImages // ignore: cast_nullable_to_non_nullable
+as bool?,logBuffer: freezed == logBuffer ? _self.logBuffer : logBuffer // ignore: cast_nullable_to_non_nullable
+as int?,logShared: freezed == logShared ? _self.logShared : logShared // ignore: cast_nullable_to_non_nullable
+as int?,bundleDir: freezed == bundleDir ? _self.bundleDir : bundleDir // ignore: cast_nullable_to_non_nullable
+as String?,bundleOverwrite: freezed == bundleOverwrite ? _self.bundleOverwrite : bundleOverwrite // ignore: cast_nullable_to_non_nullable
+as bool?,logDirAllowDirty: freezed == logDirAllowDirty ? _self.logDirAllowDirty : logDirAllowDirty // ignore: cast_nullable_to_non_nullable
+as bool?,evalSetId: freezed == evalSetId ? _self.evalSetId : evalSetId // ignore: cast_nullable_to_non_nullable
+as String?,evalSetOverrides: freezed == evalSetOverrides ? _self.evalSetOverrides : evalSetOverrides // ignore: cast_nullable_to_non_nullable
+as Map<String, dynamic>?,taskDefaults: freezed == taskDefaults ? _self.taskDefaults : taskDefaults // ignore: cast_nullable_to_non_nullable
+as Map<String, dynamic>?,
+  ));
+}
+
+}
+
+
+/// Adds pattern-matching-related methods to [Job].
+extension JobPatterns on Job {
+/// A variant of `map` that fallback to returning `orElse`.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case _:
+///     return orElse();
+/// }
+/// ```
+
+@optionalTypeArgs TResult maybeMap<TResult extends Object?>(TResult Function( _Job value)?  $default,{required TResult orElse(),}){
+final _that = this;
+switch (_that) {
+case _Job() when $default != null:
+return $default(_that);case _:
+  return orElse();
+
+}
+}
+/// A `switch`-like method, using callbacks.
+///
+/// Callbacks receives the raw object, upcasted.
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case final Subclass2 value:
+///     return ...;
+/// }
+/// ```
+
+@optionalTypeArgs TResult map<TResult extends Object?>(TResult Function( _Job value)  $default,){
+final _that = this;
+switch (_that) {
+case _Job():
+return $default(_that);}
+}
+/// A variant of `map` that fallback to returning `null`.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case _:
+///     return null;
+/// }
+/// ```
+
+@optionalTypeArgs TResult? mapOrNull<TResult extends Object?>(TResult? Function( _Job value)?  $default,){
+final _that = this;
+switch (_that) {
+case _Job() when $default != null:
+return $default(_that);case _:
+  return null;
+
+}
+}
+/// A variant of `when` that fallback to an `orElse` callback.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case _:
+///     return orElse();
+/// }
+/// ```
+
+@optionalTypeArgs TResult maybeWhen<TResult extends Object?>(TResult Function(@JsonKey(name: 'log_dir')  String logDir, @JsonKey(name: 'sandbox_type')  String sandboxType, @JsonKey(name: 'max_connections')  int maxConnections,  List<String>? models,  Map<String, Map<String, dynamic>>? variants, @JsonKey(name: 'task_paths')  List<String>? taskPaths,  Map<String, JobTask>? tasks, @JsonKey(name: 'save_examples')  bool saveExamples, @JsonKey(name: 'retry_attempts')  int? retryAttempts, @JsonKey(name: 'max_retries')  int? maxRetries, @JsonKey(name: 'retry_wait')  double? retryWait, @JsonKey(name: 'retry_connections')  double? retryConnections, @JsonKey(name: 'retry_cleanup')  bool? retryCleanup, @JsonKey(name: 'fail_on_error')  double? failOnError, @JsonKey(name: 'continue_on_fail')  bool? continueOnFail, @JsonKey(name: 'retry_on_error')  int? retryOnError, @JsonKey(name: 'debug_errors')  bool? debugErrors, @JsonKey(name: 'max_samples')  int? maxSamples, @JsonKey(name: 'max_tasks')  int? maxTasks, @JsonKey(name: 'max_subprocesses')  int? maxSubprocesses, @JsonKey(name: 'max_sandboxes')  int? maxSandboxes, @JsonKey(name: 'log_level')  String? logLevel, @JsonKey(name: 'log_level_transcript')  String? logLevelTranscript, @JsonKey(name: 'log_format')  String? logFormat,  List<String>? tags,  Map<String, dynamic>? metadata,  bool? trace,  String? display,  bool? score,  Object? limit, @JsonKey(name: 'sample_id')  Object? sampleId, @JsonKey(name: 'sample_shuffle')  Object? sampleShuffle,  Object? epochs,  Object? approval,  Object? solver, @JsonKey(name: 'sandbox_cleanup')  bool? sandboxCleanup, @JsonKey(name: 'model_base_url')  String? modelBaseUrl, @JsonKey(name: 'model_args')  Map<String, Object?>? modelArgs, @JsonKey(name: 'model_roles')  Map<String, String>? modelRoles, @JsonKey(name: 'task_args')  Map<String, Object?>? taskArgs, @JsonKey(name: 'message_limit')  int? messageLimit, @JsonKey(name: 'token_limit')  int? tokenLimit, @JsonKey(name: 'time_limit')  int? timeLimit, @JsonKey(name: 'working_limit')  int? workingLimit, @JsonKey(name: 'cost_limit')  double? costLimit, @JsonKey(name: 'model_cost_config')  Map<String, Object?>? modelCostConfig, @JsonKey(name: 'log_samples')  bool? logSamples, @JsonKey(name: 'log_realtime')  bool? logRealtime, @JsonKey(name: 'log_images')  bool? logImages, @JsonKey(name: 'log_buffer')  int? logBuffer, @JsonKey(name: 'log_shared')  int? logShared, @JsonKey(name: 'bundle_dir')  String? bundleDir, @JsonKey(name: 'bundle_overwrite')  bool? bundleOverwrite, @JsonKey(name: 'log_dir_allow_dirty')  bool? logDirAllowDirty, @JsonKey(name: 'eval_set_id')  String? evalSetId, @JsonKey(name: 'eval_set_overrides')  Map<String, dynamic>? evalSetOverrides, @JsonKey(name: 'task_defaults')  Map<String, dynamic>? taskDefaults)?  $default,{required TResult orElse(),}) {final _that = this;
+switch (_that) {
+case _Job() when $default != null:
+return $default(_that.logDir,_that.sandboxType,_that.maxConnections,_that.models,_that.variants,_that.taskPaths,_that.tasks,_that.saveExamples,_that.retryAttempts,_that.maxRetries,_that.retryWait,_that.retryConnections,_that.retryCleanup,_that.failOnError,_that.continueOnFail,_that.retryOnError,_that.debugErrors,_that.maxSamples,_that.maxTasks,_that.maxSubprocesses,_that.maxSandboxes,_that.logLevel,_that.logLevelTranscript,_that.logFormat,_that.tags,_that.metadata,_that.trace,_that.display,_that.score,_that.limit,_that.sampleId,_that.sampleShuffle,_that.epochs,_that.approval,_that.solver,_that.sandboxCleanup,_that.modelBaseUrl,_that.modelArgs,_that.modelRoles,_that.taskArgs,_that.messageLimit,_that.tokenLimit,_that.timeLimit,_that.workingLimit,_that.costLimit,_that.modelCostConfig,_that.logSamples,_that.logRealtime,_that.logImages,_that.logBuffer,_that.logShared,_that.bundleDir,_that.bundleOverwrite,_that.logDirAllowDirty,_that.evalSetId,_that.evalSetOverrides,_that.taskDefaults);case _:
+  return orElse();
+
+}
+}
+/// A `switch`-like method, using callbacks.
+///
+/// As opposed to `map`, this offers destructuring.
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case Subclass2(:final field2):
+///     return ...;
+/// }
+/// ```
+
+@optionalTypeArgs TResult when<TResult extends Object?>(TResult Function(@JsonKey(name: 'log_dir')  String logDir, @JsonKey(name: 'sandbox_type')  String sandboxType, @JsonKey(name: 'max_connections')  int maxConnections,  List<String>? models,  Map<String, Map<String, dynamic>>? variants, @JsonKey(name: 'task_paths')  List<String>? taskPaths,  Map<String, JobTask>? tasks, @JsonKey(name: 'save_examples')  bool saveExamples, @JsonKey(name: 'retry_attempts')  int? retryAttempts, @JsonKey(name: 'max_retries')  int? maxRetries, @JsonKey(name: 'retry_wait')  double? retryWait, @JsonKey(name: 'retry_connections')  double? retryConnections, @JsonKey(name: 'retry_cleanup')  bool? retryCleanup, @JsonKey(name: 'fail_on_error')  double? failOnError, @JsonKey(name: 'continue_on_fail')  bool? continueOnFail, @JsonKey(name: 'retry_on_error')  int? retryOnError, @JsonKey(name: 'debug_errors')  bool? debugErrors, @JsonKey(name: 'max_samples')  int? maxSamples, @JsonKey(name: 'max_tasks')  int? maxTasks, @JsonKey(name: 'max_subprocesses')  int? maxSubprocesses, @JsonKey(name: 'max_sandboxes')  int? maxSandboxes, @JsonKey(name: 'log_level')  String? logLevel, @JsonKey(name: 'log_level_transcript')  String? logLevelTranscript, @JsonKey(name: 'log_format')  String? logFormat,  List<String>? tags,  Map<String, dynamic>? metadata,  bool? trace,  String? display,  bool? score,  Object? limit, @JsonKey(name: 'sample_id')  Object? sampleId, @JsonKey(name: 'sample_shuffle')  Object? sampleShuffle,  Object? epochs,  Object? approval,  Object? solver, @JsonKey(name: 'sandbox_cleanup')  bool? sandboxCleanup, @JsonKey(name: 'model_base_url')  String? modelBaseUrl, @JsonKey(name: 'model_args')  Map<String, Object?>? modelArgs, @JsonKey(name: 'model_roles')  Map<String, String>? modelRoles, @JsonKey(name: 'task_args')  Map<String, Object?>? taskArgs, @JsonKey(name: 'message_limit')  int? messageLimit, @JsonKey(name: 'token_limit')  int? tokenLimit, @JsonKey(name: 'time_limit')  int? timeLimit, @JsonKey(name: 'working_limit')  int? workingLimit, @JsonKey(name: 'cost_limit')  double? costLimit, @JsonKey(name: 'model_cost_config')  Map<String, Object?>? modelCostConfig, @JsonKey(name: 'log_samples')  bool? logSamples, @JsonKey(name: 'log_realtime')  bool? logRealtime, @JsonKey(name: 'log_images')  bool? logImages, @JsonKey(name: 'log_buffer')  int? logBuffer, @JsonKey(name: 'log_shared')  int? logShared, @JsonKey(name: 'bundle_dir')  String? bundleDir, @JsonKey(name: 'bundle_overwrite')  bool? bundleOverwrite, @JsonKey(name: 'log_dir_allow_dirty')  bool? logDirAllowDirty, @JsonKey(name: 'eval_set_id')  String? evalSetId, @JsonKey(name: 'eval_set_overrides')  Map<String, dynamic>? evalSetOverrides, @JsonKey(name: 'task_defaults')  Map<String, dynamic>? taskDefaults)  $default,) {final _that = this;
+switch (_that) {
+case _Job():
+return $default(_that.logDir,_that.sandboxType,_that.maxConnections,_that.models,_that.variants,_that.taskPaths,_that.tasks,_that.saveExamples,_that.retryAttempts,_that.maxRetries,_that.retryWait,_that.retryConnections,_that.retryCleanup,_that.failOnError,_that.continueOnFail,_that.retryOnError,_that.debugErrors,_that.maxSamples,_that.maxTasks,_that.maxSubprocesses,_that.maxSandboxes,_that.logLevel,_that.logLevelTranscript,_that.logFormat,_that.tags,_that.metadata,_that.trace,_that.display,_that.score,_that.limit,_that.sampleId,_that.sampleShuffle,_that.epochs,_that.approval,_that.solver,_that.sandboxCleanup,_that.modelBaseUrl,_that.modelArgs,_that.modelRoles,_that.taskArgs,_that.messageLimit,_that.tokenLimit,_that.timeLimit,_that.workingLimit,_that.costLimit,_that.modelCostConfig,_that.logSamples,_that.logRealtime,_that.logImages,_that.logBuffer,_that.logShared,_that.bundleDir,_that.bundleOverwrite,_that.logDirAllowDirty,_that.evalSetId,_that.evalSetOverrides,_that.taskDefaults);}
+}
+/// A variant of `when` that fallback to returning `null`
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case _:
+///     return null;
+/// }
+/// ```
+
+@optionalTypeArgs TResult? whenOrNull<TResult extends Object?>(TResult? Function(@JsonKey(name: 'log_dir')  String logDir, @JsonKey(name: 'sandbox_type')  String sandboxType, @JsonKey(name: 'max_connections')  int maxConnections,  List<String>? models,  Map<String, Map<String, dynamic>>? variants, @JsonKey(name: 'task_paths')  List<String>? taskPaths,  Map<String, JobTask>? tasks, @JsonKey(name: 'save_examples')  bool saveExamples, @JsonKey(name: 'retry_attempts')  int? retryAttempts, @JsonKey(name: 'max_retries')  int? maxRetries, @JsonKey(name: 'retry_wait')  double? retryWait, @JsonKey(name: 'retry_connections')  double? retryConnections, @JsonKey(name: 'retry_cleanup')  bool? retryCleanup, @JsonKey(name: 'fail_on_error')  double? failOnError, @JsonKey(name: 'continue_on_fail')  bool? continueOnFail, @JsonKey(name: 'retry_on_error')  int? retryOnError, @JsonKey(name: 'debug_errors')  bool? debugErrors, @JsonKey(name: 'max_samples')  int? maxSamples, @JsonKey(name: 'max_tasks')  int? maxTasks, @JsonKey(name: 'max_subprocesses')  int? maxSubprocesses, @JsonKey(name: 'max_sandboxes')  int? maxSandboxes, @JsonKey(name: 'log_level')  String? logLevel, @JsonKey(name: 'log_level_transcript')  String? logLevelTranscript, @JsonKey(name: 'log_format')  String? logFormat,  List<String>? tags,  Map<String, dynamic>? metadata,  bool? trace,  String? display,  bool? score,  Object? limit, @JsonKey(name: 'sample_id')  Object? sampleId, @JsonKey(name: 'sample_shuffle')  Object? sampleShuffle,  Object? epochs,  Object? approval,  Object? solver, @JsonKey(name: 'sandbox_cleanup')  bool? sandboxCleanup, @JsonKey(name: 'model_base_url')  String? modelBaseUrl, @JsonKey(name: 'model_args')  Map<String, Object?>? modelArgs, @JsonKey(name: 'model_roles')  Map<String, String>? modelRoles, @JsonKey(name: 'task_args')  Map<String, Object?>? taskArgs, @JsonKey(name: 'message_limit')  int? messageLimit, @JsonKey(name: 'token_limit')  int? tokenLimit, @JsonKey(name: 'time_limit')  int? timeLimit, @JsonKey(name: 'working_limit')  int? workingLimit, @JsonKey(name: 'cost_limit')  double? costLimit, @JsonKey(name: 'model_cost_config')  Map<String, Object?>? modelCostConfig, @JsonKey(name: 'log_samples')  bool? logSamples, @JsonKey(name: 'log_realtime')  bool? logRealtime, @JsonKey(name: 'log_images')  bool? logImages, @JsonKey(name: 'log_buffer')  int? logBuffer, @JsonKey(name: 'log_shared')  int? logShared, @JsonKey(name: 'bundle_dir')  String? bundleDir, @JsonKey(name: 'bundle_overwrite')  bool? bundleOverwrite, @JsonKey(name: 'log_dir_allow_dirty')  bool? logDirAllowDirty, @JsonKey(name: 'eval_set_id')  String? evalSetId, @JsonKey(name: 'eval_set_overrides')  Map<String, dynamic>? evalSetOverrides, @JsonKey(name: 'task_defaults')  Map<String, dynamic>? taskDefaults)?  $default,) {final _that = this;
+switch (_that) {
+case _Job() when $default != null:
+return $default(_that.logDir,_that.sandboxType,_that.maxConnections,_that.models,_that.variants,_that.taskPaths,_that.tasks,_that.saveExamples,_that.retryAttempts,_that.maxRetries,_that.retryWait,_that.retryConnections,_that.retryCleanup,_that.failOnError,_that.continueOnFail,_that.retryOnError,_that.debugErrors,_that.maxSamples,_that.maxTasks,_that.maxSubprocesses,_that.maxSandboxes,_that.logLevel,_that.logLevelTranscript,_that.logFormat,_that.tags,_that.metadata,_that.trace,_that.display,_that.score,_that.limit,_that.sampleId,_that.sampleShuffle,_that.epochs,_that.approval,_that.solver,_that.sandboxCleanup,_that.modelBaseUrl,_that.modelArgs,_that.modelRoles,_that.taskArgs,_that.messageLimit,_that.tokenLimit,_that.timeLimit,_that.workingLimit,_that.costLimit,_that.modelCostConfig,_that.logSamples,_that.logRealtime,_that.logImages,_that.logBuffer,_that.logShared,_that.bundleDir,_that.bundleOverwrite,_that.logDirAllowDirty,_that.evalSetId,_that.evalSetOverrides,_that.taskDefaults);case _:
+  return null;
+
+}
+}
+
+}
+
+/// @nodoc
+@JsonSerializable()
+
+class _Job implements Job {
+  const _Job({@JsonKey(name: 'log_dir') required this.logDir, @JsonKey(name: 'sandbox_type') this.sandboxType = 'local', @JsonKey(name: 'max_connections') this.maxConnections = 10, final  List<String>? models, final  Map<String, Map<String, dynamic>>? variants, @JsonKey(name: 'task_paths') final  List<String>? taskPaths, final  Map<String, JobTask>? tasks, @JsonKey(name: 'save_examples') this.saveExamples = false, @JsonKey(name: 'retry_attempts') this.retryAttempts, @JsonKey(name: 'max_retries') this.maxRetries, @JsonKey(name: 'retry_wait') this.retryWait, @JsonKey(name: 'retry_connections') this.retryConnections, @JsonKey(name: 'retry_cleanup') this.retryCleanup, @JsonKey(name: 'fail_on_error') this.failOnError, @JsonKey(name: 'continue_on_fail') this.continueOnFail, @JsonKey(name: 'retry_on_error') this.retryOnError, @JsonKey(name: 'debug_errors') this.debugErrors, @JsonKey(name: 'max_samples') this.maxSamples, @JsonKey(name: 'max_tasks') this.maxTasks, @JsonKey(name: 'max_subprocesses') this.maxSubprocesses, @JsonKey(name: 'max_sandboxes') this.maxSandboxes, @JsonKey(name: 'log_level') this.logLevel, @JsonKey(name: 'log_level_transcript') this.logLevelTranscript, @JsonKey(name: 'log_format') this.logFormat, final  List<String>? tags, final  Map<String, dynamic>? metadata, this.trace, this.display, this.score, this.limit, @JsonKey(name: 'sample_id') this.sampleId, @JsonKey(name: 'sample_shuffle') this.sampleShuffle, this.epochs, this.approval, this.solver, @JsonKey(name: 'sandbox_cleanup') this.sandboxCleanup, @JsonKey(name: 'model_base_url') this.modelBaseUrl, @JsonKey(name: 'model_args') final  Map<String, Object?>? modelArgs, @JsonKey(name: 'model_roles') final  Map<String, String>? modelRoles, @JsonKey(name: 'task_args') final  Map<String, Object?>? taskArgs, @JsonKey(name: 'message_limit') this.messageLimit, @JsonKey(name: 'token_limit') this.tokenLimit, @JsonKey(name: 'time_limit') this.timeLimit, @JsonKey(name: 'working_limit') this.workingLimit, @JsonKey(name: 'cost_limit') this.costLimit, @JsonKey(name: 'model_cost_config') final  Map<String, Object?>? modelCostConfig, @JsonKey(name: 'log_samples') this.logSamples, @JsonKey(name: 'log_realtime') this.logRealtime, @JsonKey(name: 'log_images') this.logImages, @JsonKey(name: 'log_buffer') this.logBuffer, @JsonKey(name: 'log_shared') this.logShared, @JsonKey(name: 'bundle_dir') this.bundleDir, @JsonKey(name: 'bundle_overwrite') this.bundleOverwrite, @JsonKey(name: 'log_dir_allow_dirty') this.logDirAllowDirty, @JsonKey(name: 'eval_set_id') this.evalSetId, @JsonKey(name: 'eval_set_overrides') final  Map<String, dynamic>? evalSetOverrides, @JsonKey(name: 'task_defaults') final  Map<String, dynamic>? taskDefaults}): _models = models,_variants = variants,_taskPaths = taskPaths,_tasks = tasks,_tags = tags,_metadata = metadata,_modelArgs = modelArgs,_modelRoles = modelRoles,_taskArgs = taskArgs,_modelCostConfig = modelCostConfig,_evalSetOverrides = evalSetOverrides,_taskDefaults = taskDefaults;
+  factory _Job.fromJson(Map<String, dynamic> json) => _$JobFromJson(json);
+
+// ------------------------------------------------------------------
+// Core job settings
+// ------------------------------------------------------------------
+/// Directory to write evaluation logs to.
+@override@JsonKey(name: 'log_dir') final  String logDir;
+/// Sandbox type: `'local'`, `'docker'`, or `'podman'`.
+@override@JsonKey(name: 'sandbox_type') final  String sandboxType;
+/// Maximum concurrent API connections.
+@override@JsonKey(name: 'max_connections') final  int maxConnections;
+/// Models to run. `null` means use defaults from registries.
+ final  List<String>? _models;
+/// Models to run. `null` means use defaults from registries.
+@override List<String>? get models {
+  final value = _models;
+  if (value == null) return null;
+  if (_models is EqualUnmodifiableListView) return _models;
+  // ignore: implicit_dynamic_type
+  return EqualUnmodifiableListView(value);
+}
+
+/// Named variant map. Keys are variant names, values are config dicts.
+/// `null` means baseline only.
+ final  Map<String, Map<String, dynamic>>? _variants;
+/// Named variant map. Keys are variant names, values are config dicts.
+/// `null` means baseline only.
+@override Map<String, Map<String, dynamic>>? get variants {
+  final value = _variants;
+  if (value == null) return null;
+  if (_variants is EqualUnmodifiableMapView) return _variants;
+  // ignore: implicit_dynamic_type
+  return EqualUnmodifiableMapView(value);
+}
+
+/// Glob patterns for discovering task directories (relative to dataset root).
+ final  List<String>? _taskPaths;
+/// Glob patterns for discovering task directories (relative to dataset root).
+@override@JsonKey(name: 'task_paths') List<String>? get taskPaths {
+  final value = _taskPaths;
+  if (value == null) return null;
+  if (_taskPaths is EqualUnmodifiableListView) return _taskPaths;
+  // ignore: implicit_dynamic_type
+  return EqualUnmodifiableListView(value);
+}
+
+/// Per-task configurations with inline overrides.
+/// `null` means run all tasks.
+ final  Map<String, JobTask>? _tasks;
+/// Per-task configurations with inline overrides.
+/// `null` means run all tasks.
+@override Map<String, JobTask>? get tasks {
+  final value = _tasks;
+  if (value == null) return null;
+  if (_tasks is EqualUnmodifiableMapView) return _tasks;
+  // ignore: implicit_dynamic_type
+  return EqualUnmodifiableMapView(value);
+}
+
+/// If `true`, copy final workspace to `<logDir>/examples/` after each sample.
+@override@JsonKey(name: 'save_examples') final  bool saveExamples;
+// ------------------------------------------------------------------
+// Promoted eval_set() parameters (convenience top-level keys)
+// ------------------------------------------------------------------
+/// Maximum retry attempts before giving up (defaults to 10).
+@override@JsonKey(name: 'retry_attempts') final  int? retryAttempts;
+/// Maximum number of retry attempts for failed samples.
+@override@JsonKey(name: 'max_retries') final  int? maxRetries;
+/// Time in seconds to wait between retry attempts (exponential backoff).
+@override@JsonKey(name: 'retry_wait') final  double? retryWait;
+/// Reduce `max_connections` at this rate with each retry (default 1.0).
+@override@JsonKey(name: 'retry_connections') final  double? retryConnections;
+/// Cleanup failed log files after retries (defaults to true).
+@override@JsonKey(name: 'retry_cleanup') final  bool? retryCleanup;
+/// Fail on sample errors.
+///
+/// `0.0–1.0` = fail if proportion exceeds threshold,
+/// `>1` = fail if count exceeds threshold.
+@override@JsonKey(name: 'fail_on_error') final  double? failOnError;
+/// Continue running even if `fail_on_error` condition is met.
+@override@JsonKey(name: 'continue_on_fail') final  bool? continueOnFail;
+/// Number of times to retry samples on error (default: no retries).
+@override@JsonKey(name: 'retry_on_error') final  int? retryOnError;
+/// Raise task errors for debugging (defaults to false).
+@override@JsonKey(name: 'debug_errors') final  bool? debugErrors;
+/// Maximum samples to run in parallel (default is `max_connections`).
+@override@JsonKey(name: 'max_samples') final  int? maxSamples;
+/// Maximum tasks to run in parallel.
+@override@JsonKey(name: 'max_tasks') final  int? maxTasks;
+/// Maximum subprocesses to run in parallel.
+@override@JsonKey(name: 'max_subprocesses') final  int? maxSubprocesses;
+/// Maximum sandboxes (per-provider) to run in parallel.
+@override@JsonKey(name: 'max_sandboxes') final  int? maxSandboxes;
+/// Level for logging to the console (e.g. `"warning"`, `"info"`, `"debug"`).
+@override@JsonKey(name: 'log_level') final  String? logLevel;
+/// Level for logging to the log file (defaults to `"info"`).
+@override@JsonKey(name: 'log_level_transcript') final  String? logLevelTranscript;
+/// Format for writing log files (`"eval"` or `"json"`).
+@override@JsonKey(name: 'log_format') final  String? logFormat;
+/// Tags to associate with this evaluation run.
+ final  List<String>? _tags;
+/// Tags to associate with this evaluation run.
+@override List<String>? get tags {
+  final value = _tags;
+  if (value == null) return null;
+  if (_tags is EqualUnmodifiableListView) return _tags;
+  // ignore: implicit_dynamic_type
+  return EqualUnmodifiableListView(value);
+}
+
+/// Metadata to associate with this evaluation run.
+ final  Map<String, dynamic>? _metadata;
+/// Metadata to associate with this evaluation run.
+@override Map<String, dynamic>? get metadata {
+  final value = _metadata;
+  if (value == null) return null;
+  if (_metadata is EqualUnmodifiableMapView) return _metadata;
+  // ignore: implicit_dynamic_type
+  return EqualUnmodifiableMapView(value);
+}
+
+/// Trace message interactions with evaluated model to terminal.
+@override final  bool? trace;
+/// Task display type (defaults to `"full"`).
+@override final  String? display;
+/// Score output (defaults to true).
+@override final  bool? score;
+/// Limit evaluated samples (int count or `[start, end]` range).
+@override final  Object? limit;
+/// Evaluate specific sample(s) from the dataset.
+@override@JsonKey(name: 'sample_id') final  Object? sampleId;
+/// Shuffle order of samples (pass a seed to make order deterministic).
+@override@JsonKey(name: 'sample_shuffle') final  Object? sampleShuffle;
+/// Epochs to repeat samples for and optional score reducer function(s).
+@override final  Object? epochs;
+/// Tool use approval policies (string or config dict).
+@override final  Object? approval;
+/// Alternative solver(s) for evaluating task(s) (string or config dict).
+@override final  Object? solver;
+/// Sandbox cleanup after task completes (defaults to true).
+@override@JsonKey(name: 'sandbox_cleanup') final  bool? sandboxCleanup;
+/// Base URL for communicating with the model API.
+@override@JsonKey(name: 'model_base_url') final  String? modelBaseUrl;
+/// Model creation arguments.
+ final  Map<String, Object?>? _modelArgs;
+/// Model creation arguments.
+@override@JsonKey(name: 'model_args') Map<String, Object?>? get modelArgs {
+  final value = _modelArgs;
+  if (value == null) return null;
+  if (_modelArgs is EqualUnmodifiableMapView) return _modelArgs;
+  // ignore: implicit_dynamic_type
+  return EqualUnmodifiableMapView(value);
+}
+
+/// Named roles for use in `get_model()`.
+ final  Map<String, String>? _modelRoles;
+/// Named roles for use in `get_model()`.
+@override@JsonKey(name: 'model_roles') Map<String, String>? get modelRoles {
+  final value = _modelRoles;
+  if (value == null) return null;
+  if (_modelRoles is EqualUnmodifiableMapView) return _modelRoles;
+  // ignore: implicit_dynamic_type
+  return EqualUnmodifiableMapView(value);
+}
+
+/// Task creation arguments.
+ final  Map<String, Object?>? _taskArgs;
+/// Task creation arguments.
+@override@JsonKey(name: 'task_args') Map<String, Object?>? get taskArgs {
+  final value = _taskArgs;
+  if (value == null) return null;
+  if (_taskArgs is EqualUnmodifiableMapView) return _taskArgs;
+  // ignore: implicit_dynamic_type
+  return EqualUnmodifiableMapView(value);
+}
+
+/// Limit on total messages per sample.
+@override@JsonKey(name: 'message_limit') final  int? messageLimit;
+/// Limit on total tokens per sample.
+@override@JsonKey(name: 'token_limit') final  int? tokenLimit;
+/// Limit on clock time (in seconds) per sample.
+@override@JsonKey(name: 'time_limit') final  int? timeLimit;
+/// Limit on working time (in seconds) per sample.
+@override@JsonKey(name: 'working_limit') final  int? workingLimit;
+/// Limit on total cost (in dollars) per sample.
+@override@JsonKey(name: 'cost_limit') final  double? costLimit;
+/// JSON file with model prices for cost tracking.
+ final  Map<String, Object?>? _modelCostConfig;
+/// JSON file with model prices for cost tracking.
+@override@JsonKey(name: 'model_cost_config') Map<String, Object?>? get modelCostConfig {
+  final value = _modelCostConfig;
+  if (value == null) return null;
+  if (_modelCostConfig is EqualUnmodifiableMapView) return _modelCostConfig;
+  // ignore: implicit_dynamic_type
+  return EqualUnmodifiableMapView(value);
+}
+
+/// Log detailed samples and scores (defaults to true).
+@override@JsonKey(name: 'log_samples') final  bool? logSamples;
+/// Log events in realtime (defaults to true).
+@override@JsonKey(name: 'log_realtime') final  bool? logRealtime;
+/// Log base64-encoded images (defaults to false).
+@override@JsonKey(name: 'log_images') final  bool? logImages;
+/// Number of samples to buffer before writing log file.
+@override@JsonKey(name: 'log_buffer') final  int? logBuffer;
+/// Sync sample events for realtime viewing.
+@override@JsonKey(name: 'log_shared') final  int? logShared;
+/// Directory to bundle logs and viewer into.
+@override@JsonKey(name: 'bundle_dir') final  String? bundleDir;
+/// Overwrite files in `bundle_dir` (defaults to false).
+@override@JsonKey(name: 'bundle_overwrite') final  bool? bundleOverwrite;
+/// Allow log directory to contain unrelated logs (defaults to false).
+@override@JsonKey(name: 'log_dir_allow_dirty') final  bool? logDirAllowDirty;
+/// ID for the eval set. Generated if not specified.
+@override@JsonKey(name: 'eval_set_id') final  String? evalSetId;
+// ------------------------------------------------------------------
+// Pass-through overrides
+// ------------------------------------------------------------------
+/// Additional `eval_set()` kwargs not covered by top-level fields.
+///
+/// Any valid `eval_set()` parameter can be specified here and will be
+/// merged into the output JSON. Top-level fields take precedence.
+ final  Map<String, dynamic>? _evalSetOverrides;
+// ------------------------------------------------------------------
+// Pass-through overrides
+// ------------------------------------------------------------------
+/// Additional `eval_set()` kwargs not covered by top-level fields.
+///
+/// Any valid `eval_set()` parameter can be specified here and will be
+/// merged into the output JSON. Top-level fields take precedence.
+@override@JsonKey(name: 'eval_set_overrides') Map<String, dynamic>? get evalSetOverrides {
+  final value = _evalSetOverrides;
+  if (value == null) return null;
+  if (_evalSetOverrides is EqualUnmodifiableMapView) return _evalSetOverrides;
+  // ignore: implicit_dynamic_type
+  return EqualUnmodifiableMapView(value);
+}
+
+/// Default `Task` kwargs applied to every task in this job.
+///
+/// Per-task overrides (from `task.yaml`) take precedence.
+ final  Map<String, dynamic>? _taskDefaults;
+/// Default `Task` kwargs applied to every task in this job.
+///
+/// Per-task overrides (from `task.yaml`) take precedence.
+@override@JsonKey(name: 'task_defaults') Map<String, dynamic>? get taskDefaults {
+  final value = _taskDefaults;
+  if (value == null) return null;
+  if (_taskDefaults is EqualUnmodifiableMapView) return _taskDefaults;
+  // ignore: implicit_dynamic_type
+  return EqualUnmodifiableMapView(value);
+}
+
+
+/// Create a copy of Job
+/// with the given fields replaced by the non-null parameter values.
+@override @JsonKey(includeFromJson: false, includeToJson: false)
+@pragma('vm:prefer-inline')
+_$JobCopyWith<_Job> get copyWith => __$JobCopyWithImpl<_Job>(this, _$identity);
+
+@override
+Map<String, dynamic> toJson() {
+  return _$JobToJson(this, );
+}
+
+@override
+bool operator ==(Object other) {
+  return identical(this, other) || (other.runtimeType == runtimeType&&other is _Job&&(identical(other.logDir, logDir) || other.logDir == logDir)&&(identical(other.sandboxType, sandboxType) || other.sandboxType == sandboxType)&&(identical(other.maxConnections, maxConnections) || other.maxConnections == maxConnections)&&const DeepCollectionEquality().equals(other._models, _models)&&const DeepCollectionEquality().equals(other._variants, _variants)&&const DeepCollectionEquality().equals(other._taskPaths, _taskPaths)&&const DeepCollectionEquality().equals(other._tasks, _tasks)&&(identical(other.saveExamples, saveExamples) || other.saveExamples == saveExamples)&&(identical(other.retryAttempts, retryAttempts) || other.retryAttempts == retryAttempts)&&(identical(other.maxRetries, maxRetries) || other.maxRetries == maxRetries)&&(identical(other.retryWait, retryWait) || other.retryWait == retryWait)&&(identical(other.retryConnections, retryConnections) || other.retryConnections == retryConnections)&&(identical(other.retryCleanup, retryCleanup) || other.retryCleanup == retryCleanup)&&(identical(other.failOnError, failOnError) || other.failOnError == failOnError)&&(identical(other.continueOnFail, continueOnFail) || other.continueOnFail == continueOnFail)&&(identical(other.retryOnError, retryOnError) || other.retryOnError == retryOnError)&&(identical(other.debugErrors, debugErrors) || other.debugErrors == debugErrors)&&(identical(other.maxSamples, maxSamples) || other.maxSamples == maxSamples)&&(identical(other.maxTasks, maxTasks) || other.maxTasks == maxTasks)&&(identical(other.maxSubprocesses, maxSubprocesses) || other.maxSubprocesses == maxSubprocesses)&&(identical(other.maxSandboxes, maxSandboxes) || other.maxSandboxes == maxSandboxes)&&(identical(other.logLevel, logLevel) || other.logLevel == logLevel)&&(identical(other.logLevelTranscript, logLevelTranscript) || other.logLevelTranscript == logLevelTranscript)&&(identical(other.logFormat, logFormat) || other.logFormat == logFormat)&&const DeepCollectionEquality().equals(other._tags, _tags)&&const DeepCollectionEquality().equals(other._metadata, _metadata)&&(identical(other.trace, trace) || other.trace == trace)&&(identical(other.display, display) || other.display == display)&&(identical(other.score, score) || other.score == score)&&const DeepCollectionEquality().equals(other.limit, limit)&&const DeepCollectionEquality().equals(other.sampleId, sampleId)&&const DeepCollectionEquality().equals(other.sampleShuffle, sampleShuffle)&&const DeepCollectionEquality().equals(other.epochs, epochs)&&const DeepCollectionEquality().equals(other.approval, approval)&&const DeepCollectionEquality().equals(other.solver, solver)&&(identical(other.sandboxCleanup, sandboxCleanup) || other.sandboxCleanup == sandboxCleanup)&&(identical(other.modelBaseUrl, modelBaseUrl) || other.modelBaseUrl == modelBaseUrl)&&const DeepCollectionEquality().equals(other._modelArgs, _modelArgs)&&const DeepCollectionEquality().equals(other._modelRoles, _modelRoles)&&const DeepCollectionEquality().equals(other._taskArgs, _taskArgs)&&(identical(other.messageLimit, messageLimit) || other.messageLimit == messageLimit)&&(identical(other.tokenLimit, tokenLimit) || other.tokenLimit == tokenLimit)&&(identical(other.timeLimit, timeLimit) || other.timeLimit == timeLimit)&&(identical(other.workingLimit, workingLimit) || other.workingLimit == workingLimit)&&(identical(other.costLimit, costLimit) || other.costLimit == costLimit)&&const DeepCollectionEquality().equals(other._modelCostConfig, _modelCostConfig)&&(identical(other.logSamples, logSamples) || other.logSamples == logSamples)&&(identical(other.logRealtime, logRealtime) || other.logRealtime == logRealtime)&&(identical(other.logImages, logImages) || other.logImages == logImages)&&(identical(other.logBuffer, logBuffer) || other.logBuffer == logBuffer)&&(identical(other.logShared, logShared) || other.logShared == logShared)&&(identical(other.bundleDir, bundleDir) || other.bundleDir == bundleDir)&&(identical(other.bundleOverwrite, bundleOverwrite) || other.bundleOverwrite == bundleOverwrite)&&(identical(other.logDirAllowDirty, logDirAllowDirty) || other.logDirAllowDirty == logDirAllowDirty)&&(identical(other.evalSetId, evalSetId) || other.evalSetId == evalSetId)&&const DeepCollectionEquality().equals(other._evalSetOverrides, _evalSetOverrides)&&const DeepCollectionEquality().equals(other._taskDefaults, _taskDefaults));
+}
+
+@JsonKey(includeFromJson: false, includeToJson: false)
+@override
+int get hashCode => Object.hashAll([runtimeType,logDir,sandboxType,maxConnections,const DeepCollectionEquality().hash(_models),const DeepCollectionEquality().hash(_variants),const DeepCollectionEquality().hash(_taskPaths),const DeepCollectionEquality().hash(_tasks),saveExamples,retryAttempts,maxRetries,retryWait,retryConnections,retryCleanup,failOnError,continueOnFail,retryOnError,debugErrors,maxSamples,maxTasks,maxSubprocesses,maxSandboxes,logLevel,logLevelTranscript,logFormat,const DeepCollectionEquality().hash(_tags),const DeepCollectionEquality().hash(_metadata),trace,display,score,const DeepCollectionEquality().hash(limit),const DeepCollectionEquality().hash(sampleId),const DeepCollectionEquality().hash(sampleShuffle),const DeepCollectionEquality().hash(epochs),const DeepCollectionEquality().hash(approval),const DeepCollectionEquality().hash(solver),sandboxCleanup,modelBaseUrl,const DeepCollectionEquality().hash(_modelArgs),const DeepCollectionEquality().hash(_modelRoles),const DeepCollectionEquality().hash(_taskArgs),messageLimit,tokenLimit,timeLimit,workingLimit,costLimit,const DeepCollectionEquality().hash(_modelCostConfig),logSamples,logRealtime,logImages,logBuffer,logShared,bundleDir,bundleOverwrite,logDirAllowDirty,evalSetId,const DeepCollectionEquality().hash(_evalSetOverrides),const DeepCollectionEquality().hash(_taskDefaults)]);
+
+@override
+String toString() {
+  return 'Job(logDir: $logDir, sandboxType: $sandboxType, maxConnections: $maxConnections, models: $models, variants: $variants, taskPaths: $taskPaths, tasks: $tasks, saveExamples: $saveExamples, retryAttempts: $retryAttempts, maxRetries: $maxRetries, retryWait: $retryWait, retryConnections: $retryConnections, retryCleanup: $retryCleanup, failOnError: $failOnError, continueOnFail: $continueOnFail, retryOnError: $retryOnError, debugErrors: $debugErrors, maxSamples: $maxSamples, maxTasks: $maxTasks, maxSubprocesses: $maxSubprocesses, maxSandboxes: $maxSandboxes, logLevel: $logLevel, logLevelTranscript: $logLevelTranscript, logFormat: $logFormat, tags: $tags, metadata: $metadata, trace: $trace, display: $display, score: $score, limit: $limit, sampleId: $sampleId, sampleShuffle: $sampleShuffle, epochs: $epochs, approval: $approval, solver: $solver, sandboxCleanup: $sandboxCleanup, modelBaseUrl: $modelBaseUrl, modelArgs: $modelArgs, modelRoles: $modelRoles, taskArgs: $taskArgs, messageLimit: $messageLimit, tokenLimit: $tokenLimit, timeLimit: $timeLimit, workingLimit: $workingLimit, costLimit: $costLimit, modelCostConfig: $modelCostConfig, logSamples: $logSamples, logRealtime: $logRealtime, logImages: $logImages, logBuffer: $logBuffer, logShared: $logShared, bundleDir: $bundleDir, bundleOverwrite: $bundleOverwrite, logDirAllowDirty: $logDirAllowDirty, evalSetId: $evalSetId, evalSetOverrides: $evalSetOverrides, taskDefaults: $taskDefaults)';
+}
+
+
+}
+
+/// @nodoc
+abstract mixin class _$JobCopyWith<$Res> implements $JobCopyWith<$Res> {
+  factory _$JobCopyWith(_Job value, $Res Function(_Job) _then) = __$JobCopyWithImpl;
+@override @useResult
+$Res call({
+@JsonKey(name: 'log_dir') String logDir,@JsonKey(name: 'sandbox_type') String sandboxType,@JsonKey(name: 'max_connections') int maxConnections, List<String>? models, Map<String, Map<String, dynamic>>? variants,@JsonKey(name: 'task_paths') List<String>? taskPaths, Map<String, JobTask>? tasks,@JsonKey(name: 'save_examples') bool saveExamples,@JsonKey(name: 'retry_attempts') int? retryAttempts,@JsonKey(name: 'max_retries') int? maxRetries,@JsonKey(name: 'retry_wait') double? retryWait,@JsonKey(name: 'retry_connections') double? retryConnections,@JsonKey(name: 'retry_cleanup') bool? retryCleanup,@JsonKey(name: 'fail_on_error') double? failOnError,@JsonKey(name: 'continue_on_fail') bool? continueOnFail,@JsonKey(name: 'retry_on_error') int? retryOnError,@JsonKey(name: 'debug_errors') bool? debugErrors,@JsonKey(name: 'max_samples') int? maxSamples,@JsonKey(name: 'max_tasks') int? maxTasks,@JsonKey(name: 'max_subprocesses') int? maxSubprocesses,@JsonKey(name: 'max_sandboxes') int? maxSandboxes,@JsonKey(name: 'log_level') String? logLevel,@JsonKey(name: 'log_level_transcript') String? logLevelTranscript,@JsonKey(name: 'log_format') String? logFormat, List<String>? tags, Map<String, dynamic>? metadata, bool? trace, String? display, bool? score, Object? limit,@JsonKey(name: 'sample_id') Object? sampleId,@JsonKey(name: 'sample_shuffle') Object? sampleShuffle, Object? epochs, Object? approval, Object? solver,@JsonKey(name: 'sandbox_cleanup') bool? sandboxCleanup,@JsonKey(name: 'model_base_url') String? modelBaseUrl,@JsonKey(name: 'model_args') Map<String, Object?>? modelArgs,@JsonKey(name: 'model_roles') Map<String, String>? modelRoles,@JsonKey(name: 'task_args') Map<String, Object?>? taskArgs,@JsonKey(name: 'message_limit') int? messageLimit,@JsonKey(name: 'token_limit') int? tokenLimit,@JsonKey(name: 'time_limit') int? timeLimit,@JsonKey(name: 'working_limit') int? workingLimit,@JsonKey(name: 'cost_limit') double? costLimit,@JsonKey(name: 'model_cost_config') Map<String, Object?>? modelCostConfig,@JsonKey(name: 'log_samples') bool? logSamples,@JsonKey(name: 'log_realtime') bool? logRealtime,@JsonKey(name: 'log_images') bool? logImages,@JsonKey(name: 'log_buffer') int? logBuffer,@JsonKey(name: 'log_shared') int? logShared,@JsonKey(name: 'bundle_dir') String? bundleDir,@JsonKey(name: 'bundle_overwrite') bool? bundleOverwrite,@JsonKey(name: 'log_dir_allow_dirty') bool? logDirAllowDirty,@JsonKey(name: 'eval_set_id') String? evalSetId,@JsonKey(name: 'eval_set_overrides') Map<String, dynamic>? evalSetOverrides,@JsonKey(name: 'task_defaults') Map<String, dynamic>? taskDefaults
+});
+
+
+
+
+}
+/// @nodoc
+class __$JobCopyWithImpl<$Res>
+    implements _$JobCopyWith<$Res> {
+  __$JobCopyWithImpl(this._self, this._then);
+
+  final _Job _self;
+  final $Res Function(_Job) _then;
+
+/// Create a copy of Job
+/// with the given fields replaced by the non-null parameter values.
+@override @pragma('vm:prefer-inline') $Res call({Object? logDir = null,Object? sandboxType = null,Object? maxConnections = null,Object? models = freezed,Object? variants = freezed,Object? taskPaths = freezed,Object? tasks = freezed,Object? saveExamples = null,Object? retryAttempts = freezed,Object? maxRetries = freezed,Object? retryWait = freezed,Object? retryConnections = freezed,Object? retryCleanup = freezed,Object? failOnError = freezed,Object? continueOnFail = freezed,Object? retryOnError = freezed,Object? debugErrors = freezed,Object? maxSamples = freezed,Object? maxTasks = freezed,Object? maxSubprocesses = freezed,Object? maxSandboxes = freezed,Object? logLevel = freezed,Object? logLevelTranscript = freezed,Object? logFormat = freezed,Object? tags = freezed,Object? metadata = freezed,Object? trace = freezed,Object? display = freezed,Object? score = freezed,Object? limit = freezed,Object? sampleId = freezed,Object? sampleShuffle = freezed,Object? epochs = freezed,Object? approval = freezed,Object? solver = freezed,Object? sandboxCleanup = freezed,Object? modelBaseUrl = freezed,Object? modelArgs = freezed,Object? modelRoles = freezed,Object? taskArgs = freezed,Object? messageLimit = freezed,Object? tokenLimit = freezed,Object? timeLimit = freezed,Object? workingLimit = freezed,Object? costLimit = freezed,Object? modelCostConfig = freezed,Object? logSamples = freezed,Object? logRealtime = freezed,Object? logImages = freezed,Object? logBuffer = freezed,Object? logShared = freezed,Object? bundleDir = freezed,Object? bundleOverwrite = freezed,Object? logDirAllowDirty = freezed,Object? evalSetId = freezed,Object? evalSetOverrides = freezed,Object? taskDefaults = freezed,}) {
+  return _then(_Job(
+logDir: null == logDir ? _self.logDir : logDir // ignore: cast_nullable_to_non_nullable
+as String,sandboxType: null == sandboxType ? _self.sandboxType : sandboxType // ignore: cast_nullable_to_non_nullable
+as String,maxConnections: null == maxConnections ? _self.maxConnections : maxConnections // ignore: cast_nullable_to_non_nullable
+as int,models: freezed == models ? _self._models : models // ignore: cast_nullable_to_non_nullable
+as List<String>?,variants: freezed == variants ? _self._variants : variants // ignore: cast_nullable_to_non_nullable
+as Map<String, Map<String, dynamic>>?,taskPaths: freezed == taskPaths ? _self._taskPaths : taskPaths // ignore: cast_nullable_to_non_nullable
+as List<String>?,tasks: freezed == tasks ? _self._tasks : tasks // ignore: cast_nullable_to_non_nullable
+as Map<String, JobTask>?,saveExamples: null == saveExamples ? _self.saveExamples : saveExamples // ignore: cast_nullable_to_non_nullable
+as bool,retryAttempts: freezed == retryAttempts ? _self.retryAttempts : retryAttempts // ignore: cast_nullable_to_non_nullable
+as int?,maxRetries: freezed == maxRetries ? _self.maxRetries : maxRetries // ignore: cast_nullable_to_non_nullable
+as int?,retryWait: freezed == retryWait ? _self.retryWait : retryWait // ignore: cast_nullable_to_non_nullable
+as double?,retryConnections: freezed == retryConnections ? _self.retryConnections : retryConnections // ignore: cast_nullable_to_non_nullable
+as double?,retryCleanup: freezed == retryCleanup ? _self.retryCleanup : retryCleanup // ignore: cast_nullable_to_non_nullable
+as bool?,failOnError: freezed == failOnError ? _self.failOnError : failOnError // ignore: cast_nullable_to_non_nullable
+as double?,continueOnFail: freezed == continueOnFail ? _self.continueOnFail : continueOnFail // ignore: cast_nullable_to_non_nullable
+as bool?,retryOnError: freezed == retryOnError ? _self.retryOnError : retryOnError // ignore: cast_nullable_to_non_nullable
+as int?,debugErrors: freezed == debugErrors ? _self.debugErrors : debugErrors // ignore: cast_nullable_to_non_nullable
+as bool?,maxSamples: freezed == maxSamples ? _self.maxSamples : maxSamples // ignore: cast_nullable_to_non_nullable
+as int?,maxTasks: freezed == maxTasks ? _self.maxTasks : maxTasks // ignore: cast_nullable_to_non_nullable
+as int?,maxSubprocesses: freezed == maxSubprocesses ? _self.maxSubprocesses : maxSubprocesses // ignore: cast_nullable_to_non_nullable
+as int?,maxSandboxes: freezed == maxSandboxes ? _self.maxSandboxes : maxSandboxes // ignore: cast_nullable_to_non_nullable
+as int?,logLevel: freezed == logLevel ? _self.logLevel : logLevel // ignore: cast_nullable_to_non_nullable
+as String?,logLevelTranscript: freezed == logLevelTranscript ? _self.logLevelTranscript : logLevelTranscript // ignore: cast_nullable_to_non_nullable
+as String?,logFormat: freezed == logFormat ? _self.logFormat : logFormat // ignore: cast_nullable_to_non_nullable
+as String?,tags: freezed == tags ? _self._tags : tags // ignore: cast_nullable_to_non_nullable
+as List<String>?,metadata: freezed == metadata ? _self._metadata : metadata // ignore: cast_nullable_to_non_nullable
+as Map<String, dynamic>?,trace: freezed == trace ? _self.trace : trace // ignore: cast_nullable_to_non_nullable
+as bool?,display: freezed == display ? _self.display : display // ignore: cast_nullable_to_non_nullable
+as String?,score: freezed == score ? _self.score : score // ignore: cast_nullable_to_non_nullable
+as bool?,limit: freezed == limit ? _self.limit : limit ,sampleId: freezed == sampleId ? _self.sampleId : sampleId ,sampleShuffle: freezed == sampleShuffle ? _self.sampleShuffle : sampleShuffle ,epochs: freezed == epochs ? _self.epochs : epochs ,approval: freezed == approval ? _self.approval : approval ,solver: freezed == solver ? _self.solver : solver ,sandboxCleanup: freezed == sandboxCleanup ? _self.sandboxCleanup : sandboxCleanup // ignore: cast_nullable_to_non_nullable
+as bool?,modelBaseUrl: freezed == modelBaseUrl ? _self.modelBaseUrl : modelBaseUrl // ignore: cast_nullable_to_non_nullable
+as String?,modelArgs: freezed == modelArgs ? _self._modelArgs : modelArgs // ignore: cast_nullable_to_non_nullable
+as Map<String, Object?>?,modelRoles: freezed == modelRoles ? _self._modelRoles : modelRoles // ignore: cast_nullable_to_non_nullable
+as Map<String, String>?,taskArgs: freezed == taskArgs ? _self._taskArgs : taskArgs // ignore: cast_nullable_to_non_nullable
+as Map<String, Object?>?,messageLimit: freezed == messageLimit ? _self.messageLimit : messageLimit // ignore: cast_nullable_to_non_nullable
+as int?,tokenLimit: freezed == tokenLimit ? _self.tokenLimit : tokenLimit // ignore: cast_nullable_to_non_nullable
+as int?,timeLimit: freezed == timeLimit ? _self.timeLimit : timeLimit // ignore: cast_nullable_to_non_nullable
+as int?,workingLimit: freezed == workingLimit ? _self.workingLimit : workingLimit // ignore: cast_nullable_to_non_nullable
+as int?,costLimit: freezed == costLimit ? _self.costLimit : costLimit // ignore: cast_nullable_to_non_nullable
+as double?,modelCostConfig: freezed == modelCostConfig ? _self._modelCostConfig : modelCostConfig // ignore: cast_nullable_to_non_nullable
+as Map<String, Object?>?,logSamples: freezed == logSamples ? _self.logSamples : logSamples // ignore: cast_nullable_to_non_nullable
+as bool?,logRealtime: freezed == logRealtime ? _self.logRealtime : logRealtime // ignore: cast_nullable_to_non_nullable
+as bool?,logImages: freezed == logImages ? _self.logImages : logImages // ignore: cast_nullable_to_non_nullable
+as bool?,logBuffer: freezed == logBuffer ? _self.logBuffer : logBuffer // ignore: cast_nullable_to_non_nullable
+as int?,logShared: freezed == logShared ? _self.logShared : logShared // ignore: cast_nullable_to_non_nullable
+as int?,bundleDir: freezed == bundleDir ? _self.bundleDir : bundleDir // ignore: cast_nullable_to_non_nullable
+as String?,bundleOverwrite: freezed == bundleOverwrite ? _self.bundleOverwrite : bundleOverwrite // ignore: cast_nullable_to_non_nullable
+as bool?,logDirAllowDirty: freezed == logDirAllowDirty ? _self.logDirAllowDirty : logDirAllowDirty // ignore: cast_nullable_to_non_nullable
+as bool?,evalSetId: freezed == evalSetId ? _self.evalSetId : evalSetId // ignore: cast_nullable_to_non_nullable
+as String?,evalSetOverrides: freezed == evalSetOverrides ? _self._evalSetOverrides : evalSetOverrides // ignore: cast_nullable_to_non_nullable
+as Map<String, dynamic>?,taskDefaults: freezed == taskDefaults ? _self._taskDefaults : taskDefaults // ignore: cast_nullable_to_non_nullable
+as Map<String, dynamic>?,
+  ));
+}
+
+
+}
+
+
+/// @nodoc
+mixin _$JobTask {
+
+/// Task identifier matching a task directory name in `tasks/`.
+ String get id;/// Only run these sample IDs. Mutually exclusive with [excludeSamples].
+@JsonKey(name: 'include_samples') List<String>? get includeSamples;/// Exclude these sample IDs. Mutually exclusive with [includeSamples].
+@JsonKey(name: 'exclude_samples') List<String>? get excludeSamples;/// Override system message for this task.
+@JsonKey(name: 'system_message') String? get systemMessage;
+/// Create a copy of JobTask
+/// with the given fields replaced by the non-null parameter values.
+@JsonKey(includeFromJson: false, includeToJson: false)
+@pragma('vm:prefer-inline')
+$JobTaskCopyWith<JobTask> get copyWith => _$JobTaskCopyWithImpl<JobTask>(this as JobTask, _$identity);
+
+  /// Serializes this JobTask to a JSON map.
+  Map<String, dynamic> toJson();
+
+
+@override
+bool operator ==(Object other) {
+  return identical(this, other) || (other.runtimeType == runtimeType&&other is JobTask&&(identical(other.id, id) || other.id == id)&&const DeepCollectionEquality().equals(other.includeSamples, includeSamples)&&const DeepCollectionEquality().equals(other.excludeSamples, excludeSamples)&&(identical(other.systemMessage, systemMessage) || other.systemMessage == systemMessage));
+}
+
+@JsonKey(includeFromJson: false, includeToJson: false)
+@override
+int get hashCode => Object.hash(runtimeType,id,const DeepCollectionEquality().hash(includeSamples),const DeepCollectionEquality().hash(excludeSamples),systemMessage);
+
+@override
+String toString() {
+  return 'JobTask(id: $id, includeSamples: $includeSamples, excludeSamples: $excludeSamples, systemMessage: $systemMessage)';
+}
+
+
+}
+
+/// @nodoc
+abstract mixin class $JobTaskCopyWith<$Res>  {
+  factory $JobTaskCopyWith(JobTask value, $Res Function(JobTask) _then) = _$JobTaskCopyWithImpl;
+@useResult
+$Res call({
+ String id,@JsonKey(name: 'include_samples') List<String>? includeSamples,@JsonKey(name: 'exclude_samples') List<String>? excludeSamples,@JsonKey(name: 'system_message') String? systemMessage
+});
+
+
+
+
+}
+/// @nodoc
+class _$JobTaskCopyWithImpl<$Res>
+    implements $JobTaskCopyWith<$Res> {
+  _$JobTaskCopyWithImpl(this._self, this._then);
+
+  final JobTask _self;
+  final $Res Function(JobTask) _then;
+
+/// Create a copy of JobTask
+/// with the given fields replaced by the non-null parameter values.
+@pragma('vm:prefer-inline') @override $Res call({Object? id = null,Object? includeSamples = freezed,Object? excludeSamples = freezed,Object? systemMessage = freezed,}) {
+  return _then(_self.copyWith(
+id: null == id ? _self.id : id // ignore: cast_nullable_to_non_nullable
+as String,includeSamples: freezed == includeSamples ? _self.includeSamples : includeSamples // ignore: cast_nullable_to_non_nullable
+as List<String>?,excludeSamples: freezed == excludeSamples ? _self.excludeSamples : excludeSamples // ignore: cast_nullable_to_non_nullable
+as List<String>?,systemMessage: freezed == systemMessage ? _self.systemMessage : systemMessage // ignore: cast_nullable_to_non_nullable
+as String?,
+  ));
+}
+
+}
+
+
+/// Adds pattern-matching-related methods to [JobTask].
+extension JobTaskPatterns on JobTask {
+/// A variant of `map` that fallback to returning `orElse`.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case _:
+///     return orElse();
+/// }
+/// ```
+
+@optionalTypeArgs TResult maybeMap<TResult extends Object?>(TResult Function( _JobTask value)?  $default,{required TResult orElse(),}){
+final _that = this;
+switch (_that) {
+case _JobTask() when $default != null:
+return $default(_that);case _:
+  return orElse();
+
+}
+}
+/// A `switch`-like method, using callbacks.
+///
+/// Callbacks receives the raw object, upcasted.
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case final Subclass2 value:
+///     return ...;
+/// }
+/// ```
+
+@optionalTypeArgs TResult map<TResult extends Object?>(TResult Function( _JobTask value)  $default,){
+final _that = this;
+switch (_that) {
+case _JobTask():
+return $default(_that);}
+}
+/// A variant of `map` that fallback to returning `null`.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case _:
+///     return null;
+/// }
+/// ```
+
+@optionalTypeArgs TResult? mapOrNull<TResult extends Object?>(TResult? Function( _JobTask value)?  $default,){
+final _that = this;
+switch (_that) {
+case _JobTask() when $default != null:
+return $default(_that);case _:
+  return null;
+
+}
+}
+/// A variant of `when` that fallback to an `orElse` callback.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case _:
+///     return orElse();
+/// }
+/// ```
+
+@optionalTypeArgs TResult maybeWhen<TResult extends Object?>(TResult Function( String id, @JsonKey(name: 'include_samples')  List<String>? includeSamples, @JsonKey(name: 'exclude_samples')  List<String>? excludeSamples, @JsonKey(name: 'system_message')  String? systemMessage)?  $default,{required TResult orElse(),}) {final _that = this;
+switch (_that) {
+case _JobTask() when $default != null:
+return $default(_that.id,_that.includeSamples,_that.excludeSamples,_that.systemMessage);case _:
+  return orElse();
+
+}
+}
+/// A `switch`-like method, using callbacks.
+///
+/// As opposed to `map`, this offers destructuring.
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case Subclass2(:final field2):
+///     return ...;
+/// }
+/// ```
+
+@optionalTypeArgs TResult when<TResult extends Object?>(TResult Function( String id, @JsonKey(name: 'include_samples')  List<String>? includeSamples, @JsonKey(name: 'exclude_samples')  List<String>? excludeSamples, @JsonKey(name: 'system_message')  String? systemMessage)  $default,) {final _that = this;
+switch (_that) {
+case _JobTask():
+return $default(_that.id,_that.includeSamples,_that.excludeSamples,_that.systemMessage);}
+}
+/// A variant of `when` that fallback to returning `null`
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case _:
+///     return null;
+/// }
+/// ```
+
+@optionalTypeArgs TResult? whenOrNull<TResult extends Object?>(TResult? Function( String id, @JsonKey(name: 'include_samples')  List<String>? includeSamples, @JsonKey(name: 'exclude_samples')  List<String>? excludeSamples, @JsonKey(name: 'system_message')  String? systemMessage)?  $default,) {final _that = this;
+switch (_that) {
+case _JobTask() when $default != null:
+return $default(_that.id,_that.includeSamples,_that.excludeSamples,_that.systemMessage);case _:
+  return null;
+
+}
+}
+
+}
+
+/// @nodoc
+@JsonSerializable()
+
+class _JobTask implements JobTask {
+  const _JobTask({required this.id, @JsonKey(name: 'include_samples') final  List<String>? includeSamples, @JsonKey(name: 'exclude_samples') final  List<String>? excludeSamples, @JsonKey(name: 'system_message') this.systemMessage}): _includeSamples = includeSamples,_excludeSamples = excludeSamples;
+  factory _JobTask.fromJson(Map<String, dynamic> json) => _$JobTaskFromJson(json);
+
+/// Task identifier matching a task directory name in `tasks/`.
+@override final  String id;
+/// Only run these sample IDs. Mutually exclusive with [excludeSamples].
+ final  List<String>? _includeSamples;
+/// Only run these sample IDs. Mutually exclusive with [excludeSamples].
+@override@JsonKey(name: 'include_samples') List<String>? get includeSamples {
+  final value = _includeSamples;
+  if (value == null) return null;
+  if (_includeSamples is EqualUnmodifiableListView) return _includeSamples;
+  // ignore: implicit_dynamic_type
+  return EqualUnmodifiableListView(value);
+}
+
+/// Exclude these sample IDs. Mutually exclusive with [includeSamples].
+ final  List<String>? _excludeSamples;
+/// Exclude these sample IDs. Mutually exclusive with [includeSamples].
+@override@JsonKey(name: 'exclude_samples') List<String>? get excludeSamples {
+  final value = _excludeSamples;
+  if (value == null) return null;
+  if (_excludeSamples is EqualUnmodifiableListView) return _excludeSamples;
+  // ignore: implicit_dynamic_type
+  return EqualUnmodifiableListView(value);
+}
+
+/// Override system message for this task.
+@override@JsonKey(name: 'system_message') final  String? systemMessage;
+
+/// Create a copy of JobTask
+/// with the given fields replaced by the non-null parameter values.
+@override @JsonKey(includeFromJson: false, includeToJson: false)
+@pragma('vm:prefer-inline')
+_$JobTaskCopyWith<_JobTask> get copyWith => __$JobTaskCopyWithImpl<_JobTask>(this, _$identity);
+
+@override
+Map<String, dynamic> toJson() {
+  return _$JobTaskToJson(this, );
+}
+
+@override
+bool operator ==(Object other) {
+  return identical(this, other) || (other.runtimeType == runtimeType&&other is _JobTask&&(identical(other.id, id) || other.id == id)&&const DeepCollectionEquality().equals(other._includeSamples, _includeSamples)&&const DeepCollectionEquality().equals(other._excludeSamples, _excludeSamples)&&(identical(other.systemMessage, systemMessage) || other.systemMessage == systemMessage));
+}
+
+@JsonKey(includeFromJson: false, includeToJson: false)
+@override
+int get hashCode => Object.hash(runtimeType,id,const DeepCollectionEquality().hash(_includeSamples),const DeepCollectionEquality().hash(_excludeSamples),systemMessage);
+
+@override
+String toString() {
+  return 'JobTask(id: $id, includeSamples: $includeSamples, excludeSamples: $excludeSamples, systemMessage: $systemMessage)';
+}
+
+
+}
+
+/// @nodoc
+abstract mixin class _$JobTaskCopyWith<$Res> implements $JobTaskCopyWith<$Res> {
+  factory _$JobTaskCopyWith(_JobTask value, $Res Function(_JobTask) _then) = __$JobTaskCopyWithImpl;
+@override @useResult
+$Res call({
+ String id,@JsonKey(name: 'include_samples') List<String>? includeSamples,@JsonKey(name: 'exclude_samples') List<String>? excludeSamples,@JsonKey(name: 'system_message') String? systemMessage
+});
+
+
+
+
+}
+/// @nodoc
+class __$JobTaskCopyWithImpl<$Res>
+    implements _$JobTaskCopyWith<$Res> {
+  __$JobTaskCopyWithImpl(this._self, this._then);
+
+  final _JobTask _self;
+  final $Res Function(_JobTask) _then;
+
+/// Create a copy of JobTask
+/// with the given fields replaced by the non-null parameter values.
+@override @pragma('vm:prefer-inline') $Res call({Object? id = null,Object? includeSamples = freezed,Object? excludeSamples = freezed,Object? systemMessage = freezed,}) {
+  return _then(_JobTask(
+id: null == id ? _self.id : id // ignore: cast_nullable_to_non_nullable
+as String,includeSamples: freezed == includeSamples ? _self._includeSamples : includeSamples // ignore: cast_nullable_to_non_nullable
+as List<String>?,excludeSamples: freezed == excludeSamples ? _self._excludeSamples : excludeSamples // ignore: cast_nullable_to_non_nullable
+as List<String>?,systemMessage: freezed == systemMessage ? _self.systemMessage : systemMessage // ignore: cast_nullable_to_non_nullable
+as String?,
+  ));
+}
+
+
+}
+
+// dart format on
diff --git a/packages/eval_config/lib/src/models/job.g.dart b/packages/eval_config/lib/src/models/job.g.dart
new file mode 100644
index 0000000..f62e5b3
--- /dev/null
+++ b/packages/eval_config/lib/src/models/job.g.dart
@@ -0,0 +1,153 @@
+// GENERATED CODE - DO NOT MODIFY BY HAND
+
+part of 'job.dart';
+
+// **************************************************************************
+// JsonSerializableGenerator
+// **************************************************************************
+
+_Job _$JobFromJson(Map<String, dynamic> json) => _Job(
+  logDir: json['log_dir'] as String,
+  sandboxType: json['sandbox_type'] as String? ?? 'local',
+  maxConnections: (json['max_connections'] as num?)?.toInt() ?? 10,
+  models: (json['models'] as List<dynamic>?)?.map((e) => e as String).toList(),
+  variants: (json['variants'] as Map<String, dynamic>?)?.map(
+    (k, e) => MapEntry(k, e as Map<String, dynamic>),
+  ),
+  taskPaths: (json['task_paths'] as List<dynamic>?)
+      ?.map((e) => e as String)
+      .toList(),
+  tasks: (json['tasks'] as Map<String, dynamic>?)?.map(
+    (k, e) => MapEntry(k, JobTask.fromJson(e as Map<String, dynamic>)),
+  ),
+  saveExamples: json['save_examples'] as bool? ?? false,
+  retryAttempts: (json['retry_attempts'] as num?)?.toInt(),
+  maxRetries: (json['max_retries'] as num?)?.toInt(),
+  retryWait: (json['retry_wait'] as num?)?.toDouble(),
+  retryConnections: (json['retry_connections'] as num?)?.toDouble(),
+  retryCleanup: json['retry_cleanup'] as bool?,
+  failOnError: (json['fail_on_error'] as num?)?.toDouble(),
+  continueOnFail: json['continue_on_fail'] as bool?,
+  retryOnError: (json['retry_on_error'] as num?)?.toInt(),
+  debugErrors: json['debug_errors'] as bool?,
+  maxSamples: (json['max_samples'] as num?)?.toInt(),
+  maxTasks: (json['max_tasks'] as num?)?.toInt(),
+  maxSubprocesses: (json['max_subprocesses'] as num?)?.toInt(),
+  maxSandboxes: (json['max_sandboxes'] as num?)?.toInt(),
+  logLevel: json['log_level'] as String?,
+  logLevelTranscript: json['log_level_transcript'] as String?,
+  logFormat: json['log_format'] as String?,
+  tags: (json['tags'] as List<dynamic>?)?.map((e) => e as String).toList(),
+  metadata: json['metadata'] as Map<String, dynamic>?,
+  trace: json['trace'] as bool?,
+  display: json['display'] as String?,
+  score: json['score'] as bool?,
+  limit: json['limit'],
+  sampleId: json['sample_id'],
+  sampleShuffle: json['sample_shuffle'],
+  epochs: json['epochs'],
+  approval: json['approval'],
+  solver: json['solver'],
+  sandboxCleanup: json['sandbox_cleanup'] as bool?,
+  modelBaseUrl: json['model_base_url'] as String?,
+  modelArgs: json['model_args'] as Map<String, dynamic>?,
+  modelRoles: (json['model_roles'] as Map<String, dynamic>?)?.map(
+    (k, e) => MapEntry(k, e as String),
+  ),
+  taskArgs: json['task_args'] as Map<String, dynamic>?,
+  messageLimit: (json['message_limit'] as num?)?.toInt(),
+  tokenLimit: (json['token_limit'] as num?)?.toInt(),
+  timeLimit: (json['time_limit'] as num?)?.toInt(),
+  workingLimit: (json['working_limit'] as num?)?.toInt(),
+  costLimit: (json['cost_limit'] as num?)?.toDouble(),
+  modelCostConfig: json['model_cost_config'] as Map<String, dynamic>?,
+  logSamples: json['log_samples'] as bool?,
+  logRealtime: json['log_realtime'] as bool?,
+  logImages: json['log_images'] as bool?,
+  logBuffer: (json['log_buffer'] as num?)?.toInt(),
+  logShared: (json['log_shared'] as num?)?.toInt(),
+  bundleDir: json['bundle_dir'] as String?,
+  bundleOverwrite: json['bundle_overwrite'] as bool?,
+  logDirAllowDirty: json['log_dir_allow_dirty'] as bool?,
+  evalSetId: json['eval_set_id'] as String?,
+  evalSetOverrides: json['eval_set_overrides'] as Map<String, dynamic>?,
+  taskDefaults: json['task_defaults'] as Map<String, dynamic>?,
+);
+
+Map<String, dynamic> _$JobToJson(_Job instance) => <String, dynamic>{
+  'log_dir': instance.logDir,
+  'sandbox_type': instance.sandboxType,
+  'max_connections': instance.maxConnections,
+  'models': instance.models,
+  'variants': instance.variants,
+  'task_paths': instance.taskPaths,
+  'tasks': instance.tasks?.map((k, e) => MapEntry(k, e.toJson())),
+  'save_examples': instance.saveExamples,
+  'retry_attempts': instance.retryAttempts,
+  'max_retries': instance.maxRetries,
+  'retry_wait': instance.retryWait,
+  'retry_connections': instance.retryConnections,
+  'retry_cleanup': instance.retryCleanup,
+  'fail_on_error': instance.failOnError,
+  'continue_on_fail': instance.continueOnFail,
+  'retry_on_error': instance.retryOnError,
+  'debug_errors': instance.debugErrors,
+  'max_samples': instance.maxSamples,
+  'max_tasks': instance.maxTasks,
+  'max_subprocesses': instance.maxSubprocesses,
+  'max_sandboxes': instance.maxSandboxes,
+  'log_level': instance.logLevel,
+  'log_level_transcript': instance.logLevelTranscript,
+  'log_format': instance.logFormat,
+  'tags': instance.tags,
+  'metadata': instance.metadata,
+  'trace': instance.trace,
+  'display': instance.display,
+  'score': instance.score,
+  'limit': instance.limit,
+  'sample_id': instance.sampleId,
+  'sample_shuffle': instance.sampleShuffle,
+  'epochs': instance.epochs,
+  'approval': instance.approval,
+  'solver': instance.solver,
+  'sandbox_cleanup': instance.sandboxCleanup,
+  'model_base_url': instance.modelBaseUrl,
+  'model_args': instance.modelArgs,
+  'model_roles': instance.modelRoles,
+  'task_args': instance.taskArgs,
+  'message_limit': instance.messageLimit,
+  'token_limit': instance.tokenLimit,
+  'time_limit': instance.timeLimit,
+  'working_limit': instance.workingLimit,
+  'cost_limit': instance.costLimit,
+  'model_cost_config': instance.modelCostConfig,
+  'log_samples': instance.logSamples,
+  'log_realtime': instance.logRealtime,
+  'log_images': instance.logImages,
+  'log_buffer': instance.logBuffer,
+  'log_shared': instance.logShared,
+  'bundle_dir': instance.bundleDir,
+  'bundle_overwrite': instance.bundleOverwrite,
+  'log_dir_allow_dirty': instance.logDirAllowDirty,
+  'eval_set_id': instance.evalSetId,
+  'eval_set_overrides': instance.evalSetOverrides,
+  'task_defaults': instance.taskDefaults,
+};
+
+_JobTask _$JobTaskFromJson(Map<String, dynamic> json) => _JobTask(
+  id: json['id'] as String,
+  includeSamples: (json['include_samples'] as List<dynamic>?)
+      ?.map((e) => e as String)
+      .toList(),
+  excludeSamples: (json['exclude_samples'] as List<dynamic>?)
+      ?.map((e) => e as String)
+      .toList(),
+  systemMessage: json['system_message'] as String?,
+);
+
+Map<String, dynamic> _$JobTaskToJson(_JobTask instance) => <String, dynamic>{
+  'id': instance.id,
+  'include_samples': instance.includeSamples,
+  'exclude_samples': instance.excludeSamples,
+  'system_message': instance.systemMessage,
+};
diff --git a/packages/eval_config/lib/src/models/models.dart b/packages/eval_config/lib/src/models/models.dart
new file mode 100644
index 0000000..5b590fb
--- /dev/null
+++ b/packages/eval_config/lib/src/models/models.dart
@@ -0,0 +1,13 @@
+// Config models (eval runner input configuration)
+export 'context_file.dart';
+export 'job.dart';
+export 'variant.dart';
+
+// Inspect AI models (mirrors the Python Inspect AI API types)
+export 'dataset.dart';
+export 'eval_log.dart';
+export 'eval_set.dart';
+export 'field_spec.dart';
+export 'sample.dart';
+export 'task.dart';
+export 'task_info.dart';
diff --git a/packages/eval_config/lib/src/models/sample.dart b/packages/eval_config/lib/src/models/sample.dart
new file mode 100644
index 0000000..42ea852
--- /dev/null
+++ b/packages/eval_config/lib/src/models/sample.dart
@@ -0,0 +1,49 @@
+import 'package:freezed_annotation/freezed_annotation.dart';
+
+part 'sample.freezed.dart';
+part 'sample.g.dart';
+
+/// Dart representation of Inspect AI's `Sample` class.
+///
+/// A sample for an evaluation task.
+///
+/// See [`Sample`](https://inspect.aisi.org.uk/reference/inspect_ai.dataset.html#sample).
+@freezed
+sealed class Sample with _$Sample {
+  const factory Sample({
+    /// The input to be submitted to the model.
+    ///
+    /// Can be a simple string or a list of `ChatMessage` objects.
+    required Object input,
+
+    /// List of available answer choices (used only for multiple-choice evals).
+    List<String>? choices,
+
+    /// Ideal target output.
+    ///
+    /// May be a literal value or narrative text to be used by a model grader.
+    /// Can be a single string or a list of strings.
+    @Default('') Object target,
+
+    /// Unique identifier for the sample.
+    Object? id,
+
+    /// Arbitrary metadata associated with the sample.
+    Map<String, dynamic>? metadata,
+
+    /// Sandbox environment type and optional config file.
+    Object? sandbox,
+
+    /// Files that go along with the sample (copied to `SandboxEnvironment`).
+    ///
+    /// Keys are destination paths, values are source paths, inline text,
+    /// or inline binary (base64-encoded data URLs).
+    Map<String, String>? files,
+
+    /// Setup script to run for sample (run within default
+    /// `SandboxEnvironment`).
+    String? setup,
+  }) = _Sample;
+
+  factory Sample.fromJson(Map<String, dynamic> json) => _$SampleFromJson(json);
+}
diff --git a/packages/eval_config/lib/src/models/sample.freezed.dart b/packages/eval_config/lib/src/models/sample.freezed.dart
new file mode 100644
index 0000000..c97f955
--- /dev/null
+++ b/packages/eval_config/lib/src/models/sample.freezed.dart
@@ -0,0 +1,348 @@
+// GENERATED CODE - DO NOT MODIFY BY HAND
+// coverage:ignore-file
+// ignore_for_file: type=lint
+// ignore_for_file: unused_element, deprecated_member_use, deprecated_member_use_from_same_package, use_function_type_syntax_for_parameters, unnecessary_const, avoid_init_to_null, invalid_override_different_default_values_named, prefer_expression_function_bodies, annotate_overrides, invalid_annotation_target, unnecessary_question_mark
+
+part of 'sample.dart';
+
+// **************************************************************************
+// FreezedGenerator
+// **************************************************************************
+
+// dart format off
+T _$identity<T>(T value) => value;
+
+/// @nodoc
+mixin _$Sample {
+
+/// The input to be submitted to the model.
+///
+/// Can be a simple string or a list of `ChatMessage` objects.
+ Object get input;/// List of available answer choices (used only for multiple-choice evals).
+ List<String>? get choices;/// Ideal target output.
+///
+/// May be a literal value or narrative text to be used by a model grader.
+/// Can be a single string or a list of strings.
+ Object get target;/// Unique identifier for the sample.
+ Object? get id;/// Arbitrary metadata associated with the sample.
+ Map<String, dynamic>? get metadata;/// Sandbox environment type and optional config file.
+ Object? get sandbox;/// Files that go along with the sample (copied to `SandboxEnvironment`).
+///
+/// Keys are destination paths, values are source paths, inline text,
+/// or inline binary (base64-encoded data URLs).
+ Map<String, String>? get files;/// Setup script to run for sample (run within default
+/// `SandboxEnvironment`).
+ String? get setup;
+/// Create a copy of Sample
+/// with the given fields replaced by the non-null parameter values.
+@JsonKey(includeFromJson: false, includeToJson: false)
+@pragma('vm:prefer-inline')
+$SampleCopyWith<Sample> get copyWith => _$SampleCopyWithImpl<Sample>(this as Sample, _$identity);
+
+  /// Serializes this Sample to a JSON map.
+  Map<String, dynamic> toJson();
+
+
+@override
+bool operator ==(Object other) {
+  return identical(this, other) || (other.runtimeType == runtimeType&&other is Sample&&const DeepCollectionEquality().equals(other.input, input)&&const DeepCollectionEquality().equals(other.choices, choices)&&const DeepCollectionEquality().equals(other.target, target)&&const DeepCollectionEquality().equals(other.id, id)&&const DeepCollectionEquality().equals(other.metadata, metadata)&&const DeepCollectionEquality().equals(other.sandbox, sandbox)&&const DeepCollectionEquality().equals(other.files, files)&&(identical(other.setup, setup) || other.setup == setup));
+}
+
+@JsonKey(includeFromJson: false, includeToJson: false)
+@override
+int get hashCode => Object.hash(runtimeType,const DeepCollectionEquality().hash(input),const DeepCollectionEquality().hash(choices),const DeepCollectionEquality().hash(target),const DeepCollectionEquality().hash(id),const DeepCollectionEquality().hash(metadata),const DeepCollectionEquality().hash(sandbox),const DeepCollectionEquality().hash(files),setup);
+
+@override
+String toString() {
+  return 'Sample(input: $input, choices: $choices, target: $target, id: $id, metadata: $metadata, sandbox: $sandbox, files: $files, setup: $setup)';
+}
+
+
+}
+
+/// @nodoc
+abstract mixin class $SampleCopyWith<$Res>  {
+  factory $SampleCopyWith(Sample value, $Res Function(Sample) _then) = _$SampleCopyWithImpl;
+@useResult
+$Res call({
+ Object input, List<String>? choices, Object target, Object? id, Map<String, dynamic>? metadata, Object? sandbox, Map<String, String>? files, String? setup
+});
+
+
+
+
+}
+/// @nodoc
+class _$SampleCopyWithImpl<$Res>
+    implements $SampleCopyWith<$Res> {
+  _$SampleCopyWithImpl(this._self, this._then);
+
+  final Sample _self;
+  final $Res Function(Sample) _then;
+
+/// Create a copy of Sample
+/// with the given fields replaced by the non-null parameter values.
+@pragma('vm:prefer-inline') @override $Res call({Object? input = null,Object? choices = freezed,Object? target = null,Object? id = freezed,Object? metadata = freezed,Object? sandbox = freezed,Object? files = freezed,Object? setup = freezed,}) {
+  return _then(_self.copyWith(
+input: null == input ? _self.input : input ,choices: freezed == choices ? _self.choices : choices // ignore: cast_nullable_to_non_nullable
+as List<String>?,target: null == target ? _self.target : target ,id: freezed == id ? _self.id : id ,metadata: freezed == metadata ? _self.metadata : metadata // ignore: cast_nullable_to_non_nullable
+as Map<String, dynamic>?,sandbox: freezed == sandbox ? _self.sandbox : sandbox ,files: freezed == files ? _self.files : files // ignore: cast_nullable_to_non_nullable
+as Map<String, String>?,setup: freezed == setup ? _self.setup : setup // ignore: cast_nullable_to_non_nullable
+as String?,
+  ));
+}
+
+}
+
+
+/// Adds pattern-matching-related methods to [Sample].
+extension SamplePatterns on Sample {
+/// A variant of `map` that fallback to returning `orElse`.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case _:
+///     return orElse();
+/// }
+/// ```
+
+@optionalTypeArgs TResult maybeMap<TResult extends Object?>(TResult Function( _Sample value)?  $default,{required TResult orElse(),}){
+final _that = this;
+switch (_that) {
+case _Sample() when $default != null:
+return $default(_that);case _:
+  return orElse();
+
+}
+}
+/// A `switch`-like method, using callbacks.
+///
+/// Callbacks receives the raw object, upcasted.
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case final Subclass2 value:
+///     return ...;
+/// }
+/// ```
+
+@optionalTypeArgs TResult map<TResult extends Object?>(TResult Function( _Sample value)  $default,){
+final _that = this;
+switch (_that) {
+case _Sample():
+return $default(_that);}
+}
+/// A variant of `map` that fallback to returning `null`.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case _:
+///     return null;
+/// }
+/// ```
+
+@optionalTypeArgs TResult? mapOrNull<TResult extends Object?>(TResult? Function( _Sample value)?  $default,){
+final _that = this;
+switch (_that) {
+case _Sample() when $default != null:
+return $default(_that);case _:
+  return null;
+
+}
+}
+/// A variant of `when` that fallback to an `orElse` callback.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case _:
+///     return orElse();
+/// }
+/// ```
+
+@optionalTypeArgs TResult maybeWhen<TResult extends Object?>(TResult Function( Object input,  List<String>? choices,  Object target,  Object? id,  Map<String, dynamic>? metadata,  Object? sandbox,  Map<String, String>? files,  String? setup)?  $default,{required TResult orElse(),}) {final _that = this;
+switch (_that) {
+case _Sample() when $default != null:
+return $default(_that.input,_that.choices,_that.target,_that.id,_that.metadata,_that.sandbox,_that.files,_that.setup);case _:
+  return orElse();
+
+}
+}
+/// A `switch`-like method, using callbacks.
+///
+/// As opposed to `map`, this offers destructuring.
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case Subclass2(:final field2):
+///     return ...;
+/// }
+/// ```
+
+@optionalTypeArgs TResult when<TResult extends Object?>(TResult Function( Object input,  List<String>? choices,  Object target,  Object? id,  Map<String, dynamic>? metadata,  Object? sandbox,  Map<String, String>? files,  String? setup)  $default,) {final _that = this;
+switch (_that) {
+case _Sample():
+return $default(_that.input,_that.choices,_that.target,_that.id,_that.metadata,_that.sandbox,_that.files,_that.setup);}
+}
+/// A variant of `when` that fallback to returning `null`
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case _:
+///     return null;
+/// }
+/// ```
+
+@optionalTypeArgs TResult? whenOrNull<TResult extends Object?>(TResult? Function( Object input,  List<String>? choices,  Object target,  Object? id,  Map<String, dynamic>? metadata,  Object? sandbox,  Map<String, String>? files,  String? setup)?  $default,) {final _that = this;
+switch (_that) {
+case _Sample() when $default != null:
+return $default(_that.input,_that.choices,_that.target,_that.id,_that.metadata,_that.sandbox,_that.files,_that.setup);case _:
+  return null;
+
+}
+}
+
+}
+
+/// @nodoc
+@JsonSerializable()
+
+class _Sample implements Sample {
+  const _Sample({required this.input, final  List<String>? choices, this.target = '', this.id, final  Map<String, dynamic>? metadata, this.sandbox, final  Map<String, String>? files, this.setup}): _choices = choices,_metadata = metadata,_files = files;
+  factory _Sample.fromJson(Map<String, dynamic> json) => _$SampleFromJson(json);
+
+/// The input to be submitted to the model.
+///
+/// Can be a simple string or a list of `ChatMessage` objects.
+@override final  Object input;
+/// List of available answer choices (used only for multiple-choice evals).
+ final  List<String>? _choices;
+/// List of available answer choices (used only for multiple-choice evals).
+@override List<String>? get choices {
+  final value = _choices;
+  if (value == null) return null;
+  if (_choices is EqualUnmodifiableListView) return _choices;
+  // ignore: implicit_dynamic_type
+  return EqualUnmodifiableListView(value);
+}
+
+/// Ideal target output.
+///
+/// May be a literal value or narrative text to be used by a model grader.
+/// Can be a single string or a list of strings.
+@override@JsonKey() final  Object target;
+/// Unique identifier for the sample.
+@override final  Object? id;
+/// Arbitrary metadata associated with the sample.
+ final  Map<String, dynamic>? _metadata;
+/// Arbitrary metadata associated with the sample.
+@override Map<String, dynamic>? get metadata {
+  final value = _metadata;
+  if (value == null) return null;
+  if (_metadata is EqualUnmodifiableMapView) return _metadata;
+  // ignore: implicit_dynamic_type
+  return EqualUnmodifiableMapView(value);
+}
+
+/// Sandbox environment type and optional config file.
+@override final  Object? sandbox;
+/// Files that go along with the sample (copied to `SandboxEnvironment`).
+///
+/// Keys are destination paths, values are source paths, inline text,
+/// or inline binary (base64-encoded data URLs).
+ final  Map<String, String>? _files;
+/// Files that go along with the sample (copied to `SandboxEnvironment`).
+///
+/// Keys are destination paths, values are source paths, inline text,
+/// or inline binary (base64-encoded data URLs).
+@override Map<String, String>? get files {
+  final value = _files;
+  if (value == null) return null;
+  if (_files is EqualUnmodifiableMapView) return _files;
+  // ignore: implicit_dynamic_type
+  return EqualUnmodifiableMapView(value);
+}
+
+/// Setup script to run for sample (run within default
+/// `SandboxEnvironment`).
+@override final  String? setup;
+
+/// Create a copy of Sample
+/// with the given fields replaced by the non-null parameter values.
+@override @JsonKey(includeFromJson: false, includeToJson: false)
+@pragma('vm:prefer-inline')
+_$SampleCopyWith<_Sample> get copyWith => __$SampleCopyWithImpl<_Sample>(this, _$identity);
+
+@override
+Map<String, dynamic> toJson() {
+  return _$SampleToJson(this, );
+}
+
+@override
+bool operator ==(Object other) {
+  return identical(this, other) || (other.runtimeType == runtimeType&&other is _Sample&&const DeepCollectionEquality().equals(other.input, input)&&const DeepCollectionEquality().equals(other._choices, _choices)&&const DeepCollectionEquality().equals(other.target, target)&&const DeepCollectionEquality().equals(other.id, id)&&const DeepCollectionEquality().equals(other._metadata, _metadata)&&const DeepCollectionEquality().equals(other.sandbox, sandbox)&&const DeepCollectionEquality().equals(other._files, _files)&&(identical(other.setup, setup) || other.setup == setup));
+}
+
+@JsonKey(includeFromJson: false, includeToJson: false)
+@override
+int get hashCode => Object.hash(runtimeType,const DeepCollectionEquality().hash(input),const DeepCollectionEquality().hash(_choices),const DeepCollectionEquality().hash(target),const DeepCollectionEquality().hash(id),const DeepCollectionEquality().hash(_metadata),const DeepCollectionEquality().hash(sandbox),const DeepCollectionEquality().hash(_files),setup);
+
+@override
+String toString() {
+  return 'Sample(input: $input, choices: $choices, target: $target, id: $id, metadata: $metadata, sandbox: $sandbox, files: $files, setup: $setup)';
+}
+
+
+}
+
+/// @nodoc
+abstract mixin class _$SampleCopyWith<$Res> implements $SampleCopyWith<$Res> {
+  factory _$SampleCopyWith(_Sample value, $Res Function(_Sample) _then) = __$SampleCopyWithImpl;
+@override @useResult
+$Res call({
+ Object input, List<String>? choices, Object target, Object? id, Map<String, dynamic>? metadata, Object? sandbox, Map<String, String>? files, String? setup
+});
+
+
+
+
+}
+/// @nodoc
+class __$SampleCopyWithImpl<$Res>
+    implements _$SampleCopyWith<$Res> {
+  __$SampleCopyWithImpl(this._self, this._then);
+
+  final _Sample _self;
+  final $Res Function(_Sample) _then;
+
+/// Create a copy of Sample
+/// with the given fields replaced by the non-null parameter values.
+@override @pragma('vm:prefer-inline') $Res call({Object? input = null,Object? choices = freezed,Object? target = null,Object? id = freezed,Object? metadata = freezed,Object? sandbox = freezed,Object? files = freezed,Object? setup = freezed,}) {
+  return _then(_Sample(
+input: null == input ? _self.input : input ,choices: freezed == choices ? _self._choices : choices // ignore: cast_nullable_to_non_nullable
+as List<String>?,target: null == target ? _self.target : target ,id: freezed == id ? _self.id : id ,metadata: freezed == metadata ? _self._metadata : metadata // ignore: cast_nullable_to_non_nullable
+as Map<String, dynamic>?,sandbox: freezed == sandbox ? _self.sandbox : sandbox ,files: freezed == files ? _self._files : files // ignore: cast_nullable_to_non_nullable
+as Map<String, String>?,setup: freezed == setup ? _self.setup : setup // ignore: cast_nullable_to_non_nullable
+as String?,
+  ));
+}
+
+
+}
+
+// dart format on
diff --git a/packages/eval_config/lib/src/models/sample.g.dart b/packages/eval_config/lib/src/models/sample.g.dart
new file mode 100644
index 0000000..1073a43
--- /dev/null
+++ b/packages/eval_config/lib/src/models/sample.g.dart
@@ -0,0 +1,33 @@
+// GENERATED CODE - DO NOT MODIFY BY HAND
+
+part of 'sample.dart';
+
+// **************************************************************************
+// JsonSerializableGenerator
+// **************************************************************************
+
+_Sample _$SampleFromJson(Map<String, dynamic> json) => _Sample(
+  input: json['input'] as Object,
+  choices: (json['choices'] as List<dynamic>?)
+      ?.map((e) => e as String)
+      .toList(),
+  target: json['target'] as Object? ?? '',
+  id: json['id'],
+  metadata: json['metadata'] as Map<String, dynamic>?,
+  sandbox: json['sandbox'],
+  files: (json['files'] as Map<String, dynamic>?)?.map(
+    (k, e) => MapEntry(k, e as String),
+  ),
+  setup: json['setup'] as String?,
+);
+
+Map<String, dynamic> _$SampleToJson(_Sample instance) => <String, dynamic>{
+  'input': instance.input,
+  'choices': instance.choices,
+  'target': instance.target,
+  'id': instance.id,
+  'metadata': instance.metadata,
+  'sandbox': instance.sandbox,
+  'files': instance.files,
+  'setup': instance.setup,
+};
diff --git a/packages/eval_config/lib/src/models/task.dart b/packages/eval_config/lib/src/models/task.dart
new file mode 100644
index 0000000..1b11434
--- /dev/null
+++ b/packages/eval_config/lib/src/models/task.dart
@@ -0,0 +1,126 @@
+import 'package:freezed_annotation/freezed_annotation.dart';
+import 'package:eval_config/src/models/models.dart';
+
+part 'task.freezed.dart';
+part 'task.g.dart';
+
+/// Dart representation of Inspect AI's `Task` class.
+///
+/// Models the configuration accepted by the
+/// [`Task.__init__`](https://inspect.aisi.org.uk/reference/inspect_ai.html#task)
+/// constructor.
+@freezed
+sealed class Task with _$Task {
+  const factory Task({
+    /// Dataset to evaluate.
+    ///
+    /// A `Dataset`, a sequence of `Sample` objects, or `null`.
+    Dataset? dataset,
+
+    /// Setup step (always run even when the main solver is replaced).
+    Object? setup,
+
+    /// Solver or list of solvers. Defaults to `generate()`.
+    Object? solver,
+
+    /// Optional cleanup function for task.
+    ///
+    /// Called after all solvers and scorers have run for each sample
+    /// (including if an exception occurs during the run).
+    Object? cleanup,
+
+    /// Scorer used to evaluate model output.
+    Object? scorer,
+
+    /// Alternative metrics (overrides the metrics provided by the scorer).
+    Object? metrics,
+
+    /// Default model for task (optional, defaults to the eval model).
+    String? model,
+
+    /// Model generation config for default model.
+    Object? config,
+
+    /// Named roles for use in `get_model()`.
+    @JsonKey(name: 'model_roles') Map<String, String>? modelRoles,
+
+    /// Sandbox environment type (or a shorthand spec).
+    Object? sandbox,
+
+    /// Tool use approval policies.
+    Object? approval,
+
+    /// Epochs to repeat samples for and optional score reducer function(s).
+    Object? epochs,
+
+    /// Fail on sample errors.
+    ///
+    /// `true` = fail on first error (default), `false` = never fail,
+    /// `0.0–1.0` = fail if proportion exceeds threshold,
+    /// `>1` = fail if count exceeds threshold.
+    @JsonKey(name: 'fail_on_error') Object? failOnError,
+
+    /// Continue running if the `fail_on_error` condition is met.
+    @JsonKey(name: 'continue_on_fail') bool? continueOnFail,
+
+    /// Limit on total messages per sample.
+    @JsonKey(name: 'message_limit') int? messageLimit,
+
+    /// Limit on total tokens per sample.
+    @JsonKey(name: 'token_limit') int? tokenLimit,
+
+    /// Limit on clock time (in seconds) per sample.
+    @JsonKey(name: 'time_limit') int? timeLimit,
+
+    /// Limit on working time (in seconds) per sample.
+    ///
+    /// Working time includes model generation, tool calls, etc. but does not
+    /// include waiting on retries or shared resources.
+    @JsonKey(name: 'working_limit') int? workingLimit,
+
+    /// Limit on total cost (in dollars) per sample.
+    @JsonKey(name: 'cost_limit') double? costLimit,
+
+    /// Early stopping callbacks.
+    @JsonKey(name: 'early_stopping') Object? earlyStopping,
+
+    /// Task display name (e.g. for plotting).
+    ///
+    /// Defaults to the registered task name.
+    @JsonKey(name: 'display_name') String? displayName,
+
+    /// Task function identifier for Mode 1 hydration.
+    ///
+    /// When present, the Python runner uses this to look up a pre-built
+    /// `@task` function (e.g. `"flutter_code_gen"` or
+    /// `"eval_runner.runner.tasks.flutter_code_gen"`).
+    /// When absent, the runner hydrates directly from JSON (Mode 2 — future).
+    @JsonKey(name: 'task_func') String? taskFunc,
+
+    /// Task name.
+    ///
+    /// Automatically determined based on the registered name if not specified.
+    String? name,
+
+    /// Version of task (to distinguish evolutions of the task spec).
+    @Default(0) Object version,
+
+    /// Additional metadata to associate with the task.
+    Map<String, dynamic>? metadata,
+  }) = _Task;
+
+  factory Task.fromJson(Map<String, dynamic> json) => _$TaskFromJson(json);
+}
+
+class TaskMetadata {
+  final String taskFunc;
+  final Map<String, Object?> additional;
+
+  TaskMetadata(this.taskFunc, this.additional);
+
+  Map<String, dynamic> toJson() {
+    return {
+      'taskFunc': taskFunc,
+    };
+  }
+}
diff --git a/packages/eval_config/lib/src/models/task.freezed.dart b/packages/eval_config/lib/src/models/task.freezed.dart
new file mode 100644
index 0000000..da32b22
--- /dev/null
+++ b/packages/eval_config/lib/src/models/task.freezed.dart
@@ -0,0 +1,455 @@
+// GENERATED CODE - DO NOT MODIFY BY HAND
+// coverage:ignore-file
+// ignore_for_file: type=lint
+// ignore_for_file: unused_element, deprecated_member_use, deprecated_member_use_from_same_package, use_function_type_syntax_for_parameters, unnecessary_const, avoid_init_to_null, invalid_override_different_default_values_named, prefer_expression_function_bodies, annotate_overrides, invalid_annotation_target, unnecessary_question_mark
+
+part of 'task.dart';
+
+// **************************************************************************
+// FreezedGenerator
+// **************************************************************************
+
+// dart format off
+T _$identity<T>(T value) => value;
+
+/// @nodoc
+mixin _$Task {
+
+/// Dataset to evaluate.
+///
+/// A `Dataset`, a sequence of `Sample` objects, or `null`.
+ Dataset? get dataset;/// Setup step (always run even when the main solver is replaced).
+ Object? get setup;/// Solver or list of solvers. Defaults to `generate()`.
+ Object? get solver;/// Optional cleanup function for task.
+///
+/// Called after all solvers and scorers have run for each sample
+/// (including if an exception occurs during the run).
+ Object? get cleanup;/// Scorer used to evaluate model output.
+ Object? get scorer;/// Alternative metrics (overrides the metrics provided by the scorer).
+ Object? get metrics;/// Default model for task (optional, defaults to the eval model).
+ String? get model;/// Model generation config for default model.
+ Object? get config;/// Named roles for use in `get_model()`.
+@JsonKey(name: 'model_roles') Map<String, String>? get modelRoles;/// Sandbox environment type (or a shorthand spec).
+ Object? get sandbox;/// Tool use approval policies.
+ Object? get approval;/// Epochs to repeat samples for and optional score reducer function(s).
+ Object? get epochs;/// Fail on sample errors.
+///
+/// `true` = fail on first error (default), `false` = never fail,
+/// `0.0–1.0` = fail if proportion exceeds threshold,
+/// `>1` = fail if count exceeds threshold.
+@JsonKey(name: 'fail_on_error') Object? get failOnError;/// Continue running if the `fail_on_error` condition is met.
+@JsonKey(name: 'continue_on_fail') bool? get continueOnFail;/// Limit on total messages per sample.
+@JsonKey(name: 'message_limit') int? get messageLimit;/// Limit on total tokens per sample.
+@JsonKey(name: 'token_limit') int? get tokenLimit;/// Limit on clock time (in seconds) per sample.
+@JsonKey(name: 'time_limit') int? get timeLimit;/// Limit on working time (in seconds) per sample.
+///
+/// Working time includes model generation, tool calls, etc. but does not
+/// include waiting on retries or shared resources.
+@JsonKey(name: 'working_limit') int? get workingLimit;/// Limit on total cost (in dollars) per sample.
+@JsonKey(name: 'cost_limit') double? get costLimit;/// Early stopping callbacks.
+@JsonKey(name: 'early_stopping') Object? get earlyStopping;/// Task display name (e.g. for plotting).
+///
+/// Defaults to the registered task name.
+@JsonKey(name: 'display_name') String? get displayName;//TODO: This should be added as metadata
+/// Task function identifier for Mode 1 hydration.
+///
+/// When present, the Python runner uses this to look up a pre-built
+/// `@task` function (e.g. `"flutter_code_gen"` or
+/// `"eval_runner.runner.tasks.flutter_code_gen"`).
+/// When absent, the runner hydrates directly from JSON (Mode 2 — future).
+@JsonKey(name: 'task_func') String? get taskFunc;/// Task name.
+///
+/// Automatically determined based on the registered name if not specified.
+ String? get name;/// Version of task (to distinguish evolutions of the task spec).
+ Object get version;/// Additional metadata to associate with the task.
+ Map<String, dynamic>? get metadata;
+/// Create a copy of Task
+/// with the given fields replaced by the non-null parameter values.
+@JsonKey(includeFromJson: false, includeToJson: false)
+@pragma('vm:prefer-inline')
+$TaskCopyWith<Task> get copyWith => _$TaskCopyWithImpl<Task>(this as Task, _$identity);
+
+  /// Serializes this Task to a JSON map.
+  Map<String, dynamic> toJson();
+
+
+@override
+bool operator ==(Object other) {
+  return identical(this, other) || (other.runtimeType == runtimeType&&other is Task&&(identical(other.dataset, dataset) || other.dataset == dataset)&&const DeepCollectionEquality().equals(other.setup, setup)&&const DeepCollectionEquality().equals(other.solver, solver)&&const DeepCollectionEquality().equals(other.cleanup, cleanup)&&const DeepCollectionEquality().equals(other.scorer, scorer)&&const DeepCollectionEquality().equals(other.metrics, metrics)&&(identical(other.model, model) || other.model == model)&&const DeepCollectionEquality().equals(other.config, config)&&const DeepCollectionEquality().equals(other.modelRoles, modelRoles)&&const DeepCollectionEquality().equals(other.sandbox, sandbox)&&const DeepCollectionEquality().equals(other.approval, approval)&&const DeepCollectionEquality().equals(other.epochs, epochs)&&const DeepCollectionEquality().equals(other.failOnError, failOnError)&&(identical(other.continueOnFail, continueOnFail) || other.continueOnFail == continueOnFail)&&(identical(other.messageLimit, messageLimit) || other.messageLimit == messageLimit)&&(identical(other.tokenLimit, tokenLimit) || other.tokenLimit == tokenLimit)&&(identical(other.timeLimit, timeLimit) || other.timeLimit == timeLimit)&&(identical(other.workingLimit, workingLimit) || other.workingLimit == workingLimit)&&(identical(other.costLimit, costLimit) || other.costLimit == costLimit)&&const DeepCollectionEquality().equals(other.earlyStopping, earlyStopping)&&(identical(other.displayName, displayName) || other.displayName == displayName)&&(identical(other.taskFunc, taskFunc) || other.taskFunc == taskFunc)&&(identical(other.name, name) || other.name == name)&&const DeepCollectionEquality().equals(other.version, version)&&const DeepCollectionEquality().equals(other.metadata, metadata));
+}
+
+@JsonKey(includeFromJson: false, includeToJson: false)
+@override
+int get hashCode => Object.hashAll([runtimeType,dataset,const DeepCollectionEquality().hash(setup),const DeepCollectionEquality().hash(solver),const DeepCollectionEquality().hash(cleanup),const DeepCollectionEquality().hash(scorer),const DeepCollectionEquality().hash(metrics),model,const DeepCollectionEquality().hash(config),const DeepCollectionEquality().hash(modelRoles),const DeepCollectionEquality().hash(sandbox),const DeepCollectionEquality().hash(approval),const DeepCollectionEquality().hash(epochs),const DeepCollectionEquality().hash(failOnError),continueOnFail,messageLimit,tokenLimit,timeLimit,workingLimit,costLimit,const DeepCollectionEquality().hash(earlyStopping),displayName,taskFunc,name,const DeepCollectionEquality().hash(version),const DeepCollectionEquality().hash(metadata)]);
+
+@override
+String toString() {
+  return 'Task(dataset: $dataset, setup: $setup, solver: $solver, cleanup: $cleanup, scorer: $scorer, metrics: $metrics, model: $model, config: $config, modelRoles: $modelRoles, sandbox: $sandbox, approval: $approval, epochs: $epochs, failOnError: $failOnError, continueOnFail: $continueOnFail, messageLimit: $messageLimit, tokenLimit: $tokenLimit, timeLimit: $timeLimit, workingLimit: $workingLimit, costLimit: $costLimit, earlyStopping: $earlyStopping, displayName: $displayName, taskFunc: $taskFunc, name: $name, version: $version, metadata: $metadata)';
+}
+
+
+}
+
+/// @nodoc
+abstract mixin class $TaskCopyWith<$Res>  {
+  factory $TaskCopyWith(Task value, $Res Function(Task) _then) = _$TaskCopyWithImpl;
+@useResult
+$Res call({
+ Dataset? dataset, Object? setup, Object? solver, Object? cleanup, Object? scorer, Object? metrics, String? model, Object? config,@JsonKey(name: 'model_roles') Map<String, String>? modelRoles, Object? sandbox, Object? approval, Object? epochs,@JsonKey(name: 'fail_on_error') Object? failOnError,@JsonKey(name: 'continue_on_fail') bool? continueOnFail,@JsonKey(name: 'message_limit') int? messageLimit,@JsonKey(name: 'token_limit') int? tokenLimit,@JsonKey(name: 'time_limit') int? timeLimit,@JsonKey(name: 'working_limit') int? workingLimit,@JsonKey(name: 'cost_limit') double? costLimit,@JsonKey(name: 'early_stopping') Object? earlyStopping,@JsonKey(name: 'display_name') String? displayName,@JsonKey(name: 'task_func') String? taskFunc, String? name, Object version, Map<String, dynamic>? metadata
+});
+
+
+$DatasetCopyWith<$Res>? get dataset;
+
+}
+/// @nodoc
+class _$TaskCopyWithImpl<$Res>
+    implements $TaskCopyWith<$Res> {
+  _$TaskCopyWithImpl(this._self, this._then);
+
+  final Task _self;
+  final $Res Function(Task) _then;
+
+/// Create a copy of Task
+/// with the given fields replaced by the non-null parameter values.
+@pragma('vm:prefer-inline') @override $Res call({Object? dataset = freezed,Object? setup = freezed,Object? solver = freezed,Object? cleanup = freezed,Object? scorer = freezed,Object? metrics = freezed,Object? model = freezed,Object? config = freezed,Object? modelRoles = freezed,Object? sandbox = freezed,Object? approval = freezed,Object? epochs = freezed,Object? failOnError = freezed,Object? continueOnFail = freezed,Object? messageLimit = freezed,Object? tokenLimit = freezed,Object? timeLimit = freezed,Object? workingLimit = freezed,Object? costLimit = freezed,Object? earlyStopping = freezed,Object? displayName = freezed,Object? taskFunc = freezed,Object? name = freezed,Object? version = null,Object? metadata = freezed,}) {
+  return _then(_self.copyWith(
+dataset: freezed == dataset ? _self.dataset : dataset // ignore: cast_nullable_to_non_nullable
+as Dataset?,setup: freezed == setup ? _self.setup : setup ,solver: freezed == solver ? _self.solver : solver ,cleanup: freezed == cleanup ? _self.cleanup : cleanup ,scorer: freezed == scorer ? _self.scorer : scorer ,metrics: freezed == metrics ? _self.metrics : metrics ,model: freezed == model ? _self.model : model // ignore: cast_nullable_to_non_nullable
+as String?,config: freezed == config ? _self.config : config ,modelRoles: freezed == modelRoles ? _self.modelRoles : modelRoles // ignore: cast_nullable_to_non_nullable
+as Map<String, String>?,sandbox: freezed == sandbox ? _self.sandbox : sandbox ,approval: freezed == approval ? _self.approval : approval ,epochs: freezed == epochs ? _self.epochs : epochs ,failOnError: freezed == failOnError ? _self.failOnError : failOnError ,continueOnFail: freezed == continueOnFail ? _self.continueOnFail : continueOnFail // ignore: cast_nullable_to_non_nullable
+as bool?,messageLimit: freezed == messageLimit ? _self.messageLimit : messageLimit // ignore: cast_nullable_to_non_nullable
+as int?,tokenLimit: freezed == tokenLimit ? _self.tokenLimit : tokenLimit // ignore: cast_nullable_to_non_nullable
+as int?,timeLimit: freezed == timeLimit ? _self.timeLimit : timeLimit // ignore: cast_nullable_to_non_nullable
+as int?,workingLimit: freezed == workingLimit ? _self.workingLimit : workingLimit // ignore: cast_nullable_to_non_nullable
+as int?,costLimit: freezed == costLimit ? _self.costLimit : costLimit // ignore: cast_nullable_to_non_nullable
+as double?,earlyStopping: freezed == earlyStopping ? _self.earlyStopping : earlyStopping ,displayName: freezed == displayName ? _self.displayName : displayName // ignore: cast_nullable_to_non_nullable
+as String?,taskFunc: freezed == taskFunc ? _self.taskFunc : taskFunc // ignore: cast_nullable_to_non_nullable
+as String?,name: freezed == name ? _self.name : name // ignore: cast_nullable_to_non_nullable
+as String?,version: null == version ? _self.version : version ,metadata: freezed == metadata ? _self.metadata : metadata // ignore: cast_nullable_to_non_nullable
+as Map<String, dynamic>?,
+  ));
+}
+/// Create a copy of Task
+/// with the given fields replaced by the non-null parameter values.
+@override
+@pragma('vm:prefer-inline')
+$DatasetCopyWith<$Res>? get dataset {
+    if (_self.dataset == null) {
+    return null;
+  }
+
+  return $DatasetCopyWith<$Res>(_self.dataset!, (value) {
+    return _then(_self.copyWith(dataset: value));
+  });
+}
+}
+
+
+/// Adds pattern-matching-related methods to [Task].
+extension TaskPatterns on Task {
+/// A variant of `map` that fallback to returning `orElse`.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case _:
+///     return orElse();
+/// }
+/// ```
+
+@optionalTypeArgs TResult maybeMap<TResult extends Object?>(TResult Function( _Task value)?  $default,{required TResult orElse(),}){
+final _that = this;
+switch (_that) {
+case _Task() when $default != null:
+return $default(_that);case _:
+  return orElse();
+
+}
+}
+/// A `switch`-like method, using callbacks.
+///
+/// Callbacks receives the raw object, upcasted.
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case final Subclass2 value:
+///     return ...;
+/// }
+/// ```
+
+@optionalTypeArgs TResult map<TResult extends Object?>(TResult Function( _Task value)  $default,){
+final _that = this;
+switch (_that) {
+case _Task():
+return $default(_that);}
+}
+/// A variant of `map` that fallback to returning `null`.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case _:
+///     return null;
+/// }
+/// ```
+
+@optionalTypeArgs TResult? mapOrNull<TResult extends Object?>(TResult? Function( _Task value)?  $default,){
+final _that = this;
+switch (_that) {
+case _Task() when $default != null:
+return $default(_that);case _:
+  return null;
+
+}
+}
+/// A variant of `when` that fallback to an `orElse` callback.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case _:
+///     return orElse();
+/// }
+/// ```
+
+@optionalTypeArgs TResult maybeWhen<TResult extends Object?>(TResult Function( Dataset? dataset,  Object? setup,  Object? solver,  Object? cleanup,  Object? scorer,  Object? metrics,  String? model,  Object? config, @JsonKey(name: 'model_roles')  Map<String, String>? modelRoles,  Object? sandbox,  Object? approval,  Object? epochs, @JsonKey(name: 'fail_on_error')  Object? failOnError, @JsonKey(name: 'continue_on_fail')  bool? continueOnFail, @JsonKey(name: 'message_limit')  int? messageLimit, @JsonKey(name: 'token_limit')  int? tokenLimit, @JsonKey(name: 'time_limit')  int? timeLimit, @JsonKey(name: 'working_limit')  int? workingLimit, @JsonKey(name: 'cost_limit')  double? costLimit, @JsonKey(name: 'early_stopping')  Object? earlyStopping, @JsonKey(name: 'display_name')  String? displayName, @JsonKey(name: 'task_func')  String? taskFunc,  String? name,  Object version,  Map<String, dynamic>? metadata)?  $default,{required TResult orElse(),}) {final _that = this;
+switch (_that) {
+case _Task() when $default != null:
+return $default(_that.dataset,_that.setup,_that.solver,_that.cleanup,_that.scorer,_that.metrics,_that.model,_that.config,_that.modelRoles,_that.sandbox,_that.approval,_that.epochs,_that.failOnError,_that.continueOnFail,_that.messageLimit,_that.tokenLimit,_that.timeLimit,_that.workingLimit,_that.costLimit,_that.earlyStopping,_that.displayName,_that.taskFunc,_that.name,_that.version,_that.metadata);case _:
+  return orElse();
+
+}
+}
+/// A `switch`-like method, using callbacks.
+///
+/// As opposed to `map`, this offers destructuring.
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case Subclass2(:final field2):
+///     return ...;
+/// }
+/// ```
+
+@optionalTypeArgs TResult when<TResult extends Object?>(TResult Function( Dataset? dataset,  Object? setup,  Object? solver,  Object? cleanup,  Object? scorer,  Object? metrics,  String? model,  Object? config, @JsonKey(name: 'model_roles')  Map<String, String>? modelRoles,  Object? sandbox,  Object? approval,  Object? epochs, @JsonKey(name: 'fail_on_error')  Object? failOnError, @JsonKey(name: 'continue_on_fail')  bool? continueOnFail, @JsonKey(name: 'message_limit')  int? messageLimit, @JsonKey(name: 'token_limit')  int? tokenLimit, @JsonKey(name: 'time_limit')  int? timeLimit, @JsonKey(name: 'working_limit')  int? workingLimit, @JsonKey(name: 'cost_limit')  double? costLimit, @JsonKey(name: 'early_stopping')  Object? earlyStopping, @JsonKey(name: 'display_name')  String? displayName, @JsonKey(name: 'task_func')  String? taskFunc,  String? name,  Object version,  Map<String, dynamic>? metadata)  $default,) {final _that = this;
+switch (_that) {
+case _Task():
+return $default(_that.dataset,_that.setup,_that.solver,_that.cleanup,_that.scorer,_that.metrics,_that.model,_that.config,_that.modelRoles,_that.sandbox,_that.approval,_that.epochs,_that.failOnError,_that.continueOnFail,_that.messageLimit,_that.tokenLimit,_that.timeLimit,_that.workingLimit,_that.costLimit,_that.earlyStopping,_that.displayName,_that.taskFunc,_that.name,_that.version,_that.metadata);}
+}
+/// A variant of `when` that fallback to returning `null`
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case _:
+///     return null;
+/// }
+/// ```
+
+@optionalTypeArgs TResult? whenOrNull<TResult extends Object?>(TResult? Function( Dataset? dataset,  Object? setup,  Object? solver,  Object? cleanup,  Object? scorer,  Object? metrics,  String? model,  Object? config, @JsonKey(name: 'model_roles')  Map<String, String>? modelRoles,  Object? sandbox,  Object? approval,  Object? epochs, @JsonKey(name: 'fail_on_error')  Object? failOnError, @JsonKey(name: 'continue_on_fail')  bool? continueOnFail, @JsonKey(name: 'message_limit')  int? messageLimit, @JsonKey(name: 'token_limit')  int? tokenLimit, @JsonKey(name: 'time_limit')  int? timeLimit, @JsonKey(name: 'working_limit')  int? workingLimit, @JsonKey(name: 'cost_limit')  double? costLimit, @JsonKey(name: 'early_stopping')  Object? earlyStopping, @JsonKey(name: 'display_name')  String? displayName, @JsonKey(name: 'task_func')  String? taskFunc,  String? name,  Object version,  Map<String, dynamic>? metadata)?  $default,) {final _that = this;
+switch (_that) {
+case _Task() when $default != null:
+return $default(_that.dataset,_that.setup,_that.solver,_that.cleanup,_that.scorer,_that.metrics,_that.model,_that.config,_that.modelRoles,_that.sandbox,_that.approval,_that.epochs,_that.failOnError,_that.continueOnFail,_that.messageLimit,_that.tokenLimit,_that.timeLimit,_that.workingLimit,_that.costLimit,_that.earlyStopping,_that.displayName,_that.taskFunc,_that.name,_that.version,_that.metadata);case _:
+  return null;
+
+}
+}
+
+}
+
+/// @nodoc
+@JsonSerializable()
+
+class _Task implements Task {
+  const _Task({this.dataset, this.setup, this.solver, this.cleanup, this.scorer, this.metrics, this.model, this.config, @JsonKey(name: 'model_roles') final  Map<String, String>? modelRoles, this.sandbox, this.approval, this.epochs, @JsonKey(name: 'fail_on_error') this.failOnError, @JsonKey(name: 'continue_on_fail') this.continueOnFail, @JsonKey(name: 'message_limit') this.messageLimit, @JsonKey(name: 'token_limit') this.tokenLimit, @JsonKey(name: 'time_limit') this.timeLimit, @JsonKey(name: 'working_limit') this.workingLimit, @JsonKey(name: 'cost_limit') this.costLimit, @JsonKey(name: 'early_stopping') this.earlyStopping, @JsonKey(name: 'display_name') this.displayName, @JsonKey(name: 'task_func') this.taskFunc, this.name, this.version = 0, final  Map<String, dynamic>? metadata}): _modelRoles = modelRoles,_metadata = metadata;
+  factory _Task.fromJson(Map<String, dynamic> json) => _$TaskFromJson(json);
+
+/// Dataset to evaluate.
+///
+/// A `Dataset`, a sequence of `Sample` objects, or `null`.
+@override final  Dataset? dataset;
+/// Setup step (always run even when the main solver is replaced).
+@override final  Object? setup;
+/// Solver or list of solvers. Defaults to `generate()`.
+@override final  Object? solver;
+/// Optional cleanup function for task.
+///
+/// Called after all solvers and scorers have run for each sample
+/// (including if an exception occurs during the run).
+@override final  Object? cleanup;
+/// Scorer used to evaluate model output.
+@override final  Object? scorer;
+/// Alternative metrics (overrides the metrics provided by the scorer).
+@override final  Object? metrics;
+/// Default model for task (optional, defaults to the eval model).
+@override final  String? model;
+/// Model generation config for default model.
+@override final  Object? config;
+/// Named roles for use in `get_model()`.
+ final  Map<String, String>? _modelRoles;
+/// Named roles for use in `get_model()`.
+@override@JsonKey(name: 'model_roles') Map<String, String>? get modelRoles {
+  final value = _modelRoles;
+  if (value == null) return null;
+  if (_modelRoles is EqualUnmodifiableMapView) return _modelRoles;
+  // ignore: implicit_dynamic_type
+  return EqualUnmodifiableMapView(value);
+}
+
+/// Sandbox environment type (or a shorthand spec).
+@override final  Object? sandbox;
+/// Tool use approval policies.
+@override final  Object? approval;
+/// Epochs to repeat samples for and optional score reducer function(s).
+@override final  Object? epochs;
+/// Fail on sample errors.
+///
+/// `true` = fail on first error (default), `false` = never fail,
+/// `0.0–1.0` = fail if proportion exceeds threshold,
+/// `>1` = fail if count exceeds threshold.
+@override@JsonKey(name: 'fail_on_error') final  Object? failOnError;
+/// Continue running if the `fail_on_error` condition is met.
+@override@JsonKey(name: 'continue_on_fail') final  bool? continueOnFail;
+/// Limit on total messages per sample.
+@override@JsonKey(name: 'message_limit') final  int? messageLimit;
+/// Limit on total tokens per sample.
+@override@JsonKey(name: 'token_limit') final  int? tokenLimit;
+/// Limit on clock time (in seconds) per sample.
+@override@JsonKey(name: 'time_limit') final  int? timeLimit;
+/// Limit on working time (in seconds) per sample.
+///
+/// Working time includes model generation, tool calls, etc. but does not
+/// include waiting on retries or shared resources.
+@override@JsonKey(name: 'working_limit') final  int? workingLimit;
+/// Limit on total cost (in dollars) per sample.
+@override@JsonKey(name: 'cost_limit') final  double? costLimit;
+/// Early stopping callbacks.
+@override@JsonKey(name: 'early_stopping') final  Object? earlyStopping;
+/// Task display name (e.g. for plotting).
+///
+/// Defaults to the registered task name.
+@override@JsonKey(name: 'display_name') final  String? displayName;
+//TODO: This should be added as metadata
+/// Task function identifier for Mode 1 hydration.
+///
+/// When present, the Python runner uses this to look up a pre-built
+/// `@task` function (e.g. `"flutter_code_gen"` or
+/// `"eval_runner.runner.tasks.flutter_code_gen"`).
+/// When absent, the runner hydrates directly from JSON (Mode 2 — future).
+@override@JsonKey(name: 'task_func') final  String? taskFunc;
+/// Task name.
+///
+/// Automatically determined based on the registered name if not specified.
+@override final  String? name;
+/// Version of task (to distinguish evolutions of the task spec).
+@override@JsonKey() final  Object version;
+/// Additional metadata to associate with the task.
+ final  Map<String, dynamic>? _metadata;
+/// Additional metadata to associate with the task.
+@override Map<String, dynamic>? get metadata {
+  final value = _metadata;
+  if (value == null) return null;
+  if (_metadata is EqualUnmodifiableMapView) return _metadata;
+  // ignore: implicit_dynamic_type
+  return EqualUnmodifiableMapView(value);
+}
+
+
+/// Create a copy of Task
+/// with the given fields replaced by the non-null parameter values.
+@override @JsonKey(includeFromJson: false, includeToJson: false)
+@pragma('vm:prefer-inline')
+_$TaskCopyWith<_Task> get copyWith => __$TaskCopyWithImpl<_Task>(this, _$identity);
+
+@override
+Map<String, dynamic> toJson() {
+  return _$TaskToJson(this, );
+}
+
+@override
+bool operator ==(Object other) {
+  return identical(this, other) || (other.runtimeType == runtimeType&&other is _Task&&(identical(other.dataset, dataset) || other.dataset == dataset)&&const DeepCollectionEquality().equals(other.setup, setup)&&const DeepCollectionEquality().equals(other.solver, solver)&&const DeepCollectionEquality().equals(other.cleanup, cleanup)&&const DeepCollectionEquality().equals(other.scorer, scorer)&&const DeepCollectionEquality().equals(other.metrics, metrics)&&(identical(other.model, model) || other.model == model)&&const DeepCollectionEquality().equals(other.config, config)&&const DeepCollectionEquality().equals(other._modelRoles, _modelRoles)&&const DeepCollectionEquality().equals(other.sandbox, sandbox)&&const DeepCollectionEquality().equals(other.approval, approval)&&const DeepCollectionEquality().equals(other.epochs, epochs)&&const DeepCollectionEquality().equals(other.failOnError, failOnError)&&(identical(other.continueOnFail, continueOnFail) || other.continueOnFail == continueOnFail)&&(identical(other.messageLimit, messageLimit) || other.messageLimit == messageLimit)&&(identical(other.tokenLimit, tokenLimit) || other.tokenLimit == tokenLimit)&&(identical(other.timeLimit, timeLimit) || other.timeLimit == timeLimit)&&(identical(other.workingLimit, workingLimit) || other.workingLimit == workingLimit)&&(identical(other.costLimit, costLimit) || other.costLimit == costLimit)&&const DeepCollectionEquality().equals(other.earlyStopping, earlyStopping)&&(identical(other.displayName, displayName) || other.displayName == displayName)&&(identical(other.taskFunc, taskFunc) || other.taskFunc == taskFunc)&&(identical(other.name, name) || other.name == name)&&const DeepCollectionEquality().equals(other.version, version)&&const DeepCollectionEquality().equals(other._metadata, _metadata));
+}
+
+@JsonKey(includeFromJson: false, includeToJson: false)
+@override
+int get hashCode => Object.hashAll([runtimeType,dataset,const DeepCollectionEquality().hash(setup),const DeepCollectionEquality().hash(solver),const DeepCollectionEquality().hash(cleanup),const DeepCollectionEquality().hash(scorer),const DeepCollectionEquality().hash(metrics),model,const DeepCollectionEquality().hash(config),const DeepCollectionEquality().hash(_modelRoles),const DeepCollectionEquality().hash(sandbox),const DeepCollectionEquality().hash(approval),const DeepCollectionEquality().hash(epochs),const DeepCollectionEquality().hash(failOnError),continueOnFail,messageLimit,tokenLimit,timeLimit,workingLimit,costLimit,const DeepCollectionEquality().hash(earlyStopping),displayName,taskFunc,name,const DeepCollectionEquality().hash(version),const DeepCollectionEquality().hash(_metadata)]);
+
+@override
+String toString() {
+  return 'Task(dataset: $dataset, setup: $setup, solver: $solver, cleanup: $cleanup, scorer: $scorer, metrics: $metrics, model: $model, config: $config, modelRoles: $modelRoles, sandbox: $sandbox, approval: $approval, epochs: $epochs, failOnError: $failOnError, continueOnFail: $continueOnFail, messageLimit: $messageLimit, tokenLimit: $tokenLimit, timeLimit: $timeLimit, workingLimit: $workingLimit, costLimit: $costLimit, earlyStopping: $earlyStopping, displayName: $displayName, taskFunc: $taskFunc, name: $name, version: $version, metadata: $metadata)';
+}
+
+
+}
+
+/// @nodoc
+abstract mixin class _$TaskCopyWith<$Res> implements $TaskCopyWith<$Res> {
+  factory _$TaskCopyWith(_Task value, $Res Function(_Task) _then) = __$TaskCopyWithImpl;
+@override @useResult
+$Res call({
+ Dataset? dataset, Object? setup, Object? solver, Object? cleanup, Object? scorer, Object? metrics, String? model, Object? config,@JsonKey(name: 'model_roles') Map<String, String>? modelRoles, Object? sandbox, Object? approval, Object? epochs,@JsonKey(name: 'fail_on_error') Object? failOnError,@JsonKey(name: 'continue_on_fail') bool? continueOnFail,@JsonKey(name: 'message_limit') int? messageLimit,@JsonKey(name: 'token_limit') int? tokenLimit,@JsonKey(name: 'time_limit') int? timeLimit,@JsonKey(name: 'working_limit') int? workingLimit,@JsonKey(name: 'cost_limit') double? costLimit,@JsonKey(name: 'early_stopping') Object? earlyStopping,@JsonKey(name: 'display_name') String? displayName,@JsonKey(name: 'task_func') String? taskFunc, String? name, Object version, Map<String, dynamic>? metadata
+});
+
+
+@override $DatasetCopyWith<$Res>? get dataset;
+
+}
+/// @nodoc
+class __$TaskCopyWithImpl<$Res>
+    implements _$TaskCopyWith<$Res> {
+  __$TaskCopyWithImpl(this._self, this._then);
+
+  final _Task _self;
+  final $Res Function(_Task) _then;
+
+/// Create a copy of Task
+/// with the given fields replaced by the non-null parameter values.
+@override @pragma('vm:prefer-inline') $Res call({Object? dataset = freezed,Object? setup = freezed,Object? solver = freezed,Object? cleanup = freezed,Object? scorer = freezed,Object? metrics = freezed,Object? model = freezed,Object? config = freezed,Object? modelRoles = freezed,Object? sandbox = freezed,Object? approval = freezed,Object? epochs = freezed,Object? failOnError = freezed,Object? continueOnFail = freezed,Object? messageLimit = freezed,Object? tokenLimit = freezed,Object? timeLimit = freezed,Object? workingLimit = freezed,Object? costLimit = freezed,Object? earlyStopping = freezed,Object? displayName = freezed,Object? taskFunc = freezed,Object? name = freezed,Object? version = null,Object? metadata = freezed,}) {
+  return _then(_Task(
+dataset: freezed == dataset ? _self.dataset : dataset // ignore: cast_nullable_to_non_nullable
+as Dataset?,setup: freezed == setup ? _self.setup : setup ,solver: freezed == solver ? _self.solver : solver ,cleanup: freezed == cleanup ? _self.cleanup : cleanup ,scorer: freezed == scorer ? _self.scorer : scorer ,metrics: freezed == metrics ? _self.metrics : metrics ,model: freezed == model ? _self.model : model // ignore: cast_nullable_to_non_nullable
+as String?,config: freezed == config ? _self.config : config ,modelRoles: freezed == modelRoles ? _self._modelRoles : modelRoles // ignore: cast_nullable_to_non_nullable
+as Map<String, String>?,sandbox: freezed == sandbox ? _self.sandbox : sandbox ,approval: freezed == approval ? _self.approval : approval ,epochs: freezed == epochs ? _self.epochs : epochs ,failOnError: freezed == failOnError ? _self.failOnError : failOnError ,continueOnFail: freezed == continueOnFail ? _self.continueOnFail : continueOnFail // ignore: cast_nullable_to_non_nullable
+as bool?,messageLimit: freezed == messageLimit ? _self.messageLimit : messageLimit // ignore: cast_nullable_to_non_nullable
+as int?,tokenLimit: freezed == tokenLimit ? _self.tokenLimit : tokenLimit // ignore: cast_nullable_to_non_nullable
+as int?,timeLimit: freezed == timeLimit ? _self.timeLimit : timeLimit // ignore: cast_nullable_to_non_nullable
+as int?,workingLimit: freezed == workingLimit ? _self.workingLimit : workingLimit // ignore: cast_nullable_to_non_nullable
+as int?,costLimit: freezed == costLimit ? _self.costLimit : costLimit // ignore: cast_nullable_to_non_nullable
+as double?,earlyStopping: freezed == earlyStopping ? _self.earlyStopping : earlyStopping ,displayName: freezed == displayName ? _self.displayName : displayName // ignore: cast_nullable_to_non_nullable
+as String?,taskFunc: freezed == taskFunc ? _self.taskFunc : taskFunc // ignore: cast_nullable_to_non_nullable
+as String?,name: freezed == name ? _self.name : name // ignore: cast_nullable_to_non_nullable
+as String?,version: null == version ? _self.version : version ,metadata: freezed == metadata ? _self._metadata : metadata // ignore: cast_nullable_to_non_nullable
+as Map<String, dynamic>?,
+  ));
+}
+
+/// Create a copy of Task
+/// with the given fields replaced by the non-null parameter values.
+@override
+@pragma('vm:prefer-inline')
+$DatasetCopyWith<$Res>? get dataset {
+    if (_self.dataset == null) {
+    return null;
+  }
+
+  return $DatasetCopyWith<$Res>(_self.dataset!, (value) {
+    return _then(_self.copyWith(dataset: value));
+  });
+}
+}
+
+// dart format on
diff --git a/packages/eval_config/lib/src/models/task.g.dart b/packages/eval_config/lib/src/models/task.g.dart
new file mode 100644
index 0000000..9906b3a
--- /dev/null
+++ b/packages/eval_config/lib/src/models/task.g.dart
@@ -0,0 +1,67 @@
+// GENERATED CODE - DO NOT MODIFY BY HAND
+
+part of 'task.dart';
+
+// **************************************************************************
+// JsonSerializableGenerator
+// **************************************************************************
+
+_Task _$TaskFromJson(Map<String, dynamic> json) => _Task(
+  dataset: json['dataset'] == null
+      ? null
+      : Dataset.fromJson(json['dataset'] as Map<String, dynamic>),
+  setup: json['setup'],
+  solver: json['solver'],
+  cleanup: json['cleanup'],
+  scorer: json['scorer'],
+  metrics: json['metrics'],
+  model: json['model'] as String?,
+  config: json['config'],
+  modelRoles: (json['model_roles'] as Map<String, dynamic>?)?.map(
+    (k, e) => MapEntry(k, e as String),
+  ),
+  sandbox: json['sandbox'],
+  approval: json['approval'],
+  epochs: json['epochs'],
+  failOnError: json['fail_on_error'],
+  continueOnFail: json['continue_on_fail'] as bool?,
+  messageLimit: (json['message_limit'] as num?)?.toInt(),
+  tokenLimit: (json['token_limit'] as num?)?.toInt(),
+  timeLimit: (json['time_limit'] as num?)?.toInt(),
+  workingLimit: (json['working_limit'] as num?)?.toInt(),
+  costLimit: (json['cost_limit'] as num?)?.toDouble(),
+  earlyStopping: json['early_stopping'],
+  displayName: json['display_name'] as String?,
+  taskFunc: json['task_func'] as String?,
+  name: json['name'] as String?,
+  version: json['version'] as Object? ?? 0,
+  metadata: json['metadata'] as Map<String, dynamic>?,
+);
+
+Map<String, dynamic> _$TaskToJson(_Task instance) => <String, dynamic>{
+  'dataset': instance.dataset?.toJson(),
+  'setup': instance.setup,
+  'solver': instance.solver,
+  'cleanup': instance.cleanup,
+  'scorer': instance.scorer,
+  'metrics': instance.metrics,
+  'model': instance.model,
+  'config': instance.config,
+  'model_roles': instance.modelRoles,
+  'sandbox': instance.sandbox,
+  'approval': instance.approval,
+  'epochs': instance.epochs,
+  'fail_on_error': instance.failOnError,
+  'continue_on_fail': instance.continueOnFail,
+  'message_limit': instance.messageLimit,
+  'token_limit': instance.tokenLimit,
+  'time_limit': instance.timeLimit,
+  'working_limit': instance.workingLimit,
+  'cost_limit': instance.costLimit,
+  'early_stopping': instance.earlyStopping,
+  'display_name': instance.displayName,
+  'task_func': instance.taskFunc,
+  'name': instance.name,
+  'version': instance.version,
+  'metadata': instance.metadata,
+};
diff --git a/packages/eval_config/lib/src/models/task_info.dart b/packages/eval_config/lib/src/models/task_info.dart
new file mode 100644
index 0000000..0d30a51
--- /dev/null
+++ b/packages/eval_config/lib/src/models/task_info.dart
@@ -0,0 +1,26 @@
+import 'package:freezed_annotation/freezed_annotation.dart';
+
+part 'task_info.freezed.dart';
+part 'task_info.g.dart';
+
+/// Dart representation of Inspect AI's `TaskInfo` class.
+///
+/// Task information including file path, name, and attributes.
+///
+/// See [`TaskInfo`](https://inspect.aisi.org.uk/reference/inspect_ai.html#taskinfo).
+@freezed
+sealed class TaskInfo with _$TaskInfo {
+  const factory TaskInfo({
+    /// File path where the task was loaded from.
+    required String file,
+
+    /// Task name (defaults to the function name).
+    required String name,
+
+    /// Task attributes (arguments passed to `@task`).
+    @Default({}) Map<String, dynamic> attribs,
+  }) = _TaskInfo;
+
+  factory TaskInfo.fromJson(Map<String, dynamic> json) =>
+      _$TaskInfoFromJson(json);
+}
diff --git a/packages/eval_config/lib/src/models/task_info.freezed.dart b/packages/eval_config/lib/src/models/task_info.freezed.dart
new file mode 100644
index 0000000..d26db48
--- /dev/null
+++ b/packages/eval_config/lib/src/models/task_info.freezed.dart
@@ -0,0 +1,290 @@
+// GENERATED CODE - DO NOT MODIFY BY HAND
+// coverage:ignore-file
+// ignore_for_file: type=lint
+// ignore_for_file: unused_element, deprecated_member_use, deprecated_member_use_from_same_package, use_function_type_syntax_for_parameters, unnecessary_const, avoid_init_to_null, invalid_override_different_default_values_named, prefer_expression_function_bodies, annotate_overrides, invalid_annotation_target, unnecessary_question_mark
+
+part of 'task_info.dart';
+
+// **************************************************************************
+// FreezedGenerator
+// **************************************************************************
+
+// dart format off
+T _$identity<T>(T value) => value;
+
+/// @nodoc
+mixin _$TaskInfo {
+
+/// File path where the task was loaded from.
+ String get file;/// Task name (defaults to the function name).
+ String get name;/// Task attributes (arguments passed to `@task`).
+ Map<String, dynamic> get attribs;
+/// Create a copy of TaskInfo
+/// with the given fields replaced by the non-null parameter values.
+@JsonKey(includeFromJson: false, includeToJson: false)
+@pragma('vm:prefer-inline')
+$TaskInfoCopyWith<TaskInfo> get copyWith => _$TaskInfoCopyWithImpl<TaskInfo>(this as TaskInfo, _$identity);
+
+  /// Serializes this TaskInfo to a JSON map.
+  Map<String, dynamic> toJson();
+
+
+@override
+bool operator ==(Object other) {
+  return identical(this, other) || (other.runtimeType == runtimeType&&other is TaskInfo&&(identical(other.file, file) || other.file == file)&&(identical(other.name, name) || other.name == name)&&const DeepCollectionEquality().equals(other.attribs, attribs));
+}
+
+@JsonKey(includeFromJson: false, includeToJson: false)
+@override
+int get hashCode => Object.hash(runtimeType,file,name,const DeepCollectionEquality().hash(attribs));
+
+@override
+String toString() {
+  return 'TaskInfo(file: $file, name: $name, attribs: $attribs)';
+}
+
+
+}
+
+/// @nodoc
+abstract mixin class $TaskInfoCopyWith<$Res>  {
+  factory $TaskInfoCopyWith(TaskInfo value, $Res Function(TaskInfo) _then) = _$TaskInfoCopyWithImpl;
+@useResult
+$Res call({
+ String file, String name, Map<String, dynamic> attribs
+});
+
+
+
+
+}
+/// @nodoc
+class _$TaskInfoCopyWithImpl<$Res>
+    implements $TaskInfoCopyWith<$Res> {
+  _$TaskInfoCopyWithImpl(this._self, this._then);
+
+  final TaskInfo _self;
+  final $Res Function(TaskInfo) _then;
+
+/// Create a copy of TaskInfo
+/// with the given fields replaced by the non-null parameter values.
+@pragma('vm:prefer-inline') @override $Res call({Object? file = null,Object? name = null,Object? attribs = null,}) {
+  return _then(_self.copyWith(
+file: null == file ? _self.file : file // ignore: cast_nullable_to_non_nullable
+as String,name: null == name ? _self.name : name // ignore: cast_nullable_to_non_nullable
+as String,attribs: null == attribs ? _self.attribs : attribs // ignore: cast_nullable_to_non_nullable
+as Map<String, dynamic>,
+  ));
+}
+
+}
+
+
+/// Adds pattern-matching-related methods to [TaskInfo].
+extension TaskInfoPatterns on TaskInfo {
+/// A variant of `map` that fallback to returning `orElse`.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case _:
+///     return orElse();
+/// }
+/// ```
+
+@optionalTypeArgs TResult maybeMap<TResult extends Object?>(TResult Function( _TaskInfo value)?  $default,{required TResult orElse(),}){
+final _that = this;
+switch (_that) {
+case _TaskInfo() when $default != null:
+return $default(_that);case _:
+  return orElse();
+
+}
+}
+/// A `switch`-like method, using callbacks.
+///
+/// Callbacks receives the raw object, upcasted.
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case final Subclass2 value:
+///     return ...;
+/// }
+/// ```
+
+@optionalTypeArgs TResult map<TResult extends Object?>(TResult Function( _TaskInfo value)  $default,){
+final _that = this;
+switch (_that) {
+case _TaskInfo():
+return $default(_that);}
+}
+/// A variant of `map` that fallback to returning `null`.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case _:
+///     return null;
+/// }
+/// ```
+
+@optionalTypeArgs TResult? mapOrNull<TResult extends Object?>(TResult? Function( _TaskInfo value)?  $default,){
+final _that = this;
+switch (_that) {
+case _TaskInfo() when $default != null:
+return $default(_that);case _:
+  return null;
+
+}
+}
+/// A variant of `when` that fallback to an `orElse` callback.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case _:
+///     return orElse();
+/// }
+/// ```
+
+@optionalTypeArgs TResult maybeWhen<TResult extends Object?>(TResult Function( String file,  String name,  Map<String, dynamic> attribs)?  $default,{required TResult orElse(),}) {final _that = this;
+switch (_that) {
+case _TaskInfo() when $default != null:
+return $default(_that.file,_that.name,_that.attribs);case _:
+  return orElse();
+
+}
+}
+/// A `switch`-like method, using callbacks.
+///
+/// As opposed to `map`, this offers destructuring.
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case Subclass2(:final field2):
+///     return ...;
+/// }
+/// ```
+
+@optionalTypeArgs TResult when<TResult extends Object?>(TResult Function( String file,  String name,  Map<String, dynamic> attribs)  $default,) {final _that = this;
+switch (_that) {
+case _TaskInfo():
+return $default(_that.file,_that.name,_that.attribs);}
+}
+/// A variant of `when` that fallback to returning `null`
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case _:
+///     return null;
+/// }
+/// ```
+
+@optionalTypeArgs TResult? whenOrNull<TResult extends Object?>(TResult? Function( String file,  String name,  Map<String, dynamic> attribs)?  $default,) {final _that = this;
+switch (_that) {
+case _TaskInfo() when $default != null:
+return $default(_that.file,_that.name,_that.attribs);case _:
+  return null;
+
+}
+}
+
+}
+
+/// @nodoc
+@JsonSerializable()
+
+class _TaskInfo implements TaskInfo {
+  const _TaskInfo({required this.file, required this.name, final  Map<String, dynamic> attribs = const {}}): _attribs = attribs;
+  factory _TaskInfo.fromJson(Map<String, dynamic> json) => _$TaskInfoFromJson(json);
+
+/// File path where the task was loaded from.
+@override final  String file;
+/// Task name (defaults to the function name).
+@override final  String name;
+/// Task attributes (arguments passed to `@task`).
+ final  Map<String, dynamic> _attribs;
+/// Task attributes (arguments passed to `@task`).
+@override@JsonKey() Map<String, dynamic> get attribs {
+  if (_attribs is EqualUnmodifiableMapView) return _attribs;
+  // ignore: implicit_dynamic_type
+  return EqualUnmodifiableMapView(_attribs);
+}
+
+
+/// Create a copy of TaskInfo
+/// with the given fields replaced by the non-null parameter values.
+@override @JsonKey(includeFromJson: false, includeToJson: false)
+@pragma('vm:prefer-inline')
+_$TaskInfoCopyWith<_TaskInfo> get copyWith => __$TaskInfoCopyWithImpl<_TaskInfo>(this, _$identity);
+
+@override
+Map<String, dynamic> toJson() {
+  return _$TaskInfoToJson(this, );
+}
+
+@override
+bool operator ==(Object other) {
+  return identical(this, other) || (other.runtimeType == runtimeType&&other is _TaskInfo&&(identical(other.file, file) || other.file == file)&&(identical(other.name, name) || other.name == name)&&const DeepCollectionEquality().equals(other._attribs, _attribs));
+}
+
+@JsonKey(includeFromJson: false, includeToJson: false)
+@override
+int get hashCode => Object.hash(runtimeType,file,name,const DeepCollectionEquality().hash(_attribs));
+
+@override
+String toString() {
+  return 'TaskInfo(file: $file, name: $name, attribs: $attribs)';
+}
+
+
+}
+
+/// @nodoc
+abstract mixin class _$TaskInfoCopyWith<$Res> implements $TaskInfoCopyWith<$Res> {
+  factory _$TaskInfoCopyWith(_TaskInfo value, $Res Function(_TaskInfo) _then) = __$TaskInfoCopyWithImpl;
+@override @useResult
+$Res call({
+ String file, String name, Map<String, dynamic> attribs
+});
+
+
+
+
+}
+/// @nodoc
+class __$TaskInfoCopyWithImpl<$Res>
+    implements _$TaskInfoCopyWith<$Res> {
+  __$TaskInfoCopyWithImpl(this._self, this._then);
+
+  final _TaskInfo _self;
+  final $Res Function(_TaskInfo) _then;
+
+/// Create a copy of TaskInfo
+/// with the given fields replaced by the non-null parameter values.
+@override @pragma('vm:prefer-inline') $Res call({Object? file = null,Object? name = null,Object? attribs = null,}) {
+  return _then(_TaskInfo(
+file: null == file ? _self.file : file // ignore: cast_nullable_to_non_nullable
+as String,name: null == name ? _self.name : name // ignore: cast_nullable_to_non_nullable
+as String,attribs: null == attribs ? _self._attribs : attribs // ignore: cast_nullable_to_non_nullable
+as Map<String, dynamic>,
+  ));
+}
+
+
+}
+
+// dart format on
diff --git a/packages/eval_config/lib/src/models/task_info.g.dart b/packages/eval_config/lib/src/models/task_info.g.dart
new file mode 100644
index 0000000..84a1d15
--- /dev/null
+++ b/packages/eval_config/lib/src/models/task_info.g.dart
@@ -0,0 +1,19 @@
+// GENERATED CODE - DO NOT MODIFY BY HAND
+
+part of 'task_info.dart';
+
+// **************************************************************************
+// JsonSerializableGenerator
+// **************************************************************************
+
+_TaskInfo _$TaskInfoFromJson(Map<String, dynamic> json) => _TaskInfo(
+  file: json['file'] as String,
+  name: json['name'] as String,
+  attribs: json['attribs'] as Map<String, dynamic>? ?? const {},
+);
+
+Map<String, dynamic> _$TaskInfoToJson(_TaskInfo instance) => <String, dynamic>{
+  'file': instance.file,
+  'name': instance.name,
+  'attribs': instance.attribs,
+};
diff --git a/packages/eval_config/lib/src/models/variant.dart b/packages/eval_config/lib/src/models/variant.dart
new file mode 100644
index 0000000..82afa37
--- /dev/null
+++ b/packages/eval_config/lib/src/models/variant.dart
@@ -0,0 +1,60 @@
+import 'package:freezed_annotation/freezed_annotation.dart';
+
+import 'context_file.dart';
+
+part 'variant.freezed.dart';
+part 'variant.g.dart';
+
+/// A configuration variant for running evaluations.
+///
+/// Variants define different testing configurations to compare model
+/// performance with and without specific tooling or context.
+///
+/// Features are implied by field presence — no explicit feature list needed:
+/// - [contextFiles] populated → context injection enabled
+/// - [mcpServers] populated → MCP tools enabled
+/// - [skillPaths] populated → agent skills enabled
+/// - all empty → baseline variant
+///
+/// Example YAML:
+/// ```yaml
+/// variants:
+///   baseline: {}
+///   context_only:
+///     context_files: [./context_files/flutter.md]
+///   full:
+///     context_files: [./context_files/flutter.md]
+///     mcp_servers: [dart]
+///     skills: [./skills/flutter_docs_ui]
+/// ```
+@freezed
+sealed class Variant with _$Variant {
+  const factory Variant({
+    /// User-defined variant name from the job file.
+    @Default('baseline') String name,
+
+    /// Loaded context files (paths resolved by config resolver).
+    @JsonKey(name: 'context_files') @Default([]) List<ContextFile> contextFiles,
+
+    /// MCP server keys to enable (e.g., `['dart']`).
+    @JsonKey(name: 'mcp_servers') @Default([]) List<String> mcpServers,
+
+    /// Resolved paths to agent skill directories.
+    /// Each directory must contain a `SKILL.md` file.
+    @JsonKey(name: 'skill_paths') @Default([]) List<String> skillPaths,
+
+    /// Flutter SDK channel to use (e.g., `'stable'`, `'beta'`, `'main'`).
+    /// `null` means use the default (stable) image from the job's sandbox.
+    @JsonKey(name: 'flutter_channel') String? flutterChannel,
+  }) = _Variant;
+
+  const Variant._();
+
+  factory Variant.fromJson(Map<String, dynamic> json) =>
+      _$VariantFromJson(json);
+
+  /// Human-readable label for this variant.
+  ///
+  /// Alias for [name], preserved for backward compatibility.
+  String get label => name;
+}
diff --git a/packages/eval_config/lib/src/models/variant.freezed.dart b/packages/eval_config/lib/src/models/variant.freezed.dart
new file mode 100644
index 0000000..9fe224c
--- /dev/null
+++ b/packages/eval_config/lib/src/models/variant.freezed.dart
@@ -0,0 +1,319 @@
+// GENERATED CODE - DO NOT MODIFY BY HAND
+// coverage:ignore-file
+// ignore_for_file: type=lint
+// ignore_for_file: unused_element, deprecated_member_use, deprecated_member_use_from_same_package, use_function_type_syntax_for_parameters, unnecessary_const, avoid_init_to_null, invalid_override_different_default_values_named, prefer_expression_function_bodies, annotate_overrides, invalid_annotation_target, unnecessary_question_mark
+
+part of 'variant.dart';
+
+// **************************************************************************
+// FreezedGenerator
+// **************************************************************************
+
+// dart format off
+T _$identity<T>(T value) => value;
+
+/// @nodoc
+mixin _$Variant {
+
+/// User-defined variant name from the job file.
+ String get name;/// Loaded context files (paths resolved by config resolver).
+@JsonKey(name: 'context_files') List<ContextFile> get contextFiles;/// MCP server keys to enable (e.g., `['dart']`).
+@JsonKey(name: 'mcp_servers') List<String> get mcpServers;/// Resolved paths to agent skill directories.
+/// Each directory must contain a `SKILL.md` file.
+@JsonKey(name: 'skill_paths') List<String> get skillPaths;/// Flutter SDK channel to use (e.g., `'stable'`, `'beta'`, `'main'`).
+/// `null` means use the default (stable) image from the job's sandbox.
+@JsonKey(name: 'flutter_channel') String? get flutterChannel;
+/// Create a copy of Variant
+/// with the given fields replaced by the non-null parameter values.
+@JsonKey(includeFromJson: false, includeToJson: false)
+@pragma('vm:prefer-inline')
+$VariantCopyWith<Variant> get copyWith => _$VariantCopyWithImpl<Variant>(this as Variant, _$identity);
+
+  /// Serializes this Variant to a JSON map.
+  Map<String, dynamic> toJson();
+
+
+@override
+bool operator ==(Object other) {
+  return identical(this, other) || (other.runtimeType == runtimeType&&other is Variant&&(identical(other.name, name) || other.name == name)&&const DeepCollectionEquality().equals(other.contextFiles, contextFiles)&&const DeepCollectionEquality().equals(other.mcpServers, mcpServers)&&const DeepCollectionEquality().equals(other.skillPaths, skillPaths)&&(identical(other.flutterChannel, flutterChannel) || other.flutterChannel == flutterChannel));
+}
+
+@JsonKey(includeFromJson: false, includeToJson: false)
+@override
+int get hashCode => Object.hash(runtimeType,name,const DeepCollectionEquality().hash(contextFiles),const DeepCollectionEquality().hash(mcpServers),const DeepCollectionEquality().hash(skillPaths),flutterChannel);
+
+@override
+String toString() {
+  return 'Variant(name: $name, contextFiles: $contextFiles, mcpServers: $mcpServers, skillPaths: $skillPaths, flutterChannel: $flutterChannel)';
+}
+
+
+}
+
+/// @nodoc
+abstract mixin class $VariantCopyWith<$Res>  {
+  factory $VariantCopyWith(Variant value, $Res Function(Variant) _then) = _$VariantCopyWithImpl;
+@useResult
+$Res call({
+ String name,@JsonKey(name: 'context_files') List<ContextFile> contextFiles,@JsonKey(name: 'mcp_servers') List<String> mcpServers,@JsonKey(name: 'skill_paths') List<String> skillPaths,@JsonKey(name: 'flutter_channel') String? flutterChannel
+});
+
+
+
+
+}
+/// @nodoc
+class _$VariantCopyWithImpl<$Res>
+    implements $VariantCopyWith<$Res> {
+  _$VariantCopyWithImpl(this._self, this._then);
+
+  final Variant _self;
+  final $Res Function(Variant) _then;
+
+/// Create a copy of Variant
+/// with the given fields replaced by the non-null parameter values.
+@pragma('vm:prefer-inline') @override $Res call({Object? name = null,Object? contextFiles = null,Object? mcpServers = null,Object? skillPaths = null,Object? flutterChannel = freezed,}) {
+  return _then(_self.copyWith(
+name: null == name ? _self.name : name // ignore: cast_nullable_to_non_nullable
+as String,contextFiles: null == contextFiles ? _self.contextFiles : contextFiles // ignore: cast_nullable_to_non_nullable
+as List<ContextFile>,mcpServers: null == mcpServers ? _self.mcpServers : mcpServers // ignore: cast_nullable_to_non_nullable
+as List<String>,skillPaths: null == skillPaths ? _self.skillPaths : skillPaths // ignore: cast_nullable_to_non_nullable
+as List<String>,flutterChannel: freezed == flutterChannel ? _self.flutterChannel : flutterChannel // ignore: cast_nullable_to_non_nullable
+as String?,
+  ));
+}
+
+}
+
+
+/// Adds pattern-matching-related methods to [Variant].
+extension VariantPatterns on Variant {
+/// A variant of `map` that fallback to returning `orElse`.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case _:
+///     return orElse();
+/// }
+/// ```
+
+@optionalTypeArgs TResult maybeMap<TResult extends Object?>(TResult Function( _Variant value)?  $default,{required TResult orElse(),}){
+final _that = this;
+switch (_that) {
+case _Variant() when $default != null:
+return $default(_that);case _:
+  return orElse();
+
+}
+}
+/// A `switch`-like method, using callbacks.
+///
+/// Callbacks receives the raw object, upcasted.
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case final Subclass2 value:
+///     return ...;
+/// }
+/// ```
+
+@optionalTypeArgs TResult map<TResult extends Object?>(TResult Function( _Variant value)  $default,){
+final _that = this;
+switch (_that) {
+case _Variant():
+return $default(_that);}
+}
+/// A variant of `map` that fallback to returning `null`.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case final Subclass value:
+///     return ...;
+///   case _:
+///     return null;
+/// }
+/// ```
+
+@optionalTypeArgs TResult? mapOrNull<TResult extends Object?>(TResult? Function( _Variant value)?  $default,){
+final _that = this;
+switch (_that) {
+case _Variant() when $default != null:
+return $default(_that);case _:
+  return null;
+
+}
+}
+/// A variant of `when` that fallback to an `orElse` callback.
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case _:
+///     return orElse();
+/// }
+/// ```
+
+@optionalTypeArgs TResult maybeWhen<TResult extends Object?>(TResult Function( String name, @JsonKey(name: 'context_files')  List<ContextFile> contextFiles, @JsonKey(name: 'mcp_servers')  List<String> mcpServers, @JsonKey(name: 'skill_paths')  List<String> skillPaths, @JsonKey(name: 'flutter_channel')  String? flutterChannel)?  $default,{required TResult orElse(),}) {final _that = this;
+switch (_that) {
+case _Variant() when $default != null:
+return $default(_that.name,_that.contextFiles,_that.mcpServers,_that.skillPaths,_that.flutterChannel);case _:
+  return orElse();
+
+}
+}
+/// A `switch`-like method, using callbacks.
+///
+/// As opposed to `map`, this offers destructuring.
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case Subclass2(:final field2):
+///     return ...;
+/// }
+/// ```
+
+@optionalTypeArgs TResult when<TResult extends Object?>(TResult Function( String name, @JsonKey(name: 'context_files')  List<ContextFile> contextFiles, @JsonKey(name: 'mcp_servers')  List<String> mcpServers, @JsonKey(name: 'skill_paths')  List<String> skillPaths, @JsonKey(name: 'flutter_channel')  String? flutterChannel)  $default,) {final _that = this;
+switch (_that) {
+case _Variant():
+return $default(_that.name,_that.contextFiles,_that.mcpServers,_that.skillPaths,_that.flutterChannel);}
+}
+/// A variant of `when` that fallback to returning `null`
+///
+/// It is equivalent to doing:
+/// ```dart
+/// switch (sealedClass) {
+///   case Subclass(:final field):
+///     return ...;
+///   case _:
+///     return null;
+/// }
+/// ```
+
+@optionalTypeArgs TResult? whenOrNull<TResult extends Object?>(TResult? Function( String name, @JsonKey(name: 'context_files')  List<ContextFile> contextFiles, @JsonKey(name: 'mcp_servers')  List<String> mcpServers, @JsonKey(name: 'skill_paths')  List<String> skillPaths, @JsonKey(name: 'flutter_channel')  String? flutterChannel)?  $default,) {final _that = this;
+switch (_that) {
+case _Variant() when $default != null:
+return $default(_that.name,_that.contextFiles,_that.mcpServers,_that.skillPaths,_that.flutterChannel);case _:
+  return null;
+
+}
+}
+
+}
+
+/// @nodoc
+@JsonSerializable()
+
+class _Variant extends Variant {
+  const _Variant({this.name = 'baseline', @JsonKey(name: 'context_files') final  List<ContextFile> contextFiles = const [], @JsonKey(name: 'mcp_servers') final  List<String> mcpServers = const [], @JsonKey(name: 'skill_paths') final  List<String> skillPaths = const [], @JsonKey(name: 'flutter_channel') this.flutterChannel}): _contextFiles = contextFiles,_mcpServers = mcpServers,_skillPaths = skillPaths,super._();
+  factory _Variant.fromJson(Map<String, dynamic> json) => _$VariantFromJson(json);
+
+/// User-defined variant name from the job file.
+@override@JsonKey() final  String name;
+/// Loaded context files (paths resolved by config resolver).
+ final  List<ContextFile> _contextFiles;
+/// Loaded context files (paths resolved by config resolver).
+@override@JsonKey(name: 'context_files') List<ContextFile> get contextFiles {
+  if (_contextFiles is EqualUnmodifiableListView) return _contextFiles;
+  // ignore: implicit_dynamic_type
+  return EqualUnmodifiableListView(_contextFiles);
+}
+
+/// MCP server keys to enable (e.g., `['dart']`).
+ final  List<String> _mcpServers;
+/// MCP server keys to enable (e.g., `['dart']`).
+@override@JsonKey(name: 'mcp_servers') List<String> get mcpServers {
+  if (_mcpServers is EqualUnmodifiableListView) return _mcpServers;
+  // ignore: implicit_dynamic_type
+  return EqualUnmodifiableListView(_mcpServers);
+}
+
+/// Resolved paths to agent skill directories.
+/// Each directory must contain a `SKILL.md` file.
+ final  List<String> _skillPaths;
+/// Resolved paths to agent skill directories.
+/// Each directory must contain a `SKILL.md` file.
+@override@JsonKey(name: 'skill_paths') List<String> get skillPaths {
+  if (_skillPaths is EqualUnmodifiableListView) return _skillPaths;
+  // ignore: implicit_dynamic_type
+  return EqualUnmodifiableListView(_skillPaths);
+}
+
+/// Flutter SDK channel to use (e.g., `'stable'`, `'beta'`, `'main'`).
+/// `null` means use the default (stable) image from the job's sandbox.
+@override@JsonKey(name: 'flutter_channel') final  String? flutterChannel;
+
+/// Create a copy of Variant
+/// with the given fields replaced by the non-null parameter values.
+@override @JsonKey(includeFromJson: false, includeToJson: false)
+@pragma('vm:prefer-inline')
+_$VariantCopyWith<_Variant> get copyWith => __$VariantCopyWithImpl<_Variant>(this, _$identity);
+
+@override
+Map<String, dynamic> toJson() {
+  return _$VariantToJson(this, );
+}
+
+@override
+bool operator ==(Object other) {
+  return identical(this, other) || (other.runtimeType == runtimeType&&other is _Variant&&(identical(other.name, name) || other.name == name)&&const DeepCollectionEquality().equals(other._contextFiles, _contextFiles)&&const DeepCollectionEquality().equals(other._mcpServers, _mcpServers)&&const DeepCollectionEquality().equals(other._skillPaths, _skillPaths)&&(identical(other.flutterChannel, flutterChannel) || other.flutterChannel == flutterChannel));
+}
+
+@JsonKey(includeFromJson: false, includeToJson: false)
+@override
+int get hashCode => Object.hash(runtimeType,name,const DeepCollectionEquality().hash(_contextFiles),const DeepCollectionEquality().hash(_mcpServers),const DeepCollectionEquality().hash(_skillPaths),flutterChannel);
+
+@override
+String toString() {
+  return 'Variant(name: $name, contextFiles: $contextFiles, mcpServers: $mcpServers, skillPaths: $skillPaths, flutterChannel: $flutterChannel)';
+}
+
+
+}
+
+/// @nodoc
+abstract mixin class _$VariantCopyWith<$Res> implements $VariantCopyWith<$Res> {
+  factory _$VariantCopyWith(_Variant value, $Res Function(_Variant) _then) = __$VariantCopyWithImpl;
+@override @useResult
+$Res call({
+ String name,@JsonKey(name: 'context_files') List<ContextFile> contextFiles,@JsonKey(name: 'mcp_servers') List<String> mcpServers,@JsonKey(name: 'skill_paths') List<String> skillPaths,@JsonKey(name: 'flutter_channel') String? flutterChannel
+});
+
+
+
+
+}
+/// @nodoc
+class __$VariantCopyWithImpl<$Res>
+    implements _$VariantCopyWith<$Res> {
+  __$VariantCopyWithImpl(this._self, this._then);
+
+  final _Variant _self;
+  final $Res Function(_Variant) _then;
+
+/// Create a copy of Variant
+/// with the given fields replaced by the non-null parameter values.
+@override @pragma('vm:prefer-inline') $Res call({Object? name = null,Object? contextFiles = null,Object? mcpServers = null,Object? skillPaths = null,Object? flutterChannel = freezed,}) {
+  return _then(_Variant(
+name: null == name ? _self.name : name // ignore: cast_nullable_to_non_nullable
+as String,contextFiles: null == contextFiles ? _self._contextFiles : contextFiles // ignore: cast_nullable_to_non_nullable
+as List<ContextFile>,mcpServers: null == mcpServers ? _self._mcpServers : mcpServers // ignore: cast_nullable_to_non_nullable
+as List<String>,skillPaths: null == skillPaths ? _self._skillPaths : skillPaths // ignore: cast_nullable_to_non_nullable
+as List<String>,flutterChannel: freezed == flutterChannel ? _self.flutterChannel : flutterChannel // ignore: cast_nullable_to_non_nullable
+as String?,
+  ));
+}
+
+
+}
+
+// dart format on
diff --git a/packages/eval_config/lib/src/models/variant.g.dart b/packages/eval_config/lib/src/models/variant.g.dart
new file mode 100644
index 0000000..3ed7ff4
--- /dev/null
+++ b/packages/eval_config/lib/src/models/variant.g.dart
@@ -0,0 +1,35 @@
+// GENERATED CODE - DO NOT MODIFY BY HAND
+
+part of 'variant.dart';
+
+// **************************************************************************
+// JsonSerializableGenerator
+// **************************************************************************
+
+_Variant _$VariantFromJson(Map<String, dynamic> json) => _Variant(
+  name: json['name'] as String? ?? 'baseline',
+  contextFiles:
+      (json['context_files'] as List<dynamic>?)
+          ?.map((e) => ContextFile.fromJson(e as Map<String, dynamic>))
+          .toList() ??
+      const [],
+  mcpServers:
+      (json['mcp_servers'] as List<dynamic>?)
+          ?.map((e) => e as String)
+          .toList() ??
+      const [],
+  skillPaths:
+      (json['skill_paths'] as List<dynamic>?)
+          ?.map((e) => e as String)
+          .toList() ??
+      const [],
+  flutterChannel: json['flutter_channel'] as String?,
+);
+
+Map<String, dynamic> _$VariantToJson(_Variant instance) => <String, dynamic>{
+  'name': instance.name,
+  'context_files': instance.contextFiles.map((e) => e.toJson()).toList(),
+  'mcp_servers': instance.mcpServers,
+  'skill_paths': instance.skillPaths,
+  'flutter_channel': instance.flutterChannel,
+};
diff --git a/packages/eval_config/lib/src/parsed_task.dart b/packages/eval_config/lib/src/parsed_task.dart
new file mode 100644
index 0000000..21ce5e3
--- /dev/null
+++ b/packages/eval_config/lib/src/parsed_task.dart
@@ -0,0 +1,168 @@
+import 'models/models.dart';
+
+/// Default system message used when no override is provided.
+const kDefaultSystemMessage =
+    'You are a helpful assistant with deep expertise in Dart and Flutter '
+    'development. Answer questions clearly and accurately, providing '
+    'examples when helpful.';
+
+/// Lightweight intermediate type used during parsing and resolution.
+///
+/// Groups samples with task-level config (variant, sandbox, etc.) before
+/// the resolver produces the final [Task] objects. This replaces the
+/// former `TaskConfig` model-package class.
+class ParsedTask {
+  final String id;
+  final String taskFunc;
+  final List<Sample> samples;
+  final Variant variant;
+  final String sandboxType;
+  final String? systemMessage;
+  final List<String>? allowedVariants;
+  final bool saveExamples;
+  final String? examplesDir;
+
+  // ------------------------------------------------------------------
+  // Task-level settings (from task.yaml)
+  // ------------------------------------------------------------------
+
+  /// Default model for this task.
+  final String? model;
+
+  /// Model generation config.
+  final Map<String, dynamic>? config;
+
+  /// Named roles for use in `get_model()`.
+  final Map<String, String>? modelRoles;
+
+  /// Sandbox environment type (or a shorthand spec).
+  final Object? sandbox;
+
+  /// Tool use approval policies.
+  final Object? approval;
+
+  /// Epochs to repeat samples for.
+  final Object? epochs;
+
+  /// Fail on sample errors.
+  final Object? failOnError;
+
+  /// Continue running if the `fail_on_error` condition is met.
+  final bool? continueOnFail;
+
+  /// Limit on total messages per sample.
+  final int? messageLimit;
+
+  /// Limit on total tokens per sample.
+  final int? tokenLimit;
+
+  /// Limit on clock time (in seconds) per sample.
+  final int? timeLimit;
+
+  /// Limit on working time (in seconds) per sample.
+  final int? workingLimit;
+
+  /// Limit on total cost (in dollars) per sample.
+  final double? costLimit;
+
+  /// Early stopping callbacks.
+  final Object? earlyStopping;
+
+  /// Task display name (e.g. for plotting).
+  final String? displayName;
+
+  /// Version of task.
+  final Object? version;
+
+  /// Additional metadata to associate with the task.
+  final Map<String, dynamic>? metadata;
+
+  const ParsedTask({
+    required this.id,
+    required this.taskFunc,
+    required this.samples,
+    required this.variant,
+    this.sandboxType = 'local',
+    this.systemMessage,
+    this.allowedVariants,
+    this.saveExamples = false,
+    this.examplesDir,
+    // Task-level settings
+    this.model,
+    this.config,
+    this.modelRoles,
+    this.sandbox,
+    this.approval,
+    this.epochs,
+    this.failOnError,
+    this.continueOnFail,
+    this.messageLimit,
+    this.tokenLimit,
+    this.timeLimit,
+    this.workingLimit,
+    this.costLimit,
+    this.earlyStopping,
+    this.displayName,
+    this.version,
+    this.metadata,
+  });
+
+  /// Create a copy with overrides.
+  ParsedTask copyWith({
+    String? id,
+    String? taskFunc,
+    List<Sample>? samples,
+    Variant? variant,
+    String? sandboxType,
+    String? systemMessage,
+    List<String>? allowedVariants,
+    bool? saveExamples,
+    String? examplesDir,
+    String? model,
+    Map<String, dynamic>? config,
+    Map<String, String>? modelRoles,
+    Object? sandbox,
+    Object? approval,
+    Object? epochs,
+    Object? failOnError,
+    bool? continueOnFail,
+    int? messageLimit,
+    int? tokenLimit,
+    int? timeLimit,
+    int? workingLimit,
+    double? costLimit,
+    Object? earlyStopping,
+    String? displayName,
+    Object? version,
+    Map<String, dynamic>? metadata,
+  }) {
+    return ParsedTask(
+      id: id ?? this.id,
+      taskFunc: taskFunc ?? this.taskFunc,
+      samples: samples ?? this.samples,
+      variant: variant ?? this.variant,
+      sandboxType: sandboxType ?? this.sandboxType,
+      systemMessage: systemMessage ?? this.systemMessage,
+      allowedVariants: allowedVariants ?? this.allowedVariants,
+      saveExamples: saveExamples ?? this.saveExamples,
+      examplesDir: examplesDir ?? this.examplesDir,
+      model: model ?? this.model,
+      config: config ?? this.config,
+      modelRoles: modelRoles ?? this.modelRoles,
+      sandbox: sandbox ?? this.sandbox,
+      approval: approval ?? this.approval,
+      epochs: epochs ?? this.epochs,
+      failOnError: failOnError ?? this.failOnError,
+      continueOnFail: continueOnFail ?? this.continueOnFail,
+      messageLimit: messageLimit ?? this.messageLimit,
+      tokenLimit: tokenLimit ?? this.tokenLimit,
+      timeLimit: timeLimit ?? this.timeLimit,
+      workingLimit: workingLimit ?? this.workingLimit,
+      costLimit: costLimit ?? this.costLimit,
+      earlyStopping: earlyStopping ?? this.earlyStopping,
+      displayName: displayName ?? this.displayName,
+      version: version ?? this.version,
+      metadata: metadata ?? this.metadata,
+    );
+  }
+}
diff --git a/packages/eval_config/lib/src/parsers/json_parser.dart b/packages/eval_config/lib/src/parsers/json_parser.dart
new file mode 100644
index 0000000..89d9668
--- /dev/null
+++ b/packages/eval_config/lib/src/parsers/json_parser.dart
@@ -0,0 +1,228 @@
+import '../models/models.dart';
+
+import '../parsed_task.dart';
+import 'parser.dart';
+
+/// Parses config from pre-parsed `Map<String, dynamic>` data.
+///
+/// Useful for programmatic config construction (web UI, tests)
+/// without touching the filesystem.
+class JsonParser extends Parser {
+  @override
+  List<ParsedTask> parseTasks(String datasetRoot) {
+    // JSON parser expects data to be provided directly, not from filesystem.
+    // For now, return empty — callers should use parseTasksFromMaps() instead.
+    return [];
+  }
+
+  /// Parse task configs from pre-parsed maps.
+  ///
+  /// Each map should have the same structure as a task.yaml file.
+  List<ParsedTask> parseTasksFromMaps(List<Map<String, dynamic>> taskMaps) {
+    return taskMaps.map((data) {
+      final taskId = data['id'] as String;
+      final taskFunc = (data['func'] as String?) ?? taskId;
+      final systemMessage = data['system_message'] as String?;
+      final allowedVariants = (data['allowed_variants'] as List?)
+          ?.cast<String>();
+
+      // Parse samples from inline data (no file I/O)
+      final samplesRaw = data['samples'];
+      final samples = <Sample>[];
+      if (samplesRaw is Map) {
+        final inlineDefs =
+            (samplesRaw['inline'] as List?)?.cast<Map<String, dynamic>>() ??
+            const [];
+        for (final def in inlineDefs) {
+          if (def.isEmpty) continue;
+
+          // Validate required fields
+          for (final field in ['id', 'input', 'target']) {
+            if (!def.containsKey(field)) {
+              throw FormatException(
+                "Sample '${def['id'] ?? 'unknown'}' missing required "
+                "field: $field",
+              );
+            }
+          }
+
+          // Normalize tags
+          final rawTags = def['tags'];
+          final List<String> tags;
+          if (rawTags is String) {
+            tags = rawTags.split(',').map((t) => t.trim()).toList();
+          } else if (rawTags is List) {
+            tags = rawTags.cast<String>();
+          } else {
+            tags = [];
+          }
+
+          // Parse sample-level fields
+          final choices = (def['choices'] as List?)?.cast<String>();
+          final sampleSandbox = def['sandbox'];
+          final setup = def['setup'] as String?;
+          final files = def['files'] is Map
+              ? Map<String, String>.from(def['files'] as Map)
+              : null;
+
+          samples.add(
+            Sample(
+              id: def['id'] as String,
+              input: def['input'] as String,
+              target: def['target'] as String,
+              metadata: <String, dynamic>{
+                ...Map<String, dynamic>.from(
+                  def['metadata'] as Map? ?? {},
+                ),
+                'difficulty': def['difficulty'] as String? ?? 'medium',
+                'tags': tags,
+              },
+              choices: choices,
+              sandbox: sampleSandbox,
+              setup: setup,
+              files: files,
+            ),
+          );
+        }
+      }
+
+      // Parse Task-level settings
+      final model = data['model'] as String?;
+      final config = data['config'] is Map
+          ? Map<String, dynamic>.from(data['config'] as Map)
+          : null;
+      final modelRoles = data['model_roles'] is Map
+          ? Map<String, String>.from(data['model_roles'] as Map)
+          : null;
+      final sandbox = data['sandbox'];
+      final approval = data['approval'];
+      final epochs = data['epochs'];
+      final failOnError = data['fail_on_error'];
+      final continueOnFail = data['continue_on_fail'] as bool?;
+      final messageLimit = data['message_limit'] as int?;
+      final tokenLimit = data['token_limit'] as int?;
+      final timeLimit = data['time_limit'] as int?;
+      final workingLimit = data['working_limit'] as int?;
+      final costLimit = (data['cost_limit'] as num?)?.toDouble();
+      final earlyStopping = data['early_stopping'];
+      final displayName = data['display_name'] as String?;
+      final version = data['version'];
+      final taskMetadata = data['metadata'] is Map
+          ? Map<String, dynamic>.from(data['metadata'] as Map)
+          : null;
+
+      return ParsedTask(
+        id: taskId,
+        taskFunc: taskFunc,
+        variant: const Variant(),
+        samples: samples,
+        systemMessage: systemMessage,
+        allowedVariants: allowedVariants,
+        // Task-level settings
+        model: model,
+        config: config,
+        modelRoles: modelRoles,
+        sandbox: sandbox,
+        approval: approval,
+        epochs: epochs,
+        failOnError: failOnError,
+        continueOnFail: continueOnFail,
+        messageLimit: messageLimit,
+        tokenLimit: tokenLimit,
+        timeLimit: timeLimit,
+        workingLimit: workingLimit,
+        costLimit: costLimit,
+        earlyStopping: earlyStopping,
+        displayName: displayName,
+        version: version,
+        metadata: taskMetadata,
+      );
+    }).toList();
+  }
+
+  @override
+  Job parseJob(String jobPath, String datasetRoot) {
+    // JSON parser expects data to be provided directly.
+    // Callers should use parseJobFromMap() instead.
+    throw UnsupportedError(
+      'JsonParser.parseJob() requires a file path. '
+      'Use parseJobFromMap() for pre-parsed data.',
+    );
+  }
+
+  /// Parse a job from a pre-parsed map.
+  Job parseJobFromMap(Map<String, dynamic> data) {
+    return Job(
+      logDir: (data['log_dir'] as String?) ?? '',
+      sandboxType: (data['sandbox_type'] as String?) ?? 'local',
+      maxConnections: (data['max_connections'] as int?) ?? 10,
+      models: (data['models'] as List?)?.cast<String>(),
+      saveExamples: data['save_examples'] == true,
+      // Promoted eval_set() fields
+      retryAttempts: data['retry_attempts'] as int?,
+      maxRetries: data['max_retries'] as int?,
+      retryWait: (data['retry_wait'] as num?)?.toDouble(),
+      retryConnections: (data['retry_connections'] as num?)?.toDouble(),
+      retryCleanup: data['retry_cleanup'] as bool?,
+      failOnError: (data['fail_on_error'] as num?)?.toDouble(),
+      continueOnFail: data['continue_on_fail'] as bool?,
+      retryOnError: data['retry_on_error'] as int?,
+      debugErrors: data['debug_errors'] as bool?,
+      maxSamples: data['max_samples'] as int?,
+      maxTasks: data['max_tasks'] as int?,
+      maxSubprocesses: data['max_subprocesses'] as int?,
+      maxSandboxes: data['max_sandboxes'] as int?,
+      logLevel: data['log_level'] as String?,
+      logLevelTranscript: data['log_level_transcript'] as String?,
+      logFormat: data['log_format'] as String?,
+      tags: (data['tags'] as List?)?.cast<String>(),
+      metadata: data['metadata'] is Map
+          ? Map<String, dynamic>.from(data['metadata'] as Map)
+          : null,
+      trace: data['trace'] as bool?,
+      display: data['display'] as String?,
+      score: data['score'] as bool?,
+      limit: data['limit'],
+      sampleId: data['sample_id'],
+      sampleShuffle: data['sample_shuffle'],
+      epochs: data['epochs'],
+      approval: data['approval'],
+      solver: data['solver'],
+      sandboxCleanup: data['sandbox_cleanup'] as bool?,
+      modelBaseUrl: data['model_base_url'] as String?,
+      modelArgs: data['model_args'] is Map
+          ? Map<String, Object?>.from(data['model_args'] as Map)
+          : null,
+      modelRoles: data['model_roles'] is Map
+          ? Map<String, String>.from(data['model_roles'] as Map)
+          : null,
+      taskArgs: data['task_args'] is Map
+          ? Map<String, Object?>.from(data['task_args'] as Map)
+          : null,
+      messageLimit: data['message_limit'] as int?,
+      tokenLimit: data['token_limit'] as int?,
+      timeLimit: data['time_limit'] as int?,
+      workingLimit: data['working_limit'] as int?,
+      costLimit: (data['cost_limit'] as num?)?.toDouble(),
+      modelCostConfig: data['model_cost_config'] is Map
+          ? Map<String, Object?>.from(data['model_cost_config'] as Map)
+          : null,
+      logSamples: data['log_samples'] as bool?,
+      logRealtime: data['log_realtime'] as bool?,
+      logImages: data['log_images'] as bool?,
+      logBuffer: data['log_buffer'] as int?,
+      logShared: data['log_shared'] as int?,
+      bundleDir: data['bundle_dir'] as String?,
+      bundleOverwrite: data['bundle_overwrite'] as bool?,
+      logDirAllowDirty: data['log_dir_allow_dirty'] as bool?,
+      evalSetId: data['eval_set_id'] as String?,
+      // Pass-through sections
+      evalSetOverrides: data['eval_set_overrides'] is Map
+          ? Map<String, dynamic>.from(data['eval_set_overrides'] as Map)
+          : null,
+      taskDefaults: data['task_defaults'] is Map
+          ? Map<String, dynamic>.from(data['task_defaults'] as Map)
+          : null,
+    );
+  }
+}
diff --git a/packages/eval_config/lib/src/parsers/parser.dart b/packages/eval_config/lib/src/parsers/parser.dart
new file mode 100644
index 0000000..5ba0159
--- /dev/null
+++ b/packages/eval_config/lib/src/parsers/parser.dart
@@ -0,0 +1,25 @@
+import '../models/models.dart';
+
+import '../parsed_task.dart';
+
+/// Abstract base for config parsers.
+///
+/// Parsers are responsible for turning raw configuration data (YAML files,
+/// JSON maps, etc.) into domain model objects ([ParsedTask], [Job]).
+///
+/// Concrete implementations:
+/// - [YamlParser] — reads `.yaml` files from the filesystem
+/// - [JsonParser] — accepts pre-parsed `Map<String, dynamic>` data
+abstract class Parser {
+  /// Parse all task configs from a dataset root directory.
+  ///
+  /// The dataset root is expected to contain a `tasks/` subdirectory
+  /// with per-task YAML/JSON files.
+  List<ParsedTask> parseTasks(String datasetRoot);
+
+  /// Parse a job config.
+  ///
+  /// [jobPath] identifies the job (file path for YAML, key for JSON).
+  /// [datasetRoot] is the dataset root for resolving relative paths.
+  Job parseJob(String jobPath, String datasetRoot);
+}
diff --git a/packages/eval_config/lib/src/parsers/yaml_parser.dart b/packages/eval_config/lib/src/parsers/yaml_parser.dart
new file mode 100644
index 0000000..3ea236c
--- /dev/null
+++ b/packages/eval_config/lib/src/parsers/yaml_parser.dart
@@ -0,0 +1,605 @@
+import 'dart:io';
+
+import 'package:glob/glob.dart';
+import 'package:glob/list_local_fs.dart';
+import '../models/models.dart';
+import 'package:path/path.dart' as p;
+import 'package:yaml/yaml.dart';
+
+import '../parsed_task.dart';
+import '../utils/yaml_utils.dart';
+import 'parser.dart';
+
+/// Default log directory (relative to dataset root).
+const _kDefaultLogsDir = '../logs';
+
+/// Parses YAML config files from the filesystem into domain objects.
+///
+/// Reads `tasks/*/task.yaml` files for task configs and job YAML files
+/// for job configs.
+class YamlParser extends Parser {
+  // ------------------------------------------------------------------
+  // Task parsing
+  // ------------------------------------------------------------------
+
+  @override
+  List<ParsedTask> parseTasks(String datasetRoot) {
+    final tasksDir = Directory(p.join(datasetRoot, 'tasks'));
+    if (!tasksDir.existsSync()) return [];
+
+    final taskConfigs = <ParsedTask>[];
+
+    final taskDirs = tasksDir.listSync().whereType<Directory>().toList()
+      ..sort((a, b) => a.path.compareTo(b.path));
+
+    for (final taskDir in taskDirs) {
+      final taskFile = File(p.join(taskDir.path, 'task.yaml'));
+      if (taskFile.existsSync()) {
+        taskConfigs.addAll(_loadTaskFile(taskFile.path, datasetRoot));
+      }
+    }
+
+    return taskConfigs;
+  }
+
+  /// Load a single task.yaml file into a [ParsedTask].
+  ///
+  /// Returns a single-element list (variant expansion happens later).
+  List<ParsedTask> _loadTaskFile(String taskPath, String datasetRoot) {
+    final data = readYamlFileAsMap(taskPath);
+    final taskDir = p.dirname(taskPath);
+
+    final taskId = (data['id'] as String?) ?? p.basename(taskDir);
+    final taskFunc = (data['func'] as String?) ?? taskId;
+
+    final taskWorkspaceRaw = data['workspace'];
+    final taskTestsRaw = data['tests'];
+    final systemMessage = data['system_message'] as String?;
+
+    // Pre-resolve task-level paths to absolute
+    final taskWorkspace = _preResolveToAbs(taskWorkspaceRaw, taskDir);
+    final taskTests = _preResolveToAbs(taskTestsRaw, taskDir);
+
+    // Optional whitelist of variant names
+    final allowedVariants = (data['allowed_variants'] as List?)?.cast<String>();
+
+    // Parse samples section
+    final samplesRaw = data['samples'];
+    if (samplesRaw is! Map) {
+      throw FormatException(
+        "Task '$taskId': 'samples' must be a dict with 'inline' and/or "
+        "'paths' keys, got ${samplesRaw.runtimeType}",
+      );
+    }
+    final samplesMap = Map<String, dynamic>.from(samplesRaw);
+    final samples = _loadSamplesSection(
+      samplesMap,
+      datasetRoot,
+      taskWorkspace,
+      taskTests,
+      taskDir,
+    );
+
+    // Parse Task-level settings
+    final model = data['model'] as String?;
+    final config = _asMap(data['config']);
+    final modelRoles = _asStringMap(data['model_roles']);
+    final sandbox = data['sandbox'];
+    final approval = data['approval'];
+    final epochs = data['epochs'];
+    final failOnError = data['fail_on_error'];
+    final continueOnFail = data['continue_on_fail'] as bool?;
+    final messageLimit = data['message_limit'] as int?;
+    final tokenLimit = data['token_limit'] as int?;
+    final timeLimit = data['time_limit'] as int?;
+    final workingLimit = data['working_limit'] as int?;
+    final costLimit = (data['cost_limit'] as num?)?.toDouble();
+    final earlyStopping = data['early_stopping'];
+    final displayName = data['display_name'] as String?;
+    final version = data['version'];
+    final taskMetadata = _asMap(data['metadata']);
+
+    return [
+      ParsedTask(
+        id: taskId,
+        taskFunc: taskFunc,
+        variant: const Variant(), // placeholder baseline
+        samples: samples,
+        systemMessage: systemMessage,
+        allowedVariants: allowedVariants,
+        // Task-level settings
+        model: model,
+        config: config,
+        modelRoles: modelRoles,
+        sandbox: sandbox,
+        approval: approval,
+        epochs: epochs,
+        failOnError: failOnError,
+        continueOnFail: continueOnFail,
+        messageLimit: messageLimit,
+        tokenLimit: tokenLimit,
+        timeLimit: timeLimit,
+        workingLimit: workingLimit,
+        costLimit: costLimit,
+        earlyStopping: earlyStopping,
+        displayName: displayName,
+        version: version,
+        metadata: taskMetadata,
+      ),
+    ];
+  }
+
+  // ------------------------------------------------------------------
+  // Sample loading
+  // ------------------------------------------------------------------
+
+  /// Load samples from the `paths` and `inline` subsections.
+  List<Sample> _loadSamplesSection(
+    Map<String, dynamic> samplesMap,
+    String datasetRoot,
+    Object? taskWorkspace,
+    Object? taskTests,
+    String taskDir,
+  ) {
+    final pathPatterns =
+        (samplesMap['paths'] as List?)?.cast<String>() ?? const [];
+    final inlineDefs =
+        (samplesMap['inline'] as List?)?.cast<Map<String, dynamic>>() ??
+        const [];
+
+    final samples = <Sample>[];
+
+    // Load from path patterns (glob-expanded)
+    for (final pattern in pathPatterns) {
+      List<String> matchedFiles;
+      if (_isGlob(pattern)) {
+        matchedFiles = _expandGlobFiles(taskDir, pattern);
+      } else {
+        final candidate = p.normalize(p.join(taskDir, pattern));
+        matchedFiles = File(candidate).existsSync() ? [candidate] : [];
+      }
+
+      if (matchedFiles.isEmpty) {
+        throw FileSystemException(
+          'No sample files matched pattern: $pattern',
+        );
+      }
+
+      samples.addAll(
+        _loadSamplesFromFiles(
+          matchedFiles,
+          datasetRoot,
+          taskWorkspace,
+          taskTests,
+        ),
+      );
+    }
+
+    // Load inline definitions
+    for (final def in inlineDefs) {
+      if (def.isEmpty) continue;
+      samples.add(
+        _resolveSample(def, taskDir, datasetRoot, taskWorkspace, taskTests),
+      );
+    }
+
+    return samples;
+  }
+
+  /// Load samples from external YAML files.
+  List<Sample> _loadSamplesFromFiles(
+    List<String> sampleFiles,
+    String datasetRoot,
+    Object? taskWorkspace,
+    Object? taskTests,
+  ) {
+    final samples = <Sample>[];
+
+    for (final filePath in sampleFiles) {
+      final fullPath = p.isAbsolute(filePath)
+          ? filePath
+          : p.join(datasetRoot, filePath);
+      if (!File(fullPath).existsSync()) {
+        throw FileSystemException('Sample file not found', fullPath);
+      }
+
+      final sampleDir = p.dirname(fullPath);
+      final content = File(fullPath).readAsStringSync();
+
+      // Support multi-document YAML (--- separated)
+      final docs = content.split(RegExp(r'^---\s*$', multiLine: true));
+      for (final doc in docs) {
+        if (doc.trim().isEmpty) continue;
+        final data = convertYamlToObject(loadYaml(doc)) as Map<String, dynamic>;
+        samples.add(
+          _resolveSample(
+            data,
+            sampleDir,
+            datasetRoot,
+            taskWorkspace,
+            taskTests,
+          ),
+        );
+      }
+    }
+
+    return samples;
+  }
+
+  // ------------------------------------------------------------------
+  // Sample resolution
+  // ------------------------------------------------------------------
+
+  /// Resolve a single sample dict into a [Sample].
+  ///
+  /// Validates required fields and normalises tags (formerly done by
+  /// `SampleConfig.fromYaml`).
+  Sample _resolveSample(
+    Map<String, dynamic> doc,
+    String baseDir,
+    String datasetRoot,
+    Object? taskWorkspace,
+    Object? taskTests,
+  ) {
+    // --- Validate required fields ---
+    for (final field in ['id', 'input', 'target']) {
+      if (!doc.containsKey(field)) {
+        throw FormatException(
+          "Sample '${doc['id'] ?? 'unknown'}' missing required field: $field",
+        );
+      }
+    }
+
+    final sampleWorkspace = doc['workspace'];
+    final sampleTests = doc['tests'];
+
+    // Sample-level overrides task-level
+    final effectiveWorkspace = sampleWorkspace ?? taskWorkspace;
+
+    String? workspace;
+    String? workspaceGit;
+    String? workspaceGitRef;
+
+    if (effectiveWorkspace != null) {
+      if (effectiveWorkspace is Map && effectiveWorkspace.containsKey('git')) {
+        workspaceGit = effectiveWorkspace['git'] as String?;
+        workspaceGitRef = effectiveWorkspace['ref'] as String?;
+      } else {
+        final resolveDir = sampleWorkspace != null ? baseDir : datasetRoot;
+        workspace = _resolveResourcePath(effectiveWorkspace, resolveDir);
+      }
+    }
+
+    String? tests;
+    if (sampleTests != null) {
+      tests = _resolveResourcePath(sampleTests, baseDir);
+    } else if (taskTests != null) {
+      tests = _resolveResourcePath(taskTests, datasetRoot);
+    }
+
+    // --- Normalize tags ---
+    final rawTags = doc['tags'];
+    final List<String> tags;
+    if (rawTags is String) {
+      tags = rawTags.split(',').map((t) => t.trim()).toList();
+    } else if (rawTags is List) {
+      tags = rawTags.cast<String>();
+    } else {
+      tags = [];
+    }
+
+    // Build metadata with domain-specific fields
+    final metadata = <String, dynamic>{
+      ...Map<String, dynamic>.from(doc['metadata'] as Map? ?? {}),
+      'difficulty': doc['difficulty'] as String? ?? 'medium',
+      'tags': tags,
+      'workspace': ?workspace,
+      'tests': ?tests,
+      'workspace_git': ?workspaceGit,
+      'workspace_git_ref': ?workspaceGitRef,
+    };
+
+    // Parse sample-level fields
+    final choices = (doc['choices'] as List?)?.cast<String>();
+    final sampleSandbox = doc['sandbox'];
+    final setup = doc['setup'] as String?;
+    final files = _asStringMap(doc['files']);
+
+    return Sample(
+      id: doc['id'] as String,
+      input: doc['input'] as String,
+      target: doc['target'] as String,
+      metadata: metadata,
+      choices: choices,
+      sandbox: sampleSandbox,
+      files: files,
+      setup: setup,
+    );
+  }
+
+  // ------------------------------------------------------------------
+  // Job parsing
+  // ------------------------------------------------------------------
+
+  @override
+  Job parseJob(String jobPath, String datasetRoot) {
+    if (!File(jobPath).existsSync()) {
+      throw FileSystemException('Job file not found', jobPath);
+    }
+
+    final data = readYamlFileAsMap(jobPath);
+
+    final logsDir = (data['logs_dir'] as String?) ?? _kDefaultLogsDir;
+    final sandboxType = (data['sandbox_type'] as String?) ?? 'local';
+    final maxConnections = (data['max_connections'] as int?) ?? 10;
+
+    // Resolve log directory with timestamp
+    final logDir = _resolveLogDir(logsDir, datasetRoot);
+
+    // Parse task filters
+    List<String>? taskPaths;
+    Map<String, JobTask>? tasks;
+    final tasksRaw = data['tasks'] as Map<String, dynamic>?;
+    if (tasksRaw != null) {
+      taskPaths = (tasksRaw['paths'] as List?)?.cast<String>();
+      final inlineTasks = tasksRaw['inline'] as Map<String, dynamic>?;
+      if (inlineTasks != null) {
+        tasks = {};
+        for (final entry in inlineTasks.entries) {
+          tasks[entry.key] = JobTask.fromYaml(
+            entry.key,
+            entry.value as Map<String, dynamic>?,
+          );
+        }
+      }
+    }
+
+    // Parse variants
+    Map<String, Map<String, dynamic>>? variants;
+    final variantsRaw = data['variants'];
+    if (variantsRaw is Map) {
+      variants = {};
+      for (final entry in variantsRaw.entries) {
+        final key = entry.key.toString();
+        final value = entry.value;
+        if (value is Map) {
+          variants[key] = Map<String, dynamic>.from(value);
+        } else {
+          variants[key] = <String, dynamic>{};
+        }
+      }
+    }
+
+    return Job(
+      logDir: logDir,
+      sandboxType: sandboxType,
+      maxConnections: maxConnections,
+      models: (data['models'] as List?)?.cast<String>(),
+      variants: variants,
+      taskPaths: taskPaths,
+      tasks: tasks,
+      saveExamples: data['save_examples'] == true,
+      // Promoted eval_set() fields
+      retryAttempts: data['retry_attempts'] as int?,
+      maxRetries: data['max_retries'] as int?,
+      retryWait: (data['retry_wait'] as num?)?.toDouble(),
+      retryConnections: (data['retry_connections'] as num?)?.toDouble(),
+      retryCleanup: data['retry_cleanup'] as bool?,
+      failOnError: (data['fail_on_error'] as num?)?.toDouble(),
+      continueOnFail: data['continue_on_fail'] as bool?,
+      retryOnError: data['retry_on_error'] as int?,
+      debugErrors: data['debug_errors'] as bool?,
+      maxSamples: data['max_samples'] as int?,
+      maxTasks: data['max_tasks'] as int?,
+      maxSubprocesses: data['max_subprocesses'] as int?,
+      maxSandboxes: data['max_sandboxes'] as int?,
+      logLevel: data['log_level'] as String?,
+      logLevelTranscript: data['log_level_transcript'] as String?,
+      logFormat: data['log_format'] as String?,
+      tags: (data['tags'] as List?)?.cast<String>(),
+      metadata: _asMap(data['metadata']),
+      trace: data['trace'] as bool?,
+      display: data['display'] as String?,
+      score: data['score'] as bool?,
+      limit: data['limit'],
+      sampleId: data['sample_id'],
+      sampleShuffle: data['sample_shuffle'],
+      epochs: data['epochs'],
+      approval: data['approval'],
+      solver: data['solver'],
+      sandboxCleanup: data['sandbox_cleanup'] as bool?,
+      modelBaseUrl: data['model_base_url'] as String?,
+      modelArgs: _asObjectMap(data['model_args']),
+      modelRoles: _asStringMap(data['model_roles']),
+      taskArgs: _asObjectMap(data['task_args']),
+      messageLimit: data['message_limit'] as int?,
+      tokenLimit: data['token_limit'] as int?,
+      timeLimit: data['time_limit'] as int?,
+      workingLimit: data['working_limit'] as int?,
+      costLimit: (data['cost_limit'] as num?)?.toDouble(),
+      modelCostConfig: _asObjectMap(data['model_cost_config']),
+      logSamples: data['log_samples'] as bool?,
+      logRealtime: data['log_realtime'] as bool?,
+      logImages: data['log_images'] as bool?,
+      logBuffer: data['log_buffer'] as int?,
+      logShared: data['log_shared'] as int?,
+      bundleDir: data['bundle_dir'] as String?,
+      bundleOverwrite: data['bundle_overwrite'] as bool?,
+      logDirAllowDirty: data['log_dir_allow_dirty'] as bool?,
+      evalSetId: data['eval_set_id'] as String?,
+      // Pass-through sections
+      evalSetOverrides: _asMap(data['eval_set_overrides']),
+      taskDefaults: _asMap(data['task_defaults']),
+    );
+  }
+
+  /// Create a [Job] with default settings (when no job file is provided).
+  Job createDefaultJob(String baseDir) {
+    return Job(
+      logDir: _resolveLogDir(_kDefaultLogsDir, baseDir),
+      sandboxType: 'local',
+      maxConnections: 10,
+    );
+  }
+
+  // ------------------------------------------------------------------
+  // Type conversion helpers
+  // ------------------------------------------------------------------
+
+  /// Safely cast a YAML value to `Map<String, dynamic>?`.
+  static Map<String, dynamic>? _asMap(Object? value) {
+    if (value is Map) return Map<String, dynamic>.from(value);
+    return null;
+  }
+
+  /// Safely cast a YAML value to `Map<String, String>?`.
+  static Map<String, String>? _asStringMap(Object? value) {
+    if (value is Map) return Map<String, String>.from(value);
+    return null;
+  }
+
+  /// Safely cast a YAML value to `Map<String, Object?>?`.
+  static Map<String, Object?>? _asObjectMap(Object? value) {
+    if (value is Map) return Map<String, Object?>.from(value);
+    return null;
+  }
+
+  // ------------------------------------------------------------------
+  // Path resolution helpers
+  // ------------------------------------------------------------------
+
+  /// Pre-resolve a task-level resource to an absolute path.
+  Object? _preResolveToAbs(Object? resource, String taskDir) {
+    if (resource == null) return null;
+
+    if (resource is String) {
+      if (resource.startsWith('./') ||
+          resource.startsWith('../') ||
+          resource.startsWith('/')) {
+        return {'path': p.normalize(p.join(taskDir, resource))};
+      }
+      return resource;
+    }
+
+    if (resource is Map) {
+      if (resource.containsKey('path')) {
+        final pathVal = resource['path'] as String;
+        return {
+          ...resource,
+          'path': p.normalize(p.join(taskDir, pathVal)),
+        };
+      }
+      return resource;
+    }
+
+    return resource;
+  }
+
+  /// Resolve a workspace/tests resource reference to an absolute path string.
+  String? _resolveResourcePath(Object? resource, String baseDir) {
+    if (resource == null) return null;
+
+    if (resource is String) {
+      if (resource.startsWith('./') ||
+          resource.startsWith('../') ||
+          resource.startsWith('/')) {
+        return p.normalize(p.join(baseDir, resource));
+      }
+      return null;
+    }
+
+    if (resource is Map) {
+      if (resource.containsKey('path')) {
+        return p.normalize(p.join(baseDir, resource['path'] as String));
+      }
+    }
+
+    return null;
+  }
+
+  // ------------------------------------------------------------------
+  // Log dir helpers
+  // ------------------------------------------------------------------
+
+  /// Resolve log directory with a timestamp subfolder.
+  String _resolveLogDir(String logsDir, String baseDir) {
+    final now = DateTime.now().toUtc();
+    final timestamp =
+        '${now.year}-${_pad(now.month)}-${_pad(now.day)}'
+        '_${_pad(now.hour)}-${_pad(now.minute)}-${_pad(now.second)}';
+    return p.normalize(p.join(baseDir, logsDir, timestamp));
+  }
+
+  static String _pad(int n) => n.toString().padLeft(2, '0');
+
+  // ------------------------------------------------------------------
+  // Glob helpers
+  // ------------------------------------------------------------------
+
+  static bool _isGlob(String pattern) =>
+      pattern.contains('*') || pattern.contains('?') || pattern.contains('[');
+
+  /// Expand a glob pattern relative to [baseDir], returning matching files.
+  static List<String> _expandGlobFiles(String baseDir, String pattern) {
+    final glob = Glob(pattern);
+    return glob
+        .listSync(root: baseDir)
+        .whereType<File>()
+        .where(
+          (f) =>
+              f.path.endsWith('.yaml') ||
+              f.path.endsWith('.yml') ||
+              f.path.endsWith('.md'),
+        )
+        .map((f) => p.normalize(f.path))
+        .toList()
+      ..sort();
+  }
+}
+
+/// Find a job file by name or path.
+///
+/// Looks in `jobs/` directory first, then treats [job] as a relative/absolute
+/// path.
+///
+/// Throws [FileSystemException] if the job file is not found.
+String findJobFile(String datasetRoot, String job) {
+  // Check if it's a path (contains / or ends with .yaml)
+  if (job.contains('/') || job.endsWith('.yaml')) {
+    final jobPath = p.isAbsolute(job) ? job : p.join(datasetRoot, job);
+    if (!File(jobPath).existsSync()) {
+      throw FileSystemException('Job file not found', jobPath);
+    }
+    return p.normalize(jobPath);
+  }
+
+  // Look in jobs/ directory
+  final jobsDir = Directory(p.join(datasetRoot, 'jobs'));
+  if (!jobsDir.existsSync()) {
+    throw FileSystemException(
+      'Jobs directory not found. '
+      'Create it or specify a full path to the job file.',
+      jobsDir.path,
+    );
+  }
+
+  // Try with .yaml extension
+  final withExt = File(p.join(jobsDir.path, '$job.yaml'));
+  if (withExt.existsSync()) return p.normalize(withExt.path);
+
+  // Try without extension (maybe they included it)
+  final withoutExt = File(p.join(jobsDir.path, job));
+  if (withoutExt.existsSync()) return p.normalize(withoutExt.path);
+
+  // List available jobs for helpful error message
+  final available = jobsDir
+      .listSync()
+      .whereType<File>()
+      .where((f) => f.path.endsWith('.yaml'))
+      .map((f) => p.basenameWithoutExtension(f.path))
+      .toList();
+  throw FileSystemException(
+    "Job '$job' not found in ${jobsDir.path}. "
+    'Available jobs: ${available.isEmpty ? '(none)' : available}',
+  );
+}
diff --git a/packages/eval_config/lib/src/resolvers/eval_set_resolver.dart b/packages/eval_config/lib/src/resolvers/eval_set_resolver.dart
new file mode 100644
index 0000000..d308d68
--- /dev/null
+++ b/packages/eval_config/lib/src/resolvers/eval_set_resolver.dart
@@ -0,0 +1,623 @@
+import 'dart:io';
+
+import 'package:glob/glob.dart';
+import 'package:glob/list_local_fs.dart';
+import '../models/models.dart';
+import 'package:path/path.dart' as p;
+
+import '../parsed_task.dart';
+
+/// Default models used when a job doesn't specify its own.
+const List<String> kDefaultModels = [
+  'anthropic/claude-haiku-4-5',
+  'anthropic/claude-sonnet-4-5',
+  'anthropic/claude-opus-4-6',
+  'google/gemini-2.5-flash',
+  'google/gemini-3-pro-preview',
+  'google/gemini-3-flash-preview',
+  'openai/gpt-5-mini',
+  'openai/gpt-5-nano',
+  'openai/gpt-5',
+  'openai/gpt-5-pro',
+];
+
+/// Available sandbox configurations.
+const Map<String, Map<String, String>> kSandboxRegistry = {
+  'podman': {'name': 'podman', 'path': './sandboxes/podman/compose.yaml'},
+  'podman-beta': {
+    'name': 'podman',
+    'path': './sandboxes/podman/compose-beta.yaml',
+  },
+  'podman-main': {
+    'name': 'podman',
+    'path': './sandboxes/podman/compose-main.yaml',
+  },
+};
+
+/// Maps Flutter SDK channel names to sandbox registry keys.
+const Map<String, String> kSdkChannels = {
+  'stable': 'podman',
+  'beta': 'podman-beta',
+  'main': 'podman-main',
+};
+
+/// Resolves parsed task configs and job into fully-resolved
+/// [EvalSet] objects ready for JSON serialization.
+///
+/// This is the resolution engine. It:
+/// 1. Resolves models, sandboxes, and variants
+/// 2. Expands task × variant combinations into [Task] entries
+/// 3. Groups by flutter_channel (one [EvalSet] per group)
+/// 4. Propagates job-level and task-level settings to the output
+class EvalSetResolver {
+  /// Resolve task configs and job into [EvalSet] objects.
+  ///
+  /// Groups by flutter_channel so each gets its own sandbox.
+  List<EvalSet> resolve(
+    List<ParsedTask> datasetTasks,
+    Job job,
+    String datasetRoot,
+  ) {
+    final models = _resolveModels(job);
+    final sandboxTypeStr = job.sandboxType;
+    final expandedTasks = _expandTaskConfigs(
+      datasetTasks,
+      job,
+      sandboxTypeStr,
+      datasetRoot,
+    );
+
+    // Group by flutter channel
+    final groups = <String?, List<ParsedTask>>{};
+    for (final tc in expandedTasks) {
+      final key = tc.variant.flutterChannel;
+      (groups[key] ??= []).add(tc);
+    }
+
+    return [
+      for (final entry in groups.entries)
+        _buildEvalSet(
+          taskConfigs: entry.value,
+          logDir: job.logDir,
+          models: models,
+          sandbox: _resolveSandbox(
+            datasetRoot,
+            job,
+            flutterChannel: entry.key,
+          ),
+          job: job,
+        ),
+    ];
+  }
+
+  // ------------------------------------------------------------------
+  // EvalSet building
+  // ------------------------------------------------------------------
+
+  /// Build an [EvalSet] from resolved [ParsedTask]s.
+  ///
+  /// This is where [ParsedTask]s (internal) get converted to
+  /// [Task]s (output format).
+  EvalSet _buildEvalSet({
+    required List<ParsedTask> taskConfigs,
+    required String logDir,
+    required List<String> models,
+    required Object sandbox,
+    required Job job,
+  }) {
+    final inspectTasks = <Task>[];
+    final isContainer =
+        job.sandboxType.isNotEmpty && job.sandboxType != 'local';
+
+    // Parse task_defaults from the job
+    final taskDefaults = job.taskDefaults ?? <String, dynamic>{};
+
+    for (final tc in taskConfigs) {
+      // Enrich each sample with task-level metadata
+      final inspectSamples = <Sample>[];
+      for (final sample in tc.samples) {
+        final enriched = <String, dynamic>{...?sample.metadata};
+
+        if (tc.saveExamples) {
+          enriched['save_examples'] = true;
+          if (tc.examplesDir != null) {
+            enriched['examples_dir'] = tc.examplesDir;
+            enriched['task_variant'] = '${tc.id}:${tc.variant.name}';
+          }
+        }
+
+        // Build files + setup for sandbox provisioning
+        Map<String, String>? files = sample.files;
+        String? setup = sample.setup;
+        final workspace = sample.metadata?['workspace'] as String?;
+        final workspaceGit = sample.metadata?['workspace_git'] as String?;
+        final workspaceGitRef =
+            sample.metadata?['workspace_git_ref'] as String?;
+
+        if (workspace != null && isContainer) {
+          files = {...?files, '/workspace': workspace};
+          setup = setup ?? 'cd /workspace && flutter pub get';
+          enriched['workspace'] = '/workspace';
+        }
+        if (workspaceGit != null) {
+          enriched['workspace_git'] = workspaceGit;
+          if (workspaceGitRef != null) {
+            enriched['workspace_git_ref'] = workspaceGitRef;
+          }
+        }
+
+        inspectSamples.add(
+          Sample(
+            id: sample.id,
+            input: sample.input,
+            target: sample.target,
+            metadata: enriched,
+            choices: sample.choices,
+            sandbox: sample.sandbox,
+            files: files,
+            setup: setup,
+          ),
+        );
+      }
+
+      final dataset = Dataset(
+        samples: inspectSamples,
+        name: '${tc.id}:${tc.variant.name}',
+      );
+
+      // Build task metadata (variant config, system message, etc.)
+      final metadata = <String, dynamic>{
+        'variant': tc.variant.name,
+        if (tc.variant.contextFiles.isNotEmpty)
+          'variant_config': {
+            'context_files': tc.variant.contextFiles
+                .map(
+                  (cf) => {
+                    'title': cf.metadata.title,
+                    'version': cf.metadata.version,
+                    'content': cf.content,
+                  },
+                )
+                .toList(),
+            'mcp_servers': tc.variant.mcpServers,
+            'skill_paths': tc.variant.skillPaths,
+          },
+        if (tc.variant.contextFiles.isEmpty &&
+            (tc.variant.mcpServers.isNotEmpty ||
+                tc.variant.skillPaths.isNotEmpty))
+          'variant_config': {
+            'mcp_servers': tc.variant.mcpServers,
+            'skill_paths': tc.variant.skillPaths,
+          },
+        if (tc.systemMessage != null) 'system_message': tc.systemMessage,
+        if (tc.saveExamples) 'save_examples': true,
+        if (tc.examplesDir != null) 'examples_dir': tc.examplesDir,
+        // Merge any task-level metadata from YAML
+        ...?tc.metadata,
+      };
+
+      // Determine sandbox for this task
+      Object? taskSandbox;
+      if (tc.sandbox != null) {
+        // Task-level sandbox override
+        taskSandbox = tc.sandbox;
+      } else if (tc.sandboxType.isNotEmpty && tc.sandboxType != 'local') {
+        taskSandbox = _serializeSandbox(sandbox);
+      }
+
+      // Resolve task-level settings with precedence:
+      // task.yaml > task_defaults > hardcoded defaults
+      final resolvedTimeLimit =
+          tc.timeLimit ??
+          taskDefaults['time_limit'] as int? ??
+          (job.sandboxType != 'local' ? 300 : null);
+      final resolvedMessageLimit =
+          tc.messageLimit ?? taskDefaults['message_limit'] as int?;
+      final resolvedTokenLimit =
+          tc.tokenLimit ?? taskDefaults['token_limit'] as int?;
+      final resolvedWorkingLimit =
+          tc.workingLimit ?? taskDefaults['working_limit'] as int?;
+      final resolvedCostLimit =
+          tc.costLimit ?? (taskDefaults['cost_limit'] as num?)?.toDouble();
+      final resolvedEpochs = tc.epochs ?? taskDefaults['epochs'];
+      final resolvedFailOnError =
+          tc.failOnError ?? taskDefaults['fail_on_error'];
+      final resolvedContinueOnFail =
+          tc.continueOnFail ?? taskDefaults['continue_on_fail'] as bool?;
+      final resolvedModel = tc.model ?? taskDefaults['model'] as String?;
+      final resolvedConfig = tc.config ?? taskDefaults['config'];
+      final resolvedApproval = tc.approval ?? taskDefaults['approval'];
+      final resolvedEarlyStopping =
+          tc.earlyStopping ?? taskDefaults['early_stopping'];
+      final resolvedDisplayName =
+          tc.displayName ?? taskDefaults['display_name'] as String?;
+      final resolvedVersion = tc.version ?? taskDefaults['version'];
+      final resolvedModelRoles =
+          tc.modelRoles ??
+          (taskDefaults['model_roles'] as Map<String, String>?);
+
+      inspectTasks.add(
+        Task(
+          name: '${tc.id}:${tc.variant.name}',
+          taskFunc: tc.taskFunc,
+          dataset: dataset,
+          sandbox: taskSandbox,
+          metadata: metadata,
+          model: resolvedModel,
+          config: resolvedConfig,
+          modelRoles: resolvedModelRoles,
+          approval: resolvedApproval,
+          epochs: resolvedEpochs,
+          failOnError: resolvedFailOnError,
+          continueOnFail: resolvedContinueOnFail,
+          messageLimit: resolvedMessageLimit,
+          tokenLimit: resolvedTokenLimit,
+          timeLimit: resolvedTimeLimit,
+          workingLimit: resolvedWorkingLimit,
+          costLimit: resolvedCostLimit,
+          earlyStopping: resolvedEarlyStopping,
+          displayName: resolvedDisplayName,
+          version: resolvedVersion ?? 0,
+        ),
+      );
+    }
+
+    // Build the EvalSet with all job-level parameters.
+    // Start with any eval_set_overrides, then apply explicit fields.
+    final overrides = job.evalSetOverrides ?? <String, dynamic>{};
+
+    return EvalSet(
+      tasks: inspectTasks,
+      logDir: logDir,
+      model: models,
+      sandbox: _serializeSandbox(sandbox),
+      // Retry settings
+      retryAttempts:
+          job.retryAttempts ?? overrides['retry_attempts'] as int? ?? 10,
+      retryWait:
+          job.retryWait ?? (overrides['retry_wait'] as num?)?.toDouble() ?? 60,
+      retryConnections:
+          job.retryConnections ??
+          (overrides['retry_connections'] as num?)?.toDouble() ??
+          0.5,
+      retryCleanup: job.retryCleanup ?? overrides['retry_cleanup'] as bool?,
+      retryOnError:
+          job.retryOnError ??
+          job.maxRetries ??
+          overrides['retry_on_error'] as int?,
+      // Error handling
+      failOnError:
+          job.failOnError ??
+          (overrides['fail_on_error'] as num?)?.toDouble() ??
+          0.05,
+      continueOnFail:
+          job.continueOnFail ?? overrides['continue_on_fail'] as bool?,
+      debugErrors: job.debugErrors ?? overrides['debug_errors'] as bool?,
+      // Concurrency
+      maxSamples: job.maxSamples ?? overrides['max_samples'] as int?,
+      maxTasks: job.maxTasks ?? overrides['max_tasks'] as int?,
+      maxSubprocesses:
+          job.maxSubprocesses ?? overrides['max_subprocesses'] as int?,
+      maxSandboxes: job.maxSandboxes ?? overrides['max_sandboxes'] as int?,
+      // Logging
+      logLevel: job.logLevel ?? overrides['log_level'] as String? ?? 'info',
+      logLevelTranscript:
+          job.logLevelTranscript ??
+          overrides['log_level_transcript'] as String?,
+      logFormat: job.logFormat ?? overrides['log_format'] as String? ?? 'json',
+      logSamples: job.logSamples ?? overrides['log_samples'] as bool?,
+      logRealtime: job.logRealtime ?? overrides['log_realtime'] as bool?,
+      logImages: job.logImages ?? overrides['log_images'] as bool?,
+      logBuffer: job.logBuffer ?? overrides['log_buffer'] as int?,
+      logShared: job.logShared ?? overrides['log_shared'] as int?,
+      logDirAllowDirty:
+          job.logDirAllowDirty ?? overrides['log_dir_allow_dirty'] as bool?,
+      // Model config
+      modelBaseUrl: job.modelBaseUrl ?? overrides['model_base_url'] as String?,
+      modelArgs:
+          job.modelArgs ??
+          (overrides['model_args'] as Map<String, Object?>?) ??
+          const {},
+      modelRoles:
+          job.modelRoles ?? overrides['model_roles'] as Map<String, String>?,
+      taskArgs:
+          job.taskArgs ??
+          (overrides['task_args'] as Map<String, Object?>?) ??
+          const {},
+      modelCostConfig:
+          job.modelCostConfig ??
+          overrides['model_cost_config'] as Map<String, Object?>?,
+      // Sandbox
+      sandboxCleanup:
+          job.sandboxCleanup ?? overrides['sandbox_cleanup'] as bool?,
+      // Sample control
+      limit: job.limit ?? overrides['limit'],
+      sampleId: job.sampleId ?? overrides['sample_id'],
+      sampleShuffle: job.sampleShuffle ?? overrides['sample_shuffle'],
+      epochs: job.epochs ?? overrides['epochs'],
+      // Misc
+      tags: job.tags ?? (overrides['tags'] as List?)?.cast<String>(),
+      metadata: job.metadata ?? overrides['metadata'] as Map<String, dynamic>?,
+      trace: job.trace ?? overrides['trace'] as bool?,
+      display: job.display ?? overrides['display'] as String?,
+      approval: job.approval ?? overrides['approval'],
+      solver: job.solver ?? overrides['solver'],
+      score: job.score ?? overrides['score'] as bool? ?? true,
+      // Limits
+      messageLimit: job.messageLimit ?? overrides['message_limit'] as int?,
+      tokenLimit: job.tokenLimit ?? overrides['token_limit'] as int?,
+      timeLimit: job.timeLimit ?? overrides['time_limit'] as int?,
+      workingLimit: job.workingLimit ?? overrides['working_limit'] as int?,
+      costLimit: job.costLimit ?? (overrides['cost_limit'] as num?)?.toDouble(),
+      // Bundling
+      bundleDir: job.bundleDir ?? overrides['bundle_dir'] as String?,
+      bundleOverwrite:
+          job.bundleOverwrite ??
+          overrides['bundle_overwrite'] as bool? ??
+          false,
+      evalSetId: job.evalSetId ?? overrides['eval_set_id'] as String?,
+    );
+  }
+
+  // ------------------------------------------------------------------
+  // Model resolution
+  // ------------------------------------------------------------------
+
+  /// Resolve which models to run. Job overrides default.
+  List<String> _resolveModels(Job job) {
+    if (job.models != null && job.models!.isNotEmpty) return job.models!;
+    return List.of(kDefaultModels);
+  }
+
+  // ------------------------------------------------------------------
+  // Sandbox resolution
+  // ------------------------------------------------------------------
+
+  /// Resolve sandbox spec for a given config.
+  ///
+  /// Returns either `"local"` or a `Map` with `type` and `path` keys.
+  Object _resolveSandbox(
+    String datasetRoot,
+    Job job, {
+    String? flutterChannel,
+  }) {
+    final sandboxType = job.sandboxType;
+    if (sandboxType.isEmpty || sandboxType == 'local') return 'local';
+
+    // Channel override → look up channel-specific sandbox
+    if (flutterChannel != null && kSdkChannels.containsKey(flutterChannel)) {
+      final registryKey = kSdkChannels[flutterChannel]!;
+      if (kSandboxRegistry.containsKey(registryKey)) {
+        final def = kSandboxRegistry[registryKey]!;
+        var sandboxPath = def['path']!;
+        if (!p.isAbsolute(sandboxPath)) {
+          sandboxPath = p.normalize(p.join(datasetRoot, sandboxPath));
+        }
+        return {'type': def['name']!, 'path': sandboxPath};
+      }
+    }
+
+    // Named sandbox from registry
+    if (kSandboxRegistry.containsKey(sandboxType)) {
+      final def = kSandboxRegistry[sandboxType]!;
+      var sandboxPath = def['path']!;
+      if (!p.isAbsolute(sandboxPath)) {
+        sandboxPath = p.normalize(p.join(datasetRoot, sandboxPath));
+      }
+      return {'type': def['name']!, 'path': sandboxPath};
+    }
+
+    return 'local';
+  }
+
+  // ------------------------------------------------------------------
+  // Task × variant expansion
+  // ------------------------------------------------------------------
+
+  /// Expand task × variant combinations.
+  List<ParsedTask> _expandTaskConfigs(
+    List<ParsedTask> datasetTasks,
+    Job job,
+    String sandboxType,
+    String datasetRoot,
+  ) {
+    final jobVariants = job.variants ?? {'baseline': <String, dynamic>{}};
+    final expanded = <ParsedTask>[];
+
+    for (final taskConfig in datasetTasks) {
+      final taskId = taskConfig.id;
+
+      // Filter by job.tasks
+      if (job.tasks != null && !job.tasks!.containsKey(taskId)) continue;
+
+      // Determine effective variants (intersection)
+      final effectiveVariants = <String, Map<String, dynamic>>{};
+      for (final entry in jobVariants.entries) {
+        if (taskConfig.allowedVariants == null ||
+            taskConfig.allowedVariants!.contains(entry.key)) {
+          effectiveVariants[entry.key] = entry.value;
+        }
+      }
+
+      // Get job-level task overrides
+      final jobTask = (job.tasks != null && job.tasks!.containsKey(taskId))
+          ? job.tasks![taskId]
+          : null;
+
+      // Apply sample filtering
+      var samples = taskConfig.samples;
+      if (jobTask != null) {
+        if (jobTask.includeSamples != null) {
+          samples = samples
+              .where((s) => jobTask.includeSamples!.contains(s.id))
+              .toList();
+        }
+        if (jobTask.excludeSamples != null) {
+          samples = samples
+              .where((s) => !jobTask.excludeSamples!.contains(s.id))
+              .toList();
+        }
+      }
+
+      // Apply system_message override
+      var systemMessage = taskConfig.systemMessage;
+      if (jobTask?.systemMessage != null) {
+        systemMessage = jobTask!.systemMessage;
+      }
+
+      // Create one ParsedTask per effective variant
+      for (final entry in effectiveVariants.entries) {
+        final variant = _resolveVariant(entry.key, entry.value, datasetRoot);
+
+        // Compute examples_dir from job log_dir
+        String? examplesDir;
+        if (job.saveExamples) {
+          examplesDir = p.join(job.logDir, 'examples');
+        }
+
+        expanded.add(
+          taskConfig.copyWith(
+            samples: samples,
+            variant: variant,
+            sandboxType: sandboxType,
+            systemMessage: systemMessage,
+            allowedVariants: null,
+            saveExamples: job.saveExamples,
+            examplesDir: examplesDir,
+          ),
+        );
+      }
+    }
+
+    return expanded;
+  }
+
+  // ------------------------------------------------------------------
+  // Variant resolution
+  // ------------------------------------------------------------------
+
+  /// Resolve a variant dict into a fully-resolved [Variant].
+  Variant _resolveVariant(
+    String name,
+    Map<String, dynamic> vDef,
+    String datasetRoot,
+  ) {
+    if (vDef.isEmpty) return Variant(name: name);
+
+    // Load context files (with glob support)
+    final contextFiles = <ContextFile>[];
+    final cfPaths =
+        (vDef['context_files'] as List?)?.cast<String>() ?? const [];
+    for (final cfPath in cfPaths) {
+      if (_isGlob(cfPath)) {
+        final matched = _expandGlobFiles(datasetRoot, cfPath);
+        if (matched.isEmpty) {
+          throw FileSystemException(
+            'No context files matched pattern: $cfPath',
+          );
+        }
+        for (final f in matched) {
+          contextFiles.add(ContextFile.load(f));
+        }
+      } else {
+        final fullPath = p.normalize(p.join(datasetRoot, cfPath));
+        contextFiles.add(ContextFile.load(fullPath));
+      }
+    }
+
+    // Resolve skill paths (with glob support)
+    final skillPaths = <String>[];
+    final rawSkills =
+        ((vDef['skills'] as List?) ?? (vDef['skill_paths'] as List?) ?? [])
+            .cast<String>();
+    for (final skillPathStr in rawSkills) {
+      if (_isGlob(skillPathStr)) {
+        final matched = _expandGlobDirs(datasetRoot, skillPathStr);
+        final validDirs = matched
+            .where((d) => File(p.join(d, 'SKILL.md')).existsSync())
+            .toList();
+        if (validDirs.isEmpty) {
+          throw FileSystemException(
+            'No skill directories matched pattern: $skillPathStr',
+          );
+        }
+        skillPaths.addAll(validDirs);
+      } else {
+        final skillDir = p.normalize(p.join(datasetRoot, skillPathStr));
+        if (!Directory(skillDir).existsSync()) {
+          throw FileSystemException('Skill directory not found', skillDir);
+        }
+        if (!File(p.join(skillDir, 'SKILL.md')).existsSync()) {
+          throw FileSystemException(
+            'SKILL.md not found in $skillDir. '
+            'Each skill directory must contain a SKILL.md file.',
+          );
+        }
+        skillPaths.add(skillDir);
+      }
+    }
+
+    return Variant(
+      name: name,
+      contextFiles: contextFiles,
+      mcpServers: (vDef['mcp_servers'] as List?)?.cast<String>() ?? [],
+      skillPaths: skillPaths,
+      flutterChannel: vDef['flutter_channel'] as String?,
+    );
+  }
+
+  // ------------------------------------------------------------------
+  // Serialization helpers
+  // ------------------------------------------------------------------
+
+  /// Serialize sandbox to eval_set()-compatible format.
+  ///
+  /// eval_set() accepts sandbox as:
+  /// - `null` for no sandbox
+  /// - `"type"` for simple types
+  /// - `("type", "path")` which maps to a JSON list `["type", "path"]`
+  dynamic _serializeSandbox(Object sandbox) {
+    if (sandbox is String) return sandbox == 'local' ? null : sandbox;
+    if (sandbox is Map) {
+      final type = sandbox['type'] as String;
+      final path = sandbox['path'] as String;
+      return [type, path];
+    }
+    return null;
+  }
+
+  // ------------------------------------------------------------------
+  // Glob helpers
+  // ------------------------------------------------------------------
+
+  static bool _isGlob(String pattern) =>
+      pattern.contains('*') || pattern.contains('?') || pattern.contains('[');
+
+  /// Expand a glob pattern relative to [baseDir], returning matching files.
+  static List<String> _expandGlobFiles(String baseDir, String pattern) {
+    final glob = Glob(pattern);
+    return glob
+        .listSync(root: baseDir)
+        .whereType<File>()
+        .where(
+          (f) =>
+              f.path.endsWith('.yaml') ||
+              f.path.endsWith('.yml') ||
+              f.path.endsWith('.md'),
+        )
+        .map((f) => p.normalize(f.path))
+        .toList()
+      ..sort();
+  }
+
+  /// Expand a glob pattern relative to [baseDir], returning matching dirs.
+  static List<String> _expandGlobDirs(String baseDir, String pattern) {
+    final glob = Glob(pattern);
+    return glob
+        .listSync(root: baseDir)
+        .whereType<Directory>()
+        .map((d) => p.normalize(d.path))
+        .toList()
+      ..sort();
+  }
+}
diff --git a/packages/eval_config/lib/src/runner_config_exception.dart b/packages/eval_config/lib/src/runner_config_exception.dart
new file mode 100644
index 0000000..164631b
--- /dev/null
+++ b/packages/eval_config/lib/src/runner_config_exception.dart
@@ -0,0 +1,12 @@
+/// Exception thrown when runner config resolution fails.
+///
+/// This is the library-level exception for the runner_config package.
+/// CLI or web frontends can catch this and present the error appropriately.
+class ConfigException implements Exception {
+  final String message;
+
+  ConfigException(this.message);
+
+  @override
+  String toString() => message;
+}
diff --git a/packages/eval_config/lib/src/utils/yaml_utils.dart b/packages/eval_config/lib/src/utils/yaml_utils.dart
new file mode 100644
index 0000000..8dd4112
--- /dev/null
+++ b/packages/eval_config/lib/src/utils/yaml_utils.dart
@@ -0,0 +1,39 @@
+import 'dart:io';
+
+import '../runner_config_exception.dart';
+import 'package:yaml/yaml.dart';
+
+/// Converts a YamlMap or YamlList to standard Dart Map/List.
+dynamic convertYamlToObject(dynamic yaml) {
+  if (yaml is YamlMap) {
+    return Map<String, dynamic>.fromEntries(
+      yaml.entries.map(
+        (e) => MapEntry(e.key.toString(), convertYamlToObject(e.value)),
+      ),
+    );
+  }
+  if (yaml is YamlList) {
+    return yaml.map(convertYamlToObject).toList();
+  }
+  return yaml;
+}
+
+/// Reads a YAML file and returns the parsed content.
+/// Returns the raw YamlMap/YamlList for flexibility.
+YamlNode readYamlFile(String filePath) {
+  final file = File(filePath);
+  if (!file.existsSync()) {
+    throw ConfigException('YAML file not found: $filePath');
+  }
+  final content = file.readAsStringSync();
+  return loadYamlNode(content);
+}
+
+/// Reads a YAML file and converts it to a standard Dart Map.
+Map<String, dynamic> readYamlFileAsMap(String filePath) {
+  final yaml = readYamlFile(filePath);
+  if (yaml is YamlMap) {
+    return convertYamlToObject(yaml) as Map<String, dynamic>;
+  }
+  return {};
+}
diff --git a/packages/eval_config/lib/src/writers/eval_set_writer.dart b/packages/eval_config/lib/src/writers/eval_set_writer.dart
new file mode 100644
index 0000000..92db5d5
--- /dev/null
+++ b/packages/eval_config/lib/src/writers/eval_set_writer.dart
@@ -0,0 +1,30 @@
+import 'dart:convert';
+import 'dart:io';
+
+import '../models/models.dart';
+import 'package:path/path.dart' as p;
+
+/// Writes resolved [EvalSet] configs as a single JSON file.
+///
+/// The output JSON maps ~1:1 to `eval_set()` kwargs. Datasets are inlined
+/// in each task — no separate JSONL files needed.
+class EvalSetWriter {
+  /// Write [EvalSet] JSON for the given resolved configs.
+  ///
+  /// Files are written to [outputDir]. Returns the path to the JSON file.
+  String write(List<EvalSet> configs, String outputDir) {
+    Directory(outputDir).createSync(recursive: true);
+
+    final jsonPath = p.join(outputDir, 'eval_set.json');
+
+    // Single config → single object; multiple → array
+    final jsonContent = configs.length == 1
+        ? configs.first.toJson()
+        : configs.map((c) => c.toJson()).toList();
+
+    final jsonString = const JsonEncoder.withIndent('  ').convert(jsonContent);
+    File(jsonPath).writeAsStringSync(jsonString);
+
+    return jsonPath;
+  }
+}
diff --git a/packages/eval_config/pubspec.yaml b/packages/eval_config/pubspec.yaml
new file mode 100644
index 0000000..83d09ce
--- /dev/null
+++ b/packages/eval_config/pubspec.yaml
@@ -0,0 +1,19 @@
+name: eval_config
+description: Core library for resolving eval dataset YAML into run manifests.
+version: 0.0.1
+publish_to: none
+resolution: workspace
+
+environment:
+  sdk: ^3.10.0
+
+dependencies:
+  freezed_annotation: ^3.1.0
+  glob: ^2.1.0
+  json_annotation: ^4.9.0
+  path: ^1.9.0
+  yaml: ^3.1.0
+
+dev_dependencies:
+  lints: ^6.0.0
+  test: any
diff --git a/packages/eval_config/test/eval_set_resolver_test.dart b/packages/eval_config/test/eval_set_resolver_test.dart
new file mode 100644
index 0000000..39ff829
--- /dev/null
+++ b/packages/eval_config/test/eval_set_resolver_test.dart
@@ -0,0 +1,370 @@
+import 'package:eval_config/eval_config.dart';
+import 'package:test/test.dart';
+
+void main() {
+  late EvalSetResolver resolver;
+
+  /// Helper to create a minimal [ParsedTask] for testing.
+  ParsedTask makeTask({
+    String id = 'test_task',
+    String taskFunc = 'question_answer',
+    List<Sample>? samples,
+    Variant? variant,
+    List<String>? allowedVariants,
+    String? systemMessage,
+    String? model,
+    int? timeLimit,
+    int? messageLimit,
+  }) {
+    return ParsedTask(
+      id: id,
+      taskFunc: taskFunc,
+      samples:
+          samples ??
+          [
+            const Sample(
+              id: 's1',
+              input: 'What is Dart?',
+              target: 'A language',
+              metadata: {'difficulty': 'easy', 'tags': <String>[]},
+            ),
+          ],
+      variant: variant ?? const Variant(),
+      allowedVariants: allowedVariants,
+      systemMessage: systemMessage,
+      model: model,
+      timeLimit: timeLimit,
+      messageLimit: messageLimit,
+    );
+  }
+
+  /// Helper to create a minimal [Job] for testing.
+  Job makeJob({
+    String logDir = '/tmp/logs',
+    String sandboxType = 'local',
+    List<String>? models,
+    Map<String, Map<String, dynamic>>? variants,
+    Map<String, JobTask>? tasks,
+    bool saveExamples = false,
+    Map<String, dynamic>? taskDefaults,
+  }) {
+    return Job(
+      logDir: logDir,
+      sandboxType: sandboxType,
+      models: models,
+      variants: variants,
+      tasks: tasks,
+      saveExamples: saveExamples,
+      taskDefaults: taskDefaults,
+    );
+  }
+
+  setUp(() {
+    resolver = EvalSetResolver();
+  });
+
+  group('resolve()', () {
+    test(
+      'single task with baseline variant produces 1 EvalSet with 1 Task',
+      () {
+        final results = resolver.resolve(
+          [makeTask()],
+          makeJob(models: ['gemini-pro']),
+          '/tmp/dataset',
+        );
+
+        expect(results, hasLength(1));
+        final evalSet = results.first;
+        expect(evalSet.tasks, hasLength(1));
+        expect(evalSet.tasks.first.name, 'test_task:baseline');
+      },
+    );
+
+    test('task name follows "id:variant" format', () {
+      final results = resolver.resolve(
+        [makeTask(id: 'dart_qa')],
+        makeJob(
+          models: ['gemini-pro'],
+          variants: {'my_variant': {}},
+        ),
+        '/tmp/dataset',
+      );
+
+      expect(results.first.tasks.first.name, 'dart_qa:my_variant');
+    });
+
+    test('samples are set on the task dataset', () {
+      final samples = [
+        const Sample(
+          id: 'sample_1',
+          input: 'input1',
+          target: 'target1',
+          metadata: {'difficulty': 'easy', 'tags': <String>[]},
+        ),
+        const Sample(
+          id: 'sample_2',
+          input: 'input2',
+          target: 'target2',
+          metadata: {'difficulty': 'hard', 'tags': <String>[]},
+        ),
+      ];
+
+      final results = resolver.resolve(
+        [makeTask(samples: samples)],
+        makeJob(models: ['gemini-pro']),
+        '/tmp/dataset',
+      );
+
+      final dataset = results.first.tasks.first.dataset!;
+      expect(dataset.samples, hasLength(2));
+      expect(dataset.samples.first.id, 'sample_1');
+      expect(dataset.samples.last.id, 'sample_2');
+    });
+
+    test('multiple variants produce one Task per variant', () {
+      final results = resolver.resolve(
+        [makeTask()],
+        makeJob(
+          models: ['gemini-pro'],
+          variants: {'baseline': {}, 'full': {}},
+        ),
+        '/tmp/dataset',
+      );
+
+      final taskNames = results
+          .expand((e) => e.tasks)
+          .map((t) => t.name)
+          .toSet();
+      expect(taskNames, containsAll(['test_task:baseline', 'test_task:full']));
+    });
+
+    test('model list from job is passed to EvalSet', () {
+      final results = resolver.resolve(
+        [makeTask()],
+        makeJob(models: ['model_a', 'model_b']),
+        '/tmp/dataset',
+      );
+
+      expect(results.first.model, ['model_a', 'model_b']);
+    });
+
+    test('uses default models when job has none', () {
+      final results = resolver.resolve(
+        [makeTask()],
+        makeJob(models: null),
+        '/tmp/dataset',
+      );
+
+      expect(results.first.model, kDefaultModels);
+    });
+
+    test('job with include_samples filters to only matching samples', () {
+      final samples = [
+        const Sample(
+          id: 'keep',
+          input: 'i',
+          target: 't',
+          metadata: {'difficulty': 'easy', 'tags': <String>[]},
+        ),
+        const Sample(
+          id: 'drop',
+          input: 'i',
+          target: 't',
+          metadata: {'difficulty': 'easy', 'tags': <String>[]},
+        ),
+      ];
+
+      final results = resolver.resolve(
+        [makeTask(id: 'filtered', samples: samples)],
+        makeJob(
+          models: ['m'],
+          tasks: {
+            'filtered': const JobTask(
+              id: 'filtered',
+              includeSamples: ['keep'],
+            ),
+          },
+        ),
+        '/tmp/dataset',
+      );
+
+      final dataset = results.first.tasks.first.dataset!;
+      expect(dataset.samples, hasLength(1));
+      expect(dataset.samples.first.id, 'keep');
+    });
+
+    test('job with exclude_samples filters out excluded', () {
+      final samples = [
+        const Sample(
+          id: 'keep',
+          input: 'i',
+          target: 't',
+          metadata: {'difficulty': 'easy', 'tags': <String>[]},
+        ),
+        const Sample(
+          id: 'drop',
+          input: 'i',
+          target: 't',
+          metadata: {'difficulty': 'easy', 'tags': <String>[]},
+        ),
+      ];
+
+      final results = resolver.resolve(
+        [makeTask(id: 'filtered', samples: samples)],
+        makeJob(
+          models: ['m'],
+          tasks: {
+            'filtered': const JobTask(
+              id: 'filtered',
+              excludeSamples: ['drop'],
+            ),
+          },
+        ),
+        '/tmp/dataset',
+      );
+
+      final dataset = results.first.tasks.first.dataset!;
+      expect(dataset.samples, hasLength(1));
+      expect(dataset.samples.first.id, 'keep');
+    });
+
+    test('local sandbox resolves to null in output', () {
+      final results = resolver.resolve(
+        [makeTask()],
+        makeJob(models: ['m'], sandboxType: 'local'),
+        '/tmp/dataset',
+      );
+
+      expect(results.first.sandbox, isNull);
+    });
+
+    test('respects allowedVariants on tasks', () {
+      final results = resolver.resolve(
+        [
+          makeTask(allowedVariants: ['baseline']),
+        ],
+        makeJob(
+          models: ['m'],
+          variants: {'baseline': {}, 'full': {}},
+        ),
+        '/tmp/dataset',
+      );
+
+      final taskNames = results
+          .expand((e) => e.tasks)
+          .map((t) => t.name)
+          .toList();
+      expect(taskNames, ['test_task:baseline']);
+      expect(taskNames, isNot(contains('test_task:full')));
+    });
+
+    test('tasks not in job.tasks are excluded', () {
+      final results = resolver.resolve(
+        [makeTask(id: 'included'), makeTask(id: 'excluded')],
+        makeJob(
+          models: ['m'],
+          tasks: {
+            'included': const JobTask(id: 'included'),
+          },
+        ),
+        '/tmp/dataset',
+      );
+
+      final taskNames = results
+          .expand((e) => e.tasks)
+          .map((t) => t.name)
+          .toList();
+      expect(taskNames, hasLength(1));
+      expect(taskNames.first, contains('included'));
+    });
+
+    test('taskFunc is propagated to output Task', () {
+      final results = resolver.resolve(
+        [makeTask(taskFunc: 'flutter_code_gen')],
+        makeJob(models: ['m']),
+        '/tmp/dataset',
+      );
+
+      expect(results.first.tasks.first.taskFunc, 'flutter_code_gen');
+    });
+
+    test('system_message appears in task metadata', () {
+      final results = resolver.resolve(
+        [makeTask(systemMessage: 'Be concise.')],
+        makeJob(models: ['m']),
+        '/tmp/dataset',
+      );
+
+      final metadata = results.first.tasks.first.metadata!;
+      expect(metadata['system_message'], 'Be concise.');
+    });
+
+    test('task-level settings propagate to output', () {
+      final results = resolver.resolve(
+        [makeTask(model: 'gpt-4o', timeLimit: 120, messageLimit: 25)],
+        makeJob(models: ['m']),
+        '/tmp/dataset',
+      );
+
+      final task = results.first.tasks.first;
+      expect(task.model, 'gpt-4o');
+      expect(task.timeLimit, 120);
+      expect(task.messageLimit, 25);
+    });
+
+    test('task_defaults from job are used as fallbacks', () {
+      final results = resolver.resolve(
+        [makeTask()],
+        makeJob(
+          models: ['m'],
+          taskDefaults: {'time_limit': 999, 'message_limit': 77},
+        ),
+        '/tmp/dataset',
+      );
+
+      final task = results.first.tasks.first;
+      expect(task.timeLimit, 999);
+      expect(task.messageLimit, 77);
+    });
+
+    test('task-level settings override task_defaults', () {
+      final results = resolver.resolve(
+        [makeTask(timeLimit: 100)],
+        makeJob(
+          models: ['m'],
+          taskDefaults: {'time_limit': 999},
+        ),
+        '/tmp/dataset',
+      );
+
+      expect(results.first.tasks.first.timeLimit, 100);
+    });
+
+    test('job-level eval_set fields propagate', () {
+      final results = resolver.resolve(
+        [makeTask()],
+        const Job(
+          logDir: '/tmp/logs',
+          models: ['m'],
+          retryAttempts: 42,
+          logLevel: 'debug',
+        ),
+        '/tmp/dataset',
+      );
+
+      expect(results.first.retryAttempts, 42);
+      expect(results.first.logLevel, 'debug');
+    });
+
+    test('dataset name matches task name', () {
+      final results = resolver.resolve(
+        [makeTask(id: 'my_eval')],
+        makeJob(models: ['m']),
+        '/tmp/dataset',
+      );
+
+      final dataset = results.first.tasks.first.dataset!;
+      expect(dataset.name, 'my_eval:baseline');
+    });
+  });
+}
diff --git a/packages/eval_config/test/eval_set_writer_test.dart b/packages/eval_config/test/eval_set_writer_test.dart
new file mode 100644
index 0000000..e93142e
--- /dev/null
+++ b/packages/eval_config/test/eval_set_writer_test.dart
@@ -0,0 +1,96 @@
+import 'dart:convert';
+import 'dart:io';
+
+import 'package:eval_config/eval_config.dart';
+import 'package:test/test.dart';
+
+void main() {
+  late EvalSetWriter writer;
+  late Directory tmpDir;
+
+  setUp(() {
+    writer = EvalSetWriter();
+    tmpDir = Directory.systemTemp.createTempSync('eval_set_writer_test_');
+  });
+
+  tearDown(() {
+    if (tmpDir.existsSync()) {
+      tmpDir.deleteSync(recursive: true);
+    }
+  });
+
+  EvalSet makeEvalSet({String logDir = '/tmp/logs', int taskCount = 1}) {
+    return EvalSet(
+      tasks: List.generate(
+        taskCount,
+        (i) => Task(
+          name: 'task_$i:baseline',
+          taskFunc: 'func_$i',
+          dataset: Dataset(
+            samples: [
+              Sample(id: 's$i', input: 'input $i', target: 'target $i'),
+            ],
+            name: 'task_$i:baseline',
+          ),
+        ),
+      ),
+      logDir: logDir,
+    );
+  }
+
+  group('write()', () {
+    test('single config writes valid JSON object to eval_set.json', () {
+      final config = makeEvalSet();
+      final path = writer.write([config], tmpDir.path);
+
+      expect(path, endsWith('eval_set.json'));
+      expect(File(path).existsSync(), isTrue);
+
+      final content = File(path).readAsStringSync();
+      final json = jsonDecode(content);
+      expect(json, isA<Map<String, dynamic>>());
+      expect(json['tasks'], isA<List>());
+      expect(json['log_dir'], '/tmp/logs');
+    });
+
+    test('multiple configs writes JSON array', () {
+      final configs = [
+        makeEvalSet(logDir: '/logs/a'),
+        makeEvalSet(logDir: '/logs/b'),
+      ];
+      final path = writer.write(configs, tmpDir.path);
+
+      final content = File(path).readAsStringSync();
+      final json = jsonDecode(content);
+      expect(json, isA<List>());
+      expect((json as List), hasLength(2));
+    });
+
+    test('creates output directory if missing', () {
+      final nestedDir = '${tmpDir.path}/a/b/c';
+      expect(Directory(nestedDir).existsSync(), isFalse);
+
+      writer.write([makeEvalSet()], nestedDir);
+
+      expect(Directory(nestedDir).existsSync(), isTrue);
+    });
+
+    test('output is pretty-printed', () {
+      final path = writer.write([makeEvalSet()], tmpDir.path);
+      final content = File(path).readAsStringSync();
+
+      // Pretty-printed JSON has newlines and indentation
+      expect(content, contains('\n'));
+      expect(content, contains('  '));
+    });
+
+    test('overwrites existing file', () {
+      writer.write([makeEvalSet(logDir: '/first')], tmpDir.path);
+      final path = writer.write([makeEvalSet(logDir: '/second')], tmpDir.path);
+
+      final content = File(path).readAsStringSync();
+      expect(content, contains('/second'));
+      expect(content, isNot(contains('/first')));
+    });
+  });
+}
diff --git a/packages/eval_config/test/json_parser_test.dart b/packages/eval_config/test/json_parser_test.dart
new file mode 100644
index 0000000..b8fe7a4
--- /dev/null
+++ b/packages/eval_config/test/json_parser_test.dart
@@ -0,0 +1,291 @@
+import 'package:eval_config/eval_config.dart';
+import 'package:test/test.dart';
+
+void main() {
+  late JsonParser parser;
+
+  setUp(() {
+    parser = JsonParser();
+  });
+
+  group('parseTasksFromMaps()', () {
+    test('parses a minimal task map', () {
+      final tasks = parser.parseTasksFromMaps([
+        {
+          'id': 'my_task',
+          'func': 'question_answer',
+          'samples': {
+            'inline': [
+              {'id': 's1', 'input': 'What is Dart?', 'target': 'A language'},
+            ],
+          },
+        },
+      ]);
+
+      expect(tasks, hasLength(1));
+      expect(tasks.first.id, 'my_task');
+      expect(tasks.first.taskFunc, 'question_answer');
+      expect(tasks.first.samples, hasLength(1));
+      expect(tasks.first.samples.first.id, 's1');
+      expect(tasks.first.samples.first.input, 'What is Dart?');
+      expect(tasks.first.samples.first.target, 'A language');
+    });
+
+    test('defaults func to id when func is absent', () {
+      final tasks = parser.parseTasksFromMaps([
+        {
+          'id': 'dart_qa',
+          'samples': {'inline': <Map<String, dynamic>>[]},
+        },
+      ]);
+
+      expect(tasks.first.taskFunc, 'dart_qa');
+    });
+
+    test('throws FormatException when sample missing required field', () {
+      expect(
+        () => parser.parseTasksFromMaps([
+          {
+            'id': 'bad_task',
+            'samples': {
+              'inline': [
+                {'id': 's1', 'input': 'hello'}, // missing 'target'
+              ],
+            },
+          },
+        ]),
+        throwsA(isA<FormatException>()),
+      );
+    });
+
+    test('normalises tags from comma-separated string', () {
+      final tasks = parser.parseTasksFromMaps([
+        {
+          'id': 'tagged_task',
+          'samples': {
+            'inline': [
+              {
+                'id': 's1',
+                'input': 'q',
+                'target': 'a',
+                'tags': 'flutter, dart, widgets',
+              },
+            ],
+          },
+        },
+      ]);
+
+      final metadata = tasks.first.samples.first.metadata!;
+      expect(metadata['tags'], equals(['flutter', 'dart', 'widgets']));
+    });
+
+    test('normalises tags from list', () {
+      final tasks = parser.parseTasksFromMaps([
+        {
+          'id': 'tagged_task',
+          'samples': {
+            'inline': [
+              {
+                'id': 's1',
+                'input': 'q',
+                'target': 'a',
+                'tags': ['tag1', 'tag2'],
+              },
+            ],
+          },
+        },
+      ]);
+
+      final metadata = tasks.first.samples.first.metadata!;
+      expect(metadata['tags'], equals(['tag1', 'tag2']));
+    });
+
+    test('defaults tags to empty list when absent', () {
+      final tasks = parser.parseTasksFromMaps([
+        {
+          'id': 'no_tags',
+          'samples': {
+            'inline': [
+              {'id': 's1', 'input': 'q', 'target': 'a'},
+            ],
+          },
+        },
+      ]);
+
+      final metadata = tasks.first.samples.first.metadata!;
+      expect(metadata['tags'], isEmpty);
+    });
+
+    test('defaults difficulty to medium', () {
+      final tasks = parser.parseTasksFromMaps([
+        {
+          'id': 'task',
+          'samples': {
+            'inline': [
+              {'id': 's1', 'input': 'q', 'target': 'a'},
+            ],
+          },
+        },
+      ]);
+
+      final metadata = tasks.first.samples.first.metadata!;
+      expect(metadata['difficulty'], 'medium');
+    });
+
+    test('parses sample-level choices, setup, files', () {
+      final tasks = parser.parseTasksFromMaps([
+        {
+          'id': 'task',
+          'samples': {
+            'inline': [
+              {
+                'id': 's1',
+                'input': 'q',
+                'target': 'a',
+                'choices': ['A', 'B', 'C'],
+                'setup': 'echo hello',
+                'files': {'main.dart': 'void main() {}'},
+              },
+            ],
+          },
+        },
+      ]);
+
+      final sample = tasks.first.samples.first;
+      expect(sample.choices, ['A', 'B', 'C']);
+      expect(sample.setup, 'echo hello');
+      expect(sample.files, {'main.dart': 'void main() {}'});
+    });
+
+    test('parses all task-level settings', () {
+      final tasks = parser.parseTasksFromMaps([
+        {
+          'id': 'full_task',
+          'func': 'my_func',
+          'system_message': 'Be helpful',
+          'allowed_variants': ['baseline', 'full'],
+          'model': 'gemini-pro',
+          'config': {'temperature': 0.5},
+          'model_roles': {'grader': 'gpt-4o'},
+          'message_limit': 50,
+          'token_limit': 4096,
+          'time_limit': 600,
+          'working_limit': 300,
+          'cost_limit': 1.5,
+          'display_name': 'Full Task',
+          'version': 2,
+          'metadata': {'author': 'test'},
+          'samples': {'inline': <Map<String, dynamic>>[]},
+        },
+      ]);
+
+      final task = tasks.first;
+      expect(task.systemMessage, 'Be helpful');
+      expect(task.allowedVariants, ['baseline', 'full']);
+      expect(task.model, 'gemini-pro');
+      expect(task.config, {'temperature': 0.5});
+      expect(task.modelRoles, {'grader': 'gpt-4o'});
+      expect(task.messageLimit, 50);
+      expect(task.tokenLimit, 4096);
+      expect(task.timeLimit, 600);
+      expect(task.workingLimit, 300);
+      expect(task.costLimit, 1.5);
+      expect(task.displayName, 'Full Task');
+      expect(task.version, 2);
+      expect(task.metadata, {'author': 'test'});
+    });
+
+    test('skips empty sample maps', () {
+      final tasks = parser.parseTasksFromMaps([
+        {
+          'id': 'task',
+          'samples': {
+            'inline': [<String, dynamic>{}],
+          },
+        },
+      ]);
+
+      expect(tasks.first.samples, isEmpty);
+    });
+  });
+
+  group('parseJobFromMap()', () {
+    test('parses minimal job with defaults', () {
+      final job = parser.parseJobFromMap(<String, dynamic>{});
+
+      expect(job.logDir, '');
+      expect(job.sandboxType, 'local');
+      expect(job.maxConnections, 10);
+      expect(job.models, isNull);
+      expect(job.saveExamples, false);
+    });
+
+    test('parses all core fields', () {
+      final job = parser.parseJobFromMap({
+        'log_dir': './logs/run1',
+        'sandbox_type': 'podman',
+        'max_connections': 5,
+        'models': ['gemini-pro', 'gpt-4o'],
+        'save_examples': true,
+      });
+
+      expect(job.logDir, './logs/run1');
+      expect(job.sandboxType, 'podman');
+      expect(job.maxConnections, 5);
+      expect(job.models, ['gemini-pro', 'gpt-4o']);
+      expect(job.saveExamples, true);
+    });
+
+    test('parses promoted eval_set fields', () {
+      final job = parser.parseJobFromMap({
+        'retry_attempts': 20,
+        'max_retries': 3,
+        'retry_wait': 5.0,
+        'fail_on_error': 0.5,
+        'continue_on_fail': true,
+        'max_samples': 100,
+        'max_tasks': 4,
+        'log_level': 'debug',
+        'tags': ['ci', 'nightly'],
+        'metadata': {'run_by': 'bot'},
+      });
+
+      expect(job.retryAttempts, 20);
+      expect(job.maxRetries, 3);
+      expect(job.retryWait, 5.0);
+      expect(job.failOnError, 0.5);
+      expect(job.continueOnFail, true);
+      expect(job.maxSamples, 100);
+      expect(job.maxTasks, 4);
+      expect(job.logLevel, 'debug');
+      expect(job.tags, ['ci', 'nightly']);
+      expect(job.metadata, {'run_by': 'bot'});
+    });
+
+    test('parses pass-through overrides', () {
+      final job = parser.parseJobFromMap({
+        'eval_set_overrides': {'custom_key': 'custom_value'},
+        'task_defaults': {'time_limit': 600},
+      });
+
+      expect(job.evalSetOverrides, {'custom_key': 'custom_value'});
+      expect(job.taskDefaults, {'time_limit': 600});
+    });
+  });
+
+  group('parseTasks()', () {
+    test('returns empty list (filesystem not used)', () {
+      final tasks = parser.parseTasks('/nonexistent');
+      expect(tasks, isEmpty);
+    });
+  });
+
+  group('parseJob()', () {
+    test('throws UnsupportedError', () {
+      expect(
+        () => parser.parseJob('/path', '/root'),
+        throwsA(isA<UnsupportedError>()),
+      );
+    });
+  });
+}
diff --git a/packages/eval_config/test/parsed_task_test.dart b/packages/eval_config/test/parsed_task_test.dart
new file mode 100644
index 0000000..7b0ce47
--- /dev/null
+++ b/packages/eval_config/test/parsed_task_test.dart
@@ -0,0 +1,147 @@
+import 'package:eval_config/eval_config.dart';
+import 'package:test/test.dart';
+
+void main() {
+  group('ParsedTask', () {
+    test('has correct defaults', () {
+      const task = ParsedTask(
+        id: 'test',
+        taskFunc: 'question_answer',
+        samples: [],
+        variant: Variant(),
+      );
+
+      expect(task.sandboxType, 'local');
+      expect(task.saveExamples, false);
+      expect(task.systemMessage, isNull);
+      expect(task.allowedVariants, isNull);
+      expect(task.examplesDir, isNull);
+      expect(task.model, isNull);
+      expect(task.config, isNull);
+      expect(task.timeLimit, isNull);
+      expect(task.messageLimit, isNull);
+      expect(task.tokenLimit, isNull);
+      expect(task.costLimit, isNull);
+    });
+
+    test('stores all constructor fields', () {
+      const task = ParsedTask(
+        id: 'my_task',
+        taskFunc: 'flutter_code_gen',
+        samples: [Sample(id: 's1', input: 'q', target: 'a')],
+        variant: Variant(name: 'full'),
+        sandboxType: 'podman',
+        systemMessage: 'Be helpful',
+        allowedVariants: ['baseline', 'full'],
+        saveExamples: true,
+        examplesDir: '/tmp/examples',
+        model: 'gemini-pro',
+        config: {'temperature': 0.5},
+        modelRoles: {'grader': 'gpt-4o'},
+        timeLimit: 600,
+        messageLimit: 50,
+        tokenLimit: 4096,
+        workingLimit: 300,
+        costLimit: 1.5,
+        displayName: 'My Task',
+        version: 2,
+        metadata: {'author': 'test'},
+      );
+
+      expect(task.id, 'my_task');
+      expect(task.taskFunc, 'flutter_code_gen');
+      expect(task.samples, hasLength(1));
+      expect(task.variant.name, 'full');
+      expect(task.sandboxType, 'podman');
+      expect(task.systemMessage, 'Be helpful');
+      expect(task.allowedVariants, ['baseline', 'full']);
+      expect(task.saveExamples, true);
+      expect(task.examplesDir, '/tmp/examples');
+      expect(task.model, 'gemini-pro');
+      expect(task.config, {'temperature': 0.5});
+      expect(task.modelRoles, {'grader': 'gpt-4o'});
+      expect(task.timeLimit, 600);
+      expect(task.messageLimit, 50);
+      expect(task.tokenLimit, 4096);
+      expect(task.workingLimit, 300);
+      expect(task.costLimit, 1.5);
+      expect(task.displayName, 'My Task');
+      expect(task.version, 2);
+      expect(task.metadata, {'author': 'test'});
+    });
+  });
+
+  group('copyWith()', () {
+    test('overrides specified fields', () {
+      const original = ParsedTask(
+        id: 'original',
+        taskFunc: 'func_a',
+        samples: [],
+        variant: Variant(name: 'baseline'),
+        timeLimit: 100,
+      );
+
+      final copy = original.copyWith(
+        id: 'copied',
+        timeLimit: 999,
+      );
+
+      expect(copy.id, 'copied');
+      expect(copy.timeLimit, 999);
+    });
+
+    test('preserves fields not overridden', () {
+      const original = ParsedTask(
+        id: 'task',
+        taskFunc: 'func',
+        samples: [],
+        variant: Variant(name: 'full'),
+        sandboxType: 'podman',
+        systemMessage: 'Be helpful',
+        model: 'gemini-pro',
+      );
+
+      final copy = original.copyWith(id: 'new_id');
+
+      expect(copy.taskFunc, 'func');
+      expect(copy.variant.name, 'full');
+      expect(copy.sandboxType, 'podman');
+      expect(copy.systemMessage, 'Be helpful');
+      expect(copy.model, 'gemini-pro');
+    });
+
+    test('returns a new instance (not the same object)', () {
+      const original = ParsedTask(
+        id: 'a',
+        taskFunc: 'f',
+        samples: [],
+        variant: Variant(),
+      );
+
+      final copy = original.copyWith(id: 'b');
+
+      expect(identical(original, copy), isFalse);
+      expect(original.id, 'a');
+      expect(copy.id, 'b');
+    });
+
+    test('can override samples list', () {
+      const original = ParsedTask(
+        id: 'task',
+        taskFunc: 'func',
+        samples: [Sample(id: 's1', input: 'q', target: 'a')],
+        variant: Variant(),
+      );
+
+      final copy = original.copyWith(
+        samples: [
+          const Sample(id: 's2', input: 'q2', target: 'a2'),
+          const Sample(id: 's3', input: 'q3', target: 'a3'),
+        ],
+      );
+
+      expect(copy.samples, hasLength(2));
+      expect(copy.samples.first.id, 's2');
+    });
+  });
+}
diff --git a/packages/eval_config/test/yaml_utils_test.dart b/packages/eval_config/test/yaml_utils_test.dart
new file mode 100644
index 0000000..f3650c1
--- /dev/null
+++ b/packages/eval_config/test/yaml_utils_test.dart
@@ -0,0 +1,160 @@
+import 'dart:io';
+
+import 'package:eval_config/eval_config.dart';
+import 'package:test/test.dart';
+import 'package:yaml/yaml.dart';
+
+void main() {
+  group('convertYamlToObject()', () {
+    test('converts YamlMap to Map', () {
+      final yaml = loadYaml('key: value');
+      final result = convertYamlToObject(yaml);
+      expect(result, isA<Map<String, dynamic>>());
+      expect(result['key'], equals('value'));
+    });
+
+    test('converts YamlList to List', () {
+      final yaml = loadYaml('- item1\n- item2\n- item3');
+      final result = convertYamlToObject(yaml);
+      expect(result, isA<List>());
+      expect(result, equals(['item1', 'item2', 'item3']));
+    });
+
+    test('converts nested YamlMap', () {
+      final yaml = loadYaml('''
+outer:
+  inner:
+    deep: value
+''');
+      final result = convertYamlToObject(yaml);
+      expect(result['outer']['inner']['deep'], equals('value'));
+    });
+
+    test('preserves String primitive', () {
+      final yaml = loadYaml('key: hello');
+      final result = convertYamlToObject(yaml);
+      expect(result['key'], isA<String>());
+      expect(result['key'], equals('hello'));
+    });
+
+    test('preserves int primitive', () {
+      final yaml = loadYaml('key: 42');
+      final result = convertYamlToObject(yaml);
+      expect(result['key'], equals(42));
+    });
+
+    test('preserves bool primitive', () {
+      final yaml = loadYaml('key: true');
+      final result = convertYamlToObject(yaml);
+      expect(result['key'], equals(true));
+    });
+
+    test('handles null value', () {
+      final yaml = loadYaml('key: null');
+      final result = convertYamlToObject(yaml);
+      expect(result['key'], isNull);
+    });
+
+    test('handles mixed nested structures', () {
+      final yaml = loadYaml('''
+map:
+  list:
+    - item1
+    - nested:
+        key: value
+''');
+      final result = convertYamlToObject(yaml);
+      expect(result['map']['list'][0], equals('item1'));
+      expect(result['map']['list'][1]['nested']['key'], equals('value'));
+    });
+  });
+
+  group('readYamlFile()', () {
+    late Directory tempDir;
+
+    setUp(() {
+      tempDir = Directory.systemTemp.createTempSync('yaml_test_');
+    });
+
+    tearDown(() {
+      if (tempDir.existsSync()) {
+        tempDir.deleteSync(recursive: true);
+      }
+    });
+
+    // test('reads valid YAML file', () {
+    //   final file = File('${tempDir.path}/test.yaml');
+    //   file.writeAsStringSync('key: value');
+
+    //   final result = readYamlFile(file.path);
+    //   expect(result, isNotNull);
+    //   expect(result['key'], equals('value'));
+    // });
+
+    test('throws RunnerConfigException for non-existent file', () {
+      expect(
+        () => readYamlFile('${tempDir.path}/nonexistent.yaml'),
+        throwsA(isA<ConfigException>()),
+      );
+    });
+
+    //     test('reads complex YAML structure', () {
+    //       final file = File('${tempDir.path}/complex.yaml');
+    //       file.writeAsStringSync('''
+    // name: test
+    // items:
+    //   - one
+    //   - two
+    // config:
+    //   enabled: true
+    // ''');
+
+    //       final result = readYamlFile(file.path);
+    //       expect(result['name'], equals('test'));
+    //       expect(result['items'], hasLength(2));
+    //       expect(result['config']['enabled'], isTrue);
+    //     });
+  });
+
+  group('readYamlFileAsMap()', () {
+    late Directory tempDir;
+
+    setUp(() {
+      tempDir = Directory.systemTemp.createTempSync('yaml_map_test_');
+    });
+
+    tearDown(() {
+      if (tempDir.existsSync()) {
+        tempDir.deleteSync(recursive: true);
+      }
+    });
+
+    test('returns standard Dart Map from YAML', () {
+      final file = File('${tempDir.path}/test.yaml');
+      file.writeAsStringSync('key: value');
+
+      final result = readYamlFileAsMap(file.path);
+      expect(result, isA<Map<String, dynamic>>());
+      expect(result['key'], equals('value'));
+    });
+
+    test('returns empty map for YAML with list root', () {
+      final file = File('${tempDir.path}/list.yaml');
+      file.writeAsStringSync('- item1\n- item2');
+
+      final result = readYamlFileAsMap(file.path);
+      expect(result, equals(<String, dynamic>{}));
+    });
+
+    test('converts nested YamlMaps to Maps', () {
+      final file = File('${tempDir.path}/nested.yaml');
+      file.writeAsStringSync('''
+outer:
+  inner: value
+''');
+
+      final result = readYamlFileAsMap(file.path);
+      expect(result['outer'], isA<Map<String, dynamic>>());
+    });
+  });
+}
diff --git a/pubspec.lock b/pubspec.lock
new file mode 100644
index 0000000..366752d
--- /dev/null
+++ b/pubspec.lock
@@ -0,0 +1,622 @@
+# Generated by pub
+# See https://dart.dev/tools/pub/glossary#lockfile
+packages:
+  _discoveryapis_commons:
+    dependency: transitive
+    description:
+      name: _discoveryapis_commons
+      sha256: "113c4100b90a5b70a983541782431b82168b3cae166ab130649c36eb3559d498"
+      url: "https://pub.dev"
+    source: hosted
+    version: "1.0.7"
+  _fe_analyzer_shared:
+    dependency: transitive
+    description:
+      name: _fe_analyzer_shared
+      sha256: "3b19a47f6ea7c2632760777c78174f47f6aec1e05f0cd611380d4593b8af1dbc"
+      url: "https://pub.dev"
+    source: hosted
+    version: "96.0.0"
+  analyzer:
+    dependency: transitive
+    description:
+      name: analyzer
+      sha256: "0c516bc4ad36a1a75759e54d5047cb9d15cded4459df01aa35a0b5ec7db2c2a0"
+      url: "https://pub.dev"
+    source: hosted
+    version: "10.2.0"
+  args:
+    dependency: "direct main"
+    description:
+      name: args
+      sha256: d0481093c50b1da8910eb0bb301626d4d8eb7284aa739614d2b394ee09e3ea04
+      url: "https://pub.dev"
+    source: hosted
+    version: "2.7.0"
+  async:
+    dependency: transitive
+    description:
+      name: async
+      sha256: "758e6d74e971c3e5aceb4110bfd6698efc7f501675bcfe0c775459a8140750eb"
+      url: "https://pub.dev"
+    source: hosted
+    version: "2.13.0"
+  boolean_selector:
+    dependency: transitive
+    description:
+      name: boolean_selector
+      sha256: "8aab1771e1243a5063b8b0ff68042d67334e3feab9e95b9490f9a6ebf73b42ea"
+      url: "https://pub.dev"
+    source: hosted
+    version: "2.1.2"
+  build:
+    dependency: transitive
+    description:
+      name: build
+      sha256: "275bf6bb2a00a9852c28d4e0b410da1d833a734d57d39d44f94bfc895a484ec3"
+      url: "https://pub.dev"
+    source: hosted
+    version: "4.0.4"
+  build_config:
+    dependency: transitive
+    description:
+      name: build_config
+      sha256: "4070d2a59f8eec34c97c86ceb44403834899075f66e8a9d59706f8e7834f6f71"
+      url: "https://pub.dev"
+    source: hosted
+    version: "1.3.0"
+  build_daemon:
+    dependency: transitive
+    description:
+      name: build_daemon
+      sha256: bf05f6e12cfea92d3c09308d7bcdab1906cd8a179b023269eed00c071004b957
+      url: "https://pub.dev"
+    source: hosted
+    version: "4.1.1"
+  build_runner:
+    dependency: "direct dev"
+    description:
+      name: build_runner
+      sha256: "7981eb922842c77033026eb4341d5af651562008cdb116bdfa31fc46516b6462"
+      url: "https://pub.dev"
+    source: hosted
+    version: "2.12.2"
+  built_collection:
+    dependency: transitive
+    description:
+      name: built_collection
+      sha256: "376e3dd27b51ea877c28d525560790aee2e6fbb5f20e2f85d5081027d94e2100"
+      url: "https://pub.dev"
+    source: hosted
+    version: "5.1.1"
+  built_value:
+    dependency: transitive
+    description:
+      name: built_value
+      sha256: "6ae8a6435a8c6520c7077b107e77f1fb4ba7009633259a4d49a8afd8e7efc5e9"
+      url: "https://pub.dev"
+    source: hosted
+    version: "8.12.4"
+  checked_yaml:
+    dependency: transitive
+    description:
+      name: checked_yaml
+      sha256: "959525d3162f249993882720d52b7e0c833978df229be20702b33d48d91de70f"
+      url: "https://pub.dev"
+    source: hosted
+    version: "2.0.4"
+  cli_config:
+    dependency: transitive
+    description:
+      name: cli_config
+      sha256: ac20a183a07002b700f0c25e61b7ee46b23c309d76ab7b7640a028f18e4d99ec
+      url: "https://pub.dev"
+    source: hosted
+    version: "0.2.0"
+  code_builder:
+    dependency: transitive
+    description:
+      name: code_builder
+      sha256: "6a6cab2ba4680d6423f34a9b972a4c9a94ebe1b62ecec4e1a1f2cba91fd1319d"
+      url: "https://pub.dev"
+    source: hosted
+    version: "4.11.1"
+  collection:
+    dependency: transitive
+    description:
+      name: collection
+      sha256: "2f5709ae4d3d59dd8f7cd309b4e023046b57d8a6c82130785d2b0e5868084e76"
+      url: "https://pub.dev"
+    source: hosted
+    version: "1.19.1"
+  convert:
+    dependency: transitive
+    description:
+      name: convert
+      sha256: b30acd5944035672bc15c6b7a8b47d773e41e2f17de064350988c5d02adb1c68
+      url: "https://pub.dev"
+    source: hosted
+    version: "3.1.2"
+  coverage:
+    dependency: transitive
+    description:
+      name: coverage
+      sha256: "5da775aa218eaf2151c721b16c01c7676fbfdd99cebba2bf64e8b807a28ff94d"
+      url: "https://pub.dev"
+    source: hosted
+    version: "1.15.0"
+  crypto:
+    dependency: transitive
+    description:
+      name: crypto
+      sha256: c8ea0233063ba03258fbcf2ca4d6dadfefe14f02fab57702265467a19f27fadf
+      url: "https://pub.dev"
+    source: hosted
+    version: "3.0.7"
+  dart_style:
+    dependency: transitive
+    description:
+      name: dart_style
+      sha256: "29f7ecc274a86d32920b1d9cfc7502fa87220da41ec60b55f329559d5732e2b2"
+      url: "https://pub.dev"
+    source: hosted
+    version: "3.1.7"
+  dotenv:
+    dependency: transitive
+    description:
+      name: dotenv
+      sha256: "379e64b6fc82d3df29461d349a1796ecd2c436c480d4653f3af6872eccbc90e1"
+      url: "https://pub.dev"
+    source: hosted
+    version: "4.2.0"
+  file:
+    dependency: transitive
+    description:
+      name: file
+      sha256: a3b4f84adafef897088c160faf7dfffb7696046cb13ae90b508c2cbc95d3b8d4
+      url: "https://pub.dev"
+    source: hosted
+    version: "7.0.1"
+  fixnum:
+    dependency: transitive
+    description:
+      name: fixnum
+      sha256: b6dc7065e46c974bc7c5f143080a6764ec7a4be6da1285ececdc37be96de53be
+      url: "https://pub.dev"
+    source: hosted
+    version: "1.1.1"
+  freezed:
+    dependency: "direct dev"
+    description:
+      name: freezed
+      sha256: f23ea33b3863f119b58ed1b586e881a46bd28715ddcc4dbc33104524e3434131
+      url: "https://pub.dev"
+    source: hosted
+    version: "3.2.5"
+  freezed_annotation:
+    dependency: "direct main"
+    description:
+      name: freezed_annotation
+      sha256: "7294967ff0a6d98638e7acb774aac3af2550777accd8149c90af5b014e6d44d8"
+      url: "https://pub.dev"
+    source: hosted
+    version: "3.1.0"
+  frontend_server_client:
+    dependency: transitive
+    description:
+      name: frontend_server_client
+      sha256: f64a0333a82f30b0cca061bc3d143813a486dc086b574bfb233b7c1372427694
+      url: "https://pub.dev"
+    source: hosted
+    version: "4.0.0"
+  gcloud:
+    dependency: transitive
+    description:
+      name: gcloud
+      sha256: "19e8f105b1087371f37d567d1b6335260d84f970e70bf1c2e9c3616c0247f9f2"
+      url: "https://pub.dev"
+    source: hosted
+    version: "0.9.0"
+  glob:
+    dependency: "direct main"
+    description:
+      name: glob
+      sha256: c3f1ee72c96f8f78935e18aa8cecced9ab132419e8625dc187e1c2408efc20de
+      url: "https://pub.dev"
+    source: hosted
+    version: "2.1.3"
+  google_identity_services_web:
+    dependency: transitive
+    description:
+      name: google_identity_services_web
+      sha256: "5d187c46dc59e02646e10fe82665fc3884a9b71bc1c90c2b8b749316d33ee454"
+      url: "https://pub.dev"
+    source: hosted
+    version: "0.3.3+1"
+  googleapis:
+    dependency: transitive
+    description:
+      name: googleapis
+      sha256: "692fb9e90c321b61a7a2123de0353ec8a20691cd979db2553d8d732f710f6535"
+      url: "https://pub.dev"
+    source: hosted
+    version: "15.0.0"
+  googleapis_auth:
+    dependency: transitive
+    description:
+      name: googleapis_auth
+      sha256: befd71383a955535060acde8792e7efc11d2fccd03dd1d3ec434e85b68775938
+      url: "https://pub.dev"
+    source: hosted
+    version: "1.6.0"
+  graphs:
+    dependency: transitive
+    description:
+      name: graphs
+      sha256: "741bbf84165310a68ff28fe9e727332eef1407342fca52759cb21ad8177bb8d0"
+      url: "https://pub.dev"
+    source: hosted
+    version: "2.3.2"
+  howdy:
+    dependency: transitive
+    description:
+      path: "packages/howdy-cli"
+      ref: HEAD
+      resolved-ref: f2a5a68ccc306baa4741d2bae9f88614e2d1be9d
+      url: "https://github.com/ericwindmill/howdy.git"
+    source: git
+    version: "0.0.5"
+  http:
+    dependency: transitive
+    description:
+      name: http
+      sha256: "87721a4a50b19c7f1d49001e51409bddc46303966ce89a65af4f4e6004896412"
+      url: "https://pub.dev"
+    source: hosted
+    version: "1.6.0"
+  http_multi_server:
+    dependency: transitive
+    description:
+      name: http_multi_server
+      sha256: aa6199f908078bb1c5efb8d8638d4ae191aac11b311132c3ef48ce352fb52ef8
+      url: "https://pub.dev"
+    source: hosted
+    version: "3.2.2"
+  http_parser:
+    dependency: transitive
+    description:
+      name: http_parser
+      sha256: "178d74305e7866013777bab2c3d8726205dc5a4dd935297175b19a23a2e66571"
+      url: "https://pub.dev"
+    source: hosted
+    version: "4.1.2"
+  io:
+    dependency: transitive
+    description:
+      name: io
+      sha256: dfd5a80599cf0165756e3181807ed3e77daf6dd4137caaad72d0b7931597650b
+      url: "https://pub.dev"
+    source: hosted
+    version: "1.0.5"
+  json_annotation:
+    dependency: "direct main"
+    description:
+      name: json_annotation
+      sha256: cb09e7dac6210041fad964ed7fbee004f14258b4eca4040f72d1234062ace4c8
+      url: "https://pub.dev"
+    source: hosted
+    version: "4.11.0"
+  json_serializable:
+    dependency: "direct dev"
+    description:
+      name: json_serializable
+      sha256: "44729f5c45748e6748f6b9a57ab8f7e4336edc8ae41fc295070e3814e616a6c0"
+      url: "https://pub.dev"
+    source: hosted
+    version: "6.13.0"
+  lints:
+    dependency: "direct dev"
+    description:
+      name: lints
+      sha256: "12f842a479589fea194fe5c5a3095abc7be0c1f2ddfa9a0e76aed1dbd26a87df"
+      url: "https://pub.dev"
+    source: hosted
+    version: "6.1.0"
+  logging:
+    dependency: transitive
+    description:
+      name: logging
+      sha256: c8245ada5f1717ed44271ed1c26b8ce85ca3228fd2ffdb75468ab01979309d61
+      url: "https://pub.dev"
+    source: hosted
+    version: "1.3.0"
+  matcher:
+    dependency: transitive
+    description:
+      name: matcher
+      sha256: dc0b7dc7651697ea4ff3e69ef44b0407ea32c487a39fff6a4004fa585e901861
+      url: "https://pub.dev"
+    source: hosted
+    version: "0.12.19"
+  meta:
+    dependency: transitive
+    description:
+      name: meta
+      sha256: "9f29b9bcc8ee287b1a31e0d01be0eae99a930dbffdaecf04b3f3d82a969f296f"
+      url: "https://pub.dev"
+    source: hosted
+    version: "1.18.1"
+  mime:
+    dependency: transitive
+    description:
+      name: mime
+      sha256: "41a20518f0cb1256669420fdba0cd90d21561e560ac240f26ef8322e45bb7ed6"
+      url: "https://pub.dev"
+    source: hosted
+    version: "2.0.0"
+  node_preamble:
+    dependency: transitive
+    description:
+      name: node_preamble
+      sha256: "6e7eac89047ab8a8d26cf16127b5ed26de65209847630400f9aefd7cd5c730db"
+      url: "https://pub.dev"
+    source: hosted
+    version: "2.0.2"
+  package_config:
+    dependency: transitive
+    description:
+      name: package_config
+      sha256: f096c55ebb7deb7e384101542bfba8c52696c1b56fca2eb62827989ef2353bbc
+      url: "https://pub.dev"
+    source: hosted
+    version: "2.2.0"
+  path:
+    dependency: "direct main"
+    description:
+      name: path
+      sha256: "75cca69d1490965be98c73ceaea117e8a04dd21217b37b292c9ddbec0d955bc5"
+      url: "https://pub.dev"
+    source: hosted
+    version: "1.9.1"
+  pool:
+    dependency: transitive
+    description:
+      name: pool
+      sha256: "978783255c543aa3586a1b3c21f6e9d720eb315376a915872c61ef8b5c20177d"
+      url: "https://pub.dev"
+    source: hosted
+    version: "1.5.2"
+  pub_semver:
+    dependency: transitive
+    description:
+      name: pub_semver
+      sha256: "5bfcf68ca79ef689f8990d1160781b4bad40a3bd5e5218ad4076ddb7f4081585"
+      url: "https://pub.dev"
+    source: hosted
+    version: "2.2.0"
+  pubspec_parse:
+    dependency: transitive
+    description:
+      name: pubspec_parse
+      sha256: "0560ba233314abbed0a48a2956f7f022cce7c3e1e73df540277da7544cad4082"
+      url: "https://pub.dev"
+    source: hosted
+    version: "1.5.0"
+  retry:
+    dependency: transitive
+    description:
+      name: retry
+      sha256: "822e118d5b3aafed083109c72d5f484c6dc66707885e07c0fbcb8b986bba7efc"
+      url: "https://pub.dev"
+    source: hosted
+    version: "3.1.2"
+  shelf:
+    dependency: transitive
+    description:
+      name: shelf
+      sha256: e7dd780a7ffb623c57850b33f43309312fc863fb6aa3d276a754bb299839ef12
+      url: "https://pub.dev"
+    source: hosted
+    version: "1.4.2"
+  shelf_packages_handler:
+    dependency: transitive
+    description:
+      name: shelf_packages_handler
+      sha256: "89f967eca29607c933ba9571d838be31d67f53f6e4ee15147d5dc2934fee1b1e"
+      url: "https://pub.dev"
+    source: hosted
+    version: "3.0.2"
+  shelf_static:
+    dependency: transitive
+    description:
+      name: shelf_static
+      sha256: c87c3875f91262785dade62d135760c2c69cb217ac759485334c5857ad89f6e3
+      url: "https://pub.dev"
+    source: hosted
+    version: "1.1.3"
+  shelf_web_socket:
+    dependency: transitive
+    description:
+      name: shelf_web_socket
+      sha256: "3632775c8e90d6c9712f883e633716432a27758216dfb61bd86a8321c0580925"
+      url: "https://pub.dev"
+    source: hosted
+    version: "3.0.0"
+  source_gen:
+    dependency: transitive
+    description:
+      name: source_gen
+      sha256: "1d562a3c1f713904ebbed50d2760217fd8a51ca170ac4b05b0db490699dbac17"
+      url: "https://pub.dev"
+    source: hosted
+    version: "4.2.0"
+  source_helper:
+    dependency: transitive
+    description:
+      name: source_helper
+      sha256: "4a85e90b50694e652075cbe4575665539d253e6ec10e46e76b45368ab5e3caae"
+      url: "https://pub.dev"
+    source: hosted
+    version: "1.3.10"
+  source_map_stack_trace:
+    dependency: transitive
+    description:
+      name: source_map_stack_trace
+      sha256: c0713a43e323c3302c2abe2a1cc89aa057a387101ebd280371d6a6c9fa68516b
+      url: "https://pub.dev"
+    source: hosted
+    version: "2.1.2"
+  source_maps:
+    dependency: transitive
+    description:
+      name: source_maps
+      sha256: "190222579a448b03896e0ca6eca5998fa810fda630c1d65e2f78b3f638f54812"
+      url: "https://pub.dev"
+    source: hosted
+    version: "0.10.13"
+  source_span:
+    dependency: transitive
+    description:
+      name: source_span
+      sha256: "56a02f1f4cd1a2d96303c0144c93bd6d909eea6bee6bf5a0e0b685edbd4c47ab"
+      url: "https://pub.dev"
+    source: hosted
+    version: "1.10.2"
+  stack_trace:
+    dependency: transitive
+    description:
+      name: stack_trace
+      sha256: "8b27215b45d22309b5cddda1aa2b19bdfec9df0e765f2de506401c071d38d1b1"
+      url: "https://pub.dev"
+    source: hosted
+    version: "1.12.1"
+  stream_channel:
+    dependency: transitive
+    description:
+      name: stream_channel
+      sha256: "969e04c80b8bcdf826f8f16579c7b14d780458bd97f56d107d3950fdbeef059d"
+      url: "https://pub.dev"
+    source: hosted
+    version: "2.1.4"
+  stream_transform:
+    dependency: transitive
+    description:
+      name: stream_transform
+      sha256: ad47125e588cfd37a9a7f86c7d6356dde8dfe89d071d293f80ca9e9273a33871
+      url: "https://pub.dev"
+    source: hosted
+    version: "2.1.1"
+  string_scanner:
+    dependency: transitive
+    description:
+      name: string_scanner
+      sha256: "921cd31725b72fe181906c6a94d987c78e3b98c2e205b397ea399d4054872b43"
+      url: "https://pub.dev"
+    source: hosted
+    version: "1.4.1"
+  term_glyph:
+    dependency: transitive
+    description:
+      name: term_glyph
+      sha256: "7f554798625ea768a7518313e58f83891c7f5024f88e46e7182a4558850a4b8e"
+      url: "https://pub.dev"
+    source: hosted
+    version: "1.2.2"
+  test:
+    dependency: "direct dev"
+    description:
+      name: test
+      sha256: "280d6d890011ca966ad08df7e8a4ddfab0fb3aa49f96ed6de56e3521347a9ae7"
+      url: "https://pub.dev"
+    source: hosted
+    version: "1.30.0"
+  test_api:
+    dependency: transitive
+    description:
+      name: test_api
+      sha256: "8161c84903fd860b26bfdefb7963b3f0b68fee7adea0f59ef805ecca346f0c7a"
+      url: "https://pub.dev"
+    source: hosted
+    version: "0.7.10"
+  test_core:
+    dependency: transitive
+    description:
+      name: test_core
+      sha256: "0381bd1585d1a924763c308100f2138205252fb90c9d4eeaf28489ee65ccde51"
+      url: "https://pub.dev"
+    source: hosted
+    version: "0.6.16"
+  typed_data:
+    dependency: transitive
+    description:
+      name: typed_data
+      sha256: f9049c039ebfeb4cf7a7104a675823cd72dba8297f264b6637062516699fa006
+      url: "https://pub.dev"
+    source: hosted
+    version: "1.4.0"
+  vm_service:
+    dependency: transitive
+    description:
+      name: vm_service
+      sha256: "45caa6c5917fa127b5dbcfbd1fa60b14e583afdc08bfc96dda38886ca252eb60"
+      url: "https://pub.dev"
+    source: hosted
+    version: "15.0.2"
+  watcher:
+    dependency: transitive
+    description:
+      name: watcher
+      sha256: "1398c9f081a753f9226febe8900fce8f7d0a67163334e1c94a2438339d79d635"
+      url: "https://pub.dev"
+    source: hosted
+    version: "1.2.1"
+  web:
+    dependency: transitive
+    description:
+      name: web
+      sha256: "868d88a33d8a87b18ffc05f9f030ba328ffefba92d6c127917a2ba740f9cfe4a"
+      url: "https://pub.dev"
+    source: hosted
+    version: "1.1.1"
+  web_socket:
+    dependency: transitive
+    description:
+      name: web_socket
+      sha256: "34d64019aa8e36bf9842ac014bb5d2f5586ca73df5e4d9bf5c936975cae6982c"
+      url: "https://pub.dev"
+    source: hosted
+    version: "1.0.1"
+  web_socket_channel:
+    dependency: transitive
+    description:
+      name: web_socket_channel
+      sha256: d645757fb0f4773d602444000a8131ff5d48c9e47adfe9772652dd1a4f2d45c8
+      url: "https://pub.dev"
+    source: hosted
+    version: "3.0.3"
+  webkit_inspection_protocol:
+    dependency: transitive
+    description:
+      name: webkit_inspection_protocol
+      sha256: "87d3f2333bb240704cd3f1c6b5b7acd8a10e7f0bc28c28dcf14e782014f4a572"
+      url: "https://pub.dev"
+    source: hosted
+    version: "1.2.1"
+  yaml:
+    dependency: "direct main"
+    description:
+      name: yaml
+      sha256: b9da305ac7c39faa3f030eccd175340f968459dae4af175130b3fc47e40d76ce
+      url: "https://pub.dev"
+    source: hosted
+    version: "3.1.3"
+  yaml_edit:
+    dependency: transitive
+    description:
+      name: yaml_edit
+      sha256: "07c9e63ba42519745182b88ca12264a7ba2484d8239958778dfe4d44fe760488"
+      url: "https://pub.dev"
+    source: hosted
+    version: "2.2.4"
+sdks:
+  dart: ">=3.10.0 <4.0.0"
diff --git a/pubspec.yaml b/pubspec.yaml
new file mode 100644
index 0000000..6d336bb
--- /dev/null
+++ b/pubspec.yaml
@@ -0,0 +1,26 @@
+name: evals
+publish_to: none
+description: Flutter evals framework
+version: 0.0.1
+
+environment:
+  sdk: ^3.10.0
+
+workspace:
+  - packages/eval_cli
+  - packages/eval_config
+
+dependencies:
+  args: ^2.7.0
+  freezed_annotation: ^3.1.0
+  glob: ^2.1.0
+  json_annotation: ^4.9.0
+  path: ^1.9.1
+  yaml: ^3.1.3
+
+dev_dependencies:
+  build_runner: ^2.10.4
+  freezed: ^3.2.4
+  json_serializable: ^6.11.3
+  lints: ^6.0.0
+  test: ^1.28.0