diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
new file mode 100644
index 0000000..87adcc3
--- /dev/null
+++ b/.github/workflows/docs.yml
@@ -0,0 +1,75 @@
+name: Documentation
+
+on:
+  pull_request:
+    paths:
+      - 'docs/**'
+      - 'packages/dataset_config_dart/**'
+      - 'tool/**'
+      - '.github/workflows/docs.yml'
+  push:
+    branches:
+      - main
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v6
+
+      - name: Set up Python
+        uses: actions/setup-python@v6
+        with:
+          python-version: '3.13'
+
+      - name: Set up Flutter
+        uses: subosito/flutter-action@fd55f4c5af5b953cc57a2be44cb082c8f6635e8e
+        with:
+          channel: stable
+
+      - name: Install Python dependencies
+        run: |
+          pip install --upgrade pip
+          pip install -r docs/requirements.txt
+          pip install -e packages/dash_evals
+
+      - name: Install Dart dependencies
+        run: |
+          flutter pub get
+          cd tool/dartdoc_to_md && dart pub get
+
+      - name: Build documentation
+        working-directory: docs
+        run: make html
+
+      - name: Upload build artifact
+        uses: actions/upload-artifact@v6
+        with:
+          name: docs-html
+          path: docs/_build/html
+          retention-days: 1
+
+  deploy:
+    # Only deploy on push to main
+    if: github.event_name == 'push' && github.ref == 'refs/heads/main'
+    needs: build
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v6
+
+      - name: Download build artifact
+        uses: actions/download-artifact@v7
+        with:
+          name: docs-html
+          path: docs/_build/html
+
+      - name: Deploy to Firebase Hosting
+        uses: FirebaseExtended/action-hosting-deploy@v0
+        with:
+          repoToken: ${{ secrets.GITHUB_TOKEN }}
+          firebaseServiceAccount: ${{ secrets.FIREBASE_SERVICE_ACCOUNT }}
+          projectId: evals
+          target: evals-docs
+          channelId: live
diff --git a/README.md b/README.md
index e2a6dd5..20a2d6d 100644
--- a/README.md
+++ b/README.md
@@ -1,3 +1,48 @@
-# Flutter evals
+# evals
 
-Evaluation framework for testing AI agents ability to write Dart and Flutter code.
\ No newline at end of file
+Evaluation framework for testing AI agents' ability to write Dart and Flutter code. Built on [Inspect AI](https://inspect.aisi.org.uk/).
+
+> [!TIP]
+> Full documentation at [evals-docs.web.app/](https://evals-docs.web.app/)
+
+## Overview
+
+evals provides:
+
+- **Evaluation Runner** — Python package for running LLM evaluations with configurable tasks, variants, and models
+- **Evaluation Configuration** — Dart and Python packages that resolve dataset YAML into EvalSet JSON for the runner
+- **devals CLI** — Dart CLI for creating and managing dataset samples, tasks, and jobs
+- **Evaluation Explorer** — Dart/Flutter app for browsing and analyzing results
+- **Dataset** — Curated samples for Dart/Flutter Q&A, code generation, and debugging tasks
+
+## Packages
+
+| Package | Description | Docs |
+|---------|-------------|------|
+| [dash_evals](packages/dash_evals/) | Python evaluation runner using Inspect AI | [dash_evals docs](docs/contributing/packages/dash_evals.md) |
+| [dataset_config_dart](packages/dataset_config_dart/) | Dart library for resolving dataset YAML into EvalSet JSON (includes shared data models) | [dataset_config_dart docs](docs/contributing/packages/dataset_config_dart.md) |
+| [dataset_config_python](packages/dataset_config_python/) | Python configuration models | — |
+| [devals_cli](packages/devals_cli/) | Dart CLI for managing evaluation tasks and jobs | [CLI docs](docs/reference/cli.md) |
+| [eval_explorer](packages/eval_explorer/) | Dart/Flutter results viewer (Serverpod) | [eval_explorer docs](docs/contributing/packages/eval_explorer.md) |
+
+> [!NOTE]
+> The **uploader** and **report_app** packages are deprecated and will be replaced by **eval_explorer**.
+
+## Documentation
+
+| Doc | Description |
+|-----|-------------|
+| [Quick Start](docs/guides/quick_start.md) | Get started authoring your own evals |
+| [Contributing Guide](docs/contributing/guide.md) | Development setup and guidelines |
+| [CLI Reference](docs/reference/cli.md) | Full devals CLI command reference |
+| [Configuration Reference](docs/reference/configuration_reference.md) | YAML configuration file reference |
+| [Repository Structure](docs/contributing/repository_structure.md) | Project layout |
+| [Glossary](docs/reference/glossary.md) | Terminology guide |
+
+## Contributing
+
+See [CONTRIBUTING.md](CONTRIBUTING.md) for details, or go directly to the [Contributing Guide](docs/contributing/guide.md).
+
+## License
+
+See [LICENSE](LICENSE) for details.
\ No newline at end of file
diff --git a/docs/Makefile b/docs/Makefile
new file mode 100644
index 0000000..7776cf8
--- /dev/null
+++ b/docs/Makefile
@@ -0,0 +1,38 @@
+# Makefile for Sphinx + Dart API documentation
+
+SPHINXOPTS    ?=
+SPHINXBUILD   ?= sphinx-build
+SOURCEDIR     = .
+BUILDDIR      = _build
+
+# Dart API generator
+DARTDOC_TOOL  = ../tool/dartdoc_to_md
+REPO_ROOT     = ..
+
+.PHONY: help clean html livehtml dartdoc html-python
+
+help:
+	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+
+clean:
+	rm -rf $(BUILDDIR)
+	rm -rf reference/dart_api
+
+# Generate Dart API markdown using the custom analyzer-based generator
+dartdoc:
+	@echo "Generating Dart API documentation..."
+	cd $(DARTDOC_TOOL) && dart run bin/generate.dart --root $(shell cd $(REPO_ROOT) && pwd) --output docs/reference/dart_api
+	@echo "Dart API markdown generated in dart_api/"
+
+# Build HTML docs (runs Dart generator first, then Sphinx)
+html: dartdoc
+	@$(SPHINXBUILD) -M html "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+	@echo "Build finished. Open $(BUILDDIR)/html/index.html"
+
+# Build HTML docs without Dart doc generation (faster for Python-only changes)
+html-python:
+	@$(SPHINXBUILD) -M html "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+	@echo "Build finished. Open $(BUILDDIR)/html/index.html"
+
+livehtml:
+	sphinx-autobuild "$(SOURCEDIR)" "$(BUILDDIR)/html" $(SPHINXOPTS) $(O)
diff --git a/docs/_static/custom.css b/docs/_static/custom.css
new file mode 100644
index 0000000..9243a6b
--- /dev/null
+++ b/docs/_static/custom.css
@@ -0,0 +1,420 @@
+/* Custom styling for dash_evals documentation */
+
+
+/* ============================================
+   BRAND COLORS (PyData CSS variables)
+   ============================================ */
+
+html[data-theme="light"] {
+    --pst-color-primary: #7C4DFF;
+    --pst-color-primary-highlight: #9C7CFF;
+}
+
+html[data-theme="dark"] {
+    --pst-color-primary: #B388FF;
+    --pst-color-primary-highlight: #D1B3FF;
+}
+
+
+/* ============================================
+   LINKS: Color and underline overrides
+   ============================================ */
+
+html[data-theme="light"] {
+    --pst-color-link: #7C4DFF;
+    --pst-color-link-hover: #5C35CC;
+}
+
+html[data-theme="dark"] {
+    --pst-color-link: #B388FF;
+    --pst-color-link-hover: #D1B3FF;
+}
+
+.bd-article-container a {
+    color: var(--pst-color-link);
+    text-decoration: none;
+    text-decoration-thickness: max(1px, .0625rem);
+    text-underline-offset: 0.15em;
+    overflow-wrap: break-word;
+}
+
+.bd-article-container a:hover {
+    color: var(--pst-color-link-hover);
+    text-decoration-thickness: max(3px, .1875rem, .12em);
+    text-decoration-skip-ink: none;
+}
+
+.bd-article-container a:visited {
+    color: var(--pst-color-link);
+}
+
+/* Links in headings should not be underlined */
+.bd-article-container h1 a,
+.bd-article-container h2 a,
+.bd-article-container h3 a,
+.bd-article-container h4 a {
+    text-decoration: none;
+}
+
+.bd-article-container h1 a:hover,
+.bd-article-container h2 a:hover,
+.bd-article-container h3 a:hover,
+.bd-article-container h4 a:hover {
+    text-decoration: underline;
+}
+
+.navbar-brand:hover, .navbar-brand:visited:hover {
+    text-decoration: none;
+    color: var(--pst-color-link-hover);
+}
+
+.bd-header ul.navbar-nav>li.nav-item.current>.nav-link:before {
+    border-bottom: none;
+}
+
+.bd-header ul.navbar-nav>li.nav-item>.nav-link:hover:before {
+    border-bottom: none;
+}
+
+.prev-next-area a.left-prev:hover,
+.prev-next-area a.right-next:hover {
+    text-decoration: none !important;
+    border-bottom: none;
+}
+
+/* The theme puts the underline on p.prev-next-title inside the <a>, not on <a> itself */
+.prev-next-area a p.prev-next-title, .prev-next-area a:hover p.prev-next-title, .prev-next-area a p.prev-next-title:hover {
+    text-decoration: none !important;
+}
+
+.prev-next-info {
+    width: 200px;
+    padding: .25rem;
+    border: .5px solid #D1B3FF;
+    border-radius: 4px;
+}
+
+.prev-next-info:hover { 
+   background: #eae0f9;
+}
+
+/* ============================================
+   LISTS: Spacing and bullet styles
+   ============================================ */
+
+.bd-article-container ul {
+    list-style-type: disc;
+    padding-left: 1.5em;
+}
+
+.bd-article-container ol {
+    padding-left: 1.5em;
+}
+
+.bd-article-container li {
+    margin-bottom: 0.35em;
+    line-height: 1.65;
+}
+
+.bd-article-container ul ul {
+    list-style-type: circle;
+    margin-top: 0.35em;
+}
+
+.bd-article-container ul ul ul {
+    list-style-type: square;
+}
+
+/* Tighter spacing for nested lists */
+.bd-article-container li > ul,
+.bd-article-container li > ol {
+    margin-bottom: 0;
+}
+
+
+/* ============================================
+   BLOCKQUOTES
+   ============================================ */
+
+.bd-article-container blockquote {
+    border-left: 3px solid var(--pst-color-primary);
+    padding: 0.5rem 1rem;
+    margin: 1rem 0 1.2rem 0;
+    color: var(--pst-color-text-muted);
+    background-color: transparent;
+}
+
+.bd-article-container blockquote p {
+    margin-bottom: 0.5rem;
+}
+
+.bd-article-container blockquote p:last-child {
+    margin-bottom: 0;
+}
+
+
+/* ============================================
+   INLINE CODE (not in code blocks)
+   ============================================ */
+
+html[data-theme="light"] {
+    --pst-color-inline-code: #912583;
+}
+
+html[data-theme="dark"] {
+    --pst-color-inline-code: #f3c7ee;
+}
+
+.bd-article-container code:not(pre code) {
+    color: var(--pst-color-inline-code);
+    font-size: 0.875em;
+}
+
+
+/* ============================================
+   HORIZONTAL RULES
+   ============================================ */
+
+.bd-article-container hr {
+    border: none;
+    border-top: 1px solid var(--pst-color-border);
+    margin: 2rem 0;
+    opacity: 0.65;
+}
+
+
+/* ============================================
+   STRONG / EMPHASIS
+   ============================================ */
+
+.bd-article-container strong {
+    font-weight: 600;
+    color: var(--pst-color-text-base);
+}
+
+
+/* ============================================
+   DEFINITION LISTS (dl/dt/dd)
+   ============================================ */
+
+.bd-article-container dl {
+    margin-bottom: 1.2rem;
+}
+
+.bd-article-container dt {
+    font-weight: 600;
+    margin-top: 0.8rem;
+    color: var(--pst-color-text-base);
+}
+
+.bd-article-container dd {
+    margin-left: 1.5em;
+    margin-bottom: 0.5rem;
+}
+
+
+/* ============================================
+   MAIN CONTENT: Base font size (~10% smaller)
+   ============================================ */
+
+.bd-article-container .bd-content {
+    font-size: 0.9rem;
+    line-height: 1.7;
+}
+
+
+/* ============================================
+   MAIN CONTENT: Headings (smaller)
+   ============================================ */
+
+.bd-article-container h1 {
+    font-size: 1.6rem;
+    margin-top: 1.5rem;
+    margin-bottom: 1rem;
+}
+
+.bd-article-container h2 {
+    font-size: 1.25rem;
+    margin-top: 1.8rem;
+    margin-bottom: 0.8rem;
+}
+
+.bd-article-container h3 {
+    font-size: 1.05rem;
+    margin-top: 1.5rem;
+    margin-bottom: 0.6rem;
+}
+
+.bd-article-container h4 {
+    font-size: 0.95rem;
+    margin-top: 1.2rem;
+    margin-bottom: 0.5rem;
+}
+
+
+/* ============================================
+   MAIN CONTENT: More spacing between elements
+   ============================================ */
+
+.bd-article-container p {
+    margin-bottom: 1rem;
+}
+
+.bd-article-container ul,
+.bd-article-container ol {
+    margin-bottom: 1.2rem;
+}
+
+.bd-article-container section {
+    margin-bottom: 1.5rem;
+}
+
+/* Spacing after code blocks */
+.bd-article-container .highlight {
+    margin-bottom: 1.2rem;
+}
+
+/* Spacing after tables */
+.bd-article-container table {
+    margin-bottom: 1.5rem;
+}
+
+/* Spacing after admonitions */
+.bd-article-container .admonition {
+    margin-bottom: 1.5rem;
+}
+
+
+/* ============================================
+   CODE BLOCKS: Slightly darker background
+   ============================================ */
+
+pre {
+    border: 1px solid #e0e0e0;
+    border-radius: 6px;
+    background-color: #f5f5f5;
+}
+
+code.literal {
+    border: 1px solid #e0e0e0;
+    border-radius: 3px;
+    padding: 1px 4px;
+    background-color: #f2f2f2;
+}
+
+html[data-theme="light"] .highlight pre { 
+    line-height: 170%;
+}
+
+html[data-theme="dark"] pre {
+    border-color: #444;
+    background-color: #1e1e1e;
+}
+
+html[data-theme="dark"] code.literal {
+    border-color: #444;
+    background-color: #2a2a2a;
+}
+
+
+/* ============================================
+   TABLES: Padding & header background
+   ============================================ */
+
+.bd-article-container table th {
+    padding: 6px 14px;
+    background-color: #f0f0f0;
+    font-weight: 600;
+    font-size: 0.85rem;
+}
+
+.bd-article-container table td {
+    padding: 5px 14px;
+    font-size: 0.85rem;
+}
+
+/* Subtle row striping for readability */
+.bd-article-container table tbody tr:nth-child(even) {
+    background-color: #fafafa;
+}
+
+html[data-theme="dark"] .bd-article-container table th {
+    background-color: #2a2a2a;
+}
+
+html[data-theme="dark"] .bd-article-container table tbody tr:nth-child(even) {
+    background-color: #1e1e1e;
+}
+
+
+/* ============================================
+   SIGNATURE COLORS (class/function definitions)
+   ============================================ */
+
+/* Module path: dash_evals.runner.models. */
+.sig-prename.descclassname {
+    color: #666666 !important;
+}
+
+/* Class/function name: TaskResult, flutter_bug_fix */
+.sig-name.descname {
+    color: #7C4DFF !important;
+    font-weight: 600;
+}
+
+/* Property/attribute names in signatures */
+dt.sig.sig-object .sig-name:not(.descname) {
+    color: #005577 !important;
+}
+
+
+/* ============================================
+   TYPE ANNOTATION COLORS
+   ============================================ */
+
+/* The "class" keyword */
+dt.sig.sig-object > .property {
+    color: #0077AA !important;
+    font-weight: 800;
+}
+
+/* Type names in annotations */
+.sig .sig-param span.pre,
+.sig > span.pre:not(:first-child) {
+    color: #A90D91;
+}
+
+/* Parentheses */
+.sig-paren {
+    color: #666666;
+}
+
+
+/* ============================================
+   DARK MODE: Signatures
+   ============================================ */
+
+html[data-theme="dark"] .sig-prename.descclassname {
+    color: #888888 !important;
+}
+
+html[data-theme="dark"] .sig-name.descname {
+    color: #B388FF !important;
+}
+
+html[data-theme="dark"] dt.sig.sig-object .sig-name:not(.descname) {
+    color: #61AFEF !important;
+}
+
+html[data-theme="dark"] dt.sig.sig-object > span.pre:first-child {
+    color: #56B6C2 !important;
+}
+
+html[data-theme="dark"] .sig .sig-param span.pre,
+html[data-theme="dark"] .sig > span.pre:not(:first-child) {
+    color: #CE93D8;
+}
+
+html[data-theme="dark"] .sig-paren {
+    color: #888888;
+}
diff --git a/docs/_static/images/eval-set.png b/docs/_static/images/eval-set.png
new file mode 100644
index 0000000..0b58b4d
Binary files /dev/null and b/docs/_static/images/eval-set.png differ
diff --git a/docs/_static/images/evals-dataset.png b/docs/_static/images/evals-dataset.png
new file mode 100644
index 0000000..ebaebfe
Binary files /dev/null and b/docs/_static/images/evals-dataset.png differ
diff --git a/docs/_static/images/job.png b/docs/_static/images/job.png
new file mode 100644
index 0000000..7963112
Binary files /dev/null and b/docs/_static/images/job.png differ
diff --git a/docs/_static/images/logo.png b/docs/_static/images/logo.png
new file mode 100644
index 0000000..779227b
Binary files /dev/null and b/docs/_static/images/logo.png differ
diff --git a/docs/_static/images/repo-separation.png b/docs/_static/images/repo-separation.png
new file mode 100644
index 0000000..c830b22
Binary files /dev/null and b/docs/_static/images/repo-separation.png differ
diff --git a/docs/_static/images/task.png b/docs/_static/images/task.png
new file mode 100644
index 0000000..a451400
Binary files /dev/null and b/docs/_static/images/task.png differ
diff --git a/docs/conf.py b/docs/conf.py
new file mode 100644
index 0000000..db1b270
--- /dev/null
+++ b/docs/conf.py
@@ -0,0 +1,117 @@
+# Configuration file for the Sphinx documentation builder.
+# https://www.sphinx-doc.org/en/master/usage/configuration.html
+
+import os
+import sys
+
+# Add the source directory to the path so Sphinx can find the modules
+sys.path.insert(0, os.path.abspath("../packages/dash_evals/src"))
+
+# -- Project information -----------------------------------------------------
+
+project = "dash_evals"
+copyright = "2025, Flutter Authors"
+author = "Flutter Authors"
+
+# -- General configuration ---------------------------------------------------
+
+extensions = [
+    "sphinx.ext.autodoc",  # Auto-generate docs from docstrings
+    "sphinx.ext.napoleon",  # Support Google/NumPy docstring styles
+    "sphinx.ext.viewcode",  # Add links to source code
+    "sphinx.ext.intersphinx",  # Link to other projects' docs
+    "sphinx_autodoc_typehints",  # Better type hint rendering
+    "myst_parser",  # Support Markdown files
+    "sphinx_design",  # Cards, grids, tabs
+]
+
+# Autodoc settings
+autodoc_default_options = {
+    "members": True,
+    "member-order": "bysource",
+    "special-members": "__init__",
+    "undoc-members": True,
+    "exclude-members": "__weakref__",
+}
+autodoc_typehints = "description"
+autodoc_class_signature = "separated"
+
+# Napoleon settings (for Google-style docstrings)
+napoleon_google_docstring = True
+napoleon_numpy_docstring = True
+napoleon_include_init_with_doc = True
+napoleon_include_private_with_doc = False
+
+# MyST parser settings (for Markdown support)
+myst_enable_extensions = [
+    "colon_fence",
+    "fieldlist",
+]
+
+# Intersphinx mapping
+intersphinx_mapping = {
+    "python": ("https://docs.python.org/3", None),
+}
+
+# -- Options for HTML output -------------------------------------------------
+
+html_theme = "pydata_sphinx_theme"
+html_title = "evals"
+root_doc = "index"
+
+html_theme_options = {
+    # # Logo
+    # "logo": {
+    #     "image_light": "_static/images/logo.png",
+    #     "image_dark": "_static/images/logo.png",
+    # },
+    # Show all top-nav tabs instead of collapsing to "More ▾"
+    "header_links_before_dropdown": 4,
+    # Top-right icons
+    "icon_links": [
+        {
+            "name": "GitHub",
+            "url": "https://github.com/flutter/evals",
+            "icon": "fa-brands fa-github",
+        },
+    ],
+    # --- Header / Navigation Bar ---
+    # Left: logo
+    "navbar_start": ["navbar-logo"],
+    # Center: top-level section tabs (Guides, Reference, Contributing)
+    # These are auto-generated from root-level toctree entries in index.md.
+    "navbar_center": ["navbar-nav"],
+    # Right: theme switcher + icon links
+    "navbar_end": ["theme-switcher", "navbar-icon-links"],
+    # Persistent right (stays visible even on small screens)
+    "navbar_persistent": ["search-button"],
+    # Align nav tabs to the left, closer to the logo
+    "navbar_align": "left",
+    # --- Primary sidebar (left) ---
+    # Show 2 levels of nav expanded by default
+    "show_nav_level": 1,
+    # --- Secondary sidebar (right) ---
+    # Shows the current page's table of contents
+    "secondary_sidebar_items": ["page-toc"],
+    # --- Syntax highlighting ---
+    "pygments_light_style": "xcode",
+    "pygments_dark_style": "monokai",
+}
+
+# --- Primary sidebar (left) ---
+# Shows section sub-navigation (e.g. Guides subpages) via sidebar-nav-bs.
+# This is the correct way to configure the left sidebar in PyData theme.
+# Use page-glob patterns to customise per-section, e.g. {"index": []} to hide.
+html_sidebars = {
+    "**": ["sidebar-nav-bs"],
+}
+
+# Static files
+html_static_path = ["_static"]
+html_css_files = ["custom.css"]
+
+# Source file suffixes
+source_suffix = {
+    ".rst": "restructuredtext",
+    ".md": "markdown",
+}
diff --git a/docs/contributing/guide.md b/docs/contributing/guide.md
new file mode 100644
index 0000000..f001c14
--- /dev/null
+++ b/docs/contributing/guide.md
@@ -0,0 +1,305 @@
+# Contributing
+
+Welcome to the Dart/Flutter LLM evaluation project! This repository contains tools for running and analyzing AI model evaluations on Dart and Flutter tasks.
+
+---
+
+## Table of Contents
+
+- [dash_evals](#dash_evals)
+  - [Setup](#setup)
+  - [Write a New Eval](#write-a-new-eval)
+    - [Add Your Sample to the Dataset](#add-your-sample-to-the-dataset)
+    - [Edit the Config to Run Only Your New Sample](#edit-the-config-to-run-only-your-new-sample)
+    - [Verify the Sample Works](#verify-the-sample-works)
+    - [What to Commit (and Not Commit!)](#what-to-commit-and-not-commit)
+  - [Add Functionality to the Runner](#add-functionality-to-the-runner)
+    - [Understand Tasks, Solvers, and Scorers](#understand-tasks-solvers-and-scorers)
+    - [Add a New Task](#add-a-new-task)
+    - [Test and Verify](#test-and-verify)
+- [eval_explorer](#eval_explorer)
+
+---
+
+### Setup
+
+1. **Prerequisites**
+   - Python 3.13+
+   - Podman or Docker (for sandbox execution)
+   - API keys for the models you want to test
+
+2. **Create and activate a virtual environment**
+
+   ```bash
+   cd packages/dash_evals
+   python -m venv .venv
+   source .venv/bin/activate  # On Windows: .venv\Scripts\activate
+   ```
+
+3. **Install dependencies**
+
+   ```bash
+   pip install -e .          # Core dependencies
+   pip install -e ".[dev]"   # Development dependencies (pytest, ruff, etc.)
+   ```
+
+4. **Configure API keys**
+
+   You only need to configure the keys you plan on testing.
+
+   ```bash
+   export GEMINI_API_KEY=your_key_here
+   export ANTHROPIC_API_KEY=your_key_here
+   export OPENAI_API_KEY=your_key_here
+   ```
+
+5. **Verify installation**
+
+   ```bash
+   run-evals --help
+   ```
+
+---
+
+### Write a New Eval
+
+The most common contribution is adding new evaluation samples. Each sample tests a specific capability or scenario.
+
+#### Add Your Sample to the Dataset
+
+1. **Decide which task your sample belongs to**
+
+   Review the available tasks in `dataset/tasks/` or run `devals create task` to see available task functions:
+
+   | Task | Purpose |
+   |------|---------| 
+   | `question_answer` | Q&A evaluation for Dart/Flutter knowledge |
+   | `bug_fix` | Agentic debugging of code in a sandbox |
+   | `flutter_bug_fix` | Flutter-specific bug fix (wraps `bug_fix`) |
+   | `code_gen` | Generate code from specifications |
+   | `flutter_code_gen` | Flutter-specific code gen (wraps `code_gen`) |
+   | `mcp_tool` | Test MCP tool usage |
+   | `analyze_codebase` | Evaluate codebase analysis |
+   | `skill_test` | Test skill file usage in sandboxes |
+
+2. **Create your sample file**
+
+   Use `devals create sample` for interactive sample creation, or add a sample inline in the task's `task.yaml` file under `dataset/tasks/<task_name>/task.yaml`:
+
+   ```yaml
+   id: dart_your_sample_id
+   input: |
+     Your prompt to the model goes here.
+   target: |
+     Criteria for grading the response. This is used by the scorer
+     to determine if the model's output is acceptable.
+   metadata:
+     added: 2025-02-04
+     tags: [dart, async]  # Optional categorization
+   ```
+
+   For agentic tasks (bug fix, code gen), you'll also need a workspace:
+
+   ```yaml
+   id: flutter_fix_some_bug
+   input: |
+     The app crashes when the user taps the submit button.
+     Debug and fix the issue.
+   target: |
+     The fix should handle the null check in the onPressed callback.
+   workspace:
+     template: flutter_app   # Use a reusable template
+     # OR
+     path: ./project         # Custom project relative to sample directory
+   ```
+
+3. **Add your sample to the task's `task.yaml`**
+
+   Add your sample inline in the appropriate task's `samples` list:
+
+   ```yaml
+   # dataset/tasks/dart_question_answer/task.yaml
+   func: question_answer
+   samples:
+     - id: dart_your_sample_id
+       input: |
+         Your prompt to the model goes here.
+       target: |
+         Criteria for grading the response.
+   ```
+
+#### Edit the Config to Run Only Your New Sample
+
+Before committing, test your sample by creating a job file. Use `devals create job` interactively, or manually create one in `dataset/jobs/`:
+
+```yaml
+# jobs/test_my_sample.yaml
+name: test_my_sample
+
+# Run only the task containing your sample
+tasks:
+  dart_question_answer:
+    allowed_variants: [baseline]  # Start with baseline variant
+    include-samples:
+      - dart_your_sample_id  # Only run your specific sample
+
+# Use a fast model for testing
+models: [google/gemini-2.5-flash]
+```
+
+Then run with your job:
+
+```bash
+devals run test_my_sample
+```
+
+#### Verify the Sample Works
+
+1. **Dry run first** — validates configuration without making API calls:
+
+   ```bash
+   devals run test_my_sample --dry-run
+   ```
+
+2. **Run the evaluation**:
+
+   ```bash
+   devals run test_my_sample
+   ```
+
+3. **Check the output** in the `logs/` directory. Verify:
+   - The model received your prompt correctly
+   - The scorer evaluated the response appropriately
+   - No errors occurred during execution
+
+#### What to Commit (and Not Commit!)
+
+**Do commit:**
+- Your updated task file(s) in `dataset/tasks/`
+- Any new workspace templates or context files
+
+**Do NOT commit:**
+- Test job files in `dataset/jobs/` (if they were only for local testing)
+- Log files in `logs/`
+- API keys or `.env` files
+
+Before submitting your PR, clean up any test job files you created:
+
+```bash
+git status  # Check for untracked/modified job files
+```
+
+---
+
+### Add Functionality to the Runner
+
+If you're adding new task types, scorers, or solvers, this section is for you.
+
+#### Understand Tasks, Solvers, and Scorers
+
+The dash_evals runner uses [Inspect AI](https://inspect.aisi.org.uk/) concepts:
+
+| Component | Purpose | Location |
+|-----------|---------|----------|
+| **Task** | Defines what to evaluate — combines dataset, solver chain, and scorers | `runner/tasks/` |
+| **Solver** | Processes inputs (e.g., injects context, runs agent loops) | `runner/solvers/` |
+| **Scorer** | Evaluates outputs (e.g., model grading, dart analyze, flutter test) | `runner/scorers/` |
+
+A typical task structure:
+
+```python
+from inspect_ai import Task, task
+from inspect_ai.dataset import MemoryDataset
+
+@task
+def your_new_task(dataset: MemoryDataset, task_def: dict) -> Task:
+    return Task(
+        name=task_def.get("name", "your_new_task"),
+        dataset=dataset,
+        solver=[
+            add_system_message("Your system prompt"),
+            context_injector(task_def),
+            # ... more solvers
+        ],
+        scorer=[
+            model_graded_scorer(),
+            dart_analyze_scorer(),
+        ],
+    )
+```
+
+#### Add a New Task
+
+1. **Create your task file** at `src/dash_evals/runner/tasks/your_task.py`
+
+2. **Export it** from `src/dash_evals/runner/tasks/__init__.py`:
+
+   ```python
+   from .your_task import your_new_task
+
+   __all__ = [
+       # ... existing tasks ...
+       "your_new_task",
+   ]
+   ```
+
+   Task functions are discovered dynamically via `importlib`. If the function name matches a module in `runner/tasks/`, it will be found automatically when referenced from a `task.yaml` file. No registry is needed.
+
+3. **Create a task directory** in `dataset/tasks/`:
+
+   ```
+   dataset/tasks/your_task_id/
+   └── task.yaml
+   ```
+
+   ```yaml
+   # dataset/tasks/your_task_id/task.yaml
+   func: your_new_task  # Must match the function name
+   samples:
+     - id: sample_001
+       input: |
+         Your prompt here.
+       target: |
+         Expected output or grading criteria.
+   ```
+
+#### Test and Verify
+
+1. **Run the test suite**:
+
+   ```bash
+   cd packages/dash_evals
+   pytest
+   ```
+
+2. **Run linting**:
+
+   ```bash
+   ruff check src/dash_evals
+   ruff format src/dash_evals
+   ```
+
+3. **Test your task end-to-end**:
+
+   ```bash
+   devals run test_my_sample --dry-run  # Validate config
+   devals run test_my_sample   # Run actual evaluation
+   ```
+
+---
+
+## eval_explorer
+
+A Dart/Flutter application for exploring evaluation results, built with [Serverpod](https://serverpod.dev/).
+
+> [!NOTE]
+> The eval_explorer is under active development. Contribution guidelines coming soon!
+
+The package is located in `packages/eval_explorer/` and consists of:
+
+| Package | Description |
+|---------|-------------|
+| `eval_explorer_client` | Dart client package |
+| `eval_explorer_flutter` | Flutter web/mobile app |
+| `eval_explorer_server` | Serverpod backend |
+| `eval_explorer_shared` | Shared models |
diff --git a/docs/contributing/index.md b/docs/contributing/index.md
new file mode 100644
index 0000000..9b2ea78
--- /dev/null
+++ b/docs/contributing/index.md
@@ -0,0 +1,20 @@
+# Contributor Guides
+
+Documentation about how it all works. 
+
+```{toctree}
+:maxdepth: 2
+
+guide
+repository_structure
+```
+
+```{toctree}
+:maxdepth: 2
+:caption: Packages
+
+packages/dash_evals
+packages/dataset_config_dart
+packages/devals_cli
+packages/eval_explorer
+```
diff --git a/docs/contributing/packages/dash_evals.md b/docs/contributing/packages/dash_evals.md
new file mode 100644
index 0000000..3fd714a
--- /dev/null
+++ b/docs/contributing/packages/dash_evals.md
@@ -0,0 +1,91 @@
+# dash_evals
+
+Python package for running LLM evaluations on Dart and Flutter tasks using [Inspect AI](https://inspect.aisi.org.uk/). Located in `packages/dash_evals/`.
+
+For setup instructions, see the [Quick Start](/guides/quick_start.md) or [Contributing Guide](../guide.md).
+
+---
+
+## Available Tasks
+
+| Task | Description |
+|------|-------------|
+| `question_answer` | Q&A evaluation for Dart/Flutter knowledge |
+| `bug_fix` | Agentic debugging of code in a sandbox |
+| `flutter_bug_fix` | Flutter-specific bug fix (wraps `bug_fix`) |
+| `code_gen` | Generate code from specifications |
+| `flutter_code_gen` | Flutter-specific code gen (wraps `code_gen`) |
+| `mcp_tool` | Evaluate MCP tool usage (pub.dev search, project creation, etc.) |
+| `analyze_codebase` | Evaluate codebase analysis and comprehension |
+| `skill_test` | Evaluate use of skill files in a sandbox |
+
+---
+
+## Architecture
+
+```
+src/dash_evals/
+├── main.py              # CLI entry point (dual-mode)
+├── runner/
+│   ├── json_runner.py   # Mode 1: run from EvalSet JSON manifest
+│   ├── args_runner.py   # Mode 2: run from direct CLI arguments
+│   ├── tasks/           # @task functions (question_answer, bug_fix, code_gen, etc.)
+│   ├── scorers/         # Scoring logic (model_graded, dart_analyze, flutter_test, etc.)
+│   ├── solvers/         # Solver chains (context injection, system messages)
+│   └── sandboxes/       # Sandbox environments (podman)
+├── models/              # Data models
+└── utils/               # Logging and helpers
+```
+
+### Data Flow
+
+1. **Configure**: The Dart `dataset_config_dart` package parses dataset YAML and resolves it into an `EvalSet` JSON manifest
+2. **Load**: The Python runner reads the JSON manifest via `json_runner.py`, resolving task functions dynamically with `importlib`
+3. **Execute**: Each task function receives its dataset and task definition, producing an `inspect_ai.Task`
+4. **Score**: Scorers evaluate model outputs against targets
+5. **Log**: Results written to the configured `log_dir`
+
+Alternatively, the runner can be invoked directly with `--task` and `--model` arguments (via `args_runner.py`), bypassing the Dart config pipeline.
+
+---
+
+## Usage
+
+```bash
+# Mode 1: Run from JSON manifest (produced by Dart CLI)
+run-evals --json ./eval_set.json
+
+# Mode 2: Run a single task directly
+run-evals --task flutter_code_gen --model google/gemini-2.5-flash --dataset samples.jsonl
+
+# Additional options (both modes)
+run-evals --task bug_fix --model openai/gpt-4o \
+  --log-dir ./logs \
+  --sandbox podman compose.yaml \
+  --max-connections 10
+```
+
+---
+
+## Testing
+
+```bash
+# Run all tests
+pytest
+
+# Run with coverage
+pytest --cov=dash_evals
+
+# Run specific test
+pytest tests/test_parsers.py -v
+```
+
+---
+
+## Linting
+
+```bash
+# Run ruff
+ruff check src/dash_evals
+ruff format src/dash_evals
+```
diff --git a/docs/contributing/packages/dataset_config_dart.md b/docs/contributing/packages/dataset_config_dart.md
new file mode 100644
index 0000000..fe78883
--- /dev/null
+++ b/docs/contributing/packages/dataset_config_dart.md
@@ -0,0 +1,129 @@
+# dataset_config_dart
+
+Dart library for resolving eval dataset YAML into EvalSet JSON for the Python runner. Also contains the shared data models (e.g., `EvalSet`, `Task`, `Sample`, `Variant`, `Job`) used across the eval pipeline. Python equivalents of these models live in `dash_evals_config`. Located in `packages/dataset_config_dart/`.
+
+---
+
+## Architecture
+
+The package follows a layered pipeline design:
+
+```
+YAML / JSON files
+    │
+    ▼
+┌──────────┐
+│  Parser  │  YamlParser · JsonParser
+└────┬─────┘
+     │  => List<ParsedTask>, Job
+     ▼
+┌──────────┐
+│ Resolver │  EvalSetResolver
+└────┬─────┘
+     │  => List<EvalSet>
+     ▼
+┌──────────┐
+│  Writer  │  EvalSetWriter
+└────┬─────┘
+     │  => JSON file(s) on disk
+     ▼
+  Python dash_evals
+```
+
+The JSON files written to disk conform to the InspectAI API for `eval_set`, which is an 
+entry point from which to start running evals.
+
+
+| Layer | Class | Responsibility |
+|-------|-------|----------------|
+| **Parsers** | `YamlParser`, `JsonParser` | Read task YAML and job files into `ParsedTask` and `Job` objects |
+| **Resolvers** | `EvalSetResolver` | Combine parsed tasks with a job to produce fully resolved `EvalSet` objects (expanding models, variants, sandbox config, etc.) |
+| **Writers** | `EvalSetWriter` | Serialize `EvalSet` objects to JSON files that the Python runner can consume |
+| **Facade** | `ConfigResolver` | Single-call convenience that composes Parser → Resolver |
+
+---
+
+## Quick Start
+
+```dart
+import 'package:dataset_config_dart/dataset_config_dart.dart';
+
+// Single-call convenience
+final resolver = ConfigResolver();
+final configs = resolver.resolve(datasetPath, ['my_job']);
+
+// Or use the layers individually
+final parser = YamlParser();
+final tasks = parser.parseTasks(datasetPath);
+final job = parser.parseJob(jobPath, datasetPath);
+
+final evalSetResolver = EvalSetResolver();
+final evalSets = evalSetResolver.resolve(tasks, job, datasetPath);
+
+final writer = EvalSetWriter();
+writer.write(evalSets, outputDir);
+```
+
+---
+
+## Data Models
+
+This package also contains the shared Dart data models used across the eval pipeline. All models are built with [Freezed](https://pub.dev/packages/freezed) for immutability, pattern matching, and JSON serialization via [json_serializable](https://pub.dev/packages/json_serializable).
+
+> [!NOTE]
+> Python equivalents of these models live in the `dash_evals_config` package.
+
+### Config Models
+
+| Model | Description |
+|-------|-------------|
+| `Job` | A job configuration — runtime settings, model/variant/task selection, and `eval_set()` overrides |
+| `JobTask` | Per-task overrides within a job (sample filtering, custom system messages) |
+| `Variant` | A named configuration variant (e.g. `baseline`, `with_docs`) applied to task runs |
+| `ContextFile` | A file to inject into the sandbox as additional context for the model |
+
+### Inspect AI Models
+
+Mirror the Python [Inspect AI](https://inspect.aisi.org.uk/) types so that Dart can produce JSON the Python runner understands directly.
+
+| Model | Description |
+|-------|-------------|
+| `EvalSet` | Maps to `inspect_ai.eval_set()` parameters — the top-level run definition |
+| `Task` | A single evaluation task with its solver, scorer, dataset, and sandbox config |
+| `TaskInfo` | Lightweight task metadata (name and function reference) |
+| `Sample` | An individual evaluation sample (input, target, metadata) |
+| `Dataset` | A dataset definition (samples file path and field mappings) |
+| `FieldSpec` | Maps dataset columns to sample fields |
+| `EvalLog` | Comprehensive log structure for evaluation results |
+
+---
+
+## Source Layout
+
+```
+lib/
+├── dataset_config_dart.dart         # Library barrel file
+└── src/
+    ├── config_resolver.dart # Convenience facade
+    ├── parsed_task.dart     # Intermediate parsed-task model
+    ├── parsers/
+    │   ├── parser.dart      # Abstract parser interface
+    │   ├── yaml_parser.dart # YAML file parser
+    │   └── json_parser.dart # JSON map parser
+    ├── resolvers/
+    │   └── eval_set_resolver.dart
+    ├── writers/
+    │   └── eval_set_writer.dart
+    ├── runner_config_exception.dart
+    └── utils/
+        └── yaml_utils.dart
+```
+
+---
+
+## Testing
+
+```bash
+cd packages/dataset_config_dart
+dart test
+```
diff --git a/docs/contributing/packages/devals_cli.md b/docs/contributing/packages/devals_cli.md
new file mode 100644
index 0000000..96c8e8f
--- /dev/null
+++ b/docs/contributing/packages/devals_cli.md
@@ -0,0 +1,95 @@
+# devals_cli (devals)
+
+Dart CLI for managing evals — initialize datasets, create samples, run evaluations, and view results. Located in `packages/devals_cli/`.
+
+For setup instructions, see the [Quick Start](../../guides/quick_start.md) or [Contributing Guide](../guide.md).
+
+---
+
+## Commands
+
+| Command | Description |
+|---------|-------------|
+| `devals init` | Initialize a new dataset in the current directory (creates `devals.yaml`, a starter task, and a starter job) |
+| `devals doctor` | Check that prerequisites are installed (Dart, Python, dash_evals, Podman, Flutter, Serverpod, API keys) |
+| `devals create sample` | Interactively add a new sample to an existing task |
+| `devals create task` | Interactively create a new task file in `tasks/<name>/task.yaml` |
+| `devals create job` | Interactively create a new job file |
+| `devals create pipeline` | Guided flow to create a task and job together |
+| `devals run <job_name>` | Resolve config and run evaluations via the Python dash_evals |
+| `devals publish <path>` | Upload Inspect AI log files to Google Cloud Storage |
+| `devals view [log_path]` | Launch the Inspect AI viewer to browse evaluation results |
+
+---
+
+## Usage
+
+```bash
+# Scaffold a new dataset
+devals init
+
+# Check your environment
+devals doctor
+
+# Create a new eval (task + job in one step)
+devals create pipeline
+
+# Run evaluations
+devals run local_dev
+
+# Preview without executing
+devals run local_dev --dry-run
+
+# Upload logs to GCS
+devals publish logs/2026-01-07_17-11-47/
+
+# View results
+devals view
+```
+
+---
+
+## How `devals run` Works
+
+1. The CLI resolves the job YAML into `EvalSet` objects using the [dataset_config_dart](./dataset_config_dart.md) package (entirely in Dart)
+2. `EvalSetWriter` writes the resolved config to a JSON file
+3. The CLI invokes `run-evals --manifest <path>` to hand off to the Python [dash_evals](./dash_evals.md)
+
+With `--dry-run`, the CLI resolves and validates the config without calling the Python runner.
+
+---
+
+## Source Layout
+
+```
+bin/
+└── devals.dart              # Entry point
+lib/
+├── devals.dart              # Library barrel file
+└── src/
+    ├── runner.dart          # DevalRunner (CommandRunner)
+    ├── cli_exception.dart   # CLI-specific exceptions
+    ├── commands/            # Command implementations
+    │   ├── init_command.dart
+    │   ├── doctor_command.dart
+    │   ├── create_command.dart
+    │   ├── create_sample_command.dart
+    │   ├── create_task_command.dart
+    │   ├── create_job_command.dart
+    │   ├── create_pipeline_command.dart
+    │   ├── run_command.dart
+    │   ├── publish_command.dart
+    │   └── view_command.dart
+    ├── config/              # Environment and .env helpers
+    ├── dataset/             # Dataset reading, writing, templates
+    └── gcs/                 # Google Cloud Storage client
+```
+
+---
+
+## Testing
+
+```bash
+cd packages/devals_cli
+dart test
+```
diff --git a/docs/contributing/packages/eval_explorer.md b/docs/contributing/packages/eval_explorer.md
new file mode 100644
index 0000000..41ea0bd
--- /dev/null
+++ b/docs/contributing/packages/eval_explorer.md
@@ -0,0 +1,70 @@
+# eval_explorer
+
+Dart/Flutter application for browsing and analyzing evaluation results. Built with [Serverpod](https://serverpod.dev/). Located in `packages/eval_explorer/`.
+
+> [!NOTE]
+> The eval_explorer is under active development and will eventually replace the legacy `report_app` + `uploader` pipeline.
+
+## Sub-packages
+
+| Package | Description |
+|---------|-------------|
+| `eval_explorer_client` | Dart client package (mostly generated by Serverpod) |
+| `eval_explorer_flutter` | Flutter web/mobile app |
+| `eval_explorer_server` | Serverpod backend |
+| `eval_explorer_shared` | Shared models |
+
+---
+
+## Prerequisites
+
+- [Podman](https://podman.io/) (Docker substitute for Googlers)
+- Podman Compose (`brew install podman-compose`)
+
+---
+
+## Running the Server
+
+Start Postgres and Redis:
+
+```bash
+cd packages/eval_explorer/eval_explorer_server
+podman-compose up --detach
+```
+
+Start the Serverpod server:
+
+```bash
+dart bin/main.dart
+```
+
+When finished, stop the server with `Ctrl-C`, then shut down Postgres and Redis:
+
+```bash
+podman-compose down
+```
+
+---
+
+## Running the Flutter App
+
+Make sure the server is running first, then:
+
+```bash
+cd packages/eval_explorer/eval_explorer_flutter
+flutter run
+```
+
+---
+
+## Installing Fixtures
+
+Eval datasets and individual questions are kept in the `datasets` folder at the root of this repository. To load them into the database:
+
+> [!NOTE]
+> Make sure Postgres is running via `podman-compose up --detach` before running this command.
+
+```bash
+cd packages/eval_explorer/eval_explorer_server
+serverpod run fixtures
+```
diff --git a/docs/contributing/repository_structure.md b/docs/contributing/repository_structure.md
new file mode 100644
index 0000000..4ed859f
--- /dev/null
+++ b/docs/contributing/repository_structure.md
@@ -0,0 +1,108 @@
+# Repository Structure
+
+Overview of the evals repository layout.
+
+```
+evals/
+├── dataset/                    # Evaluation data and configuration
+├── docs/                       # Documentation
+├── packages/
+│   ├── devals_cli/             # Dart CLI for managing dataset (devals)
+│   ├── dataset_config_dart/    # Dart library: YAML → EvalSet JSON
+│   ├── dash_evals/             # Python evaluation runner
+│   ├── dataset_config_python/  # Python configuration models
+│   └── eval_explorer/          # Dart/Flutter results viewer (Serverpod)
+├── tool/                       # Utility scripts
+├── pubspec.yaml                # Dart workspace configuration
+└── firebase.json               # Firebase configuration
+```
+
+---
+
+## dataset/
+
+Contains all evaluation data, configurations, and resources. See the [Configuration Overview](./config/about.md) for detailed file formats.
+
+| Path | Description |
+|------|-------------|
+| `tasks/` | Task subdirectories with `task.yaml` files and inline samples |
+| `jobs/` | Job files for different run configurations |
+| `context_files/` | Context markdown files for prompt injection |
+| `sandboxes/` | Container configuration (Containerfile, compose.yaml) |
+| `workspaces/` | Reusable project templates (flutter_app, dart_package) |
+
+---
+
+## packages/
+
+### dataset_config_dart/
+
+Dart package that converts dataset YAML into EvalSet JSON for the Python runner. Provides a layered API:
+
+```
+dataset_config_dart/
+├── lib/
+│   ├── dataset_config_dart.dart  # Library barrel file
+│   └── src/
+│       ├── config_resolver.dart  # Facade: single-call convenience API
+│       ├── parsed_task.dart      # Intermediate parsing type
+│       ├── parsers/              # YamlParser, JsonParser
+│       ├── resolvers/            # EvalSetResolver
+│       ├── writers/              # EvalSetWriter
+│       └── utils/                # YAML helpers
+├── bin/                          # CLI entry points
+└── test/                         # Dart test suite
+```
+
+---
+
+### dash_evals/
+
+Python package for running LLM evaluations using [Inspect AI](https://inspect.aisi.org.uk/).
+
+```
+dash_evals/
+├── src/dash_evals/
+│   ├── main.py              # CLI entry point (--json or --task mode)
+│   ├── runner/
+│   │   ├── json_runner.py   # Run from EvalSet JSON manifest
+│   │   ├── args_runner.py   # Run from direct CLI arguments
+│   │   ├── tasks/           # Task implementations (@task functions)
+│   │   ├── scorers/         # Scoring logic
+│   │   ├── solvers/         # Solver chains
+│   │   └── sandboxes/       # Sandbox environments
+│   ├── models/              # Data models
+│   └── utils/               # Logging and helpers
+├── tests/                   # Pytest test suite
+└── pyproject.toml           # Package configuration
+```
+
+---
+
+### devals_cli/ (devals)
+
+Dart CLI for creating and managing evaluation tasks and jobs. See the [CLI documentation](./cli.md) for full command reference.
+
+```
+devals_cli/
+├── bin/devals.dart           # CLI entry point
+├── lib/src/
+│   ├── commands/            # Command implementations
+│   ├── console/             # Console UI and prompts
+│   ├── dataset/             # Dataset file utilities and discovery
+│   └── yaml/                # YAML generation and parsing
+└── test/                    # Dart test suite
+```
+
+
+### eval_explorer/
+
+Dart/Flutter application for exploring evaluation results. Built with [Serverpod](https://serverpod.dev/).
+
+```
+eval_explorer/
+├── eval_explorer_client/    # Dart client package
+├── eval_explorer_flutter/   # Flutter web/mobile app
+├── eval_explorer_server/    # Serverpod backend
+└── eval_explorer_shared/    # Shared models
+```
diff --git a/docs/guides/config.md b/docs/guides/config.md
new file mode 100644
index 0000000..aef6aba
--- /dev/null
+++ b/docs/guides/config.md
@@ -0,0 +1,43 @@
+# Config guide
+
+Evals uses a layered YAML configuration system. You define **what** to evaluate (tasks and samples), **how** to run it (jobs), and **where** code executes (sandboxes). The CLI resolves these files into a single manifest and hands it to the Python runner — so most of the time you're just editing YAML.
+
+This page walks through the main concepts and how they connect.
+
+## **Dataset**
+
+The Dataset is the collection of Tasks and Samples that are run through the python tool. A
+Sample is, at a minimum, an input and target. These are essentially test cases.
+
+In evals, the definition of dataset is expanded to include all fixtures of running evals, and all of these definitions exist in the dataset directory of the github.
+
+| 🗒️ Note!  The following diagrams provide a mental model. (They also provide a literal representation of how it works, but…) A lot of this is hidden from you, the user or sample author, so don’t let it overwhelm! |
+| :---- |
+
+![A](/_static/images/evals-dataset.png)
+
+* **Samples** - individual eval case
+* **Models** we run against
+* **Variants** - Different configurations for the agent being evaluated, e.g. with Dart MCP, with or without skills, with and without rules files, and every combination of those things.
+* **Tasks** - A task is a Python function entrypoint for one “type” of evals. For example, “question_answer”, “code_gen”, “mcp_create_project” are a few of the tasks we support. Each task generally takes a list of specific samples that are configured to run for that task.
+* **Workspaces** (The codebase that the agent is tinkering with in an eval)
+* **Sandbox definitions** (host machine, podman, docker)
+* **Default runtime configurations**
+
+### **Tasks are the basic unit of defining eval runs.**
+
+![A](/_static/images/task.png)
+
+### **Job files are run configuration**
+
+![A](/_static/images/job.png)
+
+### **Then evals run based on that job file:**
+
+![A](/_static/images/eval-set.png)
+
+This means you care about job files and task files. Job files might look like this:
+
+- job/main.yaml (runs the whole thing)
+- job/ci.yaml (a job that runs as part of ci)
+- job/local_dev.yaml (a job that is .gitignored, used for quick iteration)
diff --git a/docs/guides/index.md b/docs/guides/index.md
new file mode 100644
index 0000000..73e04a8
--- /dev/null
+++ b/docs/guides/index.md
@@ -0,0 +1,11 @@
+# Guides
+
+Get started with evals — learn how to author and run your own evaluations.
+
+```{toctree}
+:maxdepth: 1
+
+quick_start
+tutorial
+config
+```
diff --git a/docs/guides/quick_start.md b/docs/guides/quick_start.md
new file mode 100644
index 0000000..dd70a26
--- /dev/null
+++ b/docs/guides/quick_start.md
@@ -0,0 +1,140 @@
+# Get started
+
+A guide to using evals as a framework for the local development of your own evals.
+
+## Prerequisites
+
+| Tool | Version | Purpose |
+|------|---------|---------| 
+| [Dart SDK](https://dart.dev/get-dart) | 3.10+ | Runs the `devals` CLI |
+| [Python](https://www.python.org/) | 3.13+ | Runs the `dash_evals` runner |
+
+You'll also need an API key for at least one model provider (`GOOGLE_API_KEY`, `ANTHROPIC_API_KEY`, or `OPENAI_API_KEY`).
+
+## 1. Install the packages
+
+```bash
+git clone https://github.com/flutter/evals.git
+pip install -e <path-to-evals>/packages/dash_evals
+dart pub global activate devals --source path <path-to-evals>/packages/devals_cli
+
+
+## TODO: Integrate in the new repo. This is wrong for this repo
+python3 -m venv .venv
+source .venv/bin/activate
+pip install -e "packages/dash_evals[dev]"
+pip install -e "packages/dataset_config_python[dev]"
+```
+
+This installs two things:
+
+- **`devals`** (Dart) — the CLI you'll use for every command. It resolves YAML configuration into a JSON manifest and delegates execution.
+- **`dash_evals`** (Python) — the runtime that receives the manifest and drives [Inspect AI](https://inspect.aisi.org.uk/)'s `eval_set()` to actually run evaluations.
+
+## 2. Check your environment
+
+```bash
+devals doctor
+```
+
+This runs a series of prerequisite checks — Dart SDK, Python version, whether `dash_evals` is installed, API keys, and optional tools like Podman and Flutter. Fix any errors it reports before continuing; warnings are safe to ignore for now.
+
+## 3. Set up Podman (optional)
+
+If your evals use containerized execution (`sandbox_type: podman` in a job YAML), you need Podman installed and a container image built. You can skip this step for basic evals that run locally.
+
+**Install Podman** (macOS):
+
+```bash
+brew install podman
+podman machine init
+podman machine start
+```
+
+**Build the Flutter sandbox image:**
+
+```bash
+cd <path-to-evals>/examples/evals-dataset/evals/sandboxes/podman
+podman build -t flutter-sandbox:latest .
+```
+
+This builds `localhost/flutter-sandbox:latest`, which includes Ubuntu 24.04 and the Flutter SDK. The build takes a few minutes.
+
+> **Tip:** To target a different Flutter channel, pass `--build-arg FLUTTER_CHANNEL=beta` (or `main`).
+
+## 4. Configure API keys
+
+Make sure you have at least one model provider API key set as an environment variable. You can set them in your shell profile or in a `.env` file in your project root.
+
+```bash
+export GEMINI_API_KEY=your_key_here
+```
+
+## 5. Initialize your dataset
+
+Run `devals init` from the root of the project you want to evaluate. This is typically a Dart or Flutter project — the scaffolded starter task will point back at your project as its workspace.
+
+```bash
+cd ~/my-flutter-app
+devals init
+```
+
+This creates two things:
+
+- **`devals.yaml`** in your project root — a marker file that tells the CLI where your eval dataset lives (defaults to `./evals`).
+- **`evals/`** directory with the following structure:
+
+```
+my-flutter-app/
+├── devals.yaml                          # ← marker file
+└── evals/
+    ├── tasks/
+    │   └── get_started/
+    │       └── task.yaml                # starter task + sample
+    └── jobs/
+        └── local_dev.yaml               # job ready to run
+```
+
+The starter task uses the `analyze_codebase` task function, which asks the model to
+explore your project and suggest an improvement. It's a good smoke-test that
+doesn't require a sandbox or any extra setup.
+
+
+## 6. Run your first eval
+
+```bash
+devals run local_dev
+```
+
+Behind the scenes, this:
+
+1. Resolves your YAML config (job + tasks + samples) into an EvalSet JSON manifest
+2. Passes the manifest to the Python `dash_evals` runner
+3. `dash_evals` calls Inspect AI's `eval_set()`, which sends prompts, collects responses, and scores results
+4. Logs are written to a `logs/` directory (a sibling of `evals/`)
+
+To preview the resolved configuration without actually making API calls:
+
+```bash
+devals run local_dev --dry-run
+```
+
+This prints every task × model × variant combination that would execute, so you can verify your setup before spending API credits.
+
+## 7. View results
+
+```bash
+devals view
+```
+
+This launches the [Inspect AI log viewer](https://inspect.aisi.org.uk/log-viewer.html) — a local web UI where you can browse runs, inspect individual samples, view scores, and read full conversation transcripts. It automatically finds your `logs/` directory based on `devals.yaml`.
+
+---
+
+## Next steps
+
+- **Add more samples** — `devals create sample`
+- **Add tasks** — `devals create task`
+- **Create targeted jobs** — `devals create job`
+- **Interactive walkthrough** — `devals create pipeline` guides you through creating a sample, task, and job in one go
+- **[Follow the tutorial](tutorial.md)** — a hands-on walkthrough of authoring a code-generation task from scratch
diff --git a/docs/guides/tutorial.md b/docs/guides/tutorial.md
new file mode 100644
index 0000000..5776963
--- /dev/null
+++ b/docs/guides/tutorial.md
@@ -0,0 +1,287 @@
+# Author evals
+
+This tutorial picks up where [Get Started](quick_start.md) left off.
+By the end, you'll have:
+
+1. Authored a task file with two **code-generation** samples
+2. Created a job file that targets your new task
+3. Run the job and watched Inspect AI execute it
+4. Opened the Inspect log viewer to review results
+
+> [!NOTE]
+> This guide assumes you've already completed the [Get Started](quick_start.md) guide and
+> have a working `devals` installation with at least one model API key configured.
+
+---
+
+## 1. Create the task
+
+A **task** tells the framework *what* to evaluate. Each task lives in its own subdirectory
+under `evals/tasks/` and contains a `task.yaml` file.
+
+### 1.1 Set up a workspace
+
+Code-generation tasks need a **workspace** — a starter project the model writes code into
+and where tests run. Create a minimal Dart package to use as a template:
+
+```
+evals/
+└── workspaces/
+    └── dart_package/
+        ├── pubspec.yaml
+        └── lib/
+            └── main.dart
+```
+
+```{code-block} yaml
+---
+caption: evals/workspaces/dart_package/pubspec.yaml
+---
+name: dart_package_template
+description: Minimal Dart package template
+version: 1.0.0
+publish_to: none
+
+environment:
+  sdk: '>=3.0.0 <4.0.0'
+
+dev_dependencies:
+  test: ^1.24.0
+```
+
+```{code-block} dart
+---
+caption: evals/workspaces/dart_package/lib/main.dart
+---
+// Starter file — the model will overwrite this.
+```
+
+> [!TIP]
+> You can also point `workspace` at your existing project root, a Flutter app,
+> or any directory that already has a `pubspec.yaml`.
+
+### 1.2 Write a test file
+
+Each sample can have its own test file that the scorer runs automatically. Create a
+test for the first sample:
+
+```
+evals/
+└── tasks/
+    └── dart_code_gen/
+        ├── task.yaml           ← (you'll create this next)
+        └── tests/
+            └── fizzbuzz_test.dart
+```
+
+```{code-block} dart
+---
+caption: evals/tasks/dart_code_gen/tests/fizzbuzz_test.dart
+---
+import 'package:test/test.dart';
+import 'package:dart_package_template/main.dart';
+
+void main() {
+  test('fizzBuzz returns correct values', () {
+    expect(fizzBuzz(3), 'Fizz');
+    expect(fizzBuzz(5), 'Buzz');
+    expect(fizzBuzz(15), 'FizzBuzz');
+    expect(fizzBuzz(7), '7');
+  });
+
+  test('fizzBuzz handles 1', () {
+    expect(fizzBuzz(1), '1');
+  });
+}
+```
+
+### 1.3 Write the task.yaml
+
+Now create the task definition with two inline samples:
+
+```{code-block} yaml
+---
+caption: evals/tasks/dart_code_gen/task.yaml
+---
+# ============================================================
+# Task: Dart Code Generation
+# ============================================================
+# Uses the built-in `code_gen` task function which:
+#   1. Sends the prompt to the model
+#   2. Parses the structured code response
+#   3. Writes the code into the sandbox workspace
+#   4. Runs tests and scores the result
+
+func: code_gen
+workspace: ../../workspaces/dart_package
+
+samples:
+  inline:
+    # ── Sample 1: FizzBuzz ──────────────────────────────────
+    - id: fizzbuzz
+      difficulty: easy
+      tags: [dart, functions]
+      input: |
+        Write a top-level function called `fizzBuzz` that takes an
+        integer `n` and returns a String:
+        - "Fizz" if n is divisible by 3
+        - "Buzz" if n is divisible by 5
+        - "FizzBuzz" if divisible by both
+        - The number as a string otherwise
+
+        Write the complete lib/main.dart file.
+      target: |
+        The code must define a top-level `String fizzBuzz(int n)` function
+        that returns the correct value for all cases.
+        It must pass the tests in test/.
+      tests:
+        path: ./tests/fizzbuzz_test.dart
+
+    # ── Sample 2: Stack implementation ──────────────────────
+    - id: stack_class
+      difficulty: medium
+      tags: [dart, data-structures, classes]
+      input: |
+        Implement a generic Stack<T> class in Dart with the
+        following methods:
+        - push(T item) — adds an item to the top
+        - T pop() — removes and returns the top item,
+          throws StateError if empty
+        - T peek() — returns the top item without removing it,
+          throws StateError if empty
+        - bool get isEmpty
+        - int get length
+
+        Write the complete lib/main.dart file.
+      target: |
+        The code must define a generic Stack<T> class with push,
+        pop, peek, isEmpty, and length. pop and peek must throw
+        StateError when the stack is empty.
+```
+
+**Key fields explained:**
+
+| Field | What it does |
+|-------|-------------|
+| `func` | The Python `@task` function that runs the evaluation. `code_gen` is a built-in generic code-generation task. |
+| `workspace` | Path to the starter project (relative to the task directory). |
+| `samples.inline` | A list of test cases, each with an `input` prompt and a `target` grading criteria. |
+| `tests.path` | Path to test files the scorer runs against the generated code. |
+
+> [!NOTE]
+> See [Tasks](config/tasks.md) and [Samples](config/samples.md) for the
+> complete field reference.
+
+---
+
+## 2. Create the job
+
+A **job** controls *how* to run your tasks — which models to use, how many
+connections, and which tasks/variants to include.
+
+Create `evals/jobs/tutorial.yaml`:
+
+```{code-block} yaml
+---
+caption: evals/jobs/tutorial.yaml
+---
+# ============================================================
+# Job: tutorial
+# ============================================================
+# A focused job for the tutorial walkthrough.
+
+# Which model(s) to evaluate
+models:
+  - google/gemini-2.5-flash
+
+# Only run the code-gen task we just created
+tasks:
+  inline:
+    dart_code_gen: {}
+```
+
+That's the minimal job — it will:
+
+- Evaluate `google/gemini-2.5-flash`
+- Run every sample in the `dart_code_gen` task
+- Use the default `baseline` variant (no extra tools or context)
+
+> [!TIP]
+> You can add **variants** to test the model with additional context or tools.
+> For example:
+> ```yaml
+> variants:
+>   baseline: {}
+>   with_context:
+>     context_files: [./context_files/dart_docs.md]
+> ```
+> See [Configuration Overview](config/about.md#variants) for details.
+
+---
+
+## 3. Run the job
+
+Make sure you're in your project directory (the one containing `devals.yaml`), then run:
+
+```bash
+devals run tutorial
+```
+
+What happens behind the scenes:
+
+1. The Dart `dataset_config_dart` package resolves your YAML into an EvalSet JSON manifest
+2. The Python `dash_evals` reads the manifest and calls Inspect AI's `eval_set()`
+3. Inspect AI creates a sandbox, sets up the workspace, sends prompts, runs tests, and scores results
+4. Logs are written to the `logs/` directory
+
+### Dry run first
+
+To preview the resolved configuration without making any API calls:
+
+```bash
+devals run tutorial --dry-run
+```
+
+This prints a summary of every task × model × variant combination that would
+execute, so you can verify everything looks right before spending API credits.
+
+### What to expect
+
+When the eval runs, you'll see Inspect AI's interactive terminal display showing
+progress for each sample. A typical run with two samples against one model takes
+1–3 minutes, depending on the model's response time.
+
+---
+
+## 4. View the results
+
+After the run completes, launch the Inspect AI log viewer:
+
+```bash
+devals view
+```
+
+This opens a local web UI (powered by Inspect AI) where you can:
+
+- **Browse runs** — see each task × model × variant combination
+- **Inspect samples** — view the model's generated code, scores, and any test output
+- **Compare variants** — if you defined multiple variants, compare how they performed side-by-side
+
+The viewer automatically points at your `logs/` directory. To view logs from a
+specific directory:
+
+```bash
+devals view path/to/logs
+```
+
+---
+
+## Next steps
+
+Now that you've run your first custom evaluation, here are some things to try:
+
+- **Add more samples** to your task: `devals create sample`
+- **Try different task types** — `question_answer`, `bug_fix`, or `flutter_code_gen`. See [all available task functions](../packages/dash_evals.md).
+- **Add variants** to test how context files or MCP tools affect performance. See [Variants](config/about.md#variants).
+- **Run multiple models** by adding more entries to the `models` list in your job file
+- **Read the config reference** for [Jobs](config/jobs.md), [Tasks](config/tasks.md), and [Samples](config/samples.md)
\ No newline at end of file
diff --git a/docs/index.md b/docs/index.md
new file mode 100644
index 0000000..82d6d06
--- /dev/null
+++ b/docs/index.md
@@ -0,0 +1,40 @@
+# evals
+
+A framework for authoring and running LLM evaluations on Dart and Flutter tasks.
+
+::::{grid} 1 1 3 3
+:gutter: 3
+
+:::{grid-item-card} 📖 Guides
+:link: guides/index
+:link-type: doc
+
+Learn how to author and run your own evaluations, from quick start to advanced configuration.
+:::
+
+:::{grid-item-card} 📚 Reference
+:link: reference/index
+:link-type: doc
+
+API documentation, CLI usage, configuration reference, and glossary.
+:::
+
+:::{grid-item-card} 🤝 Contributor Guides
+:link: contributing/index
+:link-type: doc
+
+Repository structure, package details, and how to contribute to the project.
+:::
+
+::::
+
+```{toctree}
+:hidden:
+
+guides/index
+reference/index
+contributing/index
+```
+
+*Example of AI doing a subpar job, maybe we should eval image gen:*
+![Big brain ai slop dash](/_static/images/logo.png)
diff --git a/docs/reference/api/dash_evals/index.md b/docs/reference/api/dash_evals/index.md
new file mode 100644
index 0000000..2da5ec7
--- /dev/null
+++ b/docs/reference/api/dash_evals/index.md
@@ -0,0 +1,10 @@
+# dash_evals
+
+Main package entry points and overview.
+
+```{toctree}
+:maxdepth: 1
+
+overview
+main
+```
diff --git a/docs/reference/api/dash_evals/main.md b/docs/reference/api/dash_evals/main.md
new file mode 100644
index 0000000..5895b0d
--- /dev/null
+++ b/docs/reference/api/dash_evals/main.md
@@ -0,0 +1,7 @@
+# Main Entry Point
+
+CLI entry point for running evaluations.
+
+```{eval-rst}
+.. autofunction:: dash_evals.main.main
+```
diff --git a/docs/reference/api/dash_evals/overview.md b/docs/reference/api/dash_evals/overview.md
new file mode 100644
index 0000000..4100f41
--- /dev/null
+++ b/docs/reference/api/dash_evals/overview.md
@@ -0,0 +1,10 @@
+# Overview
+
+Package overview and exports.
+
+```{eval-rst}
+.. automodule:: dash_evals
+   :members:
+   :undoc-members:
+   :show-inheritance:
+```
diff --git a/docs/reference/api/runner/index.md b/docs/reference/api/runner/index.md
new file mode 100644
index 0000000..2058d5d
--- /dev/null
+++ b/docs/reference/api/runner/index.md
@@ -0,0 +1,17 @@
+# Runner Module
+
+The runner module executes evaluations using Inspect AI.
+
+It supports two modes:
+- **JSON mode** (`--json`): reads an `eval_set.json` manifest emitted by the Dart CLI
+- **Direct args mode** (`--task`, `--model`, etc.): runs a single task directly
+
+```{toctree}
+:maxdepth: 1
+
+runners
+tasks
+solvers
+scorers
+sandboxes
+```
diff --git a/docs/reference/api/runner/runners.md b/docs/reference/api/runner/runners.md
new file mode 100644
index 0000000..0297d8f
--- /dev/null
+++ b/docs/reference/api/runner/runners.md
@@ -0,0 +1,29 @@
+# Runners
+
+Core evaluation execution logic. The runner module provides two entry points:
+
+---
+
+## JSON Runner
+
+Reads an `eval_set.json` manifest (emitted by the Dart CLI) and calls `eval_set()`.
+
+```{eval-rst}
+.. automodule:: dash_evals.runner.json_runner
+   :members:
+   :undoc-members:
+   :show-inheritance:
+```
+
+---
+
+## Args Runner
+
+Runs a single task directly from CLI arguments (`--task`, `--model`, `--dataset`).
+
+```{eval-rst}
+.. automodule:: dash_evals.runner.args_runner
+   :members:
+   :undoc-members:
+   :show-inheritance:
+```
diff --git a/docs/reference/api/runner/sandboxes.md b/docs/reference/api/runner/sandboxes.md
new file mode 100644
index 0000000..1fb06a7
--- /dev/null
+++ b/docs/reference/api/runner/sandboxes.md
@@ -0,0 +1,25 @@
+# Sandboxes
+
+Sandbox environments for isolated task execution.
+
+---
+
+## Podman Sandbox
+
+```{eval-rst}
+.. automodule:: dash_evals.runner.sandboxes.podman.podman
+   :members:
+   :undoc-members:
+   :show-inheritance:
+```
+
+---
+
+## Sandbox Provider
+
+```{eval-rst}
+.. automodule:: dash_evals.runner.sandboxes.provider
+   :members:
+   :undoc-members:
+   :show-inheritance:
+```
diff --git a/docs/reference/api/runner/scorers.md b/docs/reference/api/runner/scorers.md
new file mode 100644
index 0000000..427c490
--- /dev/null
+++ b/docs/reference/api/runner/scorers.md
@@ -0,0 +1,102 @@
+# Scorers
+
+Scorer implementations for evaluating task outputs.
+
+---
+
+## Code Quality Scorer
+
+```{eval-rst}
+.. automodule:: dash_evals.runner.scorers.code_quality
+   :members:
+   :undoc-members:
+   :show-inheritance:
+```
+
+---
+
+## Dart Analyze Scorer
+
+```{eval-rst}
+.. automodule:: dash_evals.runner.scorers.dart_analyze
+   :members:
+   :undoc-members:
+   :show-inheritance:
+```
+
+---
+
+## Flutter Code Scorer
+
+```{eval-rst}
+.. automodule:: dash_evals.runner.scorers.flutter_code
+   :members:
+   :undoc-members:
+   :show-inheritance:
+```
+
+---
+
+## Flutter Test Scorer
+
+```{eval-rst}
+.. automodule:: dash_evals.runner.scorers.flutter_test
+   :members:
+   :undoc-members:
+   :show-inheritance:
+```
+
+---
+
+## Flutter Output Parser
+
+```{eval-rst}
+.. automodule:: dash_evals.runner.scorers.flutter_output_parser
+   :members:
+   :undoc-members:
+   :show-inheritance:
+```
+
+---
+
+## Flutter Scoring Utilities
+
+```{eval-rst}
+.. automodule:: dash_evals.runner.scorers.flutter_scoring
+   :members:
+   :undoc-members:
+   :show-inheritance:
+```
+
+---
+
+## MCP Tool Usage Scorer
+
+```{eval-rst}
+.. automodule:: dash_evals.runner.scorers.mcp_tool_usage
+   :members:
+   :undoc-members:
+   :show-inheritance:
+```
+
+---
+
+## Export Workspace
+
+```{eval-rst}
+.. automodule:: dash_evals.runner.scorers.export_workspace
+   :members:
+   :undoc-members:
+   :show-inheritance:
+```
+
+---
+
+## Skill Usage Scorer
+
+```{eval-rst}
+.. automodule:: dash_evals.runner.scorers.skill_usage
+   :members:
+   :undoc-members:
+   :show-inheritance:
+```
diff --git a/docs/reference/api/runner/solvers.md b/docs/reference/api/runner/solvers.md
new file mode 100644
index 0000000..269a761
--- /dev/null
+++ b/docs/reference/api/runner/solvers.md
@@ -0,0 +1,69 @@
+# Solvers
+
+Solver implementations for evaluation tasks.
+
+---
+
+## Add System Message
+
+```{eval-rst}
+.. automodule:: dash_evals.runner.solvers.add_system_message
+   :members:
+   :undoc-members:
+   :show-inheritance:
+```
+
+---
+
+## Context Injector
+
+```{eval-rst}
+.. automodule:: dash_evals.runner.solvers.context_injector
+   :members:
+   :undoc-members:
+   :show-inheritance:
+```
+
+---
+
+## Extract Code
+
+```{eval-rst}
+.. automodule:: dash_evals.runner.solvers.extract_code
+   :members:
+   :undoc-members:
+   :show-inheritance:
+```
+
+---
+
+## Inject Test Files
+
+```{eval-rst}
+.. automodule:: dash_evals.runner.solvers.inject_test_files
+   :members:
+   :undoc-members:
+   :show-inheritance:
+```
+
+---
+
+## Setup Workspace
+
+```{eval-rst}
+.. automodule:: dash_evals.runner.solvers.setup_workspace
+   :members:
+   :undoc-members:
+   :show-inheritance:
+```
+
+---
+
+## Write to Sandbox
+
+```{eval-rst}
+.. automodule:: dash_evals.runner.solvers.write_to_sandbox
+   :members:
+   :undoc-members:
+   :show-inheritance:
+```
diff --git a/docs/reference/api/runner/tasks.md b/docs/reference/api/runner/tasks.md
new file mode 100644
index 0000000..aa6131c
--- /dev/null
+++ b/docs/reference/api/runner/tasks.md
@@ -0,0 +1,82 @@
+# Tasks
+
+Task implementations for different evaluation types.
+
+---
+
+## Code Generation
+
+```{eval-rst}
+.. automodule:: dash_evals.runner.tasks.code_gen
+   :members:
+   :undoc-members:
+   :show-inheritance:
+```
+
+---
+
+## Bug Fix
+
+```{eval-rst}
+.. automodule:: dash_evals.runner.tasks.bug_fix
+   :members:
+   :undoc-members:
+   :show-inheritance:
+```
+
+---
+
+## Question Answer
+
+```{eval-rst}
+.. automodule:: dash_evals.runner.tasks.question_answer
+   :members:
+   :undoc-members:
+   :show-inheritance:
+```
+
+---
+
+## MCP Tool
+
+```{eval-rst}
+.. automodule:: dash_evals.runner.tasks.mcp_tool
+   :members:
+   :undoc-members:
+   :show-inheritance:
+```
+
+---
+
+## Analyze Codebase
+
+```{eval-rst}
+.. automodule:: dash_evals.runner.tasks.analyze_codebase
+   :members:
+   :undoc-members:
+   :show-inheritance:
+```
+
+---
+
+## Skill Test
+
+```{eval-rst}
+.. automodule:: dash_evals.runner.tasks.skill_test
+   :members:
+   :undoc-members:
+   :show-inheritance:
+```
+
+---
+
+## Task Helpers
+
+Shared utilities used across task implementations.
+
+```{eval-rst}
+.. automodule:: dash_evals.runner.tasks.task_helpers
+   :members:
+   :undoc-members:
+   :show-inheritance:
+```
diff --git a/docs/reference/api/utils/index.md b/docs/reference/api/utils/index.md
new file mode 100644
index 0000000..7044ed4
--- /dev/null
+++ b/docs/reference/api/utils/index.md
@@ -0,0 +1,10 @@
+# Utils Module
+
+Utility functions for dash_evals.
+
+```{toctree}
+:maxdepth: 1
+
+logging
+markdown
+```
diff --git a/docs/reference/api/utils/logging.md b/docs/reference/api/utils/logging.md
new file mode 100644
index 0000000..0509933
--- /dev/null
+++ b/docs/reference/api/utils/logging.md
@@ -0,0 +1,10 @@
+# Logging Utilities
+
+Logging configuration and utilities.
+
+```{eval-rst}
+.. automodule:: dash_evals.utils.logging
+   :members:
+   :undoc-members:
+   :show-inheritance:
+```
diff --git a/docs/reference/api/utils/markdown.md b/docs/reference/api/utils/markdown.md
new file mode 100644
index 0000000..fb72714
--- /dev/null
+++ b/docs/reference/api/utils/markdown.md
@@ -0,0 +1,10 @@
+# Markdown Utilities
+
+Markdown processing and formatting utilities.
+
+```{eval-rst}
+.. automodule:: dash_evals.utils.markdown
+   :members:
+   :undoc-members:
+   :show-inheritance:
+```
diff --git a/docs/reference/cli.md b/docs/reference/cli.md
new file mode 100644
index 0000000..deddead
--- /dev/null
+++ b/docs/reference/cli.md
@@ -0,0 +1,121 @@
+# CLI usage
+
+The `devals` CLI is a Dart command-line tool for managing the evals dataset. It provides interactive commands for creating samples, tasks, and jobs, as well as running evaluations and viewing results.
+
+```bash
+cd packages/devals_cli
+dart pub get
+dart run bin/devals.dart --help
+```
+
+> [!TIP]
+> Run `devals create pipeline` for an interactive walkthrough that creates your first sample, task, and job.
+
+Key commands:
+
+| Command | Description |
+|---------|-------------|
+| `devals init` | Initialize a new dataset configuration in the current directory |
+| `devals doctor` | Check that all prerequisites are installed and configured |
+| `devals create pipeline` | Interactive guide to create a sample, task, and job in one go |
+| `devals create sample` | Create a new sample interactively |
+| `devals create task` | Create a new task directory with a starter `task.yaml` |
+| `devals create job` | Create a new job file |
+| `devals run <job_name>` | Run evaluations (wraps `run-evals`) |
+| `devals run <job_name> --dry-run` | Preview what would be run without executing |
+| `devals view [log_dir_path]` | Launch the Inspect AI log viewer |
+
+---
+
+## Usage
+
+```
+CLI for managing evals - create samples, run evaluations, and view results.
+
+Usage: devals <command> [arguments]
+
+Global options:
+-h, --help    Print this usage information.
+
+Available commands:
+  create        Create samples, jobs, and tasks for the dataset.
+    job           Create a new job file interactively.
+    pipeline      Interactive guide to create a sample, task, and job in one go.
+    sample        Create a new sample and set it up to run.
+    task          Create a new task directory with a starter task.yaml.
+  doctor        Check that all prerequisites are installed and configured.
+  init          Initialize a new dataset configuration in the current directory.
+  run           Run evaluations using the dash_evals.
+  view          Launch the Inspect AI viewer to view evaluation results.
+
+Run "devals help <command>" for more information about a command.
+```
+
+## Commands
+
+### `devals init`
+
+Initializes a new dataset configuration in the current directory. Creates:
+
+- `evals/tasks/get_started/task.yaml` — a starter task with an example sample
+- `evals/jobs/local_dev.yaml` — a default job for local development
+
+Use this when starting a new project that needs its own evaluation dataset.
+
+### `devals doctor`
+
+Checks that all prerequisites for the CLI, `dash_evals`, and `eval_explorer` are installed and correctly configured. Similar to `flutter doctor`, it verifies:
+
+- **Dart SDK** — required for the CLI itself
+- **Python 3.13+** — required for `dash_evals`
+- **dash_evals** (`run-evals`) — the Python evaluation package
+- **Podman** — container runtime for sandboxed execution
+- **Flutter SDK** — needed for Flutter-based eval tasks
+- **Serverpod** — needed for the `eval_explorer` app
+- **API Keys** — checks for `GOOGLE_API_KEY`, `ANTHROPIC_API_KEY`, `OPENAI_API_KEY`
+
+### `devals create pipeline`
+
+An interactive walkthrough that guides you through creating your first sample, task, and job — ideal for first-time contributors.
+
+### `devals create sample`
+
+Interactively creates a new sample directory with a `sample.yaml` file. Prompts for:
+
+- Sample ID (snake_case identifier)
+- Difficulty level
+- Whether a workspace is needed
+- Workspace type (template, path, git, or create command)
+
+### `devals create task`
+
+Creates a new task directory under `tasks/` with a starter `task.yaml`. Prompts for:
+
+- Task ID
+- Task function (selected from the Python registry)
+- Optional system message
+
+### `devals create job`
+
+Creates a new job YAML file in `jobs/`. Prompts for:
+
+- Job name
+- Which models, variants, and tasks to include
+
+### `devals run <job_name>`
+
+Runs evaluations using the `dash_evals`. Wraps the Python `run-evals` entry point.
+
+```bash
+devals run local_dev        # Run a specific job
+devals run local_dev --dry-run  # Preview without executing
+```
+
+### `devals view [log_path]`
+
+Launches the Inspect AI log viewer to browse evaluation results. If no path is given, defaults to the `logs/` directory in the dataset.
+
+```bash
+devals view                 # Auto-detect log directory
+devals view /path/to/logs   # View specific log directory
+```
diff --git a/docs/reference/configuration_reference.md b/docs/reference/configuration_reference.md
new file mode 100644
index 0000000..deb2193
--- /dev/null
+++ b/docs/reference/configuration_reference.md
@@ -0,0 +1,558 @@
+# Configuration Reference
+
+This document describes the *standard* `eval/` directory structure and YAML configuration files used by the evaluation framework.
+
+## Overview
+
+The evaluation framework uses the `eval/` directory as its entry point. It contains:
+
+- Task definitions autodiscovered from `tasks/*/task.yaml`
+- Job files in `jobs/` that control what to run
+- Shared resources (context files, sandboxes, workspaces)
+
+Configuration is parsed and resolved by the Dart `dataset_config_dart` package, which produces an EvalSet JSON manifest consumed by the Python `dash_evals`.
+
+## Directory Structure
+
+```
+eval/
+├── jobs/                    # Job files for different run configurations
+│   ├── local_dev.yaml
+│   └── ci.yaml
+├── tasks/                   # Task definitions (autodiscovered)
+│   ├── flutter_bug_fix/
+│   │   ├── task.yaml        # Task config with inline samples
+│   │   └── project/         # Workspace files (if applicable)
+│   ├── dart_question_answer/
+│   │   └── task.yaml
+│   └── generate_flutter_app/
+│       ├── task.yaml
+│       └── todo_tests/      # Test files for a sample
+├── context_files/           # Context files injected into prompts
+│   └── flutter.md
+├── sandboxes/               # Container configurations
+│   └── podman/
+│       ├── Containerfile
+│       └── compose.yaml
+└── workspaces/              # Reusable project templates
+    ├── dart_package/
+    ├── flutter_app/
+    └── jaspr_app/
+```
+
+---
+
+## Task files
+
+Each subdirectory in `tasks/` that contains a `task.yaml` is automatically discovered as a task. The **directory name** is the task ID.
+
+```yaml
+# tasks/flutter_bug_fix/task.yaml
+func: flutter_bug_fix
+system_message: |
+  You are an expert Flutter developer. Fix the bug and explain your changes.
+
+# Task-level workspace (inherited by all samples)
+workspace:
+  path: ./project
+
+# Task-level tests (inherited by all samples)
+tests:
+  path: ./tests
+
+# Restrict which job-level variants apply to this task (optional)
+allowed_variants: [baseline, mcp_only]
+
+samples:
+  inline:
+    - id: flutter_bloc_cart_mutation_001
+      difficulty: medium
+      tags: [bloc, state]
+      input: |
+        Fix the bug where adding items to cart doesn't update the total.
+      target: |
+        The fix should modify the BLoC to emit a new state instead of mutating.
+
+    - id: navigation_crash
+      difficulty: hard
+      tags: [navigation]
+      workspace:
+        path: ./nav_project    # Override task-level workspace
+      input: |
+        Fix the crash when navigating back from the detail screen.
+      target: |
+        The fix should handle the disposed controller properly.
+```
+
+### Task-Level Fields
+
+#### Core Fields
+
+| Field | Type | Required | Description |
+|-------|------|----------|-------------|
+| `func` | string | Yes | Name of the `@task` function (resolved dynamically via `importlib`) |
+| `description` | string | No | Human-readable description |
+| `samples` | object | Yes | Samples config with `inline` and/or `paths` keys |
+| `allowed_variants` | list | No | Whitelist of variant names this task accepts (omit to accept all) |
+| `system_message` | string | No | Custom system prompt for this task |
+| `workspace` | object | No | Default workspace for all samples |
+| `tests` | object | No | Default test files for all samples |
+
+#### Inspect AI Task Parameters
+
+These map directly to [Inspect AI's `Task` constructor](https://inspect.aisi.org.uk/reference/inspect_ai.html#task). All are optional and override any `task_defaults` set in the job file.
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `model` | string | Default model for this task (overrides the eval model) |
+| `config` | object | Model generation config (e.g., `{temperature: 0.2, max_tokens: 4096}`) |
+| `model_roles` | object | Named roles for use in `get_model()` |
+| `sandbox` | string/object | Sandbox environment type or `[type, config_path]` |
+| `approval` | string/object | Tool use approval policies |
+| `epochs` | int/object | Number of times to repeat each sample (optionally with score reducer) |
+| `fail_on_error` | number/bool | `true` = fail on first error, `0.0–1.0` = fail if proportion exceeds threshold |
+| `continue_on_fail` | bool | Continue running if `fail_on_error` condition is met |
+| `message_limit` | int | Max total messages per sample |
+| `token_limit` | int | Max total tokens per sample |
+| `time_limit` | int | Max clock time (seconds) per sample |
+| `working_limit` | int | Max working time (seconds) per sample (excludes wait time) |
+| `cost_limit` | float | Max cost (dollars) per sample |
+| `early_stopping` | string/object | Early stopping callbacks |
+| `display_name` | string | Task display name (e.g., for plotting) |
+| `version` | int | Version of task spec (to distinguish evolutions) |
+| `metadata` | object | Additional metadata to associate with the task |
+
+### Samples Object
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `inline` | list | Inline sample definitions |
+| `paths` | list | Glob patterns for external sample YAML files (relative to task dir) |
+
+### Sample Fields (inline in task.yaml)
+
+#### Core Fields
+
+| Field | Type | Required | Description |
+|-------|------|----------|-------------|
+| `id` | string | Yes | Unique sample identifier |
+| `input` | string | Yes | The prompt given to the model |
+| `target` | string | Yes | Expected output or grading criteria |
+| `difficulty` | string | No | `easy`, `medium`, or `hard` |
+| `tags` | list | No | Categories for filtering |
+| `system_message` | string | No | Override system prompt for this sample |
+| `metadata` | object | No | Arbitrary metadata |
+| `workspace` | object | No | Override task-level workspace |
+| `tests` | object | No | Override task-level tests |
+
+#### Inspect AI Sample Parameters
+
+These map directly to [Inspect AI's `Sample`](https://inspect.aisi.org.uk/reference/inspect_ai.dataset.html#sample).
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `choices` | list | Answer choices for multiple-choice evaluations |
+| `sandbox` | string/object | Override sandbox environment for this sample |
+| `files` | object | Files to copy into the sandbox (`{destination: source}`) |
+| `setup` | string | Setup script to run in the sandbox before evaluation |
+
+### Workspace/Tests References
+
+```yaml
+# Reference a reusable template
+workspace:
+  template: flutter_app
+
+# Reference a path relative to task directory
+workspace:
+  path: ./project
+
+# Clone from git
+workspace:
+  git: https://github.com/example/repo.git
+
+# Shorthand (equivalent to path:)
+workspace: ./project
+```
+
+> [!NOTE]
+> Paths in `workspace` and `tests` are resolved **relative to the task directory** (e.g., `tasks/flutter_bug_fix/`).
+
+---
+
+## Sample files
+
+A sample is a single test case containing an input prompt, expected output (grading target), and optional configuration. Samples are defined inline in `task.yaml` or in external YAML files referenced via `paths`.
+
+```yaml
+# Inline in task.yaml
+samples:
+  inline:
+    - id: dart_async_await_001
+      difficulty: medium
+      tags: [async, dart]
+      input: |
+        Explain the difference between Future.then() and async/await in Dart.
+      target: |
+        The answer should cover both approaches, explain that they are
+        functionally equivalent, and note when each is preferred.
+      metadata:
+        added: 2025-02-04
+        category: language_fundamentals
+```
+
+---
+
+### Core Fields
+
+| Field | Type | Required | Description |
+|-------|------|----------|-------------|
+| `id` | string | Yes | Unique sample identifier |
+| `input` | string | Yes | The prompt given to the model |
+| `target` | string | Yes | Expected output or grading criteria |
+| `difficulty` | string | No | `easy`, `medium`, or `hard` |
+| `tags` | list | No | Categories for filtering |
+| `system_message` | string | No | Override system prompt for this sample |
+| `metadata` | object | No | Arbitrary metadata |
+| `workspace` | object | No | Override task-level workspace |
+| `tests` | object | No | Override task-level tests |
+
+---
+
+### Inspect AI Sample Parameters
+
+These map directly to [Inspect AI's `Sample`](https://inspect.aisi.org.uk/reference/inspect_ai.dataset.html#sample).
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `choices` | list | Answer choices for multiple-choice evaluations |
+| `sandbox` | string/object | Override sandbox environment for this sample |
+| `files` | object | Files to copy into the sandbox (`{destination: source}`) |
+| `setup` | string | Setup script to run in the sandbox before evaluation |
+
+### Multiple Choice Example
+
+```yaml
+- id: dart_null_safety_quiz
+  input: "Which of the following is NOT a valid way to handle null in Dart 3?"
+  target: C
+  choices:
+    - "Use the null-aware operator ?."
+    - "Use a null check with if (x != null)"
+    - "Use the ! operator on every nullable variable"
+    - "Use late initialization"
+```
+
+### Sandbox Files Example
+
+```yaml
+- id: flutter_fix_counter
+  input: "Fix the bug in the counter app."
+  target: "The fix should update the state correctly."
+  sandbox: docker
+  files:
+    /workspace/lib/main.dart: ./fixtures/broken_counter.dart
+    /workspace/test/widget_test.dart: ./fixtures/counter_test.dart
+  setup: "cd /workspace && flutter pub get"
+```
+
+---
+
+### Workspace & Tests References
+
+Workspaces and test paths can be specified at task level (inherited by all samples) or per-sample (overrides task level).
+
+```yaml
+# Reference a reusable template
+workspace:
+  template: flutter_app
+
+# Reference a path relative to task directory
+workspace:
+  path: ./project
+
+# Clone from git
+workspace:
+  git: https://github.com/example/repo.git
+
+# Shorthand (equivalent to path:)
+workspace: ./project
+```
+
+> [!NOTE]
+> Paths in `workspace` and `tests` are resolved **relative to the task directory** (e.g., `tasks/flutter_bug_fix/`).
+
+
+---
+
+## Job files
+
+Job files define **what to run** and can **override built-in runtime defaults**. They're selected via `devals run <job_name>`. Multiple jobs can be run sequentially.
+
+```yaml
+# jobs/local_dev.yaml
+name: local_dev
+
+# Override runtime defaults
+sandbox_type: podman
+max_connections: 15
+max_retries: 10
+
+# Save the agent's final workspace output to logs/<run>/examples/
+# save_examples: true
+
+# Filter what to run (optional - omit to run all)
+models:
+  - google/gemini-2.5-flash
+
+# Variants are defined as a named map.
+# Each key is a variant name; the value is the variant configuration.
+variants:
+  baseline: {}
+  context_only: { context_files: [./context_files/flutter.md] }
+  mcp_only: { mcp_servers: [dart] }
+  full: { context_files: [./context_files/flutter.md], mcp_servers: [dart] }
+
+# Inspect AI eval_set() parameters (all optional)
+retry_attempts: 20
+fail_on_error: 0.05
+log_level: info
+tags: [nightly]
+
+# Default Task-level overrides applied to every task
+task_defaults:
+  time_limit: 600
+  message_limit: 50
+
+# Additional eval_set() parameters not covered above
+# eval_set_overrides:
+#   bundle_dir: ./bundle
+#   log_images: true
+```
+
+
+### Core Job Fields
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `logs_dir` | string | Override logs directory (default: `../logs`) |
+| `sandbox_type` | string | Sandbox type: `local`, `docker`, or `podman` (default: `local`) |
+| `max_connections` | int | Max concurrent API connections (default: `10`) |
+| `max_retries` | int | Max retry attempts for failed samples (default: `3`) |
+| `save_examples` | bool | If `true`, copies the agent's final workspace to `<logs_dir>/<run>/examples/` after each sample. (default: `false`) |
+| `models` | list | Filter to specific models — omit to run all |
+| `variants` | map | Named variant definitions (see Variants section) — omit to run all defined in task files |
+| `tasks` | object | Task discovery and overrides (see below) |
+
+### Inspect AI eval_set() Parameters
+
+All [Inspect AI `eval_set()` parameters](https://inspect.aisi.org.uk/reference/inspect_ai.html#eval_set) are available as top-level keys in the job file. These control retry behavior, concurrency, logging, and more.
+
+#### Retry & Error Handling
+
+| Field | Type | Default | Description |
+|-------|------|---------|-------------|
+| `retry_attempts` | int | `10` | Max retry attempts before giving up |
+| `retry_wait` | float | `60` | Seconds between retries (exponential backoff) |
+| `retry_connections` | float | `0.5` | Reduce max_connections at this rate per retry |
+| `retry_cleanup` | bool | `true` | Cleanup failed log files after retries |
+| `retry_on_error` | int | — | Retry samples on error (per-sample) |
+| `fail_on_error` | float | `0.05` | Fail if error proportion exceeds threshold |
+| `continue_on_fail` | bool | — | Continue running even if fail_on_error is met |
+| `debug_errors` | bool | `false` | Raise task errors for debugging |
+
+#### Concurrency
+
+| Field | Type | Default | Description |
+|-------|------|---------|-------------|
+| `max_samples` | int | `max_connections` | Max concurrent samples per task |
+| `max_tasks` | int | `max(4, models)` | Max tasks to run in parallel |
+| `max_subprocesses` | int | `cpu_count` | Max subprocesses in parallel |
+| `max_sandboxes` | int | — | Max sandboxes per-provider in parallel |
+
+#### Logging
+
+| Field | Type | Default | Description |
+|-------|------|---------|-------------|
+| `log_level` | string | `info` | Console log level (`debug`, `info`, `warning`, `error`) |
+| `log_level_transcript` | string | `info` | Log file level |
+| `log_format` | string | `json` | Log format (`eval` or `json`) |
+| `log_samples` | bool | `true` | Log detailed samples and scores |
+| `log_realtime` | bool | `true` | Log events in realtime |
+| `log_images` | bool | `false` | Log base64-encoded images |
+| `log_buffer` | int | — | Samples to buffer before log write |
+| `log_shared` | int | — | Sync sample events for realtime viewing |
+| `log_dir_allow_dirty` | bool | `false` | Allow log dir with unrelated logs |
+
+#### Model Configuration
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `model_base_url` | string | Base URL for the model API |
+| `model_args` | object | Model creation arguments |
+| `model_roles` | object | Named roles for `get_model()` |
+| `task_args` | object | Task creation arguments |
+| `model_cost_config` | object | Model prices for cost tracking |
+
+#### Sample Control
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `limit` | int/list | Limit samples (count or `[start, end]` range) |
+| `sample_id` | string/list | Evaluate specific sample(s) |
+| `sample_shuffle` | bool/int | Shuffle samples (pass seed for deterministic order) |
+| `epochs` | int/object | Repeat samples and optional score reducer |
+
+#### Limits (Applied to All Samples)
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `message_limit` | int | Max messages per sample |
+| `token_limit` | int | Max tokens per sample |
+| `time_limit` | int | Max clock time (seconds) per sample |
+| `working_limit` | int | Max working time (seconds) per sample |
+| `cost_limit` | float | Max cost (dollars) per sample |
+
+#### Miscellaneous
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `tags` | list | Tags for this evaluation run |
+| `metadata` | object | Metadata for this evaluation run |
+| `trace` | bool | Trace model interactions to terminal |
+| `display` | string | Task display type (default: `full`) |
+| `score` | bool | Score output (default: `true`) |
+| `approval` | string/object | Tool use approval policies |
+| `solver` | string/object | Alternative solver(s) |
+| `sandbox_cleanup` | bool | Cleanup sandbox after task (default: `true`) |
+| `bundle_dir` | string | Directory for bundled logs + viewer |
+| `bundle_overwrite` | bool | Overwrite files in bundle_dir |
+| `eval_set_id` | string | Custom ID for the eval set |
+
+### Pass-Through Sections
+
+#### `task_defaults`
+
+Default [Task parameters](#inspect-ai-task-parameters) applied to **every task** in this job. Per-task overrides from `task.yaml` take precedence.
+
+```yaml
+task_defaults:
+  time_limit: 600
+  message_limit: 50
+  cost_limit: 2.0
+  epochs: 3
+```
+
+#### `eval_set_overrides`
+
+Arbitrary `eval_set()` kwargs for parameters not covered by the named fields above. Top-level fields take precedence over overrides.
+
+```yaml
+eval_set_overrides:
+  bundle_dir: ./bundle
+  log_images: true
+```
+
+### Tasks Object
+
+```yaml
+tasks:
+  # Discover tasks via glob patterns (relative to dataset root)
+  paths: [tasks/*]
+  # Per-task overrides (keys must match directory names in tasks/)
+  inline:
+    flutter_bug_fix:
+      allowed_variants: [baseline]   # Override variants for this task
+      include-samples: [sample_001]  # Only run these samples
+      exclude-samples: [slow_test]   # Exclude these samples
+```
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `paths` | list | Glob patterns for discovering task directories |
+| `inline` | object | Per-task configuration overrides |
+
+---
+
+## Variants
+
+Variants modify how tasks execute, controlling context injection, tool availability, and skill access. Variants are defined as **named maps** in job files.
+
+```yaml
+variants:
+  baseline: {}
+  context_only: { context_files: [./context_files/flutter.md] }
+  mcp_only: { mcp_servers: [dart] }
+  full: { context_files: [./context_files/flutter.md], mcp_servers: [dart] }
+```
+
+| Field | Type | Default | Description |
+|-------|------|---------|-------------|
+| `context_files` | list | `[]` | Paths or glob patterns to context files (relative to task dir) |
+| `skills` | list | `[]` | Paths or glob patterns to skill directories (relative to task dir) |
+| `mcp_servers` | list | `[]` | MCP server identifiers |
+
+Tasks can optionally restrict which variants apply to them via `allowed_variants` in their `task.yaml`:
+
+```yaml
+# task.yaml — only run baseline and mcp_only variants for this task
+allowed_variants: [baseline, mcp_only]
+```
+
+Glob patterns (containing `*`, `?`, or `[`) are expanded automatically. For skills, only directories containing `SKILL.md` are included.
+
+> [!IMPORTANT]
+> The `skills` feature requires a sandbox (docker/podman). Skill directories are copied into the sandbox filesystem by Inspect AI's built-in `skill()` tool. Each skill directory must contain a `SKILL.md` file.
+
+---
+
+## Context Files
+
+Markdown files with YAML frontmatter providing additional context to the model.
+
+```markdown
+---
+title: "AI Rules for Flutter"
+version: "1.0.0"
+description: "Recommended patterns and best practices"
+dart_version: "3.10.0"
+flutter_version: "3.24.0"
+updated: "2025-12-24"
+---
+
+## Flutter Best Practices
+
+Content here is injected into the model's context when the variant
+has context_files pointing to this file.
+```
+
+| Field | Type | Required | Description |
+|-------|------|----------|-------------|
+| `title` | string | Yes | Context file title |
+| `version` | string | Yes | Version identifier |
+| `description` | string | Yes | Brief description |
+| `dart_version` | string | No | Target Dart version |
+| `flutter_version` | string | No | Target Flutter version |
+| `updated` | string | No | Last update date |
+
+---
+
+## CLI Usage
+
+```bash
+# Run a specific job
+devals run local_dev
+devals run ci
+
+# Dry run — validate config without executing
+devals run local_dev --dry-run
+
+# Create a new task
+devals create task
+
+# Add a sample to an existing task
+devals create sample
+
+# Initialize a new dataset
+devals init
+```
diff --git a/docs/reference/dart_api/dataset_config_dart/dataset_config_dart.md b/docs/reference/dart_api/dataset_config_dart/dataset_config_dart.md
new file mode 100644
index 0000000..460a6a4
--- /dev/null
+++ b/docs/reference/dart_api/dataset_config_dart/dataset_config_dart.md
@@ -0,0 +1,1750 @@
+# dataset_config_dart
+
+Core library for resolving eval dataset YAML into EvalSet JSON.
+
+This package contains the business logic for:
+- Parsing task and job YAML files (or pre-parsed JSON maps)
+- Resolving configs (models, sandboxes, variants)
+- Writing EvalSet JSON for the Python runner
+
+It is frontend-agnostic — both the CLI and a future web interface
+can use this library.
+
+## Quick start
+
+Use [ConfigResolver] for a single-call convenience facade:
+
+```dart
+final resolver = ConfigResolver();
+final configs = resolver.resolve(datasetPath, ['my_job']);
+```
+
+## Layered API
+
+For finer-grained control, use the individual layers:
+
+1. **Parsers** — [YamlParser], [JsonParser]
+2. **Resolvers** — [EvalSetResolver]
+3. **Writers** — [EvalSetWriter]
+
+---
+
+## abstract class `ChatCompletionChoice`
+
+**Mixins:** `_$ChatCompletionChoice`
+
+Choice generated for completion.
+
+### Constructors
+
+#### `ChatCompletionChoice`
+
+```dart
+ChatCompletionChoice({required ChatMessageAssistant message, String stopReason, Logprobs? logprobs})
+```
+
+Creates a chat completion choice.
+
+#### `ChatCompletionChoice.fromJson`
+
+```dart
+ChatCompletionChoice.fromJson(Map<String, dynamic> json)
+```
+
+---
+
+## abstract class `ChatMessage`
+
+**Mixins:** `_$ChatMessage`
+
+Chat message.
+
+### Constructors
+
+#### `ChatMessage.system`
+
+```dart
+ChatMessage.system({String? id, required Object content, String? source, Map<String, dynamic>? metadata, String role})
+```
+
+System chat message.
+
+#### `ChatMessage.user`
+
+```dart
+ChatMessage.user({String? id, required Object content, String? source, Map<String, dynamic>? metadata, String role, Object? toolCallId})
+```
+
+User chat message.
+
+#### `ChatMessage.assistant`
+
+```dart
+ChatMessage.assistant({String? id, required Object content, String? source, Map<String, dynamic>? metadata, String role, List<ToolCall>? toolCalls, String? model})
+```
+
+Assistant chat message.
+
+#### `ChatMessage.tool`
+
+```dart
+ChatMessage.tool({String? id, required Object content, String? source, Map<String, dynamic>? metadata, String role, String? toolCallId, String? function, ToolCallError? error})
+```
+
+Tool chat message.
+
+#### `ChatMessage.fromJson`
+
+```dart
+ChatMessage.fromJson(Map<String, dynamic> json)
+```
+
+---
+
+## class `ConfigException`
+
+**Implements:** `Exception`
+
+Exception thrown when runner config resolution fails.
+
+This is the library-level exception for the runner_config package.
+CLI or web frontends can catch this and present the error appropriately.
+
+### Constructors
+
+#### `ConfigException`
+
+```dart
+ConfigException(String message)
+```
+
+### Properties
+
+- **`message`** → `String` *(final)*
+
+---
+
+## class `ConfigResolver`
+
+Convenience facade that composes Parser → Resolver into a single call.
+
+For finer-grained control, use [YamlParser], [JsonParser],
+and [EvalSetResolver] directly.
+
+### Constructors
+
+#### `ConfigResolver`
+
+```dart
+ConfigResolver()
+```
+
+### Methods
+
+#### `resolve`
+
+```dart
+List<EvalSet> resolve(String datasetPath, List<String> jobNames)
+```
+
+Resolve dataset + job(s) into [EvalSet] objects.
+
+[datasetPath] is the root directory containing `tasks/` and `jobs/`.
+[jobNames] are the job names (looked up in `jobs/`) or paths.
+
+**Parameters:**
+
+- `datasetPath` (`String`) *(required)*
+- `jobNames` (`List<String>`) *(required)*
+
+---
+
+## abstract class `Content`
+
+**Mixins:** `_$Content`
+
+Content sent to or received from a model.
+
+### Constructors
+
+#### `Content.text`
+
+```dart
+Content.text({required String text, bool refusal, List<Object>? citations, String type})
+```
+
+Text content.
+
+#### `Content.reasoning`
+
+```dart
+Content.reasoning({required String reasoning, String? summary, String? signature, bool redacted, String? text, String type})
+```
+
+Reasoning content.
+
+#### `Content.image`
+
+```dart
+Content.image({required String image, String detail, String type})
+```
+
+Image content.
+
+#### `Content.audio`
+
+```dart
+Content.audio({required String audio, required String format, String type})
+```
+
+Audio content.
+
+#### `Content.video`
+
+```dart
+Content.video({required String video, required String format, String type})
+```
+
+Video content.
+
+#### `Content.document`
+
+```dart
+Content.document({required String document, String? filename, String? mimeType, String type})
+```
+
+Document content.
+
+#### `Content.data`
+
+```dart
+Content.data({required Map<String, dynamic> data, String type})
+```
+
+Model internal data.
+
+#### `Content.toolUse`
+
+```dart
+Content.toolUse({required String toolType, required String id, required String name, Map<String, dynamic>? context, required Map<String, dynamic> arguments, Object? result, Object? error, String type})
+```
+
+Server side tool use.
+
+#### `Content.fromJson`
+
+```dart
+Content.fromJson(Map<String, dynamic> json)
+```
+
+---
+
+## abstract class `ContextFile`
+
+**Mixins:** `_$ContextFile`
+
+A context file with parsed YAML frontmatter and markdown content.
+
+Context files provide additional documentation or guidelines that are
+injected into the model's conversation as part of a variant configuration.
+
+File format:
+```markdown
+---
+title: Flutter Widget Guide
+version: "1.0"
+description: Comprehensive guide to Flutter widgets
+---
+# Content starts here...
+
+```
+
+### Constructors
+
+#### `ContextFile`
+
+```dart
+ContextFile({required ContextFileMetadata metadata, required String content, required String filePath})
+```
+
+#### `ContextFile.fromJson`
+
+```dart
+ContextFile.fromJson(Map<String, dynamic> json)
+```
+
+### Methods
+
+#### `static load`
+
+```dart
+static ContextFile load(String filePath)
+```
+
+Load a context file from disk, parsing its YAML frontmatter.
+
+The file must begin with `---` and contain valid YAML frontmatter
+followed by a closing `---` delimiter.
+
+Throws [FileSystemException] if the file doesn't exist.
+Throws [FormatException] if the file lacks valid YAML frontmatter.
+
+**Parameters:**
+
+- `filePath` (`String`) *(required)*
+
+---
+
+## abstract class `ContextFileMetadata`
+
+**Mixins:** `_$ContextFileMetadata`
+
+Metadata parsed from a context file's YAML frontmatter.
+
+### Constructors
+
+#### `ContextFileMetadata`
+
+```dart
+ContextFileMetadata({required String title, required String version, required String description, String? dartVersion, String? flutterVersion, String? updated})
+```
+
+#### `ContextFileMetadata.fromJson`
+
+```dart
+ContextFileMetadata.fromJson(Map<String, dynamic> json)
+```
+
+---
+
+## abstract class `Dataset`
+
+**Mixins:** `_$Dataset`
+
+Dart representation of Inspect AI's `Dataset` / `MemoryDataset` class.
+
+A sequence of [Sample] objects.
+
+This models the `MemoryDataset` variant which holds samples in an
+in-memory list.
+
+See [`Dataset`](https://inspect.aisi.org.uk/reference/inspect_ai.dataset.html#dataset)
+and [`MemoryDataset`](https://inspect.aisi.org.uk/reference/inspect_ai.dataset.html#memorydataset).
+
+### Constructors
+
+#### `Dataset`
+
+```dart
+Dataset({List<Sample> samples, String? name, String? location, bool shuffled})
+```
+
+#### `Dataset.fromJson`
+
+```dart
+Dataset.fromJson(Map<String, dynamic> json)
+```
+
+---
+
+## abstract class `EarlyStoppingSummary`
+
+**Mixins:** `_$EarlyStoppingSummary`
+
+Early stopping summary.
+
+### Constructors
+
+#### `EarlyStoppingSummary`
+
+```dart
+EarlyStoppingSummary({required String type, double? limit, double? score, Map<String, dynamic> metadata})
+```
+
+Creates an early stopping summary.
+
+#### `EarlyStoppingSummary.fromJson`
+
+```dart
+EarlyStoppingSummary.fromJson(Map<String, dynamic> json)
+```
+
+---
+
+## abstract class `EvalConfig`
+
+**Mixins:** `_$EvalConfig`
+
+Configuration used for evaluation.
+
+### Constructors
+
+#### `EvalConfig`
+
+```dart
+EvalConfig({Object? limit, Object? sampleId, bool? sampleShuffle, int? epochs, List<String>? epochsReducer, String? approval, Object? failOnError, bool? continueOnFail, int? retryOnError, int? messageLimit, int? tokenLimit, int? timeLimit, int? workingLimit, int? maxSamples, int? maxTasks, int? maxSubprocesses, int? maxSandboxes, bool? sandboxCleanup, bool? logSamples, bool? logRealtime, bool? logImages, int? logBuffer, int? logShared, bool? scoreDisplay})
+```
+
+Creates an evaluation configuration.
+
+#### `EvalConfig.fromJson`
+
+```dart
+EvalConfig.fromJson(Map<String, dynamic> json)
+```
+
+---
+
+## abstract class `EvalDataset`
+
+**Mixins:** `_$EvalDataset`
+
+Dataset used for evaluation.
+
+### Constructors
+
+#### `EvalDataset`
+
+```dart
+EvalDataset({String? name, String? location, required int samples, List<Object>? sampleIds, bool shuffled})
+```
+
+Creates an evaluation dataset.
+
+#### `EvalDataset.fromJson`
+
+```dart
+EvalDataset.fromJson(Map<String, dynamic> json)
+```
+
+---
+
+## abstract class `EvalError`
+
+**Mixins:** `_$EvalError`
+
+Eval error details.
+
+### Constructors
+
+#### `EvalError`
+
+```dart
+EvalError({required String message, required String traceback, required String tracebackAnsi})
+```
+
+Creates evaluation error details.
+
+#### `EvalError.fromJson`
+
+```dart
+EvalError.fromJson(Map<String, dynamic> json)
+```
+
+---
+
+## abstract class `EvalLog`
+
+**Mixins:** `_$EvalLog`
+
+Evaluation log.
+
+### Constructors
+
+#### `EvalLog`
+
+```dart
+EvalLog({int version, String status, required EvalSpec eval, EvalPlan? plan, EvalResults? results, EvalStats? stats, EvalError? error, bool invalidated, List<EvalSample>? samples, List<EvalSampleReductions>? reductions, String? location, String? etag, EvalSetInfo? evalSetInfo})
+```
+
+Creates an evaluation log.
+
+#### `EvalLog.fromJson`
+
+```dart
+EvalLog.fromJson(Map<String, dynamic> json)
+```
+
+---
+
+## abstract class `EvalMetric`
+
+**Mixins:** `_$EvalMetric`
+
+Metric for evaluation score.
+
+### Constructors
+
+#### `EvalMetric`
+
+```dart
+EvalMetric({required String name, required Object value, Map<String, dynamic> params, Map<String, dynamic>? metadata})
+```
+
+Creates an evaluation metric.
+
+#### `EvalMetric.fromJson`
+
+```dart
+EvalMetric.fromJson(Map<String, dynamic> json)
+```
+
+---
+
+## abstract class `EvalPlan`
+
+**Mixins:** `_$EvalPlan`
+
+Plan (solvers) used in evaluation.
+
+### Constructors
+
+#### `EvalPlan`
+
+```dart
+EvalPlan({String name, List<EvalPlanStep> steps, EvalPlanStep? finish, GenerateConfig config})
+```
+
+Creates an evaluation plan.
+
+#### `EvalPlan.fromJson`
+
+```dart
+EvalPlan.fromJson(Map<String, dynamic> json)
+```
+
+---
+
+## abstract class `EvalPlanStep`
+
+**Mixins:** `_$EvalPlanStep`
+
+Solver step.
+
+### Constructors
+
+#### `EvalPlanStep`
+
+```dart
+EvalPlanStep({required String solver, Map<String, dynamic> params, Map<String, dynamic>? paramsPassed})
+```
+
+Creates an evaluation plan step.
+
+#### `EvalPlanStep.fromJson`
+
+```dart
+EvalPlanStep.fromJson(Map<String, dynamic> json)
+```
+
+---
+
+## abstract class `EvalResults`
+
+**Mixins:** `_$EvalResults`
+
+Scoring results from evaluation.
+
+### Constructors
+
+#### `EvalResults`
+
+```dart
+EvalResults({int totalSamples, int completedSamples, EarlyStoppingSummary? earlyStopping, List<EvalScore> scores, Map<String, dynamic> metadata, List<EvalSampleReductions>? sampleReductions})
+```
+
+Creates evaluation results.
+
+#### `EvalResults.fromJson`
+
+```dart
+EvalResults.fromJson(Map<String, dynamic> json)
+```
+
+---
+
+## abstract class `EvalRevision`
+
+**Mixins:** `_$EvalRevision`
+
+Git revision for evaluation.
+
+### Constructors
+
+#### `EvalRevision`
+
+```dart
+EvalRevision({required String type, required String origin, required String commit, bool dirty})
+```
+
+Creates an evaluation revision.
+
+#### `EvalRevision.fromJson`
+
+```dart
+EvalRevision.fromJson(Map<String, dynamic> json)
+```
+
+---
+
+## abstract class `EvalSample`
+
+**Mixins:** `_$EvalSample`
+
+Sample from evaluation task.
+
+### Constructors
+
+#### `EvalSample`
+
+```dart
+EvalSample({required Object id, required int epoch, required Object input, List<String>? choices, Object? target, Map<String, dynamic> metadata, Object? sandbox, List<String>? files, String? setup, List<ChatMessage> messages, required ModelOutput output, Map<String, Score>? scores, Map<String, dynamic> store, List<Object> events, Map<String, ModelUsage> modelUsage, String? startedAt, String? completedAt, double? totalTime, double? workingTime, String? uuid, ProvenanceData? invalidation, EvalError? error, List<EvalError>? errorRetries, Map<String, String> attachments, EvalSampleLimit? limit})
+```
+
+Creates an evaluation sample.
+
+#### `EvalSample.fromJson`
+
+```dart
+EvalSample.fromJson(Map<String, dynamic> json)
+```
+
+---
+
+## abstract class `EvalSampleLimit`
+
+**Mixins:** `_$EvalSampleLimit`
+
+Limit encountered by sample.
+
+### Constructors
+
+#### `EvalSampleLimit`
+
+```dart
+EvalSampleLimit({required String type, required double limit})
+```
+
+Creates an evaluation sample limit.
+
+#### `EvalSampleLimit.fromJson`
+
+```dart
+EvalSampleLimit.fromJson(Map<String, dynamic> json)
+```
+
+---
+
+## abstract class `EvalSampleReductions`
+
+**Mixins:** `_$EvalSampleReductions`
+
+Score reductions.
+
+### Constructors
+
+#### `EvalSampleReductions`
+
+```dart
+EvalSampleReductions({required String scorer, String? reducer, required List<EvalSampleScore> samples})
+```
+
+Creates evaluation sample reductions.
+
+#### `EvalSampleReductions.fromJson`
+
+```dart
+EvalSampleReductions.fromJson(Map<String, dynamic> json)
+```
+
+---
+
+## abstract class `EvalSampleScore`
+
+**Mixins:** `_$EvalSampleScore`
+
+Score and sample_id scored.
+
+### Constructors
+
+#### `EvalSampleScore`
+
+```dart
+EvalSampleScore({required Object value, String? answer, String? explanation, Map<String, dynamic> metadata, List<Object> history, Object? sampleId})
+```
+
+Creates an evaluation sample score.
+
+#### `EvalSampleScore.fromJson`
+
+```dart
+EvalSampleScore.fromJson(Map<String, dynamic> json)
+```
+
+---
+
+## abstract class `EvalScore`
+
+**Mixins:** `_$EvalScore`
+
+Score for evaluation task.
+
+### Constructors
+
+#### `EvalScore`
+
+```dart
+EvalScore({required String name, required String scorer, String? reducer, int? scoredSamples, int? unscoredSamples, Map<String, dynamic> params, List<EvalMetric> metrics, Map<String, dynamic>? metadata})
+```
+
+Creates an evaluation score.
+
+#### `EvalScore.fromJson`
+
+```dart
+EvalScore.fromJson(Map<String, dynamic> json)
+```
+
+---
+
+## abstract class `EvalSet`
+
+**Mixins:** `_$EvalSet`
+
+Dart representation of Inspect AI's `eval_set()` function parameters.
+
+Models the configuration passed to
+[`inspect_ai.eval_set()`](https://inspect.aisi.org.uk/reference/inspect_ai.html#eval_set).
+
+This is the **Inspect AI** side of the eval set contract — it mirrors the
+Python function signature. For the Dart-side resolved config that is
+serialised *to* the Python runner, see `config/eval_set.dart`.
+
+### Constructors
+
+#### `EvalSet`
+
+```dart
+EvalSet({required List<Task> tasks, required String logDir, int? retryAttempts, double? retryWait, double? retryConnections, bool? retryCleanup, List<String>? model, String? modelBaseUrl, Map<String, Object?> modelArgs, Map<String, String>? modelRoles, Map<String, Object?> taskArgs, Object? sandbox, bool? sandboxCleanup, Object? solver, List<String>? tags, Map<String, dynamic>? metadata, bool? trace, String? display, Object? approval, bool score, String? logLevel, String? logLevelTranscript, String? logFormat, Object? limit, Object? sampleId, Object? sampleShuffle, Object? epochs, double? failOnError, bool? continueOnFail, int? retryOnError, bool? debugErrors, int? messageLimit, int? tokenLimit, int? timeLimit, int? workingLimit, double? costLimit, Map<String, Object?>? modelCostConfig, int? maxSamples, int? maxTasks, int? maxSubprocesses, int? maxSandboxes, bool? logSamples, bool? logRealtime, bool? logImages, int? logBuffer, int? logShared, String? bundleDir, bool bundleOverwrite, bool? logDirAllowDirty, String? evalSetId})
+```
+
+#### `EvalSet.fromJson`
+
+```dart
+EvalSet.fromJson(Map<String, dynamic> json)
+```
+
+---
+
+## abstract class `EvalSetInfo`
+
+**Mixins:** `_$EvalSetInfo`
+
+Eval set information.
+
+### Constructors
+
+#### `EvalSetInfo`
+
+```dart
+EvalSetInfo({required String evalSetId, required List<EvalSetTask> tasks})
+```
+
+Creates evaluation set information.
+
+#### `EvalSetInfo.fromJson`
+
+```dart
+EvalSetInfo.fromJson(Map<String, dynamic> json)
+```
+
+---
+
+## class `EvalSetResolver`
+
+Resolves parsed task configs and job into fully-resolved
+[EvalSet] objects ready for JSON serialization.
+
+This is the resolution engine. It:
+1. Resolves models, sandboxes, and variants
+2. Expands task × variant combinations into [Task] entries
+3. Groups by flutter_channel (one [EvalSet] per group)
+4. Propagates job-level and task-level settings to the output
+
+### Constructors
+
+#### `EvalSetResolver`
+
+```dart
+EvalSetResolver()
+```
+
+### Methods
+
+#### `resolve`
+
+```dart
+List<EvalSet> resolve(List<ParsedTask> datasetTasks, Job job, String datasetRoot)
+```
+
+Resolve task configs and job into [EvalSet] objects.
+
+Groups by flutter_channel so each gets its own sandbox.
+
+**Parameters:**
+
+- `datasetTasks` (`List<ParsedTask>`) *(required)*
+- `job` (`Job`) *(required)*
+- `datasetRoot` (`String`) *(required)*
+
+---
+
+## abstract class `EvalSetTask`
+
+**Mixins:** `_$EvalSetTask`
+
+Task in an eval set.
+
+### Constructors
+
+#### `EvalSetTask`
+
+```dart
+EvalSetTask({String? name, required String taskId, String? taskFile, Map<String, dynamic> taskArgs, required String model, Map<String, dynamic> modelArgs, Map<String, String>? modelRoles, required int sequence})
+```
+
+Creates an evaluation set task.
+
+#### `EvalSetTask.fromJson`
+
+```dart
+EvalSetTask.fromJson(Map<String, dynamic> json)
+```
+
+---
+
+## class `EvalSetWriter`
+
+Writes resolved [EvalSet] configs as a single JSON file.
+
+The output JSON maps ~1:1 to `eval_set()` kwargs. Datasets are inlined
+in each task — no separate JSONL files needed.
+
+### Constructors
+
+#### `EvalSetWriter`
+
+```dart
+EvalSetWriter()
+```
+
+### Methods
+
+#### `write`
+
+```dart
+String write(List<EvalSet> configs, String outputDir)
+```
+
+Write [EvalSet] JSON for the given resolved configs.
+
+Files are written to [outputDir]. Returns the path to the JSON file.
+
+**Parameters:**
+
+- `configs` (`List<EvalSet>`) *(required)*
+- `outputDir` (`String`) *(required)*
+
+---
+
+## abstract class `EvalSpec`
+
+**Mixins:** `_$EvalSpec`
+
+Eval target and configuration.
+
+### Constructors
+
+#### `EvalSpec`
+
+```dart
+EvalSpec({String? evalSetId, required String evalId, required String runId, required String created, required String task, required String taskId, Object taskVersion, String? taskFile, String? taskDisplayName, String? taskRegistryName, Map<String, dynamic> taskAttribs, Map<String, dynamic> taskArgs, Map<String, dynamic> taskArgsPassed, String? solver, Map<String, dynamic> solverArgs, Map<String, dynamic> solverArgsPassed, List<String> tags, EvalDataset? dataset, Object? sandbox, required String model, GenerateConfig? modelGenerateConfig, String? modelBaseUrl, Map<String, dynamic> modelArgs, Map<String, String>? modelRoles, EvalConfig config, EvalRevision? revision, Map<String, String> packages, Map<String, dynamic>? metadata, List<Object> scorers, List<Object> metrics})
+```
+
+Creates an evaluation specification.
+
+#### `EvalSpec.fromJson`
+
+```dart
+EvalSpec.fromJson(Map<String, dynamic> json)
+```
+
+---
+
+## abstract class `EvalStats`
+
+**Mixins:** `_$EvalStats`
+
+Timing and usage statistics.
+
+### Constructors
+
+#### `EvalStats`
+
+```dart
+EvalStats({required String startedAt, required String completedAt, Map<String, ModelUsage> modelUsage})
+```
+
+Creates evaluation statistics.
+
+#### `EvalStats.fromJson`
+
+```dart
+EvalStats.fromJson(Map<String, dynamic> json)
+```
+
+---
+
+## abstract class `FieldSpec`
+
+**Mixins:** `_$FieldSpec`
+
+Dart representation of Inspect AI's `FieldSpec` dataclass.
+
+Specification for mapping data source fields to sample fields.
+
+See [`FieldSpec`](https://inspect.aisi.org.uk/reference/inspect_ai.dataset.html#fieldspec).
+
+### Constructors
+
+#### `FieldSpec`
+
+```dart
+FieldSpec({String? input, String? target, String? choices, String? id, List<String>? metadata, String? sandbox, String? files, String? setup})
+```
+
+#### `FieldSpec.fromJson`
+
+```dart
+FieldSpec.fromJson(Map<String, dynamic> json)
+```
+
+---
+
+## abstract class `GenerateConfig`
+
+**Mixins:** `_$GenerateConfig`
+
+Model generation options.
+
+### Constructors
+
+#### `GenerateConfig`
+
+```dart
+GenerateConfig({int? maxRetries, int? timeout, int? attemptTimeout, int? maxConnections, String? systemMessage, int? maxTokens, double? topP, double? temperature, List<String>? stopSeqs, int? bestOf, double? frequencyPenalty, double? presencePenalty, Map<String, double>? logitBias, int? seed, int? topK, int? numChoices, bool? logprobs, int? topLogprobs, bool? parallelToolCalls, bool? internalTools, int? maxToolOutput, Object? cachePrompt})
+```
+
+Creates model generation options.
+
+#### `GenerateConfig.fromJson`
+
+```dart
+GenerateConfig.fromJson(Map<String, dynamic> json)
+```
+
+---
+
+## abstract class `Job`
+
+**Mixins:** `_$Job`
+
+A job configuration defining what to run and how to run it.
+
+Jobs combine runtime settings (log directory, sandbox type, rate limits)
+with filtering (which models, variants, and tasks to include).
+
+Top-level fields cover the most common settings. For full control over
+`eval_set()` and `Task` parameters, use [evalSetOverrides] and
+[taskDefaults] respectively — any valid `eval_set()` / `Task` kwarg can
+be specified there and will be passed through to the Python runner.
+
+Example YAML:
+```yaml
+log_dir: ./logs/my_run
+sandbox: podman
+max_connections: 10
+models:
+  - google/gemini-2.5-flash
+variants:
+  baseline: {}
+  context_only:
+    context_files: [./context_files/flutter.md]
+tasks:
+  dart_qa:
+    include-samples: [sample_1]
+
+# Pass-through to eval_set()
+eval_set_overrides:
+  retry_attempts: 20
+  log_level: debug
+
+# Default Task-level overrides applied to every task
+task_defaults:
+  time_limit: 600
+  message_limit: 50
+```
+
+### Constructors
+
+#### `Job`
+
+```dart
+Job({required String logDir, String sandboxType, int maxConnections, List<String>? models, Map<String, Map<String, dynamic>>? variants, List<String>? taskPaths, Map<String, JobTask>? tasks, bool saveExamples, int? retryAttempts, int? maxRetries, double? retryWait, double? retryConnections, bool? retryCleanup, double? failOnError, bool? continueOnFail, int? retryOnError, bool? debugErrors, int? maxSamples, int? maxTasks, int? maxSubprocesses, int? maxSandboxes, String? logLevel, String? logLevelTranscript, String? logFormat, List<String>? tags, Map<String, dynamic>? metadata, bool? trace, String? display, bool? score, Object? limit, Object? sampleId, Object? sampleShuffle, Object? epochs, Object? approval, Object? solver, bool? sandboxCleanup, String? modelBaseUrl, Map<String, Object?>? modelArgs, Map<String, String>? modelRoles, Map<String, Object?>? taskArgs, int? messageLimit, int? tokenLimit, int? timeLimit, int? workingLimit, double? costLimit, Map<String, Object?>? modelCostConfig, bool? logSamples, bool? logRealtime, bool? logImages, int? logBuffer, int? logShared, String? bundleDir, bool? bundleOverwrite, bool? logDirAllowDirty, String? evalSetId, Map<String, dynamic>? evalSetOverrides, Map<String, dynamic>? taskDefaults})
+```
+
+#### `Job.fromJson`
+
+```dart
+Job.fromJson(Map<String, dynamic> json)
+```
+
+---
+
+## abstract class `JobTask`
+
+**Mixins:** `_$JobTask`
+
+Per-task configuration within a job.
+
+Allows overriding which samples run for specific tasks and providing
+a custom system message.
+
+### Constructors
+
+#### `JobTask`
+
+```dart
+JobTask({required String id, List<String>? includeSamples, List<String>? excludeSamples, String? systemMessage})
+```
+
+#### `JobTask.fromJson`
+
+```dart
+JobTask.fromJson(Map<String, dynamic> json)
+```
+
+#### `JobTask.fromYaml`
+
+```dart
+JobTask.fromYaml(String taskId, Map<String, dynamic>? data)
+```
+
+Create a [JobTask] from parsed YAML data.
+
+The [taskId] is the map key from the job YAML `tasks:` section.
+The [data] may be `null` for a simple task reference with no overrides.
+
+---
+
+## class `JsonParser`
+
+**Extends:** `Parser`
+
+Parses config from pre-parsed `Map<String, dynamic>` data.
+
+Useful for programmatic config construction (web UI, tests)
+without touching the filesystem.
+
+### Constructors
+
+#### `JsonParser`
+
+```dart
+JsonParser()
+```
+
+### Methods
+
+#### `parseTasks`
+
+```dart
+List<ParsedTask> parseTasks(String datasetRoot)
+```
+
+**Parameters:**
+
+- `datasetRoot` (`String`) *(required)*
+
+#### `parseTasksFromMaps`
+
+```dart
+List<ParsedTask> parseTasksFromMaps(List<Map<String, dynamic>> taskMaps)
+```
+
+Parse task configs from pre-parsed maps.
+
+Each map should have the same structure as a task.yaml file.
+
+**Parameters:**
+
+- `taskMaps` (`List<Map<String, dynamic>>`) *(required)*
+
+#### `parseJob`
+
+```dart
+Job parseJob(String jobPath, String datasetRoot)
+```
+
+**Parameters:**
+
+- `jobPath` (`String`) *(required)*
+- `datasetRoot` (`String`) *(required)*
+
+#### `parseJobFromMap`
+
+```dart
+Job parseJobFromMap(Map<String, dynamic> data)
+```
+
+Parse a job from a pre-parsed map.
+
+**Parameters:**
+
+- `data` (`Map<String, dynamic>`) *(required)*
+
+---
+
+## abstract class `Logprobs`
+
+**Mixins:** `_$Logprobs`
+
+Logprobs for chat completion.
+
+### Constructors
+
+#### `Logprobs`
+
+```dart
+Logprobs({required List<Object> content})
+```
+
+Creates logprobs.
+
+#### `Logprobs.fromJson`
+
+```dart
+Logprobs.fromJson(Map<String, dynamic> json)
+```
+
+---
+
+## abstract class `ModelOutput`
+
+**Mixins:** `_$ModelOutput`
+
+Model output.
+
+### Constructors
+
+#### `ModelOutput`
+
+```dart
+ModelOutput({required String model, List<ChatCompletionChoice> choices, ModelUsage? usage, required String completion, String stopReason, double? time, Map<String, dynamic> metadata, String? error, ChatMessageAssistant? message})
+```
+
+Creates model output.
+
+#### `ModelOutput.fromJson`
+
+```dart
+ModelOutput.fromJson(Map<String, dynamic> json)
+```
+
+---
+
+## abstract class `ModelUsage`
+
+**Mixins:** `_$ModelUsage`
+
+Token usage for completion.
+
+### Constructors
+
+#### `ModelUsage`
+
+```dart
+ModelUsage({int inputTokens, int outputTokens, int totalTokens, int? inputTokensCacheWrite, int? inputTokensCacheRead, int reasoningTokens})
+```
+
+Creates model usage details.
+
+#### `ModelUsage.fromJson`
+
+```dart
+ModelUsage.fromJson(Map<String, dynamic> json)
+```
+
+---
+
+## class `ParsedTask`
+
+Lightweight intermediate type used during parsing and resolution.
+
+Groups samples with task-level config (variant, sandbox, etc.) before
+the resolver produces the final [Task] objects. This replaces the
+former `TaskConfig` model-package class.
+
+### Constructors
+
+#### `ParsedTask`
+
+```dart
+ParsedTask({required String id, required String taskFunc, required List<Sample> samples, required Variant variant, String sandboxType, String? systemMessage, List<String>? allowedVariants, bool saveExamples, String? examplesDir, String? model, Map<String, dynamic>? config, Map<String, String>? modelRoles, Object? sandbox, Object? approval, Object? epochs, Object? failOnError, bool? continueOnFail, int? messageLimit, int? tokenLimit, int? timeLimit, int? workingLimit, double? costLimit, Object? earlyStopping, String? displayName, Object? version, Map<String, dynamic>? metadata})
+```
+
+### Properties
+
+- **`id`** → `String` *(final)*
+
+- **`taskFunc`** → `String` *(final)*
+
+- **`samples`** → `List<Sample>` *(final)*
+
+- **`variant`** → `Variant` *(final)*
+
+- **`sandboxType`** → `String` *(final)*
+
+- **`systemMessage`** → `String?` *(final)*
+
+- **`allowedVariants`** → `List<String>?` *(final)*
+
+- **`saveExamples`** → `bool` *(final)*
+
+- **`examplesDir`** → `String?` *(final)*
+
+- **`model`** → `String?` *(final)*
+
+  Default model for this task.
+
+- **`config`** → `Map<String, dynamic>?` *(final)*
+
+  Model generation config.
+
+- **`modelRoles`** → `Map<String, String>?` *(final)*
+
+  Named roles for use in `get_model()`.
+
+- **`sandbox`** → `Object?` *(final)*
+
+  Sandbox environment type (or a shorthand spec).
+
+- **`approval`** → `Object?` *(final)*
+
+  Tool use approval policies.
+
+- **`epochs`** → `Object?` *(final)*
+
+  Epochs to repeat samples for.
+
+- **`failOnError`** → `Object?` *(final)*
+
+  Fail on sample errors.
+
+- **`continueOnFail`** → `bool?` *(final)*
+
+  Continue running if the `fail_on_error` condition is met.
+
+- **`messageLimit`** → `int?` *(final)*
+
+  Limit on total messages per sample.
+
+- **`tokenLimit`** → `int?` *(final)*
+
+  Limit on total tokens per sample.
+
+- **`timeLimit`** → `int?` *(final)*
+
+  Limit on clock time (in seconds) per sample.
+
+- **`workingLimit`** → `int?` *(final)*
+
+  Limit on working time (in seconds) per sample.
+
+- **`costLimit`** → `double?` *(final)*
+
+  Limit on total cost (in dollars) per sample.
+
+- **`earlyStopping`** → `Object?` *(final)*
+
+  Early stopping callbacks.
+
+- **`displayName`** → `String?` *(final)*
+
+  Task display name (e.g. for plotting).
+
+- **`version`** → `Object?` *(final)*
+
+  Version of task.
+
+- **`metadata`** → `Map<String, dynamic>?` *(final)*
+
+  Additional metadata to associate with the task.
+
+### Methods
+
+#### `copyWith`
+
+```dart
+ParsedTask copyWith({String? id, String? taskFunc, List<Sample>? samples, Variant? variant, String? sandboxType, String? systemMessage, List<String>? allowedVariants, bool? saveExamples, String? examplesDir, String? model, Map<String, dynamic>? config, Map<String, String>? modelRoles, Object? sandbox, Object? approval, Object? epochs, Object? failOnError, bool? continueOnFail, int? messageLimit, int? tokenLimit, int? timeLimit, int? workingLimit, double? costLimit, Object? earlyStopping, String? displayName, Object? version, Map<String, dynamic>? metadata})
+```
+
+Create a copy with overrides.
+
+**Parameters:**
+
+- `id` (`String?`)
+- `taskFunc` (`String?`)
+- `samples` (`List<Sample>?`)
+- `variant` (`Variant?`)
+- `sandboxType` (`String?`)
+- `systemMessage` (`String?`)
+- `allowedVariants` (`List<String>?`)
+- `saveExamples` (`bool?`)
+- `examplesDir` (`String?`)
+- `model` (`String?`)
+- `config` (`Map<String, dynamic>?`)
+- `modelRoles` (`Map<String, String>?`)
+- `sandbox` (`Object?`)
+- `approval` (`Object?`)
+- `epochs` (`Object?`)
+- `failOnError` (`Object?`)
+- `continueOnFail` (`bool?`)
+- `messageLimit` (`int?`)
+- `tokenLimit` (`int?`)
+- `timeLimit` (`int?`)
+- `workingLimit` (`int?`)
+- `costLimit` (`double?`)
+- `earlyStopping` (`Object?`)
+- `displayName` (`String?`)
+- `version` (`Object?`)
+- `metadata` (`Map<String, dynamic>?`)
+
+---
+
+## abstract class `Parser`
+
+Abstract base for config parsers.
+
+Parsers are responsible for turning raw configuration data (YAML files,
+JSON maps, etc.) into domain model objects ([ParsedTask], [Job]).
+
+Concrete implementations:
+- [YamlParser] — reads `.yaml` files from the filesystem
+- [JsonParser] — accepts pre-parsed `Map<String, dynamic>` data
+
+### Constructors
+
+#### `Parser`
+
+```dart
+Parser()
+```
+
+### Methods
+
+#### `parseTasks`
+
+```dart
+List<ParsedTask> parseTasks(String datasetRoot)
+```
+
+Parse all task configs from a dataset root directory.
+
+The dataset root is expected to contain a `tasks/` subdirectory
+with per-task YAML/JSON files.
+
+**Parameters:**
+
+- `datasetRoot` (`String`) *(required)*
+
+#### `parseJob`
+
+```dart
+Job parseJob(String jobPath, String datasetRoot)
+```
+
+Parse a job config.
+
+[jobPath] identifies the job (file path for YAML, key for JSON).
+[datasetRoot] is the dataset root for resolving relative paths.
+
+**Parameters:**
+
+- `jobPath` (`String`) *(required)*
+- `datasetRoot` (`String`) *(required)*
+
+---
+
+## abstract class `ProvenanceData`
+
+**Mixins:** `_$ProvenanceData`
+
+Provenance data for invalidation.
+
+### Constructors
+
+#### `ProvenanceData`
+
+```dart
+ProvenanceData({required String location, required String shash})
+```
+
+Creates provenance data.
+
+#### `ProvenanceData.fromJson`
+
+```dart
+ProvenanceData.fromJson(Map<String, dynamic> json)
+```
+
+---
+
+## abstract class `Sample`
+
+**Mixins:** `_$Sample`
+
+Dart representation of Inspect AI's `Sample` class.
+
+A sample for an evaluation task.
+
+See [`Sample`](https://inspect.aisi.org.uk/reference/inspect_ai.dataset.html#sample).
+
+### Constructors
+
+#### `Sample`
+
+```dart
+Sample({required Object input, List<String>? choices, Object target, Object? id, Map<String, dynamic>? metadata, Object? sandbox, Map<String, String>? files, String? setup})
+```
+
+#### `Sample.fromJson`
+
+```dart
+Sample.fromJson(Map<String, dynamic> json)
+```
+
+---
+
+## abstract class `Score`
+
+**Mixins:** `_$Score`
+
+Score for evaluation.
+
+### Constructors
+
+#### `Score`
+
+```dart
+Score({required Object value, String? answer, String? explanation, Map<String, dynamic>? metadata})
+```
+
+Creates a score.
+
+#### `Score.fromJson`
+
+```dart
+Score.fromJson(Map<String, dynamic> json)
+```
+
+---
+
+## abstract class `Task`
+
+**Mixins:** `_$Task`
+
+Dart representation of Inspect AI's `Task` class.
+
+Models the configuration accepted by the
+[`Task.__init__`](https://inspect.aisi.org.uk/reference/inspect_ai.html#task)
+constructor.
+
+### Constructors
+
+#### `Task`
+
+```dart
+Task({Dataset? dataset, Object? setup, Object? solver, Object? cleanup, Object? scorer, Object? metrics, String? model, Object? config, Map<String, String>? modelRoles, Object? sandbox, Object? approval, Object? epochs, Object? failOnError, bool? continueOnFail, int? messageLimit, int? tokenLimit, int? timeLimit, int? workingLimit, double? costLimit, Object? earlyStopping, String? displayName, String? taskFunc, String? name, Object version, Map<String, dynamic>? metadata})
+```
+
+#### `Task.fromJson`
+
+```dart
+Task.fromJson(Map<String, dynamic> json)
+```
+
+---
+
+## abstract class `TaskInfo`
+
+**Mixins:** `_$TaskInfo`
+
+Dart representation of Inspect AI's `TaskInfo` class.
+
+Task information including file path, name, and attributes.
+
+See [`TaskInfo`](https://inspect.aisi.org.uk/reference/inspect_ai.html#taskinfo).
+
+### Constructors
+
+#### `TaskInfo`
+
+```dart
+TaskInfo({required String file, required String name, Map<String, dynamic> attribs})
+```
+
+#### `TaskInfo.fromJson`
+
+```dart
+TaskInfo.fromJson(Map<String, dynamic> json)
+```
+
+---
+
+## class `TaskMetadata`
+
+### Constructors
+
+#### `TaskMetadata`
+
+```dart
+TaskMetadata(String taskFunc, Map<String, Object?> additional)
+```
+
+### Properties
+
+- **`taskFunc`** → `String` *(final)*
+
+- **`additional`** → `Map<String, Object?>` *(final)*
+
+### Methods
+
+#### `toJson`
+
+```dart
+Map<String, dynamic> toJson()
+```
+
+---
+
+## abstract class `ToolCall`
+
+**Mixins:** `_$ToolCall`
+
+Tool call details.
+
+### Constructors
+
+#### `ToolCall`
+
+```dart
+ToolCall({required String id, required String function, required Map<String, dynamic> arguments, String type})
+```
+
+Creates tool call details.
+
+#### `ToolCall.fromJson`
+
+```dart
+ToolCall.fromJson(Map<String, dynamic> json)
+```
+
+---
+
+## abstract class `ToolCallError`
+
+**Mixins:** `_$ToolCallError`
+
+Tool call error.
+
+### Constructors
+
+#### `ToolCallError`
+
+```dart
+ToolCallError({required String message, int? code, Map<String, dynamic>? data})
+```
+
+Creates a tool call error.
+
+#### `ToolCallError.fromJson`
+
+```dart
+ToolCallError.fromJson(Map<String, dynamic> json)
+```
+
+---
+
+## abstract class `Variant`
+
+**Mixins:** `_$Variant`
+
+A configuration variant for running evaluations.
+
+Variants define different testing configurations to compare model
+performance with and without specific tooling or context.
+
+Features are implied by field presence — no explicit feature list needed:
+- [contextFiles] populated → context injection enabled
+- [mcpServers] populated → MCP tools enabled
+- [skillPaths] populated → agent skills enabled
+- all empty → baseline variant
+
+Example YAML:
+```yaml
+variants:
+  baseline: {}
+  context_only:
+    context_files: [./context_files/flutter.md]
+  full:
+    context_files: [./context_files/flutter.md]
+    mcp_servers: [dart]
+    skills: [./skills/flutter_docs_ui]
+```
+
+### Constructors
+
+#### `Variant`
+
+```dart
+Variant({String name, List<ContextFile> contextFiles, List<String> mcpServers, List<String> skillPaths, String? flutterChannel})
+```
+
+#### `Variant.fromJson`
+
+```dart
+Variant.fromJson(Map<String, dynamic> json)
+```
+
+### Properties
+
+- **`label`** → `String`
+
+---
+
+## class `YamlParser`
+
+**Extends:** `Parser`
+
+Parses YAML config files from the filesystem into domain objects.
+
+Reads `tasks/*/task.yaml` files for task configs and job YAML files
+for job configs.
+
+### Constructors
+
+#### `YamlParser`
+
+```dart
+YamlParser()
+```
+
+### Methods
+
+#### `parseTasks`
+
+```dart
+List<ParsedTask> parseTasks(String datasetRoot)
+```
+
+**Parameters:**
+
+- `datasetRoot` (`String`) *(required)*
+
+#### `parseJob`
+
+```dart
+Job parseJob(String jobPath, String datasetRoot)
+```
+
+**Parameters:**
+
+- `jobPath` (`String`) *(required)*
+- `datasetRoot` (`String`) *(required)*
+
+#### `createDefaultJob`
+
+```dart
+Job createDefaultJob(String baseDir)
+```
+
+Create a [Job] with default settings (when no job file is provided).
+
+**Parameters:**
+
+- `baseDir` (`String`) *(required)*
+
+---
+
+## `convertYamlToObject`
+
+```dart
+dynamic convertYamlToObject(dynamic yaml)
+```
+
+Converts a YamlMap or YamlList to standard Dart Map/List.
+
+**Parameters:**
+
+- `yaml` (`dynamic`) *(required)*
+
+---
+
+## `findJobFile`
+
+```dart
+String findJobFile(String datasetRoot, String job)
+```
+
+Find a job file by name or path.
+
+Looks in `jobs/` directory first, then treats [job] as a relative/absolute
+path.
+
+Throws [FileSystemException] if the job file is not found.
+
+**Parameters:**
+
+- `datasetRoot` (`String`) *(required)*
+- `job` (`String`) *(required)*
+
+---
+
+## `readYamlFile`
+
+```dart
+YamlNode readYamlFile(String filePath)
+```
+
+Reads a YAML file and returns the parsed content.
+Returns the raw YamlMap/YamlList for flexibility.
+
+**Parameters:**
+
+- `filePath` (`String`) *(required)*
+
+---
+
+## `readYamlFileAsMap`
+
+```dart
+Map<String, dynamic> readYamlFileAsMap(String filePath)
+```
+
+Reads a YAML file and converts it to a standard Dart Map.
+
+**Parameters:**
+
+- `filePath` (`String`) *(required)*
+
diff --git a/docs/reference/dart_api/devals_cli/devals_cli.md b/docs/reference/dart_api/devals_cli/devals_cli.md
new file mode 100644
index 0000000..138f5b9
--- /dev/null
+++ b/docs/reference/dart_api/devals_cli/devals_cli.md
@@ -0,0 +1,420 @@
+# devals_cli (devals)
+
+CLI for managing dash-evals.
+
+Provides commands for:
+- Creating samples and jobs
+- Running evaluations
+- Viewing results
+
+---
+
+## class `CheckResult`
+
+The result of a single prerequisite check.
+
+### Constructors
+
+#### `CheckResult`
+
+```dart
+CheckResult({required CheckStatus status, String? version, String? message, String? fix})
+```
+
+### Properties
+
+- **`status`** → `CheckStatus` *(final)*
+
+- **`version`** → `String?` *(final)*
+
+- **`message`** → `String?` *(final)*
+
+- **`fix`** → `String?` *(final)*
+
+---
+
+## class `CliException`
+
+**Implements:** `Exception`
+
+Exception thrown when a CLI command fails with a specific exit code.
+
+Throw this from anywhere in the CLI codebase when an error occurs.
+The top-level main function catches these and exits with the specified code.
+
+### Constructors
+
+#### `CliException`
+
+```dart
+CliException(String message, {int exitCode})
+```
+
+### Properties
+
+- **`message`** → `String` *(final)*
+
+- **`exitCode`** → `int` *(final)*
+
+---
+
+## class `CreateCommand`
+
+**Extends:** `Command<int>`
+
+Parent command for create subcommands.
+
+### Constructors
+
+#### `CreateCommand`
+
+```dart
+CreateCommand()
+```
+
+### Properties
+
+- **`name`** → `String`
+
+- **`description`** → `String`
+
+---
+
+## class `CreateJobCommand`
+
+**Extends:** `Command<int>`
+
+Interactive command to create a new job file.
+
+### Constructors
+
+#### `CreateJobCommand`
+
+```dart
+CreateJobCommand()
+```
+
+### Properties
+
+- **`name`** → `String`
+
+- **`description`** → `String`
+
+### Methods
+
+#### `run`
+
+```dart
+Future<int> run()
+```
+
+---
+
+## class `CreatePipelineCommand`
+
+**Extends:** `Command<int>`
+
+Interactive guide to create a task and job in one go.
+
+### Constructors
+
+#### `CreatePipelineCommand`
+
+```dart
+CreatePipelineCommand()
+```
+
+### Properties
+
+- **`name`** → `String`
+
+- **`description`** → `String`
+
+### Methods
+
+#### `run`
+
+```dart
+Future<int> run()
+```
+
+---
+
+## class `CreateSampleCommand`
+
+**Extends:** `Command<int>`
+
+Interactive command to add a new sample to an existing task file.
+
+### Constructors
+
+#### `CreateSampleCommand`
+
+```dart
+CreateSampleCommand()
+```
+
+### Properties
+
+- **`name`** → `String`
+
+- **`description`** → `String`
+
+### Methods
+
+#### `run`
+
+```dart
+Future<int> run()
+```
+
+---
+
+## class `CreateTaskCommand`
+
+**Extends:** `Command<int>`
+
+Interactive command to create a new task file in tasks/{name}/task.yaml.
+
+### Constructors
+
+#### `CreateTaskCommand`
+
+```dart
+CreateTaskCommand()
+```
+
+### Properties
+
+- **`name`** → `String`
+
+- **`description`** → `String`
+
+### Methods
+
+#### `run`
+
+```dart
+Future<int> run()
+```
+
+---
+
+## class `DoctorCheck`
+
+A single prerequisite check to run.
+
+### Constructors
+
+#### `DoctorCheck`
+
+```dart
+DoctorCheck({required String name, required String component, required Future<CheckResult> Function() check, bool isRequired})
+```
+
+### Properties
+
+- **`name`** → `String` *(final)*
+
+- **`component`** → `String` *(final)*
+
+- **`check`** → `Future<CheckResult> Function()` *(final)*
+
+- **`isRequired`** → `bool` *(final)*
+
+---
+
+## class `DoctorCommand`
+
+**Extends:** `Command<int>`
+
+Command that checks whether prerequisites are installed.
+
+Similar to `flutter doctor`, this verifies the tools needed
+for the CLI, dash_evals, and eval_explorer.
+
+### Constructors
+
+#### `DoctorCommand`
+
+```dart
+DoctorCommand({Future<ProcessResult> Function(String, List<String>)? processRunner})
+```
+
+### Properties
+
+- **`name`** → `String`
+
+- **`description`** → `String`
+
+### Methods
+
+#### `run`
+
+```dart
+Future<int> run()
+```
+
+---
+
+## class `InitCommand`
+
+**Extends:** `Command<int>`
+
+### Constructors
+
+#### `InitCommand`
+
+```dart
+InitCommand()
+```
+
+### Properties
+
+- **`name`** → `String`
+
+- **`description`** → `String`
+
+### Methods
+
+#### `run`
+
+```dart
+Future<int> run()
+```
+
+---
+
+## class `PublishCommand`
+
+**Extends:** `Command<int>`
+
+Publishes InspectAI JSON log files to a GCS bucket.
+
+Usage:
+  devals publish {path}           Upload a file or directory of logs
+  devals publish --dry-run {path} Preview what would be uploaded
+
+The target bucket and credentials are configured via `.env` file,
+environment variables, or CLI flags. Precedence: flag > env var > .env.
+
+### Constructors
+
+#### `PublishCommand`
+
+```dart
+PublishCommand()
+```
+
+### Properties
+
+- **`name`** → `String`
+
+- **`description`** → `String`
+
+- **`invocation`** → `String`
+
+### Methods
+
+#### `run`
+
+```dart
+Future<int> run()
+```
+
+---
+
+## class `RunCommand`
+
+**Extends:** `Command<int>`
+
+Command to run evaluations using the Python dash_evals package.
+
+Config resolution and dry-run happen entirely in Dart. For actual runs,
+Dart writes an EvalSet JSON file, then Python reads it and calls
+`eval_set()` directly.
+
+### Constructors
+
+#### `RunCommand`
+
+```dart
+RunCommand()
+```
+
+### Properties
+
+- **`name`** → `String`
+
+- **`description`** → `String`
+
+- **`invocation`** → `String`
+
+### Methods
+
+#### `run`
+
+```dart
+Future<int> run()
+```
+
+---
+
+## class `ViewCommand`
+
+**Extends:** `Command<int>`
+
+Command to launch the Inspect AI viewer.
+
+### Constructors
+
+#### `ViewCommand`
+
+```dart
+ViewCommand()
+```
+
+### Properties
+
+- **`name`** → `String`
+
+- **`description`** → `String`
+
+- **`invocation`** → `String`
+
+### Methods
+
+#### `run`
+
+```dart
+Future<int> run()
+```
+
+---
+
+## enum `CheckStatus`
+
+The result status of a single doctor check.
+
+### Values
+
+- **`ok`**
+- **`warning`**
+- **`error`**
+
+---
+
+## `buildChecks`
+
+```dart
+List<DoctorCheck> buildChecks({Future<ProcessResult> Function(String, List<String>)? processRunner})
+```
+
+Builds the list of all doctor checks.
+
+[processRunner] is injectable for testing.
+
+**Parameters:**
+
+- `processRunner` (`Future<ProcessResult> Function(String, List<String>)?`)
+
diff --git a/docs/reference/dart_api/eval_explorer_client/eval_explorer_client.md b/docs/reference/dart_api/eval_explorer_client/eval_explorer_client.md
new file mode 100644
index 0000000..0f0bd6a
--- /dev/null
+++ b/docs/reference/dart_api/eval_explorer_client/eval_explorer_client.md
@@ -0,0 +1,5013 @@
+# eval_explorer_client
+
+## class `AuthHeaderEncodingException`
+
+**Implements:** `Exception`
+
+An exception thrown upon erroneous encoding of an auth header.
+
+### Constructors
+
+#### `AuthHeaderEncodingException`
+
+```dart
+AuthHeaderEncodingException(String message)
+```
+
+Creates a new [AuthHeaderEncodingException].
+
+### Properties
+
+- **`message`** → `String` *(final)*
+
+  A message indicating the error.
+
+---
+
+## abstract class `AuthenticationKeyManager`
+
+**Implements:** `ClientAuthKeyProvider`
+
+Manages keys for authentication with the server.
+
+### Constructors
+
+#### `AuthenticationKeyManager`
+
+```dart
+AuthenticationKeyManager()
+```
+
+### Properties
+
+- **`authHeaderValue`** → `Future<String?>`
+
+### Methods
+
+#### `get`
+
+```dart
+Future<String?> get()
+```
+
+Retrieves an authentication key.
+
+#### `put`
+
+```dart
+Future<void> put(String key)
+```
+
+Saves an authentication key retrieved by the server.
+
+**Parameters:**
+
+- `key` (`String`) *(required)*
+
+#### `remove`
+
+```dart
+Future<void> remove()
+```
+
+Removes the authentication key.
+
+#### `getHeaderValue`
+
+```dart
+Future<String?> getHeaderValue()
+```
+
+Retrieves the authentication key in a format that can be used in a transport header.
+The format conversion is performed by [toHeaderValue].
+
+#### `toHeaderValue`
+
+```dart
+Future<String?> toHeaderValue(String? key)
+```
+
+Converts an authentication key to a format that can be used in a transport
+header. This will automatically be unwrapped again on the server side
+before being handed to the authentication handler.
+
+The value must be compliant with the HTTP header format defined in
+RFC 9110 HTTP Semantics, 11.6.2. Authorization.
+See:
+https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Authorization
+https://httpwg.org/specs/rfc9110.html#field.authorization
+
+**Parameters:**
+
+- `key` (`String?`) *(required)*
+
+---
+
+## class `BadRequestMessage`
+
+**Extends:** `WebSocketMessage`
+
+A message sent when a bad request is received.
+
+### Constructors
+
+#### `BadRequestMessage`
+
+```dart
+BadRequestMessage(Map<dynamic, dynamic> data)
+```
+
+Creates a new [BadRequestMessage].
+
+### Properties
+
+- **`request`** → `String` *(final)*
+
+  The request that was bad.
+
+### Methods
+
+#### `static buildMessage`
+
+```dart
+static String buildMessage(String request)
+```
+
+Builds a [BadRequestMessage] message.
+
+**Parameters:**
+
+- `request` (`String`) *(required)*
+
+---
+
+## class `Bit`
+
+Represents a binary vector, where each element is either `true` or `false`.
+
+### Constructors
+
+#### `Bit`
+
+```dart
+Bit(List<bool> value)
+```
+
+Creates a [Bit] from a list of boolean values.
+
+#### `Bit.fromBinary`
+
+```dart
+Bit.fromBinary(Uint8List bytes)
+```
+
+Creates a [Bit] from its binary representation.
+
+### Methods
+
+#### `toBinary`
+
+```dart
+Uint8List toBinary()
+```
+
+Converts the bit vector to its binary representation.
+
+#### `toList`
+
+```dart
+List<bool> toList()
+```
+
+Returns the bit vector as a list of boolean values.
+
+#### `static fromString`
+
+```dart
+static Bit fromString(String value)
+```
+
+Creates a [Bit] from a string representation.
+
+**Parameters:**
+
+- `value` (`String`) *(required)*
+
+---
+
+## class `Client`
+
+**Extends:** `ServerpodClientShared`
+
+### Constructors
+
+#### `Client`
+
+```dart
+Client(String host, {dynamic securityContext, AuthenticationKeyManager? authenticationKeyManager, Duration? streamingConnectionTimeout, Duration? connectionTimeout, dynamic Function(MethodCallContext, Object, StackTrace)? onFailedCall, dynamic Function(MethodCallContext)? onSucceededCall, bool? disconnectStreamsOnLostInternetConnection})
+```
+
+### Properties
+
+- **`emailIdp`** → `EndpointEmailIdp` *(final)*
+
+- **`jwtRefresh`** → `EndpointJwtRefresh` *(final)*
+
+- **`googleIdp`** → `EndpointGoogleIdp` *(final)*
+
+- **`modules`** → `Modules` *(final)*
+
+- **`endpointRefLookup`** → `Map<String, EndpointRef>`
+
+- **`moduleLookup`** → `Map<String, ModuleEndpointCaller>`
+
+---
+
+## abstract class `ClientAuthKeyProvider`
+
+Provides the authentication key for the client.
+
+### Constructors
+
+#### `ClientAuthKeyProvider`
+
+```dart
+ClientAuthKeyProvider()
+```
+
+### Properties
+
+- **`authHeaderValue`** → `Future<String?>`
+
+---
+
+## class `CloseMethodStreamCommand`
+
+**Extends:** `WebSocketMessage`
+
+**Implements:** `WebSocketMessageInfo`
+
+A message sent over a websocket connection to close a websocket stream of
+data to an endpoint method.
+
+### Constructors
+
+#### `CloseMethodStreamCommand`
+
+```dart
+CloseMethodStreamCommand(Map<dynamic, dynamic> data)
+```
+
+Creates a new [CloseMethodStreamCommand].
+
+### Properties
+
+- **`endpoint`** → `String` *(final)*
+
+  The endpoint associated with the stream.
+
+- **`method`** → `String` *(final)*
+
+  The method associated with the stream.
+
+- **`connectionId`** → `UuidValue` *(final)*
+
+  The connection id that uniquely identifies the stream.
+
+- **`parameter`** → `String?` *(final)*
+
+  The parameter associated with the stream.
+  If this is null the close command targets the return stream of the method.
+
+- **`reason`** → `CloseReason` *(final)*
+
+  The reason the stream was closed.
+
+### Methods
+
+#### `static buildMessage`
+
+```dart
+static String buildMessage({required String endpoint, required UuidValue connectionId, String? parameter, required String method, required CloseReason reason})
+```
+
+Creates a new [CloseMethodStreamCommand] message.
+
+**Parameters:**
+
+- `endpoint` (`String`) *(required)*
+- `connectionId` (`UuidValue`) *(required)*
+- `parameter` (`String?`)
+- `method` (`String`) *(required)*
+- `reason` (`CloseReason`) *(required)*
+
+---
+
+## class `ConnectionAttemptTimedOutException`
+
+**Extends:** `MethodStreamException`
+
+Thrown if connection attempt timed out.
+
+### Constructors
+
+#### `ConnectionAttemptTimedOutException`
+
+```dart
+ConnectionAttemptTimedOutException()
+```
+
+---
+
+## class `ConnectionClosedException`
+
+**Extends:** `MethodStreamException`
+
+Thrown if the connection is closed with an error.
+
+### Constructors
+
+#### `ConnectionClosedException`
+
+```dart
+ConnectionClosedException()
+```
+
+Creates a new [ConnectionClosedException].
+
+---
+
+## abstract class `ConnectivityMonitor`
+
+Keeps track of internet connectivity and notifies its listeners when the
+internet connection is either lost or regained. For most use cases, use
+the concrete FlutterConnectivityMonitor class in the serverpod_flutter
+package.
+
+### Constructors
+
+#### `ConnectivityMonitor`
+
+```dart
+ConnectivityMonitor()
+```
+
+### Methods
+
+#### `addListener`
+
+```dart
+void addListener(void Function(bool) listener)
+```
+
+Adds a listener to the connectivity monitor.
+
+**Parameters:**
+
+- `listener` (`void Function(bool)`) *(required)*
+
+#### `removeListener`
+
+```dart
+void removeListener(void Function(bool) listener)
+```
+
+Removes a listener from the connectivity monitor.
+
+**Parameters:**
+
+- `listener` (`void Function(bool)`) *(required)*
+
+#### `dispose`
+
+```dart
+void dispose()
+```
+
+Removes all listeners from the connectivity monitor.
+
+#### `notifyListeners`
+
+```dart
+void notifyListeners(bool connected)
+```
+
+Notifies listeners of changes in connectivity. This method should only
+be called by classes that inherits from [ConnectivityMonitor].
+
+**Parameters:**
+
+- `connected` (`bool`) *(required)*
+
+---
+
+## abstract class `Dataset`
+
+**Implements:** `SerializableModel`
+
+A dataset is an Inspect AI term that refers to a collection of samples.
+
+In our case, each dataset corresponds to a collection of sample types.
+(i.e. "dart_qa_dataset", "flutter_code_execution") And each sample type
+refers to a specific file in the /datasets directory.
+
+### Constructors
+
+#### `Dataset`
+
+```dart
+Dataset({UuidValue? id, required String name, bool? isActive})
+```
+
+#### `Dataset.fromJson`
+
+```dart
+Dataset.fromJson(Map<String, dynamic> jsonSerialization)
+```
+
+### Properties
+
+- **`id`** → `UuidValue?`
+
+  The database id, set if the object has been inserted into the
+  database or if it has been fetched from the database. Otherwise,
+  the id will be null.
+
+- **`name`** → `String`
+
+- **`isActive`** → `bool`
+
+### Methods
+
+#### `copyWith`
+
+```dart
+Dataset copyWith({UuidValue? id, String? name, bool? isActive})
+```
+
+Returns a shallow copy of this [Dataset]
+with some or all fields replaced by the given arguments.
+
+**Parameters:**
+
+- `id` (`UuidValue?`)
+- `name` (`String?`)
+- `isActive` (`bool?`)
+
+#### `toJson`
+
+```dart
+Map<String, dynamic> toJson()
+```
+
+---
+
+## class `DeepCollectionEquality`
+
+**Implements:** `Equality<dynamic>`
+
+Deep equality on collections.
+
+Recognizes lists, sets, iterables and maps and compares their elements using
+deep equality as well.
+
+Non-iterable/map objects are compared using a configurable base equality.
+
+Works in one of two modes: ordered or unordered.
+
+In ordered mode, lists and iterables are required to have equal elements
+in the same order. In unordered mode, the order of elements in iterables
+and lists are not important.
+
+A list is only equal to another list, likewise for sets and maps. All other
+iterables are compared as iterables only.
+
+### Constructors
+
+#### `DeepCollectionEquality`
+
+```dart
+DeepCollectionEquality([Equality<dynamic> base])
+```
+
+#### `DeepCollectionEquality.unordered`
+
+```dart
+DeepCollectionEquality.unordered([Equality<dynamic> base])
+```
+
+Creates a deep equality on collections where the order of lists and
+iterables are not considered important. That is, lists and iterables are
+treated as unordered iterables.
+
+### Methods
+
+#### `equals`
+
+```dart
+bool equals(Object? e1, Object? e2)
+```
+
+**Parameters:**
+
+- `e1` (`Object?`) *(required)*
+- `e2` (`Object?`) *(required)*
+
+#### `hash`
+
+```dart
+int hash(Object? o)
+```
+
+**Parameters:**
+
+- `o` (`Object?`) *(required)*
+
+#### `isValidKey`
+
+```dart
+bool isValidKey(Object? o)
+```
+
+**Parameters:**
+
+- `o` (`Object?`) *(required)*
+
+---
+
+## class `DeserializationTypeNotFoundException`
+
+**Implements:** `Exception`
+
+Exception thrown when no deserialization type was found during
+protocol deserialization
+
+### Constructors
+
+#### `DeserializationTypeNotFoundException`
+
+```dart
+DeserializationTypeNotFoundException({String? message, Type? type})
+```
+
+Creates a new [DeserializationTypeNotFoundException].
+
+### Properties
+
+- **`message`** → `String` *(final)*
+
+  The exception message that was thrown.
+
+- **`type`** → `Type?` *(final)*
+
+  The type that was not found.
+
+---
+
+## abstract class `EndpointCaller`
+
+Super class for all classes that can call a server endpoint.
+
+### Constructors
+
+#### `EndpointCaller`
+
+```dart
+EndpointCaller()
+```
+
+### Properties
+
+- **`endpointRefLookup`** → `Map<String, EndpointRef>`
+
+### Methods
+
+#### `callServerEndpoint`
+
+```dart
+Future<T> callServerEndpoint(String endpoint, String method, Map<String, dynamic> args, {bool authenticated})
+```
+
+Calls a server endpoint method by its name, passing arguments in a map.
+Typically, this method is called by generated code.
+
+**Parameters:**
+
+- `endpoint` (`String`) *(required)*
+- `method` (`String`) *(required)*
+- `args` (`Map<String, dynamic>`) *(required)*
+- `authenticated` (`bool`)
+
+#### `callStreamingServerEndpoint`
+
+```dart
+dynamic callStreamingServerEndpoint(String endpoint, String method, Map<String, dynamic> args, Map<String, Stream<dynamic>> streams, {bool authenticated})
+```
+
+Calls a server endpoint method that supports streaming. The [streams]
+parameter is a map of stream names to stream objects. The method will
+listen to the streams and send the data to the server.
+Typically, this method is called by generated code.
+
+[T] is the type of the return value of the endpoint stream. This is either
+a [Stream] or a [Future].
+
+[G] is the generic of [T], such as `T<G>`.
+
+If [T] is not a [Stream] or a [Future], the method will throw an exception.
+
+**Parameters:**
+
+- `endpoint` (`String`) *(required)*
+- `method` (`String`) *(required)*
+- `args` (`Map<String, dynamic>`) *(required)*
+- `streams` (`Map<String, Stream<dynamic>>`) *(required)*
+- `authenticated` (`bool`)
+
+#### `getEndpointOfType`
+
+```dart
+T getEndpointOfType([String? name])
+```
+
+Returns an endpoint of type [T]. If more than one endpoint of type [T]
+exists, [name] can be used to disambiguate.
+
+**Parameters:**
+
+- `name` (`String?`)
+
+---
+
+## class `EndpointEmailIdp`
+
+**Extends:** `EndpointEmailIdpBase`
+
+By extending [EmailIdpBaseEndpoint], the email identity provider endpoints
+are made available on the server and enable the corresponding sign-in widget
+on the client.
+{@category Endpoint}
+
+### Constructors
+
+#### `EndpointEmailIdp`
+
+```dart
+EndpointEmailIdp(EndpointCaller caller)
+```
+
+### Properties
+
+- **`name`** → `String`
+
+### Methods
+
+#### `login`
+
+```dart
+Future<AuthSuccess> login({required String email, required String password})
+```
+
+Logs in the user and returns a new session.
+
+Throws an [EmailAccountLoginException] in case of errors, with reason:
+- [EmailAccountLoginExceptionReason.invalidCredentials] if the email or
+  password is incorrect.
+- [EmailAccountLoginExceptionReason.tooManyAttempts] if there have been
+  too many failed login attempts.
+
+Throws an [AuthUserBlockedException] if the auth user is blocked.
+
+**Parameters:**
+
+- `email` (`String`) *(required)*
+- `password` (`String`) *(required)*
+
+#### `startRegistration`
+
+```dart
+Future<UuidValue> startRegistration({required String email})
+```
+
+Starts the registration for a new user account with an email-based login
+associated to it.
+
+Upon successful completion of this method, an email will have been
+sent to [email] with a verification link, which the user must open to
+complete the registration.
+
+Always returns a account request ID, which can be used to complete the
+registration. If the email is already registered, the returned ID will not
+be valid.
+
+**Parameters:**
+
+- `email` (`String`) *(required)*
+
+#### `verifyRegistrationCode`
+
+```dart
+Future<String> verifyRegistrationCode({required UuidValue accountRequestId, required String verificationCode})
+```
+
+Verifies an account request code and returns a token
+that can be used to complete the account creation.
+
+Throws an [EmailAccountRequestException] in case of errors, with reason:
+- [EmailAccountRequestExceptionReason.expired] if the account request has
+  already expired.
+- [EmailAccountRequestExceptionReason.policyViolation] if the password
+  does not comply with the password policy.
+- [EmailAccountRequestExceptionReason.invalid] if no request exists
+  for the given [accountRequestId] or [verificationCode] is invalid.
+
+**Parameters:**
+
+- `accountRequestId` (`UuidValue`) *(required)*
+- `verificationCode` (`String`) *(required)*
+
+#### `finishRegistration`
+
+```dart
+Future<AuthSuccess> finishRegistration({required String registrationToken, required String password})
+```
+
+Completes a new account registration, creating a new auth user with a
+profile and attaching the given email account to it.
+
+Throws an [EmailAccountRequestException] in case of errors, with reason:
+- [EmailAccountRequestExceptionReason.expired] if the account request has
+  already expired.
+- [EmailAccountRequestExceptionReason.policyViolation] if the password
+  does not comply with the password policy.
+- [EmailAccountRequestExceptionReason.invalid] if the [registrationToken]
+  is invalid.
+
+Throws an [AuthUserBlockedException] if the auth user is blocked.
+
+Returns a session for the newly created user.
+
+**Parameters:**
+
+- `registrationToken` (`String`) *(required)*
+- `password` (`String`) *(required)*
+
+#### `startPasswordReset`
+
+```dart
+Future<UuidValue> startPasswordReset({required String email})
+```
+
+Requests a password reset for [email].
+
+If the email address is registered, an email with reset instructions will
+be send out. If the email is unknown, this method will have no effect.
+
+Always returns a password reset request ID, which can be used to complete
+the reset. If the email is not registered, the returned ID will not be
+valid.
+
+Throws an [EmailAccountPasswordResetException] in case of errors, with reason:
+- [EmailAccountPasswordResetExceptionReason.tooManyAttempts] if the user has
+  made too many attempts trying to request a password reset.
+
+**Parameters:**
+
+- `email` (`String`) *(required)*
+
+#### `verifyPasswordResetCode`
+
+```dart
+Future<String> verifyPasswordResetCode({required UuidValue passwordResetRequestId, required String verificationCode})
+```
+
+Verifies a password reset code and returns a finishPasswordResetToken
+that can be used to finish the password reset.
+
+Throws an [EmailAccountPasswordResetException] in case of errors, with reason:
+- [EmailAccountPasswordResetExceptionReason.expired] if the password reset
+  request has already expired.
+- [EmailAccountPasswordResetExceptionReason.tooManyAttempts] if the user has
+  made too many attempts trying to verify the password reset.
+- [EmailAccountPasswordResetExceptionReason.invalid] if no request exists
+  for the given [passwordResetRequestId] or [verificationCode] is invalid.
+
+If multiple steps are required to complete the password reset, this endpoint
+should be overridden to return credentials for the next step instead
+of the credentials for setting the password.
+
+**Parameters:**
+
+- `passwordResetRequestId` (`UuidValue`) *(required)*
+- `verificationCode` (`String`) *(required)*
+
+#### `finishPasswordReset`
+
+```dart
+Future<void> finishPasswordReset({required String finishPasswordResetToken, required String newPassword})
+```
+
+Completes a password reset request by setting a new password.
+
+The [verificationCode] returned from [verifyPasswordResetCode] is used to
+validate the password reset request.
+
+Throws an [EmailAccountPasswordResetException] in case of errors, with reason:
+- [EmailAccountPasswordResetExceptionReason.expired] if the password reset
+  request has already expired.
+- [EmailAccountPasswordResetExceptionReason.policyViolation] if the new
+  password does not comply with the password policy.
+- [EmailAccountPasswordResetExceptionReason.invalid] if no request exists
+  for the given [passwordResetRequestId] or [verificationCode] is invalid.
+
+Throws an [AuthUserBlockedException] if the auth user is blocked.
+
+**Parameters:**
+
+- `finishPasswordResetToken` (`String`) *(required)*
+- `newPassword` (`String`) *(required)*
+
+---
+
+## class `EndpointGoogleIdp`
+
+**Extends:** `EndpointGoogleIdpBase`
+
+{@category Endpoint}
+
+### Constructors
+
+#### `EndpointGoogleIdp`
+
+```dart
+EndpointGoogleIdp(EndpointCaller caller)
+```
+
+### Properties
+
+- **`name`** → `String`
+
+### Methods
+
+#### `login`
+
+```dart
+Future<AuthSuccess> login({required String idToken, required String? accessToken})
+```
+
+Validates a Google ID token and either logs in the associated user or
+creates a new user account if the Google account ID is not yet known.
+
+If a new user is created an associated [UserProfile] is also created.
+
+**Parameters:**
+
+- `idToken` (`String`) *(required)*
+- `accessToken` (`String?`) *(required)*
+
+---
+
+## class `EndpointJwtRefresh`
+
+**Extends:** `EndpointRefreshJwtTokens`
+
+By extending [RefreshJwtTokensEndpoint], the JWT token refresh endpoint
+is made available on the server and enables automatic token refresh on the client.
+{@category Endpoint}
+
+### Constructors
+
+#### `EndpointJwtRefresh`
+
+```dart
+EndpointJwtRefresh(EndpointCaller caller)
+```
+
+### Properties
+
+- **`name`** → `String`
+
+### Methods
+
+#### `refreshAccessToken`
+
+```dart
+Future<AuthSuccess> refreshAccessToken({required String refreshToken})
+```
+
+Creates a new token pair for the given [refreshToken].
+
+Can throw the following exceptions:
+-[RefreshTokenMalformedException]: refresh token is malformed and could
+  not be parsed. Not expected to happen for tokens issued by the server.
+-[RefreshTokenNotFoundException]: refresh token is unknown to the server.
+  Either the token was deleted or generated by a different server.
+-[RefreshTokenExpiredException]: refresh token has expired. Will happen
+  only if it has not been used within configured `refreshTokenLifetime`.
+-[RefreshTokenInvalidSecretException]: refresh token is incorrect, meaning
+  it does not refer to the current secret refresh token. This indicates
+  either a malfunctioning client or a malicious attempt by someone who has
+  obtained the refresh token. In this case the underlying refresh token
+  will be deleted, and access to it will expire fully when the last access
+  token is elapsed.
+
+This endpoint is unauthenticated, meaning the client won't include any
+authentication information with the call.
+
+**Parameters:**
+
+- `refreshToken` (`String`) *(required)*
+
+---
+
+## abstract class `EndpointRef`
+
+This class connects endpoints on the server with the client, it also
+hooks up streams with the endpoint. Overridden by generated code.
+
+### Constructors
+
+#### `EndpointRef`
+
+```dart
+EndpointRef(EndpointCaller caller)
+```
+
+Creates a new [EndpointRef].
+
+### Properties
+
+- **`caller`** → `EndpointCaller` *(final)*
+
+  Holds a reference to the caller class.
+
+- **`client`** → `ServerpodClientShared` *(final)*
+
+  Reference to the client.
+
+- **`name`** → `String`
+
+- **`stream`** → `Stream<SerializableModel>`
+
+### Methods
+
+#### `sendStreamMessage`
+
+```dart
+Future<void> sendStreamMessage(SerializableModel message)
+```
+
+Sends a message to the endpoint's stream.
+
+**Parameters:**
+
+- `message` (`SerializableModel`) *(required)*
+
+#### `resetStream`
+
+```dart
+void resetStream()
+```
+
+Resets web socket stream, so it's possible to re-listen to endpoint
+streams.
+
+---
+
+## abstract class `Evaluation`
+
+**Implements:** `SerializableModel`
+
+Result of evaluating one sample.
+
+### Constructors
+
+#### `Evaluation`
+
+```dart
+Evaluation({UuidValue? id, required UuidValue runId, Run? run, required UuidValue taskId, Task? task, required UuidValue sampleId, Sample? sample, required UuidValue modelId, Model? model, required UuidValue datasetId, Dataset? dataset, required List<Variant> variant, required String output, required List<ToolCallData> toolCalls, required int retryCount, String? error, required bool neverSucceeded, required double durationSeconds, bool? analyzerPassed, int? testsPassed, int? testsTotal, double? structureScore, String? failureReason, required int inputTokens, required int outputTokens, required int reasoningTokens, DateTime? createdAt})
+```
+
+#### `Evaluation.fromJson`
+
+```dart
+Evaluation.fromJson(Map<String, dynamic> jsonSerialization)
+```
+
+### Properties
+
+- **`id`** → `UuidValue?`
+
+  The database id, set if the object has been inserted into the
+  database or if it has been fetched from the database. Otherwise,
+  the id will be null.
+
+- **`runId`** → `UuidValue`
+
+- **`run`** → `Run?`
+
+  The parent run.
+
+- **`taskId`** → `UuidValue`
+
+- **`task`** → `Task?`
+
+  The parent task.
+
+- **`sampleId`** → `UuidValue`
+
+- **`sample`** → `Sample?`
+
+  The sample that was evaluated.
+
+- **`modelId`** → `UuidValue`
+
+- **`model`** → `Model?`
+
+  The model that was evaluated.
+
+- **`datasetId`** → `UuidValue`
+
+- **`dataset`** → `Dataset?`
+
+  The dataset this sample belongs to (e.g., "flutter_qa_dataset").
+
+- **`variant`** → `List<Variant>`
+
+  Variant configuration.
+
+- **`output`** → `String`
+
+  The actual output generated by the model.
+
+- **`toolCalls`** → `List<ToolCallData>`
+
+  Tool calls made during evaluation.
+
+- **`retryCount`** → `int`
+
+  Number of times this sample was retried.
+
+- **`error`** → `String?`
+
+  Error message if sample failed.
+
+- **`neverSucceeded`** → `bool`
+
+  True if all retries failed (exclude from accuracy calculations).
+
+- **`durationSeconds`** → `double`
+
+  Total time for this sample in seconds.
+
+- **`analyzerPassed`** → `bool?`
+
+  Did flutter analyze pass?
+
+- **`testsPassed`** → `int?`
+
+  Number of tests passed.
+
+- **`testsTotal`** → `int?`
+
+  Total number of tests.
+
+- **`structureScore`** → `double?`
+
+  Code structure validation score (0.0-1.0).
+
+- **`failureReason`** → `String?`
+
+  Categorized failure reason: "analyzer_error", "test_failure", "missing_structure".
+
+- **`inputTokens`** → `int`
+
+  Input tokens for this sample.
+
+- **`outputTokens`** → `int`
+
+  Output tokens for this sample.
+
+- **`reasoningTokens`** → `int`
+
+  Reasoning tokens for this sample.
+
+- **`createdAt`** → `DateTime`
+
+  When this evaluation was run.
+
+### Methods
+
+#### `copyWith`
+
+```dart
+Evaluation copyWith({UuidValue? id, UuidValue? runId, Run? run, UuidValue? taskId, Task? task, UuidValue? sampleId, Sample? sample, UuidValue? modelId, Model? model, UuidValue? datasetId, Dataset? dataset, List<Variant>? variant, String? output, List<ToolCallData>? toolCalls, int? retryCount, String? error, bool? neverSucceeded, double? durationSeconds, bool? analyzerPassed, int? testsPassed, int? testsTotal, double? structureScore, String? failureReason, int? inputTokens, int? outputTokens, int? reasoningTokens, DateTime? createdAt})
+```
+
+Returns a shallow copy of this [Evaluation]
+with some or all fields replaced by the given arguments.
+
+**Parameters:**
+
+- `id` (`UuidValue?`)
+- `runId` (`UuidValue?`)
+- `run` (`Run?`)
+- `taskId` (`UuidValue?`)
+- `task` (`Task?`)
+- `sampleId` (`UuidValue?`)
+- `sample` (`Sample?`)
+- `modelId` (`UuidValue?`)
+- `model` (`Model?`)
+- `datasetId` (`UuidValue?`)
+- `dataset` (`Dataset?`)
+- `variant` (`List<Variant>?`)
+- `output` (`String?`)
+- `toolCalls` (`List<ToolCallData>?`)
+- `retryCount` (`int?`)
+- `error` (`String?`)
+- `neverSucceeded` (`bool?`)
+- `durationSeconds` (`double?`)
+- `analyzerPassed` (`bool?`)
+- `testsPassed` (`int?`)
+- `testsTotal` (`int?`)
+- `structureScore` (`double?`)
+- `failureReason` (`String?`)
+- `inputTokens` (`int?`)
+- `outputTokens` (`int?`)
+- `reasoningTokens` (`int?`)
+- `createdAt` (`DateTime?`)
+
+#### `toJson`
+
+```dart
+Map<String, dynamic> toJson()
+```
+
+---
+
+## class `FileUploader`
+
+The file uploader uploads files to Serverpod's cloud storage. On the server
+you can setup a custom storage service, such as S3 or Google Cloud. To
+directly upload a file, you first need to retrieve an upload description
+from your server. After the file is uploaded, make sure to notify the server
+by calling the verifyDirectFileUpload on the current Session object.
+
+### Constructors
+
+#### `FileUploader`
+
+```dart
+FileUploader(String uploadDescription)
+```
+
+Creates a new FileUploader from an [uploadDescription] created by the
+server.
+
+### Methods
+
+#### `uploadByteData`
+
+```dart
+Future<bool> uploadByteData(ByteData byteData)
+```
+
+Uploads a file contained by a [ByteData] object, returns true if
+successful.
+
+**Parameters:**
+
+- `byteData` (`ByteData`) *(required)*
+
+#### `upload`
+
+```dart
+Future<bool> upload(Stream<List<int>> stream, [int? length])
+```
+
+Uploads a file from a [Stream], returns true if successful. The [length]
+of the stream is optional, but if it's not provided for a multipart upload,
+the entire file will be buffered in memory.
+
+**Parameters:**
+
+- `stream` (`Stream<List<int>>`) *(required)*
+- `length` (`int?`)
+
+---
+
+## abstract class `Greeting`
+
+**Implements:** `SerializableModel`
+
+A greeting message which can be sent to or from the server.
+
+### Constructors
+
+#### `Greeting`
+
+```dart
+Greeting({required String message, required String author, required DateTime timestamp})
+```
+
+#### `Greeting.fromJson`
+
+```dart
+Greeting.fromJson(Map<String, dynamic> jsonSerialization)
+```
+
+### Properties
+
+- **`message`** → `String`
+
+  The greeting message.
+
+- **`author`** → `String`
+
+  The author of the greeting message.
+
+- **`timestamp`** → `DateTime`
+
+  The time when the message was created.
+
+### Methods
+
+#### `copyWith`
+
+```dart
+Greeting copyWith({String? message, String? author, DateTime? timestamp})
+```
+
+Returns a shallow copy of this [Greeting]
+with some or all fields replaced by the given arguments.
+
+**Parameters:**
+
+- `message` (`String?`)
+- `author` (`String?`)
+- `timestamp` (`DateTime?`)
+
+#### `toJson`
+
+```dart
+Map<String, dynamic> toJson()
+```
+
+---
+
+## class `HalfVector`
+
+Represents a vector of half-precision float values.
+
+### Constructors
+
+#### `HalfVector`
+
+```dart
+HalfVector(List<double> _vec)
+```
+
+Creates a new [HalfVector] from a list of double values.
+
+#### `HalfVector.fromBinary`
+
+```dart
+HalfVector.fromBinary(Uint8List bytes)
+```
+
+Creates a [HalfVector] from its binary representation.
+
+### Methods
+
+#### `toBinary`
+
+```dart
+Uint8List toBinary()
+```
+
+Converts the [HalfVector] to its binary representation.
+
+#### `toList`
+
+```dart
+List<double> toList()
+```
+
+Returns the half-precision vector as a list of double values.
+
+---
+
+## class `MethodCallContext`
+
+Context for a method call.
+
+### Constructors
+
+#### `MethodCallContext`
+
+```dart
+MethodCallContext({required String endpointName, required String methodName, required Map<String, dynamic> arguments})
+```
+
+Creates a new [MethodCallContext].
+
+### Properties
+
+- **`endpointName`** → `String` *(final)*
+
+  Name of the called endpoint.
+
+- **`methodName`** → `String` *(final)*
+
+  Name of the called endpoint method.
+
+- **`arguments`** → `Map<String, dynamic>` *(final)*
+
+  Arguments passed to the method.
+
+---
+
+## abstract class `MethodStreamException`
+
+**Implements:** `Exception`
+
+Exceptions thrown by the [ClientMethodStreamManager].
+
+### Constructors
+
+#### `MethodStreamException`
+
+```dart
+MethodStreamException()
+```
+
+Creates a new [MethodStreamException].
+
+---
+
+## class `MethodStreamMessage`
+
+**Extends:** `WebSocketMessage`
+
+**Implements:** `WebSocketMessageInfo`
+
+A message sent to a method stream.
+
+### Constructors
+
+#### `MethodStreamMessage`
+
+```dart
+MethodStreamMessage(Map<dynamic, dynamic> data, SerializationManager _serializationManager)
+```
+
+Creates a new [MethodStreamMessage].
+The [object] must be an object processed by the
+[SerializationManager.wrapWithClassName] method.
+
+### Properties
+
+- **`endpoint`** → `String` *(final)*
+
+  The endpoint the message is sent to.
+
+- **`method`** → `String` *(final)*
+
+  The method the message is sent to.
+
+- **`connectionId`** → `UuidValue` *(final)*
+
+  The connection id that uniquely identifies the stream.
+
+- **`parameter`** → `String?` *(final)*
+
+  The parameter the message is sent to.
+  If this is null the message is sent to the return stream of the method.
+
+- **`object`** → `dynamic` *(final)*
+
+  The object that was sent.
+
+### Methods
+
+#### `static buildMessage`
+
+```dart
+static String buildMessage({required String endpoint, required String method, required UuidValue connectionId, String? parameter, required dynamic object, required SerializationManager serializationManager})
+```
+
+Builds a [MethodStreamMessage] message.
+
+**Parameters:**
+
+- `endpoint` (`String`) *(required)*
+- `method` (`String`) *(required)*
+- `connectionId` (`UuidValue`) *(required)*
+- `parameter` (`String?`)
+- `object` (`dynamic`) *(required)*
+- `serializationManager` (`SerializationManager`) *(required)*
+
+---
+
+## class `MethodStreamSerializableException`
+
+**Extends:** `WebSocketMessage`
+
+**Implements:** `WebSocketMessageInfo`
+
+A serializable exception sent over a method stream.
+
+### Constructors
+
+#### `MethodStreamSerializableException`
+
+```dart
+MethodStreamSerializableException(Map<dynamic, dynamic> data, SerializationManager serializationManager)
+```
+
+Creates a new [MethodStreamSerializableException].
+The [exception] must be a serializable exception processed by the
+[SerializationManager.wrapWithClassName] method.
+
+### Properties
+
+- **`endpoint`** → `String` *(final)*
+
+  The endpoint the message is sent to.
+
+- **`method`** → `String` *(final)*
+
+  The method the message is sent to.
+
+- **`connectionId`** → `UuidValue` *(final)*
+
+  The connection id that uniquely identifies the stream.
+
+- **`parameter`** → `String?` *(final)*
+
+  The parameter the message is sent to.
+  If this is null the message is sent to the return stream of the method.
+
+- **`exception`** → `SerializableException` *(final)*
+
+  The serializable exception sent.
+
+### Methods
+
+#### `static buildMessage`
+
+```dart
+static String buildMessage({required String endpoint, required String method, required UuidValue connectionId, String? parameter, required dynamic object, required SerializationManager serializationManager})
+```
+
+Builds a [MethodStreamSerializableException] message.
+The [exception] must be a serializable exception processed by the
+[SerializationManager.wrapWithClassName] method.
+
+**Parameters:**
+
+- `endpoint` (`String`) *(required)*
+- `method` (`String`) *(required)*
+- `connectionId` (`UuidValue`) *(required)*
+- `parameter` (`String?`)
+- `object` (`dynamic`) *(required)*
+- `serializationManager` (`SerializationManager`) *(required)*
+
+---
+
+## abstract class `Model`
+
+**Implements:** `SerializableModel`
+
+An LLM being evaluated.
+
+### Constructors
+
+#### `Model`
+
+```dart
+Model({UuidValue? id, required String name})
+```
+
+#### `Model.fromJson`
+
+```dart
+Model.fromJson(Map<String, dynamic> jsonSerialization)
+```
+
+### Properties
+
+- **`id`** → `UuidValue?`
+
+  The database id, set if the object has been inserted into the
+  database or if it has been fetched from the database. Otherwise,
+  the id will be null.
+
+- **`name`** → `String`
+
+  Unique identifier for the model.
+
+### Methods
+
+#### `copyWith`
+
+```dart
+Model copyWith({UuidValue? id, String? name})
+```
+
+Returns a shallow copy of this [Model]
+with some or all fields replaced by the given arguments.
+
+**Parameters:**
+
+- `id` (`UuidValue?`)
+- `name` (`String?`)
+
+#### `toJson`
+
+```dart
+Map<String, dynamic> toJson()
+```
+
+---
+
+## abstract class `ModuleEndpointCaller`
+
+**Extends:** `EndpointCaller`
+
+This class is used to connect modules with the client. Overridden by
+generated code.
+
+### Constructors
+
+#### `ModuleEndpointCaller`
+
+```dart
+ModuleEndpointCaller(ServerpodClientShared client)
+```
+
+Creates a new [ModuleEndpointCaller].
+
+### Properties
+
+- **`client`** → `ServerpodClientShared` *(final)*
+
+  Reference to the client.
+
+### Methods
+
+#### `callServerEndpoint`
+
+```dart
+Future<T> callServerEndpoint(String endpoint, String method, Map<String, dynamic> args, {bool authenticated})
+```
+
+**Parameters:**
+
+- `endpoint` (`String`) *(required)*
+- `method` (`String`) *(required)*
+- `args` (`Map<String, dynamic>`) *(required)*
+- `authenticated` (`bool`)
+
+#### `callStreamingServerEndpoint`
+
+```dart
+dynamic callStreamingServerEndpoint(String endpoint, String method, Map<String, dynamic> args, Map<String, Stream<dynamic>> streams, {bool authenticated})
+```
+
+**Parameters:**
+
+- `endpoint` (`String`) *(required)*
+- `method` (`String`) *(required)*
+- `args` (`Map<String, dynamic>`) *(required)*
+- `streams` (`Map<String, Stream<dynamic>>`) *(required)*
+- `authenticated` (`bool`)
+
+---
+
+## class `Modules`
+
+### Constructors
+
+#### `Modules`
+
+```dart
+Modules(Client client)
+```
+
+### Properties
+
+- **`serverpod_auth_idp`** → `Caller` *(final)*
+
+- **`serverpod_auth_core`** → `Caller` *(final)*
+
+---
+
+## class `MutexRefresherClientAuthKeyProvider`
+
+**Implements:** `RefresherClientAuthKeyProvider`
+
+A [RefresherClientAuthKeyProvider] decorator that adds a mutex lock to
+prevent concurrent refresh calls. Actual auth header getter and refresh
+logic is delegated to the [_delegate] provider.
+
+### Constructors
+
+#### `MutexRefresherClientAuthKeyProvider`
+
+```dart
+MutexRefresherClientAuthKeyProvider(RefresherClientAuthKeyProvider _delegate)
+```
+
+Creates a new [MutexRefresherClientAuthKeyProvider].
+
+### Properties
+
+- **`authHeaderValue`** → `Future<String?>`
+
+### Methods
+
+#### `refreshAuthKey`
+
+```dart
+Future<RefreshAuthKeyResult> refreshAuthKey({bool force})
+```
+
+Refreshes the authentication key with locking to prevent concurrent calls.
+
+**Parameters:**
+
+- `force` (`bool`)
+
+---
+
+## class `OpenMethodStreamCommand`
+
+**Extends:** `WebSocketMessage`
+
+**Implements:** `WebSocketMessageInfo`
+
+A message sent over a websocket connection to open a websocket stream of
+data to an endpoint method.
+
+An [OpenMethodStreamResponse] should be sent in response to this message.
+
+### Constructors
+
+#### `OpenMethodStreamCommand`
+
+```dart
+OpenMethodStreamCommand(Map<dynamic, dynamic> data)
+```
+
+Creates a new [OpenMethodStreamCommand] message.
+
+### Properties
+
+- **`endpoint`** → `String` *(final)*
+
+  The endpoint to call.
+
+- **`method`** → `String` *(final)*
+
+  The method to call.
+
+- **`encodedArgs`** → `String` *(final)*
+
+  The JSON encoded arguments to pass to the method.
+
+- **`inputStreams`** → `List<String>` *(final)*
+
+  The input streams that should be opened.
+
+- **`connectionId`** → `UuidValue` *(final)*
+
+  The connection id that uniquely identifies the stream.
+
+- **`authentication`** → `String?` *(final)*
+
+  The authentication value as it is sent across the transport layer.
+
+### Methods
+
+#### `static buildMessage`
+
+```dart
+static String buildMessage({required String endpoint, required String method, required Map<String, dynamic> args, required UuidValue connectionId, required List<String> inputStreams, String? authentication})
+```
+
+Creates a new [OpenMethodStreamCommand].
+
+**Parameters:**
+
+- `endpoint` (`String`) *(required)*
+- `method` (`String`) *(required)*
+- `args` (`Map<String, dynamic>`) *(required)*
+- `connectionId` (`UuidValue`) *(required)*
+- `inputStreams` (`List<String>`) *(required)*
+- `authentication` (`String?`)
+
+---
+
+## class `OpenMethodStreamException`
+
+**Extends:** `MethodStreamException`
+
+Thrown if opening a method stream fails.
+
+### Constructors
+
+#### `OpenMethodStreamException`
+
+```dart
+OpenMethodStreamException(OpenMethodStreamResponseType responseType)
+```
+
+Creates a new [OpenMethodStreamException].
+
+### Properties
+
+- **`responseType`** → `OpenMethodStreamResponseType` *(final)*
+
+  The response type that caused the exception.
+
+---
+
+## class `OpenMethodStreamResponse`
+
+**Extends:** `WebSocketMessage`
+
+**Implements:** `WebSocketMessageInfo`
+
+A message sent over a websocket connection to respond to an
+[OpenMethodStreamCommand].
+
+### Constructors
+
+#### `OpenMethodStreamResponse`
+
+```dart
+OpenMethodStreamResponse(Map<dynamic, dynamic> data)
+```
+
+Creates a new [OpenMethodStreamResponse].
+
+### Properties
+
+- **`connectionId`** → `UuidValue` *(final)*
+
+  The connection id that uniquely identifies the stream.
+
+- **`endpoint`** → `String` *(final)*
+
+  The endpoint called.
+
+- **`method`** → `String` *(final)*
+
+  The method called.
+
+- **`responseType`** → `OpenMethodStreamResponseType` *(final)*
+
+  The response type.
+
+### Methods
+
+#### `static buildMessage`
+
+```dart
+static String buildMessage({required UuidValue connectionId, required OpenMethodStreamResponseType responseType, required String endpoint, required String method})
+```
+
+Builds a new [OpenMethodStreamResponse] message.
+
+**Parameters:**
+
+- `connectionId` (`UuidValue`) *(required)*
+- `responseType` (`OpenMethodStreamResponseType`) *(required)*
+- `endpoint` (`String`) *(required)*
+- `method` (`String`) *(required)*
+
+---
+
+## class `PingCommand`
+
+**Extends:** `WebSocketMessage`
+
+A message sent over a websocket connection to check if the connection is
+still alive. The other end should respond with a [PongCommand].
+
+### Constructors
+
+#### `PingCommand`
+
+```dart
+PingCommand()
+```
+
+### Methods
+
+#### `static buildMessage`
+
+```dart
+static String buildMessage()
+```
+
+Builds a [PingCommand] message.
+
+---
+
+## class `PongCommand`
+
+**Extends:** `WebSocketMessage`
+
+A response to a [PingCommand].
+
+### Constructors
+
+#### `PongCommand`
+
+```dart
+PongCommand()
+```
+
+### Methods
+
+#### `static buildMessage`
+
+```dart
+static String buildMessage()
+```
+
+Builds a [PongCommand] message.
+
+---
+
+## class `Protocol`
+
+**Extends:** `SerializationManager`
+
+### Constructors
+
+#### `Protocol`
+
+```dart
+Protocol()
+```
+
+### Methods
+
+#### `static getClassNameFromObjectJson`
+
+```dart
+static String? getClassNameFromObjectJson(dynamic data)
+```
+
+**Parameters:**
+
+- `data` (`dynamic`) *(required)*
+
+#### `deserialize`
+
+```dart
+T deserialize(dynamic data, [Type? t])
+```
+
+**Parameters:**
+
+- `data` (`dynamic`) *(required)*
+- `t` (`Type?`)
+
+#### `static getClassNameForType`
+
+```dart
+static String? getClassNameForType(Type type)
+```
+
+**Parameters:**
+
+- `type` (`Type`) *(required)*
+
+#### `getClassNameForObject`
+
+```dart
+String? getClassNameForObject(Object? data)
+```
+
+**Parameters:**
+
+- `data` (`Object?`) *(required)*
+
+#### `deserializeByClassName`
+
+```dart
+dynamic deserializeByClassName(Map<String, dynamic> data)
+```
+
+**Parameters:**
+
+- `data` (`Map<String, dynamic>`) *(required)*
+
+#### `mapRecordToJson`
+
+```dart
+Map<String, dynamic>? mapRecordToJson(Record? record)
+```
+
+Maps any `Record`s known to this [Protocol] to their JSON representation
+
+Throws in case the record type is not known.
+
+This method will return `null` (only) for `null` inputs.
+
+**Parameters:**
+
+- `record` (`Record?`) *(required)*
+
+---
+
+## abstract class `ProtocolSerialization`
+
+The [ProtocolSerialization] defines a toJsonForProtocol method which makes it
+possible to limit what fields are serialized
+
+### Constructors
+
+#### `ProtocolSerialization`
+
+```dart
+ProtocolSerialization()
+```
+
+### Methods
+
+#### `toJsonForProtocol`
+
+```dart
+dynamic toJsonForProtocol()
+```
+
+Returns a JSON structure of the model, optimized for Protocol communication.
+
+---
+
+## abstract class `RefresherClientAuthKeyProvider`
+
+**Implements:** `ClientAuthKeyProvider`
+
+Provides the authentication key for the client, with a method to refresh it.
+
+### Constructors
+
+#### `RefresherClientAuthKeyProvider`
+
+```dart
+RefresherClientAuthKeyProvider()
+```
+
+### Methods
+
+#### `refreshAuthKey`
+
+```dart
+Future<RefreshAuthKeyResult> refreshAuthKey({bool force})
+```
+
+Refreshes the authentication key and returns the result of the operation.
+If the refresh is successful, should return [RefreshAuthKeyResult.success]
+to retry requests that failed due to authentication errors. Be sure to
+annotate the refresh endpoint with @unauthenticatedClientCall to avoid a
+deadlock on the [authHeaderValue] getter on a refresh call. If the [force]
+parameter is set to true, the refresh should be performed regardless of
+skip conditions that the provider might have.
+
+**Parameters:**
+
+- `force` (`bool`)
+
+---
+
+## abstract class `Run`
+
+**Implements:** `SerializableModel`
+
+A collection of tasks executed together.
+
+### Constructors
+
+#### `Run`
+
+```dart
+Run({UuidValue? id, required String inspectId, required Status status, required List<String> variants, required String mcpServerVersion, required int batchRuntimeSeconds, List<Model>? models, List<Dataset>? datasets, List<Task>? tasks, DateTime? createdAt})
+```
+
+#### `Run.fromJson`
+
+```dart
+Run.fromJson(Map<String, dynamic> jsonSerialization)
+```
+
+### Properties
+
+- **`id`** → `UuidValue?`
+
+  The database id, set if the object has been inserted into the
+  database or if it has been fetched from the database. Otherwise,
+  the id will be null.
+
+- **`inspectId`** → `String`
+
+  InspectAI-generated Id.
+
+- **`status`** → `Status`
+
+  Run status (e.g., "complete", "inProgress", "failed").
+
+- **`variants`** → `List<String>`
+
+  The variant configurations used in this run.
+
+- **`mcpServerVersion`** → `String`
+
+  Version of the MCP server used during evaluation.
+
+- **`batchRuntimeSeconds`** → `int`
+
+  Total script runtime in seconds.
+
+- **`models`** → `List<Model>?`
+
+  List of models evaluated in this run.
+
+- **`datasets`** → `List<Dataset>?`
+
+  List of datasets evaluated in this run.
+
+- **`tasks`** → `List<Task>?`
+
+  List of Inspect AI task names that were run.
+
+- **`createdAt`** → `DateTime`
+
+  Creation time for this record.
+
+### Methods
+
+#### `copyWith`
+
+```dart
+Run copyWith({UuidValue? id, String? inspectId, Status? status, List<String>? variants, String? mcpServerVersion, int? batchRuntimeSeconds, List<Model>? models, List<Dataset>? datasets, List<Task>? tasks, DateTime? createdAt})
+```
+
+Returns a shallow copy of this [Run]
+with some or all fields replaced by the given arguments.
+
+**Parameters:**
+
+- `id` (`UuidValue?`)
+- `inspectId` (`String?`)
+- `status` (`Status?`)
+- `variants` (`List<String>?`)
+- `mcpServerVersion` (`String?`)
+- `batchRuntimeSeconds` (`int?`)
+- `models` (`List<Model>?`)
+- `datasets` (`List<Dataset>?`)
+- `tasks` (`List<Task>?`)
+- `createdAt` (`DateTime?`)
+
+#### `toJson`
+
+```dart
+Map<String, dynamic> toJson()
+```
+
+---
+
+## abstract class `RunSummary`
+
+**Implements:** `SerializableModel`
+
+Metadata for the outcomes of a given [Run]. This is a separate table from [Run] because
+otherwise each of these columns would have to be nullable on [Run], as they are generated
+after the run is completed.
+
+### Constructors
+
+#### `RunSummary`
+
+```dart
+RunSummary({UuidValue? id, required UuidValue runId, Run? run, required int totalTasks, required int totalSamples, required double avgAccuracy, required int totalTokens, required int inputTokens, required int outputTokens, required int reasoningTokens, DateTime? createdAt})
+```
+
+#### `RunSummary.fromJson`
+
+```dart
+RunSummary.fromJson(Map<String, dynamic> jsonSerialization)
+```
+
+### Properties
+
+- **`id`** → `UuidValue?`
+
+  The database id, set if the object has been inserted into the
+  database or if it has been fetched from the database. Otherwise,
+  the id will be null.
+
+- **`runId`** → `UuidValue`
+
+- **`run`** → `Run?`
+
+  Run this summary belongs to.
+
+- **`totalTasks`** → `int`
+
+  Number of tasks in this run.
+
+- **`totalSamples`** → `int`
+
+  Total number of samples evaluated.
+
+- **`avgAccuracy`** → `double`
+
+  Average accuracy across all tasks (0.0 to 1.0).
+
+- **`totalTokens`** → `int`
+
+  Total token usage.
+
+- **`inputTokens`** → `int`
+
+  Input tokens used.
+
+- **`outputTokens`** → `int`
+
+  Output tokens generated.
+
+- **`reasoningTokens`** → `int`
+
+  Reasoning tokens used (for models that support it).
+
+- **`createdAt`** → `DateTime`
+
+  Creation time for this record.
+
+### Methods
+
+#### `copyWith`
+
+```dart
+RunSummary copyWith({UuidValue? id, UuidValue? runId, Run? run, int? totalTasks, int? totalSamples, double? avgAccuracy, int? totalTokens, int? inputTokens, int? outputTokens, int? reasoningTokens, DateTime? createdAt})
+```
+
+Returns a shallow copy of this [RunSummary]
+with some or all fields replaced by the given arguments.
+
+**Parameters:**
+
+- `id` (`UuidValue?`)
+- `runId` (`UuidValue?`)
+- `run` (`Run?`)
+- `totalTasks` (`int?`)
+- `totalSamples` (`int?`)
+- `avgAccuracy` (`double?`)
+- `totalTokens` (`int?`)
+- `inputTokens` (`int?`)
+- `outputTokens` (`int?`)
+- `reasoningTokens` (`int?`)
+- `createdAt` (`DateTime?`)
+
+#### `toJson`
+
+```dart
+Map<String, dynamic> toJson()
+```
+
+---
+
+## abstract class `Sample`
+
+**Implements:** `SerializableModel`
+
+A single challenge to be presented to a [Model] and evaluated by one or more [Scorer]s.
+
+### Constructors
+
+#### `Sample`
+
+```dart
+Sample({UuidValue? id, required String name, required UuidValue datasetId, Dataset? dataset, required String input, required String target, List<SampleTagXref>? tagsXref, bool? isActive, DateTime? createdAt})
+```
+
+#### `Sample.fromJson`
+
+```dart
+Sample.fromJson(Map<String, dynamic> jsonSerialization)
+```
+
+### Properties
+
+- **`id`** → `UuidValue?`
+
+  The database id, set if the object has been inserted into the
+  database or if it has been fetched from the database. Otherwise,
+  the id will be null.
+
+- **`name`** → `String`
+
+  Short sample name/ID (e.g., "dart_futures_vs_streams").
+
+- **`datasetId`** → `UuidValue`
+
+- **`dataset`** → `Dataset?`
+
+  The dataset this sample belongs to (e.g., "dart_qa_dataset").
+
+- **`input`** → `String`
+
+  The input prompt/question for the model.
+
+- **`target`** → `String`
+
+  The expected answer or grading guidance.
+
+- **`tagsXref`** → `List<SampleTagXref>?`
+
+  Tags associated with this sample (e.g., ["dart", "flutter"]).
+  Technically, this relationship only reaches the cross-reference table,
+  not the tags themselves.
+
+- **`isActive`** → `bool`
+
+  True if the sample is still active and included in eval runs.
+
+- **`createdAt`** → `DateTime`
+
+  Creation time for this record.
+
+### Methods
+
+#### `copyWith`
+
+```dart
+Sample copyWith({UuidValue? id, String? name, UuidValue? datasetId, Dataset? dataset, String? input, String? target, List<SampleTagXref>? tagsXref, bool? isActive, DateTime? createdAt})
+```
+
+Returns a shallow copy of this [Sample]
+with some or all fields replaced by the given arguments.
+
+**Parameters:**
+
+- `id` (`UuidValue?`)
+- `name` (`String?`)
+- `datasetId` (`UuidValue?`)
+- `dataset` (`Dataset?`)
+- `input` (`String?`)
+- `target` (`String?`)
+- `tagsXref` (`List<SampleTagXref>?`)
+- `isActive` (`bool?`)
+- `createdAt` (`DateTime?`)
+
+#### `toJson`
+
+```dart
+Map<String, dynamic> toJson()
+```
+
+---
+
+## abstract class `SampleTagXref`
+
+**Implements:** `SerializableModel`
+
+Cross reference table for samples and tags.
+
+### Constructors
+
+#### `SampleTagXref`
+
+```dart
+SampleTagXref({int? id, required UuidValue sampleId, Sample? sample, required UuidValue tagId, Tag? tag})
+```
+
+#### `SampleTagXref.fromJson`
+
+```dart
+SampleTagXref.fromJson(Map<String, dynamic> jsonSerialization)
+```
+
+### Properties
+
+- **`id`** → `int?`
+
+  The database id, set if the object has been inserted into the
+  database or if it has been fetched from the database. Otherwise,
+  the id will be null.
+
+- **`sampleId`** → `UuidValue`
+
+- **`sample`** → `Sample?`
+
+- **`tagId`** → `UuidValue`
+
+- **`tag`** → `Tag?`
+
+### Methods
+
+#### `copyWith`
+
+```dart
+SampleTagXref copyWith({int? id, UuidValue? sampleId, Sample? sample, UuidValue? tagId, Tag? tag})
+```
+
+Returns a shallow copy of this [SampleTagXref]
+with some or all fields replaced by the given arguments.
+
+**Parameters:**
+
+- `id` (`int?`)
+- `sampleId` (`UuidValue?`)
+- `sample` (`Sample?`)
+- `tagId` (`UuidValue?`)
+- `tag` (`Tag?`)
+
+#### `toJson`
+
+```dart
+Map<String, dynamic> toJson()
+```
+
+---
+
+## abstract class `Scorer`
+
+**Implements:** `SerializableModel`
+
+Ye who watch the watchers.
+
+### Constructors
+
+#### `Scorer`
+
+```dart
+Scorer({UuidValue? id, required String name})
+```
+
+#### `Scorer.fromJson`
+
+```dart
+Scorer.fromJson(Map<String, dynamic> jsonSerialization)
+```
+
+### Properties
+
+- **`id`** → `UuidValue?`
+
+  The database id, set if the object has been inserted into the
+  database or if it has been fetched from the database. Otherwise,
+  the id will be null.
+
+- **`name`** → `String`
+
+  Name of the scorer (e.g., "bleu").
+
+### Methods
+
+#### `copyWith`
+
+```dart
+Scorer copyWith({UuidValue? id, String? name})
+```
+
+Returns a shallow copy of this [Scorer]
+with some or all fields replaced by the given arguments.
+
+**Parameters:**
+
+- `id` (`UuidValue?`)
+- `name` (`String?`)
+
+#### `toJson`
+
+```dart
+Map<String, dynamic> toJson()
+```
+
+---
+
+## abstract class `ScorerResult`
+
+**Implements:** `SerializableModel`
+
+A scorer's assessment of a task.
+
+### Constructors
+
+#### `ScorerResult`
+
+```dart
+ScorerResult({UuidValue? id, required UuidValue scorerId, Scorer? scorer, required UuidValue evaluationId, Evaluation? evaluation, required ScorerResultData data})
+```
+
+#### `ScorerResult.fromJson`
+
+```dart
+ScorerResult.fromJson(Map<String, dynamic> jsonSerialization)
+```
+
+### Properties
+
+- **`id`** → `UuidValue?`
+
+  The database id, set if the object has been inserted into the
+  database or if it has been fetched from the database. Otherwise,
+  the id will be null.
+
+- **`scorerId`** → `UuidValue`
+
+- **`scorer`** → `Scorer?`
+
+  Scorer this summary belongs to.
+
+- **`evaluationId`** → `UuidValue`
+
+- **`evaluation`** → `Evaluation?`
+
+  Whether this scorer data is for a baseline run.
+
+- **`data`** → `ScorerResultData`
+
+  Flexible data archived by the scorer.
+
+### Methods
+
+#### `copyWith`
+
+```dart
+ScorerResult copyWith({UuidValue? id, UuidValue? scorerId, Scorer? scorer, UuidValue? evaluationId, Evaluation? evaluation, ScorerResultData? data})
+```
+
+Returns a shallow copy of this [ScorerResult]
+with some or all fields replaced by the given arguments.
+
+**Parameters:**
+
+- `id` (`UuidValue?`)
+- `scorerId` (`UuidValue?`)
+- `scorer` (`Scorer?`)
+- `evaluationId` (`UuidValue?`)
+- `evaluation` (`Evaluation?`)
+- `data` (`ScorerResultData?`)
+
+#### `toJson`
+
+```dart
+Map<String, dynamic> toJson()
+```
+
+---
+
+## abstract class `SerializableException`
+
+**Implements:** `SerializableModel`, `Exception`
+
+This is `SerializableException` that can be used to pass Domain exceptions
+from the Server to the Client
+
+You can `throw SerializableException()`
+
+Based on issue [#486](https://github.com/serverpod/serverpod/issues/486)
+
+### Constructors
+
+#### `SerializableException`
+
+```dart
+SerializableException()
+```
+
+Const constructor to pass empty exception with `statusCode 500`
+
+### Methods
+
+#### `toJson`
+
+```dart
+dynamic toJson()
+```
+
+---
+
+## abstract class `SerializableModel`
+
+The [SerializableModel] is the base interface for all serializable objects in
+Serverpod, except primitives.
+
+### Constructors
+
+#### `SerializableModel`
+
+```dart
+SerializableModel()
+```
+
+### Methods
+
+#### `toJson`
+
+```dart
+dynamic toJson()
+```
+
+Returns a serialized JSON structure of the model which also includes
+fields used by the database.
+
+---
+
+## abstract class `SerializationManager`
+
+The [SerializationManager] is responsible for creating objects from a
+serialization, but also for serializing objects. This class is typically
+extended by generated code.
+
+### Constructors
+
+#### `SerializationManager`
+
+```dart
+SerializationManager()
+```
+
+### Methods
+
+#### `decode`
+
+```dart
+T decode(String data, [Type? t])
+```
+
+Decodes the provided json [String] to an object of type [t] or [T].
+
+**Parameters:**
+
+- `data` (`String`) *(required)*
+- `t` (`Type?`)
+
+#### `decodeWithType`
+
+```dart
+Object? decodeWithType(String data)
+```
+
+Decodes the provided json [String] if it has been encoded with
+[encodeWithType].
+
+**Parameters:**
+
+- `data` (`String`) *(required)*
+
+#### `deserialize`
+
+```dart
+T deserialize(dynamic data, [Type? t])
+```
+
+Deserialize the provided json [data] to an object of type [t] or [T].
+
+**Parameters:**
+
+- `data` (`dynamic`) *(required)*
+- `t` (`Type?`)
+
+#### `getClassNameForObject`
+
+```dart
+String? getClassNameForObject(Object? data)
+```
+
+Get the className for the provided object.
+
+**Parameters:**
+
+- `data` (`Object?`) *(required)*
+
+#### `deserializeByClassName`
+
+```dart
+dynamic deserializeByClassName(Map<String, dynamic> data)
+```
+
+Deserialize the provided json [data] by using the className stored in the [data].
+
+**Parameters:**
+
+- `data` (`Map<String, dynamic>`) *(required)*
+
+#### `wrapWithClassName`
+
+```dart
+Map<String, dynamic> wrapWithClassName(Object? data)
+```
+
+Wraps serialized data with its class name so that it can be deserialized
+with [deserializeByClassName].
+
+**Parameters:**
+
+- `data` (`Object?`) *(required)*
+
+#### `static encode`
+
+```dart
+static String encode(Object? object, {bool formatted, bool encodeForProtocol})
+```
+
+Encode the provided [object] to a Json-formatted [String].
+If [formatted] is true, the output will be formatted with two spaces
+indentation.
+
+**Parameters:**
+
+- `object` (`Object?`) *(required)*
+- `formatted` (`bool`)
+- `encodeForProtocol` (`bool`)
+
+#### `static encodeForProtocol`
+
+```dart
+static String encodeForProtocol(Object? object, {bool formatted})
+```
+
+Encode the provided [object] to a Json-formatted [String].
+if object implements [ProtocolSerialization] interface then
+[toJsonForProtocol] it will be used instead of [toJson] method
+
+**Parameters:**
+
+- `object` (`Object?`) *(required)*
+- `formatted` (`bool`)
+
+#### `encodeWithType`
+
+```dart
+String encodeWithType(Object? object, {bool formatted})
+```
+
+Encode the provided [object] to a json-formatted [String], include class
+name so that it can be decoded even if the class is unknown.
+If [formatted] is true, the output will be formatted with two spaces
+indentation.
+
+**Parameters:**
+
+- `object` (`Object?`) *(required)*
+- `formatted` (`bool`)
+
+#### `encodeWithTypeForProtocol`
+
+```dart
+String encodeWithTypeForProtocol(Object? object, {bool formatted})
+```
+
+Encode the provided [object] to a Json-formatted [String], including the
+class name so that it can be decoded even if the class is unknown.
+If [formatted] is true, the output will be formatted with two spaces
+indentation. If [object] implements [ProtocolSerialization] interface, then
+[toJsonForProtocol] will be used instead of the [toJson] method.
+
+**Parameters:**
+
+- `object` (`Object?`) *(required)*
+- `formatted` (`bool`)
+
+---
+
+## class `ServerpodClientBadRequest`
+
+**Extends:** `ServerpodClientException`
+
+Thrown if the client created a malformed or invalid request
+to the server.
+
+### Constructors
+
+#### `ServerpodClientBadRequest`
+
+```dart
+ServerpodClientBadRequest([String? message])
+```
+
+Creates a Bad Request Exception
+
+---
+
+## class `ServerpodClientEndpointNotFound`
+
+**Extends:** `ServerpodClientGetEndpointException`
+
+Thrown if the client tries to call an endpoint that was not generated.
+This will typically happen if getting the endpoint by type while the user
+has not defined the endpoint in their project.
+
+### Constructors
+
+#### `ServerpodClientEndpointNotFound`
+
+```dart
+ServerpodClientEndpointNotFound(Type type)
+```
+
+Creates an Endpoint Missing Exception.
+
+---
+
+## class `ServerpodClientException`
+
+**Implements:** `Exception`
+
+[Exception] thrown when errors in communication with the server occurs.
+
+### Constructors
+
+#### `ServerpodClientException`
+
+```dart
+ServerpodClientException(String message, int statusCode)
+```
+
+Creates a new [ServerpodClientException].
+
+### Properties
+
+- **`message`** → `String` *(final)*
+
+  Error message sent from the server.
+
+- **`statusCode`** → `int` *(final)*
+
+  Http status code associated with the error.
+
+---
+
+## class `ServerpodClientForbidden`
+
+**Extends:** `ServerpodClientException`
+
+Thrown if the client is forbidden to perform the request.
+This is typically due to missing permissions.
+
+### Constructors
+
+#### `ServerpodClientForbidden`
+
+```dart
+ServerpodClientForbidden()
+```
+
+Creates a Forbidden Exception
+
+---
+
+## abstract class `ServerpodClientGetEndpointException`
+
+**Implements:** `Exception`
+
+Thrown if not able to get an endpoint on the client by type.
+
+### Constructors
+
+#### `ServerpodClientGetEndpointException`
+
+```dart
+ServerpodClientGetEndpointException(String message)
+```
+
+Creates an Endpoint Missing Exception.
+
+### Properties
+
+- **`message`** → `String` *(final)*
+
+  The error message to show to the user.
+
+---
+
+## class `ServerpodClientInternalServerError`
+
+**Extends:** `ServerpodClientException`
+
+Thrown if the server encountered an internal error.
+This is typically a bug in the server code.
+
+### Constructors
+
+#### `ServerpodClientInternalServerError`
+
+```dart
+ServerpodClientInternalServerError()
+```
+
+Creates an Internal Server Error Exception
+
+---
+
+## class `ServerpodClientMultipleEndpointsFound`
+
+**Extends:** `ServerpodClientGetEndpointException`
+
+Thrown if the client tries to call an endpoint by type, but multiple
+endpoints of that type exists. The user should disambiguate by using the
+name parameter.
+
+### Constructors
+
+#### `ServerpodClientMultipleEndpointsFound`
+
+```dart
+ServerpodClientMultipleEndpointsFound(Type type, Iterable<EndpointRef> endpoints)
+```
+
+Creates an Multiple Endpoints Found Exception.
+
+---
+
+## class `ServerpodClientNotFound`
+
+**Extends:** `ServerpodClientException`
+
+Thrown if the requested resource was not found on the server.
+
+### Constructors
+
+#### `ServerpodClientNotFound`
+
+```dart
+ServerpodClientNotFound()
+```
+
+Creates a Not Found Exception
+
+---
+
+## abstract class `ServerpodClientRequestDelegate`
+
+Defines the interface of the delegate that performs the actual request to the server
+and returns the response data.
+The delegate is used by [ServerpodClientShared] to perform the actual request.
+It's overridden in different versions depending on if the dart:io library
+is available.
+
+### Constructors
+
+#### `ServerpodClientRequestDelegate`
+
+```dart
+ServerpodClientRequestDelegate()
+```
+
+### Methods
+
+#### `serverRequest`
+
+```dart
+Future<String> serverRequest(Uri url, {required String body, String? authenticationValue})
+```
+
+Performs the actual request to the server and returns the response data.
+
+**Parameters:**
+
+- `url` (`Uri`) *(required)*
+- `body` (`String`) *(required)*
+- `authenticationValue` (`String?`)
+
+#### `close`
+
+```dart
+void close()
+```
+
+Closes the connection to the server.
+This delegate should not be used after calling this.
+
+---
+
+## abstract class `ServerpodClientShared`
+
+**Extends:** `EndpointCaller`
+
+Superclass with shared methods for handling communication with the server.
+Is typically overridden by generated code to provide implementations of methods for calling the server.
+
+### Constructors
+
+#### `ServerpodClientShared`
+
+```dart
+ServerpodClientShared(String host, SerializationManager serializationManager, {dynamic securityContext, AuthenticationKeyManager? authenticationKeyManager, required Duration? streamingConnectionTimeout, required Duration? connectionTimeout, void Function(MethodCallContext, Object, StackTrace)? onFailedCall, void Function(MethodCallContext)? onSucceededCall, bool? disconnectStreamsOnLostInternetConnection})
+```
+
+Creates a new ServerpodClientShared.
+
+### Properties
+
+- **`host`** → `String` *(final)*
+
+  Full url to the Serverpod server. E.g. "https://example.com/"
+
+- **`serializationManager`** → `SerializationManager` *(final)*
+
+  The [SerializationManager] used to serialize objects sent to the server.
+
+- **`authenticationKeyManager`** → `AuthenticationKeyManager?`
+
+- **`moduleLookup`** → `Map<String, ModuleEndpointCaller>`
+
+- **`streamingConnectionTimeout`** → `Duration` *(final)*
+
+  Timeout when opening a web socket connection. If no message has been
+  received within the timeout duration the socket will be closed.
+
+- **`connectionTimeout`** → `Duration` *(final)*
+
+  Timeout when calling a server endpoint. If no response has been received, defaults to 20 seconds.
+
+- **`onFailedCall`** → `void Function(MethodCallContext, Object, StackTrace)?` *(final)*
+
+  Callback when any call to the server fails or an exception is
+  thrown.
+
+- **`onSucceededCall`** → `void Function(MethodCallContext)?` *(final)*
+
+  Callback when any call to the server succeeds.
+
+- **`connectivityMonitor`** → `ConnectivityMonitor?`
+
+- **`authKeyProvider`** → `ClientAuthKeyProvider?`
+
+  Provides the authentication key for the client. Required to make
+  authenticated requests. If not provided, all requests will be
+  unauthenticated.
+
+- **`streamingConnectionStatus`** → `StreamingConnectionStatus`
+
+### Methods
+
+#### `close`
+
+```dart
+void close()
+```
+
+Closes all open connections to the server.
+
+#### `openStreamingConnection`
+
+```dart
+Future<void> openStreamingConnection({bool disconnectOnLostInternetConnection})
+```
+
+Open a streaming connection to the server.
+
+**Parameters:**
+
+- `disconnectOnLostInternetConnection` (`bool`)
+
+#### `closeStreamingMethodConnections`
+
+```dart
+Future<void> closeStreamingMethodConnections({Object? exception})
+```
+
+Closes all open streaming method connections.
+
+[exception] is an optional exception that will be thrown to all
+listeners of open streams.
+
+If [exception] is not provided, a [WebSocketClosedException] will be
+thrown.
+
+**Parameters:**
+
+- `exception` (`Object?`)
+
+#### `closeStreamingConnection`
+
+```dart
+Future<void> closeStreamingConnection()
+```
+
+Closes the streaming connection if it is open.
+
+#### `addStreamingConnectionStatusListener`
+
+```dart
+void addStreamingConnectionStatusListener(void Function() listener)
+```
+
+Adds a callback for when the [streamingConnectionStatus] property is
+changed.
+
+**Parameters:**
+
+- `listener` (`void Function()`) *(required)*
+
+#### `removeStreamingConnectionStatusListener`
+
+```dart
+void removeStreamingConnectionStatusListener(void Function() listener)
+```
+
+Removes a connection status listener.
+
+**Parameters:**
+
+- `listener` (`void Function()`) *(required)*
+
+#### `updateStreamingConnectionAuthenticationKey`
+
+```dart
+Future<void> updateStreamingConnectionAuthenticationKey()
+```
+
+Updates the authentication key if the streaming connection is open.
+Note, the provided key will be converted/wrapped as a proper authentication header value
+when sent to the server.
+
+#### `callServerEndpoint`
+
+```dart
+Future<T> callServerEndpoint(String endpoint, String method, Map<String, dynamic> args, {bool authenticated})
+```
+
+**Parameters:**
+
+- `endpoint` (`String`) *(required)*
+- `method` (`String`) *(required)*
+- `args` (`Map<String, dynamic>`) *(required)*
+- `authenticated` (`bool`)
+
+#### `callStreamingServerEndpoint`
+
+```dart
+dynamic callStreamingServerEndpoint(String endpoint, String method, Map<String, dynamic> args, Map<String, Stream<dynamic>> streams, {bool authenticated})
+```
+
+**Parameters:**
+
+- `endpoint` (`String`) *(required)*
+- `method` (`String`) *(required)*
+- `args` (`Map<String, dynamic>`) *(required)*
+- `streams` (`Map<String, Stream<dynamic>>`) *(required)*
+- `authenticated` (`bool`)
+
+---
+
+## class `ServerpodClientUnauthorized`
+
+**Extends:** `ServerpodClientException`
+
+Thrown if the client fails to authenticate and is therefore
+not authorized to perform the request.
+
+### Constructors
+
+#### `ServerpodClientUnauthorized`
+
+```dart
+ServerpodClientUnauthorized()
+```
+
+Creates an Unauthorized Exception
+
+---
+
+## class `SparseVector`
+
+Represents a sparse vector that stores only non-zero elements.
+
+### Constructors
+
+#### `SparseVector`
+
+```dart
+SparseVector(List<double> value)
+```
+
+Creates a [SparseVector] from a list of doubles with all values.
+
+#### `SparseVector.fromMap`
+
+```dart
+SparseVector.fromMap(Map<int, double> map, int dimensions)
+```
+
+Creates a [SparseVector] from a map of indices to values.
+
+Map keys are indices and values are the vector values at those positions.
+The [dimensions] parameter specifies the total vector length.
+
+#### `SparseVector.fromBinary`
+
+```dart
+SparseVector.fromBinary(Uint8List bytes)
+```
+
+Creates a [SparseVector] from its binary representation.
+
+### Properties
+
+- **`dimensions`** → `int` *(final)*
+
+  The total number of dimensions in the vector.
+
+- **`indices`** → `List<int>` *(final)*
+
+  The indices of non-zero values in the vector.
+
+- **`values`** → `List<double>` *(final)*
+
+  The non-zero values in the vector.
+
+### Methods
+
+#### `toBinary`
+
+```dart
+Uint8List toBinary()
+```
+
+Converts the sparse vector to its binary representation.
+
+#### `toList`
+
+```dart
+List<double> toList()
+```
+
+Returns the sparse vector as a dense list of double values.
+
+#### `static fromString`
+
+```dart
+static SparseVector fromString(String value)
+```
+
+Creates a [SparseVector] from a string representation.
+
+**Parameters:**
+
+- `value` (`String`) *(required)*
+
+---
+
+## class `StreamingConnectionHandler`
+
+The StreamingConnection handler manages the web socket connection and its
+state. It will automatically reconnect to the server if the connection is
+lost. The [listener] will be notified whenever the connection state changes
+and once every second when counting down to reconnect. The time between
+reconnection attempts is specified with [retryEverySeconds], default is 5
+seconds.
+
+### Constructors
+
+#### `StreamingConnectionHandler`
+
+```dart
+StreamingConnectionHandler({required ServerpodClientShared client, required void Function(StreamingConnectionHandlerState) listener, int retryEverySeconds})
+```
+
+Creates a new connection handler with the specified listener and interval
+for reconnecting to the server.
+
+### Properties
+
+- **`client`** → `ServerpodClientShared` *(final)*
+
+  The Serverpod client this StreamingConnectionHandler is managing.
+
+- **`retryEverySeconds`** → `int` *(final)*
+
+  Time in seconds between connection attempts. Default is 5 seconds.
+
+- **`listener`** → `void Function(StreamingConnectionHandlerState)` *(final)*
+
+  A listener that is called whenever the state of the connection handler
+  changes.
+
+- **`status`** → `StreamingConnectionHandlerState`
+
+### Methods
+
+#### `dispose`
+
+```dart
+void dispose()
+```
+
+Disposes the connection handler, but does not close the connection.
+
+#### `connect`
+
+```dart
+void connect()
+```
+
+Opens a web socket channel to the server and attempts to keep it alive.
+
+#### `close`
+
+```dart
+void close()
+```
+
+Disconnects the streaming connection if it is open.
+
+---
+
+## class `StreamingConnectionHandlerState`
+
+Represents the state of the connection handler.
+
+### Properties
+
+- **`retryInSeconds`** → `int?` *(final)*
+
+  Time in seconds until next connection attempt. Only set if the connection
+  [status] is StreamingConnectionStatus.waitingToRetry.
+
+- **`status`** → `StreamingConnectionStatus` *(final)*
+
+  The status of the connection.
+
+---
+
+## abstract class `Tag`
+
+**Implements:** `SerializableModel`
+
+Category for a sample.
+
+### Constructors
+
+#### `Tag`
+
+```dart
+Tag({UuidValue? id, required String name, List<SampleTagXref>? samplesXref})
+```
+
+#### `Tag.fromJson`
+
+```dart
+Tag.fromJson(Map<String, dynamic> jsonSerialization)
+```
+
+### Properties
+
+- **`id`** → `UuidValue?`
+
+  The database id, set if the object has been inserted into the
+  database or if it has been fetched from the database. Otherwise,
+  the id will be null.
+
+- **`name`** → `String`
+
+  Unique identifier for the tag.
+
+- **`samplesXref`** → `List<SampleTagXref>?`
+
+  Samples associated with this tag.
+  Technically, this relationship only reaches the cross-reference table,
+  not the samples themselves.
+
+### Methods
+
+#### `copyWith`
+
+```dart
+Tag copyWith({UuidValue? id, String? name, List<SampleTagXref>? samplesXref})
+```
+
+Returns a shallow copy of this [Tag]
+with some or all fields replaced by the given arguments.
+
+**Parameters:**
+
+- `id` (`UuidValue?`)
+- `name` (`String?`)
+- `samplesXref` (`List<SampleTagXref>?`)
+
+#### `toJson`
+
+```dart
+Map<String, dynamic> toJson()
+```
+
+---
+
+## abstract class `Task`
+
+**Implements:** `SerializableModel`
+
+Results from evaluating one model against one dataset.
+
+### Constructors
+
+#### `Task`
+
+```dart
+Task({UuidValue? id, required String inspectId, required UuidValue modelId, Model? model, required UuidValue datasetId, Dataset? dataset, required UuidValue runId, Run? run, DateTime? createdAt})
+```
+
+#### `Task.fromJson`
+
+```dart
+Task.fromJson(Map<String, dynamic> jsonSerialization)
+```
+
+### Properties
+
+- **`id`** → `UuidValue?`
+
+  The database id, set if the object has been inserted into the
+  database or if it has been fetched from the database. Otherwise,
+  the id will be null.
+
+- **`inspectId`** → `String`
+
+  InspectAI-generated Id.
+
+- **`modelId`** → `UuidValue`
+
+- **`model`** → `Model?`
+
+  Model identifier (e.g., "google/gemini-2.5-pro").
+
+- **`datasetId`** → `UuidValue`
+
+- **`dataset`** → `Dataset?`
+
+  Dataset identifier (e.g., "flutter_qa_dataset").
+
+- **`runId`** → `UuidValue`
+
+- **`run`** → `Run?`
+
+  Run this task belongs to.
+
+- **`createdAt`** → `DateTime`
+
+  When this task was evaluated.
+
+### Methods
+
+#### `copyWith`
+
+```dart
+Task copyWith({UuidValue? id, String? inspectId, UuidValue? modelId, Model? model, UuidValue? datasetId, Dataset? dataset, UuidValue? runId, Run? run, DateTime? createdAt})
+```
+
+Returns a shallow copy of this [Task]
+with some or all fields replaced by the given arguments.
+
+**Parameters:**
+
+- `id` (`UuidValue?`)
+- `inspectId` (`String?`)
+- `modelId` (`UuidValue?`)
+- `model` (`Model?`)
+- `datasetId` (`UuidValue?`)
+- `dataset` (`Dataset?`)
+- `runId` (`UuidValue?`)
+- `run` (`Run?`)
+- `createdAt` (`DateTime?`)
+
+#### `toJson`
+
+```dart
+Map<String, dynamic> toJson()
+```
+
+---
+
+## abstract class `TaskSummary`
+
+**Implements:** `SerializableModel`
+
+### Constructors
+
+#### `TaskSummary`
+
+```dart
+TaskSummary({UuidValue? id, required UuidValue taskId, Task? task, required int totalSamples, required int passedSamples, required double accuracy, String? taskName, required int inputTokens, required int outputTokens, required int totalTokens, required int reasoningTokens, String? variant, required int executionTimeSeconds, required int samplesWithRetries, required int samplesNeverSucceeded, required int totalRetries})
+```
+
+#### `TaskSummary.fromJson`
+
+```dart
+TaskSummary.fromJson(Map<String, dynamic> jsonSerialization)
+```
+
+### Properties
+
+- **`id`** → `UuidValue?`
+
+  The database id, set if the object has been inserted into the
+  database or if it has been fetched from the database. Otherwise,
+  the id will be null.
+
+- **`taskId`** → `UuidValue`
+
+- **`task`** → `Task?`
+
+  Task this summary belongs to.
+
+- **`totalSamples`** → `int`
+
+  Total number of samples in this task.
+
+- **`passedSamples`** → `int`
+
+  Number of samples that passed.
+
+- **`accuracy`** → `double`
+
+  Accuracy as a value from 0.0 to 1.0.
+
+- **`taskName`** → `String?`
+
+  The Inspect AI task function name (e.g., "qa_task").
+
+- **`inputTokens`** → `int`
+
+  Input tokens used.
+
+- **`outputTokens`** → `int`
+
+  Output tokens generated.
+
+- **`totalTokens`** → `int`
+
+  Total tokens used.
+
+- **`reasoningTokens`** → `int`
+
+  Reasoning tokens used (for models that support it).
+
+- **`variant`** → `String?`
+
+  Variant configuration used (e.g., "baseline", "dart_mcp").
+
+- **`executionTimeSeconds`** → `int`
+
+  Total execution time in seconds.
+
+- **`samplesWithRetries`** → `int`
+
+  Number of samples that needed retries.
+
+- **`samplesNeverSucceeded`** → `int`
+
+  Number of samples that failed all retries (excluded from accuracy).
+
+- **`totalRetries`** → `int`
+
+  Total number of retries across all samples.
+
+### Methods
+
+#### `copyWith`
+
+```dart
+TaskSummary copyWith({UuidValue? id, UuidValue? taskId, Task? task, int? totalSamples, int? passedSamples, double? accuracy, String? taskName, int? inputTokens, int? outputTokens, int? totalTokens, int? reasoningTokens, String? variant, int? executionTimeSeconds, int? samplesWithRetries, int? samplesNeverSucceeded, int? totalRetries})
+```
+
+Returns a shallow copy of this [TaskSummary]
+with some or all fields replaced by the given arguments.
+
+**Parameters:**
+
+- `id` (`UuidValue?`)
+- `taskId` (`UuidValue?`)
+- `task` (`Task?`)
+- `totalSamples` (`int?`)
+- `passedSamples` (`int?`)
+- `accuracy` (`double?`)
+- `taskName` (`String?`)
+- `inputTokens` (`int?`)
+- `outputTokens` (`int?`)
+- `totalTokens` (`int?`)
+- `reasoningTokens` (`int?`)
+- `variant` (`String?`)
+- `executionTimeSeconds` (`int?`)
+- `samplesWithRetries` (`int?`)
+- `samplesNeverSucceeded` (`int?`)
+- `totalRetries` (`int?`)
+
+#### `toJson`
+
+```dart
+Map<String, dynamic> toJson()
+```
+
+---
+
+## abstract class `ToolCallData`
+
+**Implements:** `SerializableModel`
+
+Result of a tool call made during evaluation. Not a database table.
+
+### Constructors
+
+#### `ToolCallData`
+
+```dart
+ToolCallData({required String name, required Map<String, String> arguments})
+```
+
+#### `ToolCallData.fromJson`
+
+```dart
+ToolCallData.fromJson(Map<String, dynamic> jsonSerialization)
+```
+
+### Properties
+
+- **`name`** → `String`
+
+  Name of the tool.
+
+- **`arguments`** → `Map<String, String>`
+
+  Arguments passed to the tool.
+
+### Methods
+
+#### `copyWith`
+
+```dart
+ToolCallData copyWith({String? name, Map<String, String>? arguments})
+```
+
+Returns a shallow copy of this [ToolCallData]
+with some or all fields replaced by the given arguments.
+
+**Parameters:**
+
+- `name` (`String?`)
+- `arguments` (`Map<String, String>?`)
+
+#### `toJson`
+
+```dart
+Map<String, dynamic> toJson()
+```
+
+---
+
+## class `UnknownMessageException`
+
+**Implements:** `Exception`
+
+Exception thrown when an unknown message is received.
+
+### Constructors
+
+#### `UnknownMessageException`
+
+```dart
+UnknownMessageException(String jsonString, {Object? error, StackTrace? stackTrace})
+```
+
+Creates a new [UnknownMessageException].
+
+### Properties
+
+- **`jsonString`** → `String` *(final)*
+
+  The JSON string that was not recognized.
+
+- **`error`** → `Object?` *(final)*
+
+  An optional error that occurred when parsing the message.
+
+- **`stackTrace`** → `StackTrace?` *(final)*
+
+  An optional stack trace for the error.
+
+---
+
+## class `Uuid`
+
+uuid for Dart
+Author: Yulian Kuncheff
+Released under MIT License.
+
+### Constructors
+
+#### `Uuid`
+
+```dart
+Uuid({GlobalOptions? goptions})
+```
+
+Creates a new instance of the Uuid class.
+Optionally you can pass in a [GlobalOptions] object to set global options
+for all UUID generation.
+[GlobalOptions.rng] is a [RNG] class that returns a list of random bytes.
+
+Defaults rng function is `UuidUtil.cryptoRNG`
+
+Example: Using MathRNG globally
+
+```dart
+var uuid = Uuid(options: {
+  'grng': UuidUtil.mathRNG
+})
+
+// Generate a v4 (random) id that will use cryptRNG for its rng function
+uuid.v4();
+```
+
+### Properties
+
+- **`goptions`** → `GlobalOptions?` *(final)*
+
+- **`NAMESPACE_DNS`** → `static String`
+
+- **`NAMESPACE_URL`** → `static String`
+
+- **`NAMESPACE_OID`** → `static String`
+
+- **`NAMESPACE_X500`** → `static String`
+
+- **`NAMESPACE_NIL`** → `static String`
+
+### Methods
+
+#### `static parse`
+
+```dart
+static List<int> parse(String uuid, {List<int>? buffer, int offset, bool validate, ValidationMode validationMode})
+```
+
+Parses the provided [uuid] into a list of byte values as a List&lt;int&gt;.
+Can optionally be provided a [buffer] to write into and
+ a positional [offset] for where to start inputting into the buffer.
+Throws FormatException if the UUID is invalid. Optionally you can set
+[validate] to false to disable validation of the UUID before parsing.
+
+Example parsing a UUID string
+
+```dart
+var bytes = uuid.parse('797ff043-11eb-11e1-80d6-510998755d10');
+// bytes-> [121, 127, 240, 67, 17, 235, 17, 225, 128, 214, 81, 9, 152, 117, 93, 16]
+```
+
+**Parameters:**
+
+- `uuid` (`String`) *(required)*
+- `buffer` (`List<int>?`)
+- `offset` (`int`)
+- `validate` (`bool`)
+- `validationMode` (`ValidationMode`)
+
+#### `static parseAsByteList`
+
+```dart
+static Uint8List parseAsByteList(String uuid, {List<int>? buffer, int offset, bool validate, ValidationMode validationMode})
+```
+
+Parses the provided [uuid] into a list of byte values as a Uint8List.
+Can optionally be provided a [buffer] to write into and
+ a positional [offset] for where to start inputting into the buffer.
+Throws FormatException if the UUID is invalid. Optionally you can set
+[validate] to false to disable validation of the UUID before parsing.
+
+**Parameters:**
+
+- `uuid` (`String`) *(required)*
+- `buffer` (`List<int>?`)
+- `offset` (`int`)
+- `validate` (`bool`)
+- `validationMode` (`ValidationMode`)
+
+#### `static unparse`
+
+```dart
+static String unparse(List<int> buffer, {int offset})
+```
+
+Unparses a [buffer] of bytes and outputs a proper UUID string.
+An optional [offset] is allowed if you want to start at a different point
+in the buffer.
+Throws an exception if the buffer does not have a length of 16
+
+Example parsing and unparsing a UUID string
+
+```dart
+var uuidString = uuid.unparse(bytes);
+// uuidString -> '797ff043-11eb-11e1-80d6-510998755d10'
+```
+
+**Parameters:**
+
+- `buffer` (`List<int>`) *(required)*
+- `offset` (`int`)
+
+#### `static isValidUUID`
+
+```dart
+static bool isValidUUID({String fromString, Uint8List? fromByteList, ValidationMode validationMode, bool noDashes})
+```
+
+Validates the provided [uuid] to make sure it has all the necessary
+components and formatting and returns a [bool]
+You can choose to validate from a string or from a byte list based on
+which parameter is passed.
+
+**Parameters:**
+
+- `fromString` (`String`)
+- `fromByteList` (`Uint8List?`)
+- `validationMode` (`ValidationMode`)
+- `noDashes` (`bool`)
+
+#### `v1`
+
+```dart
+String v1({Map<String, dynamic>? options, V1Options? config})
+```
+
+Generates a time-based version 1 UUID
+
+By default it will generate a string based off current time, and will
+return a string.
+
+The first argument is an options map that takes various configuration
+options detailed in the readme. This is going to be eventually deprecated.
+
+The second argument is a [V1Options] object that takes the same options as
+the options map. This is the preferred way to pass options.
+
+http://tools.ietf.org/html/rfc4122.html#section-4.2.2
+
+Example: Generate string UUID with fully-specified options
+```dart
+uuid.v1(options: {
+    'node': [0x01, 0x23, 0x45, 0x67, 0x89, 0xab],
+    'clockSeq': 0x1234,
+    'mSecs': new DateTime.utc(2011,11,01).millisecondsSinceEpoch,
+    'nSecs': 5678
+})   // -> "710b962e-041c-11e1-9234-0123456789ab"
+```
+
+**Parameters:**
+
+- `options` (`Map<String, dynamic>?`)
+- `config` (`V1Options?`)
+
+#### `v1buffer`
+
+```dart
+List<int> v1buffer(List<int> buffer, {Map<String, dynamic>? options, V1Options? config, int offset})
+```
+
+Generates a time-based version 1 UUID into a provided buffer
+
+By default it will generate a string based off current time, and will
+place the result into the provided [buffer]. The [buffer] will also be returned..
+
+Optionally an [offset] can be provided with a start position in the buffer.
+
+The first optional argument is an options map that takes various configuration
+options detailed in the readme. This is going to be eventually deprecated.
+
+The second optional argument is a [V1Options] object that takes the same
+options as the options map. This is the preferred way to pass options.
+
+http://tools.ietf.org/html/rfc4122.html#section-4.2.2
+
+Example: In-place generation of two binary IDs
+```dart
+// Generate two ids in an array
+var myBuffer = new List(32); // -> []
+uuid.v1buffer(myBuffer);
+// -> [115, 189, 5, 128, 201, 91, 17, 225, 146, 52, 109, 0, 9, 0, 52, 128, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null]
+uuid.v1buffer(myBuffer, offset: 16);
+// -> [115, 189, 5, 128, 201, 91, 17, 225, 146, 52, 109, 0, 9, 0, 52, 128, 115, 189, 5, 129, 201, 91, 17, 225, 146, 52, 109, 0, 9, 0, 52, 128]
+
+// Optionally use uuid.unparse() to get stringify the ids
+uuid.unparse(myBuffer);    // -> '73bd0580-c95b-11e1-9234-6d0009003480'
+uuid.unparse(myBuffer, offset: 16) // -> '73bd0581-c95b-11e1-9234-6d0009003480'
+```
+
+**Parameters:**
+
+- `buffer` (`List<int>`) *(required)*
+- `options` (`Map<String, dynamic>?`)
+- `config` (`V1Options?`)
+- `offset` (`int`)
+
+#### `v1obj`
+
+```dart
+UuidValue v1obj({Map<String, dynamic>? options, V1Options? config})
+```
+
+Generates a time-based version 1 UUID as a [UuidValue] object
+
+By default it will generate a string based off current time, and will
+return it as a [UuidValue] object.
+
+The first argument is an options map that takes various configuration
+options detailed in the readme. This is going to be eventually deprecated.
+
+The second argument is a [V1Options] object that takes the same options as
+the options map. This is the preferred way to pass options.
+
+http://tools.ietf.org/html/rfc4122.html#section-4.2.2
+
+Example: UuidValue usage
+```dart
+uuidValue = uuid.v1Obj(options: {
+    'node': [0x01, 0x23, 0x45, 0x67, 0x89, 0xab],
+    'clockSeq': 0x1234,
+    'mSecs': new DateTime.utc(2011,11,01).millisecondsSinceEpoch,
+    'nSecs': 5678
+}) // -> UuidValue{uuid: '710b962e-041c-11e1-9234-0123456789ab'}
+
+print(uuidValue) -> // -> '710b962e-041c-11e1-9234-0123456789ab'
+uuidValue.toBytes() -> // -> [...]
+```
+
+**Parameters:**
+
+- `options` (`Map<String, dynamic>?`)
+- `config` (`V1Options?`)
+
+#### `v4`
+
+```dart
+String v4({Map<String, dynamic>? options, V4Options? config})
+```
+
+Generates a RNG version 4 UUID
+
+By default it will generate a string based cryptoRNG, and will return
+a string. If you wish to use crypto-strong RNG, pass in UuidUtil.cryptoRNG
+
+The first argument is an options map that takes various configuration
+options detailed in the readme. This is going to be eventually deprecated.
+
+The second argument is a [V4Options] object that takes the same options as
+the options map. This is the preferred way to pass options.
+
+http://tools.ietf.org/html/rfc4122.html#section-4.4
+
+Example: Generate string UUID with different RNG method
+
+```dart
+import 'package:uuid/uuid_util.dart';
+uuid.v4(options: {
+  'rng': UuidUtil.cryptoRNG
+});
+// -> "109156be-c4fb-41ea-b1b4-efe1671c5836"
+```
+
+Example: Generate string UUID with different RNG method and named parameters
+
+```dart
+import 'package:uuid/uuid_util.dart';
+uuid.v4(options: {
+  'rng': UuidUtil.mathRNG,
+  'namedArgs': new Map.fromIterables([const Symbol('seed')],[1])
+});
+// -> "09a91894-e93f-4141-a3ec-82eb32f2a3ef"
+```
+
+Example: Generate string UUID with different RNG method and positional parameters
+
+```dart
+import 'package:uuid/uuid_util.dart';
+uuid.v4(options: {
+  'rng': UuidUtil.cryptoRNG,
+  'positionalArgs': [1]
+});
+// -> "09a91894-e93f-4141-a3ec-82eb32f2a3ef"
+```
+
+Example: Generate string UUID with fully-specified options
+
+```dart
+uuid.v4(options: {
+  'random': [
+    0x10, 0x91, 0x56, 0xbe, 0xc4, 0xfb, 0xc1, 0xea,
+    0x71, 0xb4, 0xef, 0xe1, 0x67, 0x1c, 0x58, 0x36
+  ]
+});
+// -> "109156be-c4fb-41ea-b1b4-efe1671c5836"
+```
+
+**Parameters:**
+
+- `options` (`Map<String, dynamic>?`)
+- `config` (`V4Options?`)
+
+#### `v4buffer`
+
+```dart
+List<int> v4buffer(List<int> buffer, {Map<String, dynamic>? options, V4Options? config, int offset})
+```
+
+Generates a RNG version 4 UUID into a provided buffer
+
+By default it will generate a string based off cryptoRNG, and will
+place the result into the provided [buffer]. The [buffer] will also be returned.
+If you wish to have crypto-strong RNG, pass in UuidUtil.cryptoRNG.
+
+Optionally an [offset] can be provided with a start position in the buffer.
+
+The first optional argument is an options map that takes various configuration
+options detailed in the readme. This is going to be eventually deprecated.
+
+The second optional argument is a [V4Options] object that takes the same options as
+the options map. This is the preferred way to pass options.
+
+http://tools.ietf.org/html/rfc4122.html#section-4.4
+
+Example: Generate two IDs in a single buffer
+
+```dart
+var myBuffer = new List(32);
+uuid.v4buffer(myBuffer);
+uuid.v4buffer(myBuffer, offset: 16);
+```
+
+**Parameters:**
+
+- `buffer` (`List<int>`) *(required)*
+- `options` (`Map<String, dynamic>?`)
+- `config` (`V4Options?`)
+- `offset` (`int`)
+
+#### `v4obj`
+
+```dart
+UuidValue v4obj({Map<String, dynamic>? options, V4Options? config})
+```
+
+Generates a RNG version 4 UUID as a [UuidValue] object
+
+By default it will generate a string based cryptoRNG, and will return
+a [UuidValue] object. If you wish to use crypto-strong RNG, pass in UuidUtil.cryptoRNG
+
+The first argument is an options map that takes various configuration
+options detailed in the readme. This is going to be eventually deprecated.
+
+The second argument is a [V4Options] object that takes the same options as
+the options map. This is the preferred way to pass options.
+
+http://tools.ietf.org/html/rfc4122.html#section-4.4
+
+Example: UuidValue usage
+
+```dart
+uuidValue = uuid.v4obj(options: {
+  'random': [
+    0x10, 0x91, 0x56, 0xbe, 0xc4, 0xfb, 0xc1, 0xea,
+    0x71, 0xb4, 0xef, 0xe1, 0x67, 0x1c, 0x58, 0x36
+  ]
+}) // -> UuidValue{uuid: '109156be-c4fb-41ea-b1b4-efe1671c5836'}
+
+print(uuidValue) -> // -> '109156be-c4fb-41ea-b1b4-efe1671c5836'
+uuidValue.toBytes() -> // -> [...]
+```
+
+**Parameters:**
+
+- `options` (`Map<String, dynamic>?`)
+- `config` (`V4Options?`)
+
+#### `v5`
+
+```dart
+String v5(String? namespace, String? name, {Map<String, dynamic>? options, V5Options? config})
+```
+
+Generates a namespace & name-based version 5 UUID
+
+By default it will generate a string based on a provided uuid namespace and
+name, and will return a string.
+
+The [namespace] parameter is the UUID namespace (as a String).
+The [name] parameter is a String that will be converted to UTF-8 bytes.
+
+For binary data input, use [v5FromBytes] instead.
+
+The first optional argument is an options map that takes various configuration
+options detailed in the readme. This is going to be eventually deprecated.
+
+The second optional argument is a [V5Options] object that takes the same options as
+the options map. This is the preferred way to pass options.
+
+http://tools.ietf.org/html/rfc4122.html#section-4.4
+
+Example: Generate string UUID with fully-specified options
+
+```dart
+uuid.v5(Namespace.url.value, 'www.google.com');
+// -> "c74a196f-f19d-5ea9-bffd-a2742432fc9c"
+```
+
+**Parameters:**
+
+- `namespace` (`String?`) *(required)*
+- `name` (`String?`) *(required)*
+- `options` (`Map<String, dynamic>?`)
+- `config` (`V5Options?`)
+
+#### `v5FromBytes`
+
+```dart
+String v5FromBytes(String? namespace, Uint8List? name, {V5Options? config})
+```
+
+Generates a namespace & name-based version 5 UUID from binary data
+
+By default it will generate a string based on a provided uuid namespace and
+binary name data, and will return a string.
+
+The [namespace] parameter is the UUID namespace (as a String).
+The [name] parameter is a Uint8List containing arbitrary binary data.
+This allows for generating UUIDs from raw bytes as per RFC 4122 / RFC 9562.
+
+The optional [config] argument is a [V5Options] object that takes configuration options.
+
+http://tools.ietf.org/html/rfc4122.html#section-4.4
+
+Example: Generate UUID from binary data
+
+```dart
+var binaryData = Uint8List.fromList([0x01, 0x02, 0x03, 0x04]);
+uuid.v5FromBytes(Namespace.url.value, binaryData);
+// -> "81156b66-5dc6-5909-8842-89a96a29d3ba"
+```
+
+**Parameters:**
+
+- `namespace` (`String?`) *(required)*
+- `name` (`Uint8List?`) *(required)*
+- `config` (`V5Options?`)
+
+#### `v5buffer`
+
+```dart
+List<int> v5buffer(String? namespace, String? name, List<int>? buffer, {Map<String, dynamic>? options, V5Options? config, int offset})
+```
+
+Generates a namespace & name-based version 5 UUID into a provided buffer
+
+By default it will generate a string based on a provided uuid namespace and
+place the result into the provided [buffer]. The [buffer] will also be returned.
+
+The [namespace] parameter is the UUID namespace (as a String).
+The [name] parameter is a String.
+
+Optionally an [offset] can be provided with a start position in the buffer.
+
+The first optional argument is an options map that takes various configuration
+options detailed in the readme. This is going to be eventually deprecated.
+
+The second optional argument is a [V5Options] object that takes the same options as
+the options map. This is the preferred way to pass options.
+
+http://tools.ietf.org/html/rfc4122.html#section-4.4
+
+Example: Generate two IDs in a single buffer
+
+```dart
+var myBuffer = new List(32);
+uuid.v5buffer(Uuid.NAMESPACE_URL, 'www.google.com', myBuffer);
+uuid.v5buffer(Uuid.NAMESPACE_URL, 'www.google.com', myBuffer, offset: 16);
+```
+
+**Parameters:**
+
+- `namespace` (`String?`) *(required)*
+- `name` (`String?`) *(required)*
+- `buffer` (`List<int>?`) *(required)*
+- `options` (`Map<String, dynamic>?`)
+- `config` (`V5Options?`)
+- `offset` (`int`)
+
+#### `v5FromBytesBuffer`
+
+```dart
+List<int> v5FromBytesBuffer(String? namespace, Uint8List? name, List<int>? buffer, {V5Options? config, int offset})
+```
+
+Generates a namespace & name-based version 5 UUID from binary data into a provided buffer
+
+By default it will generate a string based on a provided uuid namespace and
+binary name data, and place the result into the provided [buffer].
+The [buffer] will also be returned.
+
+The [namespace] parameter is the UUID namespace (as a String).
+The [name] parameter is a Uint8List containing arbitrary binary data.
+
+Optionally an [offset] can be provided with a start position in the buffer.
+
+http://tools.ietf.org/html/rfc4122.html#section-4.4
+
+Example: Generate two IDs in a single buffer
+
+```dart
+var myBuffer = new List(32);
+var binaryData = Uint8List.fromList([0x01, 0x02, 0x03]);
+uuid.v5FromBytesBuffer(Namespace.url.value, binaryData, myBuffer);
+uuid.v5FromBytesBuffer(Namespace.url.value, binaryData, myBuffer, offset: 16);
+```
+
+**Parameters:**
+
+- `namespace` (`String?`) *(required)*
+- `name` (`Uint8List?`) *(required)*
+- `buffer` (`List<int>?`) *(required)*
+- `config` (`V5Options?`)
+- `offset` (`int`)
+
+#### `v5obj`
+
+```dart
+UuidValue v5obj(String? namespace, String? name, {Map<String, dynamic>? options, V5Options? config})
+```
+
+Generates a namespace & name-based version 5 UUID as a [UuidValue] object
+
+By default it will generate a string based on a provided uuid namespace and
+name, and will return a [UuidValue] object.
+
+The [namespace] parameter is the UUID namespace (as a String).
+The [name] parameter is a String.
+
+The first optional argument is an options map that takes various configuration
+options detailed in the readme. This is going to be eventually deprecated.
+
+The second optional argument is a [V5Options] object that takes the same options as
+the options map. This is the preferred way to pass options.
+
+http://tools.ietf.org/html/rfc4122.html#section-4.4
+
+Example: UuidValue usage
+```dart
+uuidValue = uuid.v5obj(Uuid.NAMESPACE_URL, 'www.google.com');
+// -> UuidValue(uuid: "c74a196f-f19d-5ea9-bffd-a2742432fc9c")
+
+print(uuidValue) -> // -> 'c74a196f-f19d-5ea9-bffd-a2742432fc9c'
+uuidValue.toBytes() -> // -> [...]
+```
+
+**Parameters:**
+
+- `namespace` (`String?`) *(required)*
+- `name` (`String?`) *(required)*
+- `options` (`Map<String, dynamic>?`)
+- `config` (`V5Options?`)
+
+#### `v5FromBytesObj`
+
+```dart
+UuidValue v5FromBytesObj(String? namespace, Uint8List? name, {V5Options? config})
+```
+
+Generates a namespace & name-based version 5 UUID from binary data as a [UuidValue] object
+
+By default it will generate a string based on a provided uuid namespace and
+binary name data, and will return a [UuidValue] object.
+
+The [namespace] parameter is the UUID namespace (as a String).
+The [name] parameter is a Uint8List containing arbitrary binary data.
+
+http://tools.ietf.org/html/rfc4122.html#section-4.4
+
+Example: UuidValue usage with binary data
+```dart
+var binaryData = Uint8List.fromList([0x01, 0x02, 0x03, 0x04]);
+uuidValue = uuid.v5FromBytesObj(Namespace.url.value, binaryData);
+// -> UuidValue(uuid: "81156b66-5dc6-5909-8842-89a96a29d3ba")
+
+print(uuidValue) -> // -> '81156b66-5dc6-5909-8842-89a96a29d3ba'
+uuidValue.toBytes() -> // -> [...]
+```
+
+**Parameters:**
+
+- `namespace` (`String?`) *(required)*
+- `name` (`Uint8List?`) *(required)*
+- `config` (`V5Options?`)
+
+#### `v6`
+
+```dart
+String v6({V6Options? config})
+```
+
+Generates a draft time-based version 6 UUID
+
+By default it will generate a string based off current Gregorian epoch time
+in milliseconds, and will return a string.
+
+The first argument is a [V6Options] object that takes the same options as
+the options map.
+
+https://datatracker.ietf.org/doc/html/draft-ietf-uuidrev-rfc4122bis#name-uuid-version-6
+
+**Parameters:**
+
+- `config` (`V6Options?`)
+
+#### `v6buffer`
+
+```dart
+List<int> v6buffer(List<int> buffer, {V6Options? config, int offset})
+```
+
+Generates a draft time-based version 1 UUID into a provided buffer
+
+By default it will generate a string based off current Gregorian epoch time, and will
+in milliseconds, and will place the result into the provided [buffer].
+The [buffer] will also be returned.
+
+Optionally an [offset] can be provided with a start position in the buffer.
+
+The first optional argument is an options map that takes various configuration
+options detailed in the readme. This is going to be eventually deprecated.
+
+The second optional argument is a [V6Options] object that takes the same options as
+the options map. This is the preferred way to pass options.
+
+https://datatracker.ietf.org/doc/html/draft-ietf-uuidrev-rfc4122bis#name-uuid-version-6
+
+**Parameters:**
+
+- `buffer` (`List<int>`) *(required)*
+- `config` (`V6Options?`)
+- `offset` (`int`)
+
+#### `v6obj`
+
+```dart
+UuidValue v6obj({V6Options? config})
+```
+
+Generates a draft time-based version 6 UUID as a [UuidValue] object
+
+By default it will generate a string based off current Gregorian Epoch time
+in milliseconds, and will return it as a [UuidValue] object.
+
+The first argument is a [V6Options] object that takes the same options as
+the options map. This is the preferred way to pass options.
+
+https://datatracker.ietf.org/doc/html/draft-ietf-uuidrev-rfc4122bis#name-uuid-version-6
+
+**Parameters:**
+
+- `config` (`V6Options?`)
+
+#### `v7`
+
+```dart
+String v7({V7Options? config})
+```
+
+Generates a draft time-based version 7 UUID as a [UuidValue] object
+
+By default it will generate a string based off current Unix epoch time in
+milliseconds, and will return a string.
+
+The first argument is a [V7Options] object that takes the same options as
+the options map.
+
+https://datatracker.ietf.org/doc/html/draft-ietf-uuidrev-rfc4122bis#name-uuid-version-7
+
+**Parameters:**
+
+- `config` (`V7Options?`)
+
+#### `v7buffer`
+
+```dart
+List<int> v7buffer(List<int> buffer, {V7Options? config, int offset})
+```
+
+Generates a draft time-based version 7 UUID into a provided buffer
+
+By default it will generate a string based off current Unix epoch time in
+milliseconds, and will place the result into the provided [buffer].
+The [buffer] will also be returned..
+
+Optionally an [offset] can be provided with a start position in the buffer.
+
+The first optional argument is a [V7Options] object that takes the same options as
+the options map.
+
+https://datatracker.ietf.org/doc/html/draft-ietf-uuidrev-rfc4122bis#name-uuid-version-7
+
+**Parameters:**
+
+- `buffer` (`List<int>`) *(required)*
+- `config` (`V7Options?`)
+- `offset` (`int`)
+
+#### `v7obj`
+
+```dart
+UuidValue v7obj({V7Options? config})
+```
+
+Generates a draft time-based version 7 UUID as a [UuidValue] object
+
+By default it will generate a string based off current Unix epoch time in
+milliseconds, and will return it as a [UuidValue] object.
+
+The first argument is a [V7Options] object that takes the same options as
+the options map.
+
+https://datatracker.ietf.org/doc/html/draft-ietf-uuidrev-rfc4122bis#name-uuid-version-7
+
+**Parameters:**
+
+- `config` (`V7Options?`)
+
+#### `v8`
+
+```dart
+String v8({V8Options? config})
+```
+
+Generates a draft time-based version 8 UUID
+
+By default it will generate a string based off current Unix epoch time in
+milliseconds, and will return a string.
+
+The first argument is a [V8Options] object that takes the same options as
+the options map.
+
+https://datatracker.ietf.org/doc/html/draft-ietf-uuidrev-rfc4122bis#name-uuid-version-8
+
+**Parameters:**
+
+- `config` (`V8Options?`)
+
+#### `v8buffer`
+
+```dart
+List<int> v8buffer(List<int> buffer, {V8Options? config, int offset})
+```
+
+Generates a draft time-based version 8 UUID into a provided buffer
+
+By default it will generate a string based off current Unix epoch time in
+milliseconds, and will place the result into the provided [buffer].
+The [buffer] will also be returned..
+
+Optionally an [offset] can be provided with a start position in the buffer.
+
+The first optional argument is a [V8Options] object that takes the same options as
+the options map.
+
+https://datatracker.ietf.org/doc/html/draft-ietf-uuidrev-rfc4122bis#name-uuid-version-8
+
+**Parameters:**
+
+- `buffer` (`List<int>`) *(required)*
+- `config` (`V8Options?`)
+- `offset` (`int`)
+
+#### `v8obj`
+
+```dart
+UuidValue v8obj({V8Options? config})
+```
+
+Generates a draft time-based version 8 UUID as a [UuidValue] object
+
+By default it will generate a string based off current Unix epoch time in
+milliseconds, and will return it as a [UuidValue] object.
+
+The first argument is a [V8Options] object that takes the same options as
+the options map.
+
+https://datatracker.ietf.org/doc/html/draft-ietf-uuidrev-rfc4122bis#name-uuid-version-8
+
+**Parameters:**
+
+- `config` (`V8Options?`)
+
+#### `v8g`
+
+```dart
+String v8g({V8GenericOptions? config})
+```
+
+Generates a draft time-based version 8 UUID
+
+Takes in 128 bits (16 bytes) of custom data, and produces a valid V8 uuid.
+Bits 48-51 and bits 64-65 will be modified to create a valid uuid.
+
+The first argument is a [V8GenericOptions] object that takes the same options as
+the options map.
+
+https://datatracker.ietf.org/doc/html/draft-ietf-uuidrev-rfc4122bis#name-uuid-version-8
+
+**Parameters:**
+
+- `config` (`V8GenericOptions?`)
+
+#### `v8gbuffer`
+
+```dart
+List<int> v8gbuffer(List<int> buffer, {V8GenericOptions? config, int offset})
+```
+
+Generates a draft time-based version 8 UUID into a provided buffer
+
+Takes in 128 bits (16 bytes) of custom data, and produces a valid V8 uuid.
+Bits 48-51 and bits 64-65 will be modified to create a valid uuid.
+It will place the result into the provided [buffer].
+
+The [buffer] will also be returned..
+
+Optionally an [offset] can be provided with a start position in the buffer.
+
+The first optional argument is a [V8GenericOptions] object that takes the same options as
+the options map.
+
+https://datatracker.ietf.org/doc/html/draft-ietf-uuidrev-rfc4122bis#name-uuid-version-8
+
+**Parameters:**
+
+- `buffer` (`List<int>`) *(required)*
+- `config` (`V8GenericOptions?`)
+- `offset` (`int`)
+
+#### `v8gobj`
+
+```dart
+UuidValue v8gobj({V8GenericOptions? config})
+```
+
+Generates a draft time-based version 8 UUID as a [UuidValue] object
+
+Takes in 128 bits (16 bytes) of custom data, and produces a valid V8 uuid.
+Bits 48-51 and bits 64-65 will be modified to create a valid uuid.
+It will return it as a [UuidValue] object.
+
+The first argument is a [V8GenericOptions] object that takes the same options as
+the options map.
+
+https://datatracker.ietf.org/doc/html/draft-ietf-uuidrev-rfc4122bis#name-uuid-version-8
+
+**Parameters:**
+
+- `config` (`V8GenericOptions?`)
+
+---
+
+## class `UuidValue`
+
+### Constructors
+
+#### `UuidValue.fromString`
+
+```dart
+UuidValue.fromString(String uuid)
+```
+
+fromString() creates a UuidValue from a [String] with no validation.
+
+#### `UuidValue.fromByteList`
+
+```dart
+UuidValue.fromByteList(Uint8List byteList, {int? offset})
+```
+
+fromByteList() creates a UuidValue from a [Uint8List] of bytes.
+
+#### `UuidValue.fromList`
+
+```dart
+UuidValue.fromList(List<int> byteList, {int? offset})
+```
+
+fromList() creates a UuidValue from a [List<int>] of bytes.
+
+#### `UuidValue.fromNamespace`
+
+```dart
+UuidValue.fromNamespace(Namespace ns)
+```
+
+fromNamespace() creates a UuidValue from a [Namespace] enum.
+
+#### `UuidValue.withValidation`
+
+```dart
+UuidValue.withValidation(String uuid, [ValidationMode validationMode, bool noDashes])
+```
+
+withValidation() creates a UuidValue from a [uuid] string.
+Optionally, you can provide a [validationMode] to use when validating
+the uuid string.
+Throws [FormatException] if the UUID is invalid.
+
+#### `UuidValue.raw`
+
+```dart
+UuidValue.raw(String uuid)
+```
+
+Creates a UuidValue by taking directly the internal string representation of the [uuid],
+which is expected to be lowercase.
+
+You can use [UuidValue.fromString] instead, which will lowercase the uuid string for you or
+[UuidValue.withValidation] if you need validation of the created UUIDs.
+
+#### `UuidValue`
+
+```dart
+UuidValue(String uuid)
+```
+
+Takes in a string representation of a [uuid] to wrap.
+
+### Properties
+
+- **`uuid`** → `String` *(final)*
+
+- **`dns`** → `static UuidValue`
+
+- **`url`** → `static UuidValue`
+
+- **`oid`** → `static UuidValue`
+
+- **`x500`** → `static UuidValue`
+
+- **`nil`** → `static UuidValue`
+
+- **`version`** → `int`
+
+- **`time`** → `int`
+
+- **`isV1`** → `bool`
+
+- **`isV4`** → `bool`
+
+- **`isV5`** → `bool`
+
+- **`isV6`** → `bool`
+
+- **`isV7`** → `bool`
+
+- **`isV8`** → `bool`
+
+- **`isNil`** → `bool`
+
+### Methods
+
+#### `validate`
+
+```dart
+void validate([ValidationMode validationMode, bool noDashes])
+```
+
+validate() validates the internal string representation of the uuid.
+Optionally, you can provide a [validationMode] to use when validating
+the uuid string.
+Throws [FormatException] if the UUID is invalid.
+
+**Parameters:**
+
+- `validationMode` (`ValidationMode`)
+- `noDashes` (`bool`)
+
+#### `toBytes`
+
+```dart
+Uint8List toBytes({bool validate})
+```
+
+**Parameters:**
+
+- `validate` (`bool`)
+
+#### `toFormattedString`
+
+```dart
+String toFormattedString({bool validate})
+```
+
+**Parameters:**
+
+- `validate` (`bool`)
+
+#### `equals`
+
+```dart
+bool equals(UuidValue other)
+```
+
+**Parameters:**
+
+- `other` (`UuidValue`) *(required)*
+
+---
+
+## class `Vector`
+
+Represents a vector of double values.
+
+### Constructors
+
+#### `Vector`
+
+```dart
+Vector(List<double> _vec)
+```
+
+Creates a new [Vector] from a list of double values.
+
+#### `Vector.fromBinary`
+
+```dart
+Vector.fromBinary(Uint8List bytes)
+```
+
+Creates a [Vector] from its binary representation.
+
+### Methods
+
+#### `toBinary`
+
+```dart
+Uint8List toBinary()
+```
+
+Converts the vector to its binary representation.
+
+#### `toList`
+
+```dart
+List<double> toList()
+```
+
+Returns the vector as a list of double values.
+
+---
+
+## class `WebSocketClosedException`
+
+**Extends:** `MethodStreamException`
+
+Thrown if the WebSocket connection is closed.
+
+### Constructors
+
+#### `WebSocketClosedException`
+
+```dart
+WebSocketClosedException()
+```
+
+Creates a new [WebSocketClosedException].
+
+---
+
+## class `WebSocketConnectException`
+
+**Extends:** `MethodStreamException`
+
+Thrown if the WebSocket connection fails.
+
+### Constructors
+
+#### `WebSocketConnectException`
+
+```dart
+WebSocketConnectException(Object? error, [StackTrace? stackTrace])
+```
+
+Creates a new [WebSocketConnectException].
+
+### Properties
+
+- **`error`** → `Object?` *(final)*
+
+  The error that caused the exception.
+
+- **`stackTrace`** → `StackTrace?` *(final)*
+
+  The stack trace of the error.
+
+---
+
+## class `WebSocketListenException`
+
+**Extends:** `MethodStreamException`
+
+Thrown if an error occurs when listening to the WebSocket connection.
+
+### Constructors
+
+#### `WebSocketListenException`
+
+```dart
+WebSocketListenException(Object? error, [StackTrace? stackTrace])
+```
+
+Creates a new [WebSocketListenException].
+
+### Properties
+
+- **`error`** → `Object?` *(final)*
+
+  The error that caused the exception.
+
+- **`stackTrace`** → `StackTrace?` *(final)*
+
+  The stack trace of the error.
+
+---
+
+## abstract class `WebSocketMessage`
+
+Base class for messages sent over a WebSocket connection.
+
+### Constructors
+
+#### `WebSocketMessage`
+
+```dart
+WebSocketMessage()
+```
+
+### Methods
+
+#### `static fromJsonString`
+
+```dart
+static WebSocketMessage fromJsonString(String jsonString, SerializationManager serializationManager)
+```
+
+Converts a JSON string to a [WebSocketMessage] object.
+
+Throws an [UnknownMessageException] if the message is not recognized.
+
+**Parameters:**
+
+- `jsonString` (`String`) *(required)*
+- `serializationManager` (`SerializationManager`) *(required)*
+
+---
+
+## abstract class `WebSocketMessageInfo`
+
+Interface of [WebSocketMessage] subclasses that have endpoint,
+method and connection id info.
+
+### Constructors
+
+#### `WebSocketMessageInfo`
+
+```dart
+WebSocketMessageInfo()
+```
+
+### Properties
+
+- **`endpoint`** → `String`
+
+- **`method`** → `String`
+
+- **`connectionId`** → `UuidValue`
+
+---
+
+## enum `CloseReason`
+
+The reason a stream was closed.
+
+### Values
+
+- **`done`**
+  The stream was closed because the method was done.
+- **`error`**
+  The stream was closed because an error occurred.
+
+---
+
+## enum `Namespace`
+
+RFC4122 & RFC9562 provided namespaces for v3, v5, and v8 namespace based UUIDs
+
+### Values
+
+- **`DNS`**
+- **`URL`**
+- **`OID`**
+- **`X500`**
+- **`NIL`**
+- **`MAX`**
+- **`dns`**
+- **`url`**
+- **`oid`**
+- **`x500`**
+- **`nil`**
+- **`max`**
+
+---
+
+## enum `OpenMethodStreamResponseType`
+
+The response to an [OpenMethodStreamCommand].
+
+### Values
+
+- **`success`**
+  The stream was successfully opened.
+- **`endpointNotFound`**
+  The endpoint was not found.
+- **`authenticationFailed`**
+  The user is not authenticated.
+- **`authorizationDeclined`**
+  The user is not authorized.
+- **`invalidArguments`**
+  The arguments were invalid.
+
+---
+
+## enum `RefreshAuthKeyResult`
+
+Represents the result of an authentication key refresh operation.
+
+### Values
+
+- **`skipped`**
+  Refresh was skipped because the key is not expiring.
+- **`success`**
+  Refresh was successful and a new key was obtained.
+- **`failedUnauthorized`**
+  Refresh failed due to invalid refresh credentials (such as expired token).
+- **`failedOther`**
+  Refresh failed due to other reasons (network, server error, etc.).
+
+---
+
+## enum `Status`
+
+### Values
+
+- **`complete`**
+- **`inProgress`**
+- **`failed`**
+
+---
+
+## enum `StreamingConnectionStatus`
+
+Status of the streaming connection.
+
+### Values
+
+- **`connected`**
+  Streaming connection is live.
+- **`connecting`**
+  Streaming connection is connecting.
+- **`disconnected`**
+  Streaming connection is disconnected.
+- **`waitingToRetry`**
+  Streaming connection is waiting to make a new connection attempt.
+
+---
+
+## enum `ValidationMode`
+
+The options for UUID Validation strictness
+
+### Values
+
+- **`nonStrict`**
+- **`strictRFC4122`**
+- **`strictRFC9562`**
+
+---
+
+## enum `Variant`
+
+### Values
+
+- **`mcp`**
+- **`rules`**
+
+---
+
+## `getType`
+
+```dart
+Type getType()
+```
+
+Get the type provided as an generic. Useful for getting a nullable type.
+
+---
+
+## `isValidAuthHeaderValue`
+
+```dart
+bool isValidAuthHeaderValue(String value)
+```
+
+Returns true if the provided value is a valid HTTP "authorization" header value
+(which includes starting with an authentication scheme name).
+
+**Parameters:**
+
+- `value` (`String`) *(required)*
+
+---
+
+## `isWrappedBasicAuthHeaderValue`
+
+```dart
+bool isWrappedBasicAuthHeaderValue(String value)
+```
+
+Returns true if the provided value is a Serverpod-wrapped auth key.
+
+**Parameters:**
+
+- `value` (`String`) *(required)*
+
+---
+
+## `isWrappedBearerAuthHeaderValue`
+
+```dart
+bool isWrappedBearerAuthHeaderValue(String value)
+```
+
+Returns true if the provided value is a Bearer auth header value.
+
+**Parameters:**
+
+- `value` (`String`) *(required)*
+
+---
+
+## `unwrapAuthHeaderValue`
+
+```dart
+String? unwrapAuthHeaderValue(String? authValue)
+```
+
+Returns the auth key from an auth value that has potentially been wrapped.
+This operation is the inverse of [wrapAsBasicAuthHeaderValue] and
+[wrapAsBearerAuthHeaderValue]. If null is provided, null is returned.
+
+**Parameters:**
+
+- `authValue` (`String?`) *(required)*
+
+---
+
+## `wrapAsBasicAuthHeaderValue`
+
+```dart
+String wrapAsBasicAuthHeaderValue(String key)
+```
+
+Returns a value that is compliant with the HTTP auth header format
+by encoding and wrapping the provided auth key as a Basic auth value.
+
+**Parameters:**
+
+- `key` (`String`) *(required)*
+
+---
+
+## `wrapAsBearerAuthHeaderValue`
+
+```dart
+String wrapAsBearerAuthHeaderValue(String token)
+```
+
+Returns a value that is compliant with the HTTP auth header format
+by wrapping the provided token as a Bearer auth value.
+Unlike Basic auth, Bearer tokens are not Base64 encoded as they are
+expected to already be in the correct format.
+
+**Parameters:**
+
+- `token` (`String`) *(required)*
+
diff --git a/docs/reference/dart_api/eval_explorer_server/eval_explorer_server.md b/docs/reference/dart_api/eval_explorer_server/eval_explorer_server.md
new file mode 100644
index 0000000..2c5bca8
--- /dev/null
+++ b/docs/reference/dart_api/eval_explorer_server/eval_explorer_server.md
@@ -0,0 +1,14 @@
+# eval_explorer_server
+
+## `run`
+
+```dart
+void run(List<String> args)
+```
+
+The starting point of the Serverpod server.
+
+**Parameters:**
+
+- `args` (`List<String>`) *(required)*
+
diff --git a/docs/reference/dart_api/eval_explorer_shared/eval_explorer_shared.md b/docs/reference/dart_api/eval_explorer_shared/eval_explorer_shared.md
new file mode 100644
index 0000000..27404c1
--- /dev/null
+++ b/docs/reference/dart_api/eval_explorer_shared/eval_explorer_shared.md
@@ -0,0 +1,20 @@
+# eval_explorer_shared
+
+## abstract class `ScorerResultData`
+
+**Mixins:** `_$ScorerResultData`
+
+### Constructors
+
+#### `ScorerResultData`
+
+```dart
+ScorerResultData({required String name, required bool passed, String explanation, String answer})
+```
+
+#### `ScorerResultData.fromJson`
+
+```dart
+ScorerResultData.fromJson(Map<String, dynamic> json)
+```
+
diff --git a/docs/reference/dart_api/index.md b/docs/reference/dart_api/index.md
new file mode 100644
index 0000000..73d47b4
--- /dev/null
+++ b/docs/reference/dart_api/index.md
@@ -0,0 +1,14 @@
+# Dart API Reference
+
+Auto-generated API documentation for the Dart packages in this repository.
+
+```{toctree}
+:maxdepth: 2
+
+dataset_config_dart/dataset_config_dart
+devals_cli/devals_cli
+eval_explorer_server/eval_explorer_server
+eval_explorer_shared/eval_explorer_shared
+eval_explorer_client/eval_explorer_client
+```
+
diff --git a/docs/reference/glossary.md b/docs/reference/glossary.md
new file mode 100644
index 0000000..8960400
--- /dev/null
+++ b/docs/reference/glossary.md
@@ -0,0 +1,73 @@
+# Glossary
+
+Key terminology for understanding the evals framework.
+
+## Core Concepts
+
+| Term | Definition |
+|------|------------|
+| **Model** | The LLM being evaluated (e.g., `google/gemini-2.5-pro`, `anthropic/claude-3-5-haiku`) |
+| **Task** | An Inspect AI evaluation function that processes samples (e.g., `question_answer`, `bug_fix`, `code_gen`) |
+| **Sample** | A single test case containing an input prompt and expected output (grading criteria) |
+| **Variant** | Named configuration that modifies how a task runs — controls context injection, MCP tools, and skill availability |
+| **Eval Run** | A complete execution of a task against one or more models, producing results for all samples |
+
+## Configuration Files
+
+| Term | Definition |
+|------|------------|
+| **task.yaml** | Task definition file specifying the task function, samples, and optional variant restrictions |
+| **job.yaml** | Runtime configuration defining *what* to run — filters tasks, variants, and models for a specific run |
+| **EvalSet JSON** | Resolved configuration produced by the Dart `dataset_config_dart` package and consumed by the Python runner |
+
+## Resources
+
+| Term | Definition |
+|------|------------|
+| **Context File** | Markdown file with YAML frontmatter providing additional context injected into prompts |
+| **Workspace Template** | Reusable project scaffolds (Flutter app, Dart package) mounted in the sandbox |
+| **Sandbox** | Containerized environment (Podman/Docker) for safe code execution during evaluations |
+| **MCP Server** | Model Context Protocol server providing tools/context to the model during evaluation |
+
+## Scoring
+
+| Term | Definition |
+|------|------------|
+| **Scorer** | Logic that determines if a model's output is correct (e.g., model-graded semantic match) |
+| **Accuracy** | Percentage of samples scored as correct in an eval run |
+
+## Key Packages
+
+| Package | Definition |
+|---------|------------|
+| **dataset_config_dart** | Dart package that parses dataset YAML and resolves it into EvalSet JSON via a layered Parser → Resolver → Writer pipeline |
+| **dash_evals** | Python package that consumes EvalSet JSON (or direct CLI arguments) and executes evaluations using Inspect AI |
+| **devals_cli** | Dart CLI (`devals`) for creating and managing tasks, samples, and jobs |
+
+## Internal Classes
+
+### Dart (dataset_config_dart)
+
+| Class | Definition |
+|-------|------------|
+| **EvalSet** | Top-level container representing a fully resolved evaluation configuration, serialized to JSON for the runner |
+| **Task** | Inspect domain task definition with a name, task function reference, dataset, and configuration |
+| **Sample** | An input/target test case with optional metadata, workspace, and sandbox configuration |
+| **Variant** | Named variant configuration with context files, MCP servers, and skills |
+| **TaskInfo** | Lightweight task metadata (name and function reference) |
+| **ParsedTask** | Intermediate representation produced by parsers, consumed by the resolver |
+| **Job** | Parsed job file with runtime overrides and task/variant/model filters |
+| **ConfigResolver** | Facade providing single-call convenience API for the full parse → resolve → write pipeline |
+
+### Python (dash_evals)
+
+| Class | Definition |
+|-------|------------|
+| **json_runner** | Module that reads EvalSet JSON, resolves task functions via `importlib`, builds `inspect_ai.Task` objects, and calls `eval_set()` |
+| **args_runner** | Module that builds a single task from direct CLI arguments (`--task`, `--model`, `--dataset`) |
+
+---
+
+See the [Configuration Overview](./config/about.md) for detailed configuration file documentation.
+
+[Learn more about Inspect AI](https://inspect.aisi.org.uk/)
diff --git a/docs/reference/index.md b/docs/reference/index.md
new file mode 100644
index 0000000..1576729
--- /dev/null
+++ b/docs/reference/index.md
@@ -0,0 +1,27 @@
+# Reference
+
+API documentation, CLI usage, and other reference material.
+
+```{toctree}
+:maxdepth: 1
+
+glossary
+cli
+configuration_reference
+```
+
+```{toctree}
+:maxdepth: 1
+:caption: Python API
+
+api/dash_evals/index
+api/runner/index
+api/utils/index
+```
+
+```{toctree}
+:maxdepth: 1
+:caption: Dart API
+
+dart_api/index
+```
diff --git a/docs/requirements.txt b/docs/requirements.txt
new file mode 100644
index 0000000..8e9b18e
--- /dev/null
+++ b/docs/requirements.txt
@@ -0,0 +1,6 @@
+sphinx
+sphinx-autobuild
+sphinx-autodoc-typehints
+myst-parser
+pydata-sphinx-theme
+sphinx-design
diff --git a/firebase.json b/firebase.json
new file mode 100644
index 0000000..6fa6b63
--- /dev/null
+++ b/firebase.json
@@ -0,0 +1,12 @@
+{
+  "hosting": {
+    "site": "evals-docs",
+    "public": "docs/_build/html",
+    "ignore": [
+      "firebase.json",
+      "**/.*",
+      "**/node_modules/**",
+      "**/dart_docs/"
+    ]
+  }
+}
diff --git a/tool/dartdoc_to_md/bin/generate.dart b/tool/dartdoc_to_md/bin/generate.dart
new file mode 100644
index 0000000..7616569
--- /dev/null
+++ b/tool/dartdoc_to_md/bin/generate.dart
@@ -0,0 +1,582 @@
+// Dart-to-Markdown generator for Sphinx docs.
+//
+// Usage:
+//   dart run bin/generate.dart [--output <dir>] [--root <dir>]
+//
+// Reads Dart source files using the analyzer, extracts public API elements
+// (classes, enums, functions, constants) with their doc comments, and
+// generates Sphinx-compatible Markdown (.md) files.
+
+import 'dart:io';
+
+import 'package:analyzer/dart/analysis/analysis_context_collection.dart';
+import 'package:analyzer/dart/analysis/results.dart';
+import 'package:analyzer/dart/element/element.dart';
+import 'package:analyzer/dart/element/type.dart';
+import 'package:analyzer/file_system/physical_file_system.dart';
+import 'package:args/args.dart';
+import 'package:path/path.dart' as p;
+
+// ---------------------------------------------------------------------------
+// Configuration
+// ---------------------------------------------------------------------------
+
+class PackageSpec {
+  final String name;
+  final String displayName;
+  final String packageDir;
+  final String libraryFile;
+
+  const PackageSpec({
+    required this.name,
+    required this.displayName,
+    required this.packageDir,
+    required this.libraryFile,
+  });
+}
+
+// ---------------------------------------------------------------------------
+// Main
+// ---------------------------------------------------------------------------
+
+Future<void> main(List<String> args) async {
+  final parser =
+      ArgParser()
+        ..addOption(
+          'output',
+          abbr: 'o',
+          defaultsTo: 'docs/reference/dart_api',
+          help: 'Output directory for generated markdown files.',
+        )
+        ..addOption(
+          'root',
+          abbr: 'r',
+          defaultsTo: '.',
+          help: 'Root directory of the monorepo.',
+        );
+
+  final results = parser.parse(args);
+  final root = p.canonicalize(results['root'] as String);
+  final outputDir = p.join(root, results['output'] as String);
+
+  // Clean stale output from previous runs.
+  final outputDirectory = Directory(outputDir);
+  if (outputDirectory.existsSync()) {
+    stdout.writeln('🧹 Cleaning $outputDir...');
+    outputDirectory.deleteSync(recursive: true);
+  }
+
+  final packages = [
+    PackageSpec(
+      name: 'dataset_config_dart',
+      displayName: 'dataset_config_dart',
+      packageDir: p.join(root, 'packages', 'dataset_config_dart'),
+      libraryFile: 'dataset_config_dart.dart',
+    ),
+    PackageSpec(
+      name: 'devals_cli',
+      displayName: 'devals_cli (devals)',
+      packageDir: p.join(root, 'packages', 'devals_cli'),
+      libraryFile: 'devals.dart',
+    ),
+    PackageSpec(
+      name: 'eval_explorer_server',
+      displayName: 'eval_explorer_server',
+      packageDir: p.join(root, 'packages', 'eval_explorer', 'eval_explorer_server'),
+      libraryFile: 'server.dart',
+    ),
+    PackageSpec(
+      name: 'eval_explorer_shared',
+      displayName: 'eval_explorer_shared',
+      packageDir: p.join(root, 'packages', 'eval_explorer', 'eval_explorer_shared'),
+      libraryFile: 'eval_explorer_shared.dart',
+    ),
+    PackageSpec(
+      name: 'eval_explorer_client',
+      displayName: 'eval_explorer_client',
+      packageDir: p.join(root, 'packages', 'eval_explorer', 'eval_explorer_client'),
+      libraryFile: 'eval_explorer_client.dart',
+    ),
+  ];
+
+  for (final pkg in packages) {
+    stdout.writeln('📦 Processing ${pkg.displayName}...');
+    try {
+      await _processPackage(pkg, outputDir);
+      stdout.writeln('   ✅ Done');
+    } catch (e, st) {
+      stderr.writeln('   ❌ Error processing ${pkg.name}: $e');
+      stderr.writeln(st);
+    }
+  }
+
+  _writeIndex(outputDir, packages);
+  stdout.writeln('\n🎉 All done! Markdown written to $outputDir');
+}
+
+/// Check if an element should be excluded from docs.
+bool _shouldExclude(Element element) {
+  final name = element.name;
+  if (name == null) return true;
+
+  // Skip private elements
+  if (name.startsWith('_')) return true;
+
+  // Skip Freezed-generated classes ($CopyWith, _$Impl, etc.)
+  if (name.startsWith(r'$')) return true;
+
+  return false;
+}
+
+// ---------------------------------------------------------------------------
+// Package processing
+// ---------------------------------------------------------------------------
+
+Future<void> _processPackage(PackageSpec pkg, String outputDir) async {
+  final libDir = p.join(pkg.packageDir, 'lib');
+  final barrelPath = p.join(libDir, pkg.libraryFile);
+
+  if (!File(barrelPath).existsSync()) {
+    stderr.writeln('   ⚠️  Barrel file not found: $barrelPath — skipping');
+    return;
+  }
+
+  // Use the analyzer to resolve the barrel library. This gives us all
+  // exported elements across transitive exports.
+  final collection = AnalysisContextCollection(
+    includedPaths: [libDir],
+    resourceProvider: PhysicalResourceProvider.INSTANCE,
+  );
+
+  final canonBarrel = p.canonicalize(barrelPath);
+  final context = collection.contextFor(canonBarrel);
+  final resolvedResult = await context.currentSession.getResolvedLibrary(
+    canonBarrel,
+  );
+
+  if (resolvedResult is! ResolvedLibraryResult) {
+    stderr.writeln('   ⚠️  Could not resolve $barrelPath — skipping');
+    return;
+  }
+
+  final library = resolvedResult.element;
+
+  final buf = StringBuffer();
+  buf.writeln('# ${pkg.displayName}');
+  buf.writeln();
+
+  // Library-level doc comment
+  final libDoc = library.documentationComment;
+  if (libDoc != null) {
+    buf.writeln(_cleanDoc(libDoc));
+    buf.writeln();
+  }
+
+  // Use the export namespace to get ALL publicly visible elements,
+  // including those re-exported from src/ files.
+  final exportedNames = library.exportNamespace.definedNames2;
+
+  // Sort elements into categories
+  final classes = <ClassElement>[];
+  final enums = <EnumElement>[];
+  final functions = <TopLevelFunctionElement>[];
+  final variables = <TopLevelVariableElement>[];
+
+  for (final element in exportedNames.values) {
+    if (_shouldExclude(element)) continue;
+
+    if (element is ClassElement) {
+      classes.add(element);
+    } else if (element is EnumElement) {
+      enums.add(element);
+    } else if (element is TopLevelFunctionElement) {
+      functions.add(element);
+    } else if (element is TopLevelVariableElement) {
+      variables.add(element);
+    }
+    // Skip getters, setters, and other element types for now
+  }
+
+  // Sort by name for stable output
+  classes.sort((a, b) => (a.name ?? '').compareTo(b.name ?? ''));
+  enums.sort((a, b) => (a.name ?? '').compareTo(b.name ?? ''));
+  functions.sort((a, b) => (a.name ?? '').compareTo(b.name ?? ''));
+  variables.sort((a, b) => (a.name ?? '').compareTo(b.name ?? ''));
+
+  stdout.writeln(
+    '   Found ${classes.length} classes, ${enums.length} enums, '
+    '${functions.length} functions, ${variables.length} variables',
+  );
+
+  // Track whether we've written any content yet (to avoid leading ---)
+  var hasContent = libDoc != null;
+
+  _writeClasses(buf, classes, hasContent);
+  hasContent = hasContent || classes.isNotEmpty;
+  _writeEnums(buf, enums, hasContent);
+  hasContent = hasContent || enums.isNotEmpty;
+  _writeFunctions(buf, functions, hasContent);
+  hasContent = hasContent || functions.isNotEmpty;
+  _writeTopLevelVariables(buf, variables, hasContent);
+
+  // Write to disk
+  final pkgOutputDir = p.join(outputDir, pkg.name);
+  Directory(pkgOutputDir).createSync(recursive: true);
+  File(
+    p.join(pkgOutputDir, '${pkg.name}.md'),
+  ).writeAsStringSync(buf.toString());
+}
+
+// ---------------------------------------------------------------------------
+// Markdown generators
+// ---------------------------------------------------------------------------
+
+void _writeClasses(
+  StringBuffer buf,
+  List<ClassElement> classes,
+  bool hasContent,
+) {
+  var isFirst = !hasContent;
+  for (final cls in classes) {
+    if (cls.name == null || cls.name!.startsWith('_')) continue;
+
+    // Skip classes with @nodoc annotation
+    final classDoc = cls.documentationComment;
+    if (classDoc != null && classDoc.contains('@nodoc')) continue;
+
+    if (!isFirst) {
+      buf.writeln('---');
+      buf.writeln();
+    }
+    isFirst = false;
+    final keyword = cls.isAbstract ? 'abstract class' : 'class';
+    buf.writeln('## $keyword `${cls.name}`');
+    buf.writeln();
+
+    // Superclass
+    final supertype = cls.supertype;
+    if (supertype != null) {
+      final supertypeName = _typeStr(supertype);
+      if (supertypeName != 'Object') {
+        buf.writeln('**Extends:** `$supertypeName`');
+        buf.writeln();
+      }
+    }
+
+    // Interfaces
+    if (cls.interfaces.isNotEmpty) {
+      final names = cls.interfaces.map((i) => '`${_typeStr(i)}`').join(', ');
+      buf.writeln('**Implements:** $names');
+      buf.writeln();
+    }
+
+    // Mixins
+    if (cls.mixins.isNotEmpty) {
+      final names = cls.mixins.map((m) => '`${_typeStr(m)}`').join(', ');
+      buf.writeln('**Mixins:** $names');
+      buf.writeln();
+    }
+
+    _writeDoc(buf, cls.documentationComment);
+
+    // Constructors
+    final constructors =
+        cls.constructors
+            .where((c) => c.name != null && !c.name!.startsWith('_'))
+            .toList();
+    if (constructors.isNotEmpty) {
+      buf.writeln('### Constructors');
+      buf.writeln();
+      for (final ctor in constructors) {
+        final ctorName =
+            ctor.name == 'new' ? cls.name! : '${cls.name}.${ctor.name}';
+        buf.writeln('#### `$ctorName`');
+        buf.writeln();
+        buf.writeln('```dart');
+        buf.writeln(_constructorSignature(cls.name!, ctor));
+        buf.writeln('```');
+        buf.writeln();
+        _writeDoc(buf, ctor.documentationComment);
+      }
+    }
+
+    // Fields / properties
+    final fields =
+        cls.fields
+            .where(
+              (f) =>
+                  f.name != null &&
+                  !f.name!.startsWith('_') &&
+                  f.name != 'hashCode',
+            )
+            .toList();
+    if (fields.isNotEmpty) {
+      buf.writeln('### Properties');
+      buf.writeln();
+      for (final field in fields) {
+        final typeStr = _typeStr(field.type);
+        final prefix = field.isStatic ? 'static ' : '';
+        final suffix = field.isFinal ? ' *(final)*' : '';
+        buf.writeln('- **`${field.name}`** → `$prefix$typeStr`$suffix');
+        final doc = field.documentationComment;
+        if (doc != null) {
+          buf.writeln();
+          buf.writeln('  ${_cleanDoc(doc).replaceAll('\n', '\n  ')}');
+        }
+        buf.writeln();
+      }
+    }
+
+    // Methods
+    final methods =
+        cls.methods
+            .where(
+              (m) =>
+                  m.name != null &&
+                  !m.name!.startsWith('_') &&
+                  m.name != 'toString' &&
+                  m.name != 'noSuchMethod' &&
+                  m.name != '==',
+            )
+            .toList();
+    if (methods.isNotEmpty) {
+      buf.writeln('### Methods');
+      buf.writeln();
+      for (final method in methods) {
+        _writeMethod(buf, method);
+      }
+    }
+  }
+}
+
+void _writeEnums(StringBuffer buf, List<EnumElement> enums, bool hasContent) {
+  var isFirst = !hasContent;
+  for (final e in enums) {
+    if (e.name == null || e.name!.startsWith('_')) continue;
+
+    if (!isFirst) {
+      buf.writeln('---');
+      buf.writeln();
+    }
+    isFirst = false;
+    buf.writeln('## enum `${e.name}`');
+    buf.writeln();
+
+    _writeDoc(buf, e.documentationComment);
+
+    buf.writeln('### Values');
+    buf.writeln();
+    for (final value in e.constants) {
+      buf.writeln('- **`${value.name}`**');
+      final doc = value.documentationComment;
+      if (doc != null) {
+        buf.writeln('  ${_cleanDoc(doc).replaceAll('\n', '\n  ')}');
+      }
+    }
+    buf.writeln();
+  }
+}
+
+void _writeFunctions(
+  StringBuffer buf,
+  List<TopLevelFunctionElement> functions,
+  bool hasContent,
+) {
+  final publicFns =
+      functions
+          .where((f) => f.name != null && !f.name!.startsWith('_'))
+          .toList();
+  if (publicFns.isEmpty) return;
+
+  var isFirst = !hasContent;
+  for (final fn in publicFns) {
+    // Skip main() — not part of the public API
+    if (fn.name == 'main') continue;
+
+    if (!isFirst) {
+      buf.writeln('---');
+      buf.writeln();
+    }
+    isFirst = false;
+    buf.writeln('## `${fn.name}`');
+    buf.writeln();
+    buf.writeln('```dart');
+    buf.writeln(_functionSignature(fn));
+    buf.writeln('```');
+    buf.writeln();
+    _writeDoc(buf, fn.documentationComment);
+    _writeParams(buf, fn.formalParameters);
+  }
+}
+
+void _writeTopLevelVariables(
+  StringBuffer buf,
+  List<TopLevelVariableElement> variables,
+  bool hasContent,
+) {
+  final publicVars =
+      variables
+          .where((v) => v.name != null && !v.name!.startsWith('_'))
+          .toList();
+  if (publicVars.isEmpty) return;
+
+  var isFirst = !hasContent;
+  for (final v in publicVars) {
+    if (!isFirst) {
+      buf.writeln('---');
+      buf.writeln();
+    }
+    isFirst = false;
+
+    final typeStr = _typeStr(v.type);
+    buf.writeln('### `${v.name}`');
+    buf.writeln();
+    buf.writeln('**Type:** `$typeStr`');
+    buf.writeln();
+    _writeDoc(buf, v.documentationComment);
+  }
+}
+
+void _writeMethod(StringBuffer buf, MethodElement method) {
+  final isStatic = method.isStatic;
+  final prefix = isStatic ? 'static ' : '';
+  buf.writeln('#### `$prefix${method.name}`');
+  buf.writeln();
+  buf.writeln('```dart');
+  buf.writeln(_methodSignature(method));
+  buf.writeln('```');
+  buf.writeln();
+  _writeDoc(buf, method.documentationComment);
+  _writeParams(buf, method.formalParameters);
+}
+
+void _writeDoc(StringBuffer buf, String? doc) {
+  if (doc == null) return;
+  buf.writeln(_cleanDoc(doc));
+  buf.writeln();
+}
+
+void _writeParams(StringBuffer buf, List<FormalParameterElement> params) {
+  final publicParams =
+      params.where((p) => p.name != null && !p.name!.startsWith('_')).toList();
+  if (publicParams.isEmpty) return;
+
+  buf.writeln('**Parameters:**');
+  buf.writeln();
+  for (final param in publicParams) {
+    final typeStr = _typeStr(param.type);
+    final required = param.isRequired ? ' *(required)*' : '';
+    buf.writeln('- `${param.name}` (`$typeStr`)$required');
+  }
+  buf.writeln();
+}
+
+// ---------------------------------------------------------------------------
+// Signature formatting
+// ---------------------------------------------------------------------------
+
+String _constructorSignature(String className, ConstructorElement ctor) {
+  final name = ctor.name == 'new' ? className : '$className.${ctor.name}';
+  final params = _formatParams(ctor.formalParameters);
+  return '$name($params)';
+}
+
+String _functionSignature(TopLevelFunctionElement fn) {
+  final retType = _typeStr(fn.returnType);
+  final params = _formatParams(fn.formalParameters);
+  return '$retType ${fn.name}($params)';
+}
+
+String _methodSignature(MethodElement method) {
+  final retType = _typeStr(method.returnType);
+  final params = _formatParams(method.formalParameters);
+  final prefix = method.isStatic ? 'static ' : '';
+  return '$prefix$retType ${method.name}($params)';
+}
+
+String _formatParams(List<FormalParameterElement> params) {
+  if (params.isEmpty) return '';
+
+  final parts = <String>[];
+  var inNamed = false;
+  var inPositional = false;
+
+  for (final p in params) {
+    final typeStr = _typeStr(p.type);
+    final required = p.isRequired && p.isNamed ? 'required ' : '';
+    final paramStr = '$required$typeStr ${p.name}';
+
+    if (p.isNamed && !inNamed) {
+      inNamed = true;
+      parts.add('{$paramStr');
+    } else if (p.isOptionalPositional && !inPositional) {
+      inPositional = true;
+      parts.add('[$paramStr');
+    } else {
+      parts.add(paramStr);
+    }
+  }
+
+  var result = parts.join(', ');
+  if (inNamed) result += '}';
+  if (inPositional) result += ']';
+  return result;
+}
+
+// ---------------------------------------------------------------------------
+// Type formatting
+// ---------------------------------------------------------------------------
+
+String _typeStr(DartType type) => type.getDisplayString();
+
+// ---------------------------------------------------------------------------
+// Doc comment cleaning
+// ---------------------------------------------------------------------------
+
+String _cleanDoc(String doc) {
+  return doc
+      .split('\n')
+      .map((line) {
+        var cleaned = line;
+        if (cleaned.trimLeft().startsWith('///')) {
+          cleaned = cleaned.trimLeft().substring(3);
+          if (cleaned.startsWith(' ')) cleaned = cleaned.substring(1);
+        } else if (cleaned.trimLeft().startsWith('*')) {
+          cleaned = cleaned.trimLeft().substring(1);
+          if (cleaned.startsWith(' ')) cleaned = cleaned.substring(1);
+        }
+        return cleaned;
+      })
+      .join('\n')
+      .trim();
+}
+
+// ---------------------------------------------------------------------------
+// Index page
+// ---------------------------------------------------------------------------
+
+void _writeIndex(String outputDir, List<PackageSpec> packages) {
+  final buf = StringBuffer();
+  buf.writeln('# Dart API Reference');
+  buf.writeln();
+  buf.writeln(
+    'Auto-generated API documentation for the Dart packages in this repository.',
+  );
+  buf.writeln();
+
+  buf.writeln('```{toctree}');
+  buf.writeln(':maxdepth: 2');
+  buf.writeln();
+  for (final pkg in packages) {
+    final pkgFile = File(p.join(outputDir, pkg.name, '${pkg.name}.md'));
+    if (pkgFile.existsSync()) {
+      buf.writeln('${pkg.name}/${pkg.name}');
+    }
+  }
+  buf.writeln('```');
+  buf.writeln();
+
+  Directory(outputDir).createSync(recursive: true);
+  File(p.join(outputDir, 'index.md')).writeAsStringSync(buf.toString());
+  stdout.writeln('📄 Wrote index page');
+}
diff --git a/tool/dartdoc_to_md/pubspec.lock b/tool/dartdoc_to_md/pubspec.lock
new file mode 100644
index 0000000..61639ba
--- /dev/null
+++ b/tool/dartdoc_to_md/pubspec.lock
@@ -0,0 +1,157 @@
+# Generated by pub
+# See https://dart.dev/tools/pub/glossary#lockfile
+packages:
+  _fe_analyzer_shared:
+    dependency: transitive
+    description:
+      name: _fe_analyzer_shared
+      sha256: "1dd467c7e56541bea70bbd35d537e3aa12dfba81f39e2a75bb6a61fc5595985b"
+      url: "https://pub.dev"
+    source: hosted
+    version: "97.0.0"
+  analyzer:
+    dependency: "direct main"
+    description:
+      name: analyzer
+      sha256: "041602214e3ec5a02ba85c08fe8e381aa25ac5367db17d03fbb6d22d851d4bba"
+      url: "https://pub.dev"
+    source: hosted
+    version: "11.0.0"
+  args:
+    dependency: "direct main"
+    description:
+      name: args
+      sha256: d0481093c50b1da8910eb0bb301626d4d8eb7284aa739614d2b394ee09e3ea04
+      url: "https://pub.dev"
+    source: hosted
+    version: "2.7.0"
+  async:
+    dependency: transitive
+    description:
+      name: async
+      sha256: "758e6d74e971c3e5aceb4110bfd6698efc7f501675bcfe0c775459a8140750eb"
+      url: "https://pub.dev"
+    source: hosted
+    version: "2.13.0"
+  collection:
+    dependency: transitive
+    description:
+      name: collection
+      sha256: "2f5709ae4d3d59dd8f7cd309b4e023046b57d8a6c82130785d2b0e5868084e76"
+      url: "https://pub.dev"
+    source: hosted
+    version: "1.19.1"
+  convert:
+    dependency: transitive
+    description:
+      name: convert
+      sha256: b30acd5944035672bc15c6b7a8b47d773e41e2f17de064350988c5d02adb1c68
+      url: "https://pub.dev"
+    source: hosted
+    version: "3.1.2"
+  crypto:
+    dependency: transitive
+    description:
+      name: crypto
+      sha256: c8ea0233063ba03258fbcf2ca4d6dadfefe14f02fab57702265467a19f27fadf
+      url: "https://pub.dev"
+    source: hosted
+    version: "3.0.7"
+  file:
+    dependency: transitive
+    description:
+      name: file
+      sha256: a3b4f84adafef897088c160faf7dfffb7696046cb13ae90b508c2cbc95d3b8d4
+      url: "https://pub.dev"
+    source: hosted
+    version: "7.0.1"
+  glob:
+    dependency: transitive
+    description:
+      name: glob
+      sha256: c3f1ee72c96f8f78935e18aa8cecced9ab132419e8625dc187e1c2408efc20de
+      url: "https://pub.dev"
+    source: hosted
+    version: "2.1.3"
+  meta:
+    dependency: transitive
+    description:
+      name: meta
+      sha256: "9f29b9bcc8ee287b1a31e0d01be0eae99a930dbffdaecf04b3f3d82a969f296f"
+      url: "https://pub.dev"
+    source: hosted
+    version: "1.18.1"
+  package_config:
+    dependency: transitive
+    description:
+      name: package_config
+      sha256: f096c55ebb7deb7e384101542bfba8c52696c1b56fca2eb62827989ef2353bbc
+      url: "https://pub.dev"
+    source: hosted
+    version: "2.2.0"
+  path:
+    dependency: "direct main"
+    description:
+      name: path
+      sha256: "75cca69d1490965be98c73ceaea117e8a04dd21217b37b292c9ddbec0d955bc5"
+      url: "https://pub.dev"
+    source: hosted
+    version: "1.9.1"
+  pub_semver:
+    dependency: transitive
+    description:
+      name: pub_semver
+      sha256: "5bfcf68ca79ef689f8990d1160781b4bad40a3bd5e5218ad4076ddb7f4081585"
+      url: "https://pub.dev"
+    source: hosted
+    version: "2.2.0"
+  source_span:
+    dependency: transitive
+    description:
+      name: source_span
+      sha256: "56a02f1f4cd1a2d96303c0144c93bd6d909eea6bee6bf5a0e0b685edbd4c47ab"
+      url: "https://pub.dev"
+    source: hosted
+    version: "1.10.2"
+  string_scanner:
+    dependency: transitive
+    description:
+      name: string_scanner
+      sha256: "921cd31725b72fe181906c6a94d987c78e3b98c2e205b397ea399d4054872b43"
+      url: "https://pub.dev"
+    source: hosted
+    version: "1.4.1"
+  term_glyph:
+    dependency: transitive
+    description:
+      name: term_glyph
+      sha256: "7f554798625ea768a7518313e58f83891c7f5024f88e46e7182a4558850a4b8e"
+      url: "https://pub.dev"
+    source: hosted
+    version: "1.2.2"
+  typed_data:
+    dependency: transitive
+    description:
+      name: typed_data
+      sha256: f9049c039ebfeb4cf7a7104a675823cd72dba8297f264b6637062516699fa006
+      url: "https://pub.dev"
+    source: hosted
+    version: "1.4.0"
+  watcher:
+    dependency: transitive
+    description:
+      name: watcher
+      sha256: "1398c9f081a753f9226febe8900fce8f7d0a67163334e1c94a2438339d79d635"
+      url: "https://pub.dev"
+    source: hosted
+    version: "1.2.1"
+  yaml:
+    dependency: transitive
+    description:
+      name: yaml
+      sha256: b9da305ac7c39faa3f030eccd175340f968459dae4af175130b3fc47e40d76ce
+      url: "https://pub.dev"
+    source: hosted
+    version: "3.1.3"
+sdks:
+  dart: ">=3.10.0 <4.0.0"
diff --git a/tool/dartdoc_to_md/pubspec.yaml b/tool/dartdoc_to_md/pubspec.yaml
new file mode 100644
index 0000000..f7309d1
--- /dev/null
+++ b/tool/dartdoc_to_md/pubspec.yaml
@@ -0,0 +1,12 @@
+name: dartdoc_to_md
+description: Generates Sphinx-compatible Markdown from Dart source using the analyzer.
+publish_to: none
+version: 0.1.0
+
+environment:
+  sdk: ^3.10.0
+
+dependencies:
+  analyzer: ^11.0.0
+  args: ^2.7.0
+  path: ^1.9.1