flutter · ericwindmill · Mar 11, 2026 · Mar 11, 2026 · Mar 11, 2026
diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
@@ -0,0 +1,75 @@
+name: Documentation
+
+on:
+  pull_request:
+    paths:
+      - 'docs/**'
+      - 'packages/dataset_config_dart/**'
+      - 'tool/**'
+      - '.github/workflows/docs.yml'
+  push:
+    branches:
+      - main
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v6
+
+      - name: Set up Python
+        uses: actions/setup-python@v6
+        with:
+          python-version: '3.13'
+
+      - name: Set up Flutter
+        uses: subosito/flutter-action@fd55f4c5af5b953cc57a2be44cb082c8f6635e8e
+        with:
+          channel: stable
+
+      - name: Install Python dependencies
+        run: |
+          pip install --upgrade pip
+          pip install -r docs/requirements.txt
+          pip install -e packages/dash_evals
+
+      - name: Install Dart dependencies
+        run: |
+          flutter pub get
+          cd tool/dartdoc_to_md && dart pub get
+
+      - name: Build documentation
+        working-directory: docs
+        run: make html
+
+      - name: Upload build artifact
+        uses: actions/upload-artifact@v6
+        with:
+          name: docs-html
+          path: docs/_build/html
+          retention-days: 1
+
+  deploy:
+    # Only deploy on push to main
+    if: github.event_name == 'push' && github.ref == 'refs/heads/main'
+    needs: build
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v6
+
+      - name: Download build artifact
+        uses: actions/download-artifact@v7
+        with:
+          name: docs-html
+          path: docs/_build/html
+
+      - name: Deploy to Firebase Hosting
+        uses: FirebaseExtended/action-hosting-deploy@v0
+        with:
+          repoToken: ${{ secrets.GITHUB_TOKEN }}
+          firebaseServiceAccount: ${{ secrets.FIREBASE_SERVICE_ACCOUNT }}
+          projectId: evals
+          target: evals-docs
+          channelId: live
diff --git a/README.md b/README.md
@@ -1,3 +1,48 @@
-# Flutter evals
+# evals
 
-Evaluation framework for testing AI agents ability to write Dart and Flutter code.
+Evaluation framework for testing AI agents' ability to write Dart and Flutter code. Built on [Inspect AI](https://inspect.aisi.org.uk/).
+
+> [!TIP]
+> Full documentation at [evals-docs.web.app/](https://evals-docs.web.app/)
+
+## Overview
+
+evals provides:
+
+- **Evaluation Runner** — Python package for running LLM evaluations with configurable tasks, variants, and models
+- **Evaluation Configuration** — Dart and Python packages that resolve dataset YAML into EvalSet JSON for the runner
+- **devals CLI** — Dart CLI for creating and managing dataset samples, tasks, and jobs
+- **Evaluation Explorer** — Dart/Flutter app for browsing and analyzing results
+- **Dataset** — Curated samples for Dart/Flutter Q&A, code generation, and debugging tasks
+
+## Packages
+
+| Package | Description | Docs |
+|---------|-------------|------|
+| [dash_evals](packages/dash_evals/) | Python evaluation runner using Inspect AI | [dash_evals docs](docs/contributing/packages/dash_evals.md) |
+| [dataset_config_dart](packages/dataset_config_dart/) | Dart library for resolving dataset YAML into EvalSet JSON (includes shared data models) | [dataset_config_dart docs](docs/contributing/packages/dataset_config_dart.md) |
+| [dataset_config_python](packages/dataset_config_python/) | Python configuration models | — |
+| [devals_cli](packages/devals_cli/) | Dart CLI for managing evaluation tasks and jobs | [CLI docs](docs/reference/cli.md) |
+| [eval_explorer](packages/eval_explorer/) | Dart/Flutter results viewer (Serverpod) | [eval_explorer docs](docs/contributing/packages/eval_explorer.md) |
+
+> [!NOTE]
+> The **uploader** and **report_app** packages are deprecated and will be replaced by **eval_explorer**.
+
+## Documentation
+
+| Doc | Description |
+|-----|-------------|
+| [Quick Start](docs/guides/quick_start.md) | Get started authoring your own evals |
+| [Contributing Guide](docs/contributing/guide.md) | Development setup and guidelines |
+| [CLI Reference](docs/reference/cli.md) | Full devals CLI command reference |
+| [Configuration Reference](docs/reference/configuration_reference.md) | YAML configuration file reference |
+| [Repository Structure](docs/contributing/repository_structure.md) | Project layout |
+| [Glossary](docs/reference/glossary.md) | Terminology guide |
+
+## Contributing
+
+See [CONTRIBUTING.md](CONTRIBUTING.md) for details, or go directly to the [Contributing Guide](docs/contributing/guide.md).
+
+## License
+
+See [LICENSE](LICENSE) for details.
diff --git a/docs/Makefile b/docs/Makefile
@@ -0,0 +1,38 @@
+# Makefile for Sphinx + Dart API documentation
+
+SPHINXOPTS    ?=
+SPHINXBUILD   ?= sphinx-build
+SOURCEDIR     = .
+BUILDDIR      = _build
+
+# Dart API generator
+DARTDOC_TOOL  = ../tool/dartdoc_to_md
+REPO_ROOT     = ..
+
+.PHONY: help clean html livehtml dartdoc html-python
+
+help:
+	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+
+clean:
+	rm -rf $(BUILDDIR)
+	rm -rf reference/dart_api
+
+# Generate Dart API markdown using the custom analyzer-based generator
+dartdoc:
+	@echo "Generating Dart API documentation..."
+	cd $(DARTDOC_TOOL) && dart run bin/generate.dart --root $(shell cd $(REPO_ROOT) && pwd) --output docs/reference/dart_api
+	@echo "Dart API markdown generated in dart_api/"
+
+# Build HTML docs (runs Dart generator first, then Sphinx)
+html: dartdoc
+	@$(SPHINXBUILD) -M html "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+	@echo "Build finished. Open $(BUILDDIR)/html/index.html"
+
+# Build HTML docs without Dart doc generation (faster for Python-only changes)
+html-python:
+	@$(SPHINXBUILD) -M html "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+	@echo "Build finished. Open $(BUILDDIR)/html/index.html"
+
+livehtml:
+	sphinx-autobuild "$(SOURCEDIR)" "$(BUILDDIR)/html" $(SPHINXOPTS) $(O)