diff --git a/README.md b/README.md
index d2b82507c..547065994 100644
--- a/README.md
+++ b/README.md
@@ -63,6 +63,7 @@ ART is an open-source RL framework that improves agent reliability by allowing L
 | Agent Task          | Example Notebook                                                                                                                       | Description                                         | Comparative Performance                                                                                                                                                                                                     |
 | ------------------- | -------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
 | **ART•E [Serverless]**   | [🏋️ Train agent](https://colab.research.google.com/github/openpipe/art-notebooks/blob/main/examples/art-e.ipynb)                       | Qwen3 14B learns to search emails using RULER     | <img src="https://github.com/openpipe/art/raw/main/assets/benchmarks/email_agent/accuracy-training-progress.svg" height="72"> [benchmarks](/dev/art-e/art_e/evaluate/display_benchmarks.ipynb)                              |
+| **ART•E Local** | [Example](/examples/art_e) | Lightweight local email-search agent task with deterministic inbox fixtures | Local smoke-test example |
 | **2048 [Serverless]** | [🏋️ Train agent](https://colab.research.google.com/github/openpipe/art-notebooks/blob/main/examples/2048/2048.ipynb)                   | Qwen3 14B learns to play 2048                     | <img src="https://github.com/openpipe/art/raw/main/assets/benchmarks/2048/accuracy-training-progress.svg" height="72"> [benchmarks](/examples/2048/display_benchmarks.ipynb)                                                |
 | **ART•E LangGraph** | [🏋️ Train agent](https://colab.research.google.com/github/openpipe/art-notebooks/blob/main/examples/langgraph/art-e-langgraph.ipynb)   | Qwen 2.5 7B learns to search emails using LangGraph | [Link coming soon]                                                                                                                                                                                                          |
 | **MCP•RL**          | [🏋️ Train agent](https://colab.research.google.com/github/openpipe/art-notebooks/blob/main/examples/mcp-rl/mcp-rl.ipynb)               | Qwen 2.5 3B masters the NWS MCP server              | [Link coming soon]                                                                                                                                                                                                          |
diff --git a/examples/art_e/README.md b/examples/art_e/README.md
new file mode 100644
index 000000000..94cf44f79
--- /dev/null
+++ b/examples/art_e/README.md
@@ -0,0 +1,46 @@
+# ART-E Email Search Example
+
+This example is a lightweight, local version of the ART-E email search task. It
+shows how to train an agent to search a small inbox, read relevant messages, and
+return a grounded answer with supporting message IDs.
+
+The example is intentionally small:
+
+- No external email service is required.
+- The inboxes are deterministic Python fixtures.
+- The rollout uses a simple text protocol instead of provider-specific tool
+  calling so it works across most chat models.
+- The reward combines exact answer matching and citation correctness.
+
+For the full ART-E research context, see the
+[ART-E blog post](https://openpipe.ai/blog/art-e-mail-agent).
+
+## Files
+
+- `scenarios.py` defines inbox fixtures, search/read helpers, and answer
+  scoring.
+- `rollout.py` runs one multi-turn email-search trajectory.
+- `train.py` trains a small model with ART using the local scenarios.
+
+## Run One Rollout
+
+Set an inference API key for the provider used by your `art.Model`, then run:
+
+```bash
+python examples/art_e/rollout.py
+```
+
+The script uses an OpenRouter model by default for a cheap smoke test. You can
+change the model configuration at the bottom of `rollout.py`.
+
+## Train
+
+Training requires the normal ART local backend setup:
+
+```bash
+python examples/art_e/train.py
+```
+
+The default training configuration is deliberately modest so the example is easy
+to inspect. Increase `SIMULTANEOUS_ROLLOUTS`, `TRAIN_STEPS`, or the base model
+when running on a larger GPU.
diff --git a/examples/art_e/__init__.py b/examples/art_e/__init__.py
new file mode 100644
index 000000000..ef66a8b65
--- /dev/null
+++ b/examples/art_e/__init__.py
@@ -0,0 +1 @@
+"""Lightweight ART-E email search example."""
diff --git a/examples/art_e/rollout.py b/examples/art_e/rollout.py
new file mode 100644
index 000000000..9e49bbcdc
--- /dev/null
+++ b/examples/art_e/rollout.py
@@ -0,0 +1,191 @@
+from __future__ import annotations
+
+import asyncio
+import json
+import os
+import random
+
+from dotenv import load_dotenv
+import openai
+import requests
+from scenarios import (
+    SCENARIOS,
+    EmailScenario,
+    parse_json_command,
+    read_email,
+    score_answer,
+    search_emails,
+)
+
+import art
+
+load_dotenv()
+
+MAX_TURNS = 6
+
+SYSTEM_PROMPT = """You are ART-E, an email research agent.
+
+You need to answer the user's question by searching and reading their inbox.
+
+Use exactly one command per assistant message:
+
+<search>{"keywords":["keyword"],"sent_before":"YYYY-MM-DD"}</search>
+<read>{"message_id":"message-id"}</read>
+<answer>{"answer":"final answer","reference_message_ids":["message-id"]}</answer>
+
+Rules:
+- Search before answering unless the answer is already present in the context.
+- Read a message before citing it.
+- Cite only message IDs that support your answer.
+- Keep final answers concise and factual.
+"""
+
+
+def tool_message(payload: object) -> dict[str, str]:
+    return {
+        "role": "user",
+        "content": "Tool result:\n" + json.dumps(payload, indent=2, sort_keys=True),
+    }
+
+
+@art.retry(exceptions=(openai.LengthFinishReasonError, requests.ReadTimeout))
+async def rollout(
+    model: art.Model,
+    scenario: EmailScenario,
+    step: int = 0,
+    is_validation: bool = False,
+    verbose: bool = False,
+) -> art.Trajectory:
+    trajectory = art.Trajectory(
+        messages_and_choices=[
+            {"role": "system", "content": SYSTEM_PROMPT},
+            {
+                "role": "user",
+                "content": (
+                    f"Inbox: {scenario.inbox_address}\n"
+                    f"Today: {scenario.query_date}\n"
+                    f"Question: {scenario.question}"
+                ),
+            },
+        ],
+        metadata={
+            "scenario_id": scenario.id,
+            "step": step,
+            "validation": is_validation,
+        },
+        reward=0,
+    )
+
+    client = model.openai_client()
+    searched = False
+    read_message_ids: set[str] = set()
+
+    for _ in range(MAX_TURNS):
+        completion = await client.chat.completions.create(
+            max_completion_tokens=256,
+            messages=trajectory.messages(),
+            model=model.get_inference_name(),
+        )
+        choice = completion.choices[0]
+        content = choice.message.content or ""
+        trajectory.messages_and_choices.append(choice)
+
+        if verbose:
+            print(content)
+
+        if search_payload := parse_json_command(content, "search"):
+            keywords = search_payload.get("keywords", [])
+            if not isinstance(keywords, list):
+                trajectory.reward = -0.25
+                trajectory.metrics["invalid_command"] = 1
+                break
+            sent_before = search_payload.get("sent_before", scenario.query_date)
+            results = search_emails(
+                scenario,
+                [str(keyword) for keyword in keywords],
+                sent_before=str(sent_before) if sent_before else None,
+            )
+            searched = True
+            trajectory.messages_and_choices.append(
+                tool_message({"search_results": results})
+            )
+            continue
+
+        if read_payload := parse_json_command(content, "read"):
+            message_id = read_payload.get("message_id")
+            if not isinstance(message_id, str):
+                trajectory.reward = -0.25
+                trajectory.metrics["invalid_command"] = 1
+                break
+            email = read_email(scenario, message_id)
+            if email is None:
+                trajectory.messages_and_choices.append(
+                    tool_message({"error": f"Message not found: {message_id}"})
+                )
+            else:
+                read_message_ids.add(message_id)
+                trajectory.messages_and_choices.append(tool_message({"email": email}))
+            continue
+
+        if answer_payload := parse_json_command(content, "answer"):
+            answer = answer_payload.get("answer", "")
+            reference_message_ids = answer_payload.get("reference_message_ids", [])
+            if not isinstance(answer, str) or not isinstance(
+                reference_message_ids, list
+            ):
+                trajectory.reward = -0.25
+                trajectory.metrics["invalid_command"] = 1
+                break
+
+            references = [str(message_id) for message_id in reference_message_ids]
+            reward, metrics = score_answer(scenario, answer, references)
+            unread_citation = any(
+                message_id not in read_message_ids for message_id in references
+            )
+            if unread_citation:
+                reward *= 0.5
+            trajectory.reward = reward
+            trajectory.metrics.update(metrics)
+            trajectory.metrics["searched"] = float(searched)
+            trajectory.metrics["unread_citation"] = float(unread_citation)
+            break
+
+        trajectory.messages_and_choices.append(
+            tool_message(
+                {
+                    "error": (
+                        "Invalid command. Use one of <search>, <read>, or "
+                        "<answer> with a JSON payload."
+                    )
+                }
+            )
+        )
+    else:
+        trajectory.reward = -0.1
+        trajectory.metrics["ran_out_of_turns"] = 1
+
+    return trajectory
+
+
+if __name__ == "__main__":
+    random.seed(42)
+
+    smoke_test_model = art.Model(
+        name="gpt-4o-mini",
+        project="art-e",
+        inference_model_name="openai/gpt-4o-mini",
+        inference_base_url="https://openrouter.ai/api/v1",
+        inference_api_key=os.getenv("OPENROUTER_API_KEY"),
+    )
+
+    async def main() -> None:
+        trajectory = await rollout(
+            smoke_test_model,
+            random.choice(SCENARIOS),
+            is_validation=True,
+            verbose=True,
+        )
+        print("reward:", trajectory.reward)
+        print("metrics:", trajectory.metrics)
+
+    asyncio.run(main())
diff --git a/examples/art_e/scenarios.py b/examples/art_e/scenarios.py
new file mode 100644
index 000000000..775404040
--- /dev/null
+++ b/examples/art_e/scenarios.py
@@ -0,0 +1,236 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+from datetime import date
+import json
+import re
+from typing import Any, TypedDict
+
+
+class EmailMessage(TypedDict):
+    id: str
+    sender: str
+    recipients: list[str]
+    sent_at: str
+    subject: str
+    body: str
+
+
+class SearchResult(TypedDict):
+    id: str
+    sender: str
+    sent_at: str
+    subject: str
+    snippet: str
+
+
+@dataclass(frozen=True)
+class EmailScenario:
+    id: str
+    inbox_address: str
+    query_date: str
+    question: str
+    answer: str
+    reference_message_ids: tuple[str, ...]
+    inbox: tuple[EmailMessage, ...]
+
+
+SCENARIOS: tuple[EmailScenario, ...] = (
+    EmailScenario(
+        id="quarterly-budget-owner",
+        inbox_address="alex@acme.test",
+        query_date="2026-03-15",
+        question=(
+            "Who owns the quarterly budget deck, and when did they say the "
+            "draft would be ready?"
+        ),
+        answer="Maya owns the quarterly budget deck and said the draft would be ready by Friday.",
+        reference_message_ids=("msg-budget-2",),
+        inbox=(
+            {
+                "id": "msg-budget-1",
+                "sender": "nora@acme.test",
+                "recipients": ["alex@acme.test"],
+                "sent_at": "2026-03-01",
+                "subject": "Budget planning kickoff",
+                "body": "Let's collect assumptions for the quarterly budget review.",
+            },
+            {
+                "id": "msg-budget-2",
+                "sender": "maya@acme.test",
+                "recipients": ["alex@acme.test"],
+                "sent_at": "2026-03-08",
+                "subject": "Quarterly budget deck owner",
+                "body": (
+                    "I will own the quarterly budget deck. The draft will be "
+                    "ready by Friday so finance can review it before Monday."
+                ),
+            },
+            {
+                "id": "msg-budget-3",
+                "sender": "finance@acme.test",
+                "recipients": ["alex@acme.test"],
+                "sent_at": "2026-03-12",
+                "subject": "Reminder: travel budget",
+                "body": "Please submit travel budget updates before the end of the month.",
+            },
+        ),
+    ),
+    EmailScenario(
+        id="customer-escalation-time",
+        inbox_address="alex@acme.test",
+        query_date="2026-04-05",
+        question=(
+            "What time is the Northwind escalation call, and which customer "
+            "issue should be discussed first?"
+        ),
+        answer="The Northwind escalation call is at 3 PM UTC, and the login outage should be discussed first.",
+        reference_message_ids=("msg-northwind-2",),
+        inbox=(
+            {
+                "id": "msg-northwind-1",
+                "sender": "support@acme.test",
+                "recipients": ["alex@acme.test"],
+                "sent_at": "2026-04-01",
+                "subject": "Northwind weekly notes",
+                "body": "Northwind asked for the usual weekly usage report.",
+            },
+            {
+                "id": "msg-northwind-2",
+                "sender": "sam@acme.test",
+                "recipients": ["alex@acme.test"],
+                "sent_at": "2026-04-03",
+                "subject": "Northwind escalation call",
+                "body": (
+                    "The Northwind escalation call is at 3 PM UTC. Please "
+                    "discuss the login outage first, then the reporting delay."
+                ),
+            },
+            {
+                "id": "msg-northwind-3",
+                "sender": "calendar@acme.test",
+                "recipients": ["alex@acme.test"],
+                "sent_at": "2026-04-04",
+                "subject": "Daily standup moved",
+                "body": "Daily standup is moving to 10 AM local time next week.",
+            },
+        ),
+    ),
+    EmailScenario(
+        id="contract-renewal-discount",
+        inbox_address="alex@acme.test",
+        query_date="2026-04-20",
+        question="What renewal discount did Finch Labs approve?",
+        answer="Finch Labs approved a 12% renewal discount.",
+        reference_message_ids=("msg-finch-3",),
+        inbox=(
+            {
+                "id": "msg-finch-1",
+                "sender": "sales@acme.test",
+                "recipients": ["alex@acme.test"],
+                "sent_at": "2026-04-09",
+                "subject": "Finch Labs renewal",
+                "body": "Finch Labs is reviewing renewal pricing this week.",
+            },
+            {
+                "id": "msg-finch-2",
+                "sender": "legal@acme.test",
+                "recipients": ["alex@acme.test"],
+                "sent_at": "2026-04-11",
+                "subject": "Finch Labs contract language",
+                "body": "The renewal contract language is approved from legal.",
+            },
+            {
+                "id": "msg-finch-3",
+                "sender": "riley@finch.test",
+                "recipients": ["alex@acme.test"],
+                "sent_at": "2026-04-18",
+                "subject": "Re: Finch Labs renewal",
+                "body": "We approved the 12% renewal discount. Please send the final order form.",
+            },
+        ),
+    ),
+)
+
+
+def search_emails(
+    scenario: EmailScenario,
+    keywords: list[str],
+    sent_before: str | None = None,
+    limit: int = 5,
+) -> list[SearchResult]:
+    normalized_keywords = [keyword.lower() for keyword in keywords if keyword.strip()]
+    try:
+        before = date.fromisoformat(sent_before) if sent_before else None
+    except ValueError:
+        before = None
+    results: list[SearchResult] = []
+
+    for email in scenario.inbox:
+        sent_at = date.fromisoformat(email["sent_at"])
+        if before and sent_at >= before:
+            continue
+
+        haystack = " ".join(
+            [email["sender"], email["subject"], email["body"]]
+        ).lower()
+        if normalized_keywords and not all(
+            keyword in haystack for keyword in normalized_keywords
+        ):
+            continue
+
+        results.append(
+            {
+                "id": email["id"],
+                "sender": email["sender"],
+                "sent_at": email["sent_at"],
+                "subject": email["subject"],
+                "snippet": email["body"][:160],
+            }
+        )
+
+    return results[:limit]
+
+
+def read_email(scenario: EmailScenario, message_id: str) -> EmailMessage | None:
+    for email in scenario.inbox:
+        if email["id"] == message_id:
+            return email
+    return None
+
+
+def parse_json_command(content: str, tag: str) -> dict[str, Any] | None:
+    match = re.search(rf"<{tag}>\s*(\{{.*?\}})\s*</{tag}>", content, re.DOTALL)
+    if not match:
+        return None
+    try:
+        payload = json.loads(match.group(1))
+    except json.JSONDecodeError:
+        return None
+    return payload if isinstance(payload, dict) else None
+
+
+def normalize_text(value: str) -> str:
+    value = value.lower()
+    value = re.sub(r"[^a-z0-9% ]+", " ", value)
+    return re.sub(r"\s+", " ", value).strip()
+
+
+def score_answer(
+    scenario: EmailScenario,
+    answer: str,
+    reference_message_ids: list[str],
+) -> tuple[float, dict[str, float]]:
+    normalized_answer = normalize_text(answer)
+    expected_answer = normalize_text(scenario.answer)
+
+    answer_score = 1.0 if expected_answer in normalized_answer else 0.0
+    expected_refs = set(scenario.reference_message_ids)
+    provided_refs = set(reference_message_ids)
+    citation_score = 1.0 if expected_refs.issubset(provided_refs) else 0.0
+    reward = (0.75 * answer_score) + (0.25 * citation_score)
+
+    return reward, {
+        "answer_correct": answer_score,
+        "citations_correct": citation_score,
+    }
diff --git a/examples/art_e/train.py b/examples/art_e/train.py
new file mode 100644
index 000000000..e4a2f7040
--- /dev/null
+++ b/examples/art_e/train.py
@@ -0,0 +1,73 @@
+from __future__ import annotations
+
+import asyncio
+import random
+
+from dotenv import load_dotenv
+from rollout import rollout
+from scenarios import SCENARIOS
+
+import art
+from art.local import LocalBackend
+
+load_dotenv()
+
+random.seed(42)
+
+TRAIN_STEPS = 30
+SIMULTANEOUS_ROLLOUTS = 12
+VALIDATION_ROLLOUTS = 3
+
+
+async def train() -> None:
+    backend = LocalBackend()
+
+    model = art.TrainableModel(
+        name="art-e-email-search-001",
+        project="art-e",
+        base_model="Qwen/Qwen2.5-3B-Instruct",
+    )
+
+    await model.register(backend)
+
+    scenarios = list(SCENARIOS)
+    for step in range(await model.get_step(), TRAIN_STEPS):
+        random.shuffle(scenarios)
+
+        train_groups = await art.gather_trajectory_groups(
+            (
+                art.TrajectoryGroup(
+                    rollout(model, scenario, step=step, is_validation=False)
+                    for _ in range(SIMULTANEOUS_ROLLOUTS)
+                )
+                for scenario in scenarios
+            ),
+            pbar_desc="train",
+            max_exceptions=10,
+        )
+
+        val_groups = await art.gather_trajectory_groups(
+            (
+                art.TrajectoryGroup(
+                    rollout(model, scenario, step=step, is_validation=True)
+                    for _ in range(VALIDATION_ROLLOUTS)
+                )
+                for scenario in SCENARIOS
+            ),
+            pbar_desc="val",
+            max_exceptions=10,
+        )
+
+        await model.log(val_groups)
+        await model.delete_checkpoints()
+        result = await backend.train(model, train_groups, learning_rate=1e-5)
+        await model.log(
+            train_groups,
+            metrics=result.metrics,
+            step=result.step,
+            split="train",
+        )
+
+
+if __name__ == "__main__":
+    asyncio.run(train())
diff --git a/tests/test_art_e_example.py b/tests/test_art_e_example.py
new file mode 100644
index 000000000..f61c39ded
--- /dev/null
+++ b/tests/test_art_e_example.py
@@ -0,0 +1,41 @@
+from examples.art_e.scenarios import (
+    SCENARIOS,
+    parse_json_command,
+    read_email,
+    score_answer,
+    search_emails,
+)
+
+
+def test_search_emails_finds_matching_message() -> None:
+    scenario = SCENARIOS[0]
+
+    results = search_emails(scenario, ["quarterly", "deck"])
+
+    assert [result["id"] for result in results] == ["msg-budget-2"]
+
+
+def test_read_email_returns_message_by_id() -> None:
+    scenario = SCENARIOS[1]
+
+    email = read_email(scenario, "msg-northwind-2")
+
+    assert email is not None
+    assert "3 PM UTC" in email["body"]
+
+
+def test_score_answer_rewards_answer_and_citation() -> None:
+    scenario = SCENARIOS[2]
+
+    reward, metrics = score_answer(
+        scenario,
+        "Finch Labs approved a 12% renewal discount.",
+        ["msg-finch-3"],
+    )
+
+    assert reward == 1.0
+    assert metrics == {"answer_correct": 1.0, "citations_correct": 1.0}
+
+
+def test_parse_json_command_ignores_invalid_json() -> None:
+    assert parse_json_command("<search>{invalid}</search>", "search") is None