diff --git a/pages/_meta.ts b/pages/_meta.ts
index ef9a5cab..b700a1ca 100644
--- a/pages/_meta.ts
+++ b/pages/_meta.ts
@@ -18,6 +18,10 @@ const meta: Meta = {
     title: "Workbench",
     type: "page",
   },
+  gateway: {
+    title: "Gateway",
+    type: "page",
+  },
   infrastructure: {
     title: "Runtime",
     type: "page",
diff --git a/pages/ai/_meta.ts b/pages/ai/_meta.ts
index 7165fac5..88e4c860 100644
--- a/pages/ai/_meta.ts
+++ b/pages/ai/_meta.ts
@@ -2,6 +2,26 @@ import type { Meta } from "nextra";
 
 const meta: Meta = {
   index: "AI Introduction",
+  "-- gateway": {
+    type: "separator",
+    title: "Inference Gateway",
+  },
+  "gateway-intro": {
+    title: "Introduction",
+    href: "/gateway",
+  },
+  "gateway-start": {
+    title: "Getting Started",
+    href: "/gateway/getting-started",
+  },
+  "gateway-models": {
+    title: "Models & Providers",
+    href: "/gateway/models",
+  },
+  "gateway-zdr": {
+    title: "Zero Data Retention",
+    href: "/gateway/zdr",
+  },
   "-- workbench": {
     type: "separator",
     title: "Agentic Workbench",
diff --git a/pages/ai/index.mdx b/pages/ai/index.mdx
index fdfbcaed..629fda09 100644
--- a/pages/ai/index.mdx
+++ b/pages/ai/index.mdx
@@ -44,8 +44,20 @@ Core capabilities:
 
 Each run produces task and agent evaluations. That data feeds back into the workbench to improve prompts, policies, and workflows over time.
 
+## Inference Gateway
+
+The [Tangle Gateway](/gateway) is the inference routing layer. Agents and applications call a single API to access hundreds of models across centralized providers and decentralized operators. The gateway handles model selection, compliance routing, billing, and payment settlement.
+
+Key capabilities:
+
+- **One API, any model.** OpenAI, Anthropic, Google, Groq, and 20+ providers.
+- **Decentralized operators.** Route to operators on the Tangle network who compete on price and latency.
+- **Compliance.** [Zero Data Retention](/gateway/zdr) and [no-train](/gateway/no-train) routing with verified provider agreements.
+- **On-chain payments.** [SpendAuth](/gateway/spend-auth) — pay operators directly without a credit card.
+
 ## Learn More
 
+- [Gateway — Getting Started](/gateway/getting-started)
 - [Workbench details](/vibe/introduction)
 - [Runtime and sandboxing](/infrastructure/introduction)
 - [Operator onboarding](/operators/introduction)
diff --git a/pages/developers/blueprints/use-cases.mdx b/pages/developers/blueprints/use-cases.mdx
index c329e097..39953f8e 100644
--- a/pages/developers/blueprints/use-cases.mdx
+++ b/pages/developers/blueprints/use-cases.mdx
@@ -21,7 +21,21 @@ Tangle Network enables developers to rapidly build and deploy secure multi-party
   displayStyle="row"
 />
 
-## AI
+## AI & Inference
+
+<GithubRepoCard
+  name="llm-inference-blueprint"
+  description="LLM inference serving via vLLM/Ollama with on-chain settlement. Powers the Tangle Gateway operator network."
+  url="https://github.com/tangle-network/llm-inference-blueprint"
+  displayStyle="row"
+/>
+
+<GithubRepoCard
+  name="tangle-inference-core"
+  description="Shared Rust crate for EIP-712 SpendAuth verification, nonce management, and on-chain payment settlement."
+  url="https://github.com/tangle-network/tangle-inference-core"
+  displayStyle="row"
+/>
 
 <GithubRepoCard
   name="coinbase-agentkit-blueprint"
diff --git a/pages/gateway/_meta.ts b/pages/gateway/_meta.ts
new file mode 100644
index 00000000..186342b5
--- /dev/null
+++ b/pages/gateway/_meta.ts
@@ -0,0 +1,64 @@
+import type { Meta } from "nextra";
+
+const meta: Meta = {
+  index: "Introduction",
+  "getting-started": "Getting Started",
+  "-- models": {
+    type: "separator",
+    title: "Models & Providers",
+  },
+  models: "Supported Models",
+  byok: "Bring Your Own Key",
+  fallbacks: "Model Fallbacks",
+  timeouts: "Provider Timeouts",
+  caching: "Automatic Caching",
+  "-- routing": {
+    type: "separator",
+    title: "Routing",
+  },
+  "how-routing-works": "How Routing Works",
+  "operator-routing": "Operator Routing",
+  "smart-routing": "Smart Routing",
+  "-- security": {
+    type: "separator",
+    title: "Security & Compliance",
+  },
+  authentication: "Authentication",
+  zdr: "Zero Data Retention",
+  "no-train": "Disallow Prompt Training",
+  guardrails: "Guardrails",
+  "rate-limiting": "Rate Limiting",
+  "-- billing": {
+    type: "separator",
+    title: "Billing",
+  },
+  pricing: "Credits & Pricing",
+  "free-tier": "Free Tier",
+  "spend-auth": "SpendAuth (On-Chain)",
+  "-- observability": {
+    type: "separator",
+    title: "Observability",
+  },
+  "generation-lookup": "Generation Lookup",
+  "routing-trace": "Routing Trace",
+  "-- reference": {
+    type: "separator",
+    title: "API Reference",
+  },
+  "api-chat": "POST /v1/chat/completions",
+  "api-generation": "GET /v1/generation",
+  "api-credits": "GET /v1/credits",
+  "api-compliance": "Provider Compliance API",
+  "provider-options": "providerOptions.gateway",
+  "response-headers": "Response Headers",
+  "feature-flags": "Feature Flags",
+  "-- guides": {
+    type: "separator",
+    title: "Guides",
+  },
+  "migrate-openai": "Migrate from OpenAI",
+  "migrate-vercel": "Migrate from Vercel AI Gateway",
+  "enterprise-zdr": "Enterprise ZDR Setup",
+};
+
+export default meta;
diff --git a/pages/gateway/api-chat.mdx b/pages/gateway/api-chat.mdx
new file mode 100644
index 00000000..db872118
--- /dev/null
+++ b/pages/gateway/api-chat.mdx
@@ -0,0 +1,96 @@
+---
+title: POST /v1/chat/completions
+description: OpenAI-compatible chat completion endpoint with gateway extensions.
+---
+
+# POST /v1/chat/completions
+
+OpenAI-compatible chat completion endpoint. Supports streaming, tool use, and all standard parameters, plus gateway-specific extensions via `providerOptions.gateway`.
+
+## Request
+
+```bash
+POST https://router.tangle.tools/v1/chat/completions
+Authorization: Bearer sk-tan-YOUR_KEY
+Content-Type: application/json
+```
+
+### Body
+
+```json
+{
+  "model": "anthropic/claude-sonnet-4-6",
+  "messages": [
+    {"role": "system", "content": "You are a helpful assistant."},
+    {"role": "user", "content": "Hello"}
+  ],
+  "temperature": 0.7,
+  "max_tokens": 4096,
+  "stream": true,
+  "tools": [...],
+  "tool_choice": "auto",
+  "response_format": {"type": "json_object"},
+  "top_p": 0.9,
+  "frequency_penalty": 0,
+  "presence_penalty": 0,
+  "stop": ["\n\n"],
+  "providerOptions": {
+    "gateway": {
+      "byok": {"anthropic": [{"apiKey": "sk-ant-..."}]},
+      "zeroDataRetention": true,
+      "caching": "auto",
+      "models": ["openai/gpt-4o"],
+      "timeout": 5000
+    }
+  }
+}
+```
+
+All standard OpenAI parameters (`tools`, `tool_choice`, `response_format`, `top_p`, `frequency_penalty`, `presence_penalty`, `stop`, `logprobs`) are forwarded to the provider.
+
+### Routing headers (optional)
+
+| Header                | Effect                                          |
+| --------------------- | ----------------------------------------------- |
+| `X-Tangle-Routing`    | `operator`, `provider`, or `auto` (default)     |
+| `X-Tangle-Blueprint`  | Pin to operators under this Blueprint ID        |
+| `X-Tangle-Service`    | Pin to a specific service instance              |
+| `X-Tangle-Operator`   | Pin to a specific operator (slug or 0x address) |
+| `X-Payment-Signature` | SpendAuth JSON payload for on-chain payment     |
+
+### Validation
+
+| Field         | Constraint                                         |
+| ------------- | -------------------------------------------------- |
+| `model`       | Required. Alphanumeric + `/-.:\\_`, max 128 chars. |
+| `messages`    | Required. Non-empty array. Each must have `role`.  |
+| `max_tokens`  | Optional. 1-128,000. Default: 4,096.               |
+| `temperature` | Optional. 0-2. Default: 1.                         |
+| Body size     | Max 1MB.                                           |
+
+## Response (non-streaming)
+
+Standard OpenAI chat completion response:
+
+```json
+{
+  "id": "chatcmpl-...",
+  "choices": [{
+    "message": {"role": "assistant", "content": "Hello! How can I help?"},
+    "finish_reason": "stop"
+  }],
+  "usage": {
+    "prompt_tokens": 15,
+    "completion_tokens": 8,
+    "total_tokens": 23
+  }
+}
+```
+
+## Response (streaming)
+
+Server-sent events with `data: {...}` lines and `data: [DONE]` terminator.
+
+## Response headers
+
+See [Response Headers](/gateway/response-headers) for the full list.
diff --git a/pages/gateway/api-compliance.mdx b/pages/gateway/api-compliance.mdx
new file mode 100644
index 00000000..fd4e02d6
--- /dev/null
+++ b/pages/gateway/api-compliance.mdx
@@ -0,0 +1,49 @@
+---
+title: Provider Compliance API
+description: Query and manage provider ZDR and no-train compliance data.
+---
+
+# Provider Compliance API
+
+## GET /api/gateway/compliance
+
+List compliance data for all providers. Public endpoint (rate-limited).
+
+```bash
+curl https://router.tangle.tools/api/gateway/compliance
+```
+
+```json
+{
+  "providers": [
+    {
+      "provider": "anthropic",
+      "name": "Anthropic",
+      "zdr": true,
+      "no_train": true,
+      "supports_prompt_caching": true,
+      "caching_type": "explicit",
+      "policy_url": "https://platform.claude.com/docs/en/build-with-claude/zero-data-retention",
+      "verified_at": "2026-04-10T00:00:00.000Z"
+    }
+  ]
+}
+```
+
+## PUT /api/admin/compliance
+
+Update compliance data for a provider. Admin-only (requires `ADMIN_EMAILS` session).
+
+```bash
+curl -X PUT https://router.tangle.tools/api/admin/compliance \
+  -H "Cookie: session_token=ADMIN_SESSION" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "providerId": "openai",
+    "zdr": true,
+    "noTrain": true,
+    "policyUrl": "https://openai.com/policies/api-data-usage-policies"
+  }'
+```
+
+Only fields included in the request body are updated. Omitted fields remain unchanged. `verifiedAt` is automatically set to the current timestamp.
diff --git a/pages/gateway/api-credits.mdx b/pages/gateway/api-credits.mdx
new file mode 100644
index 00000000..9bfdf339
--- /dev/null
+++ b/pages/gateway/api-credits.mdx
@@ -0,0 +1,29 @@
+---
+title: GET /v1/credits
+description: Check your credit balance and total usage.
+---
+
+# GET /v1/credits
+
+Check your credit balance. Requires authentication.
+
+## Request
+
+```
+GET https://router.tangle.tools/v1/credits
+Authorization: Bearer sk-tan-YOUR_KEY
+```
+
+## Response
+
+```json
+{
+  "balance": "95.50",
+  "total_used": "4.50"
+}
+```
+
+| Field        | Description                    |
+| ------------ | ------------------------------ |
+| `balance`    | Remaining credit balance (USD) |
+| `total_used` | Total credits consumed (USD)   |
diff --git a/pages/gateway/api-generation.mdx b/pages/gateway/api-generation.mdx
new file mode 100644
index 00000000..b86cf608
--- /dev/null
+++ b/pages/gateway/api-generation.mdx
@@ -0,0 +1,55 @@
+---
+title: GET /v1/generation
+description: Look up detailed information about a specific request.
+---
+
+# GET /v1/generation
+
+Retrieve detailed information about a specific generation by its ID. Requires authentication.
+
+## Request
+
+```
+GET https://router.tangle.tools/v1/generation?id=gen_01ARZ3NDEKTSV4RRFFQ69G5FAV
+Authorization: Bearer sk-tan-YOUR_KEY
+```
+
+## Parameters
+
+| Parameter | Required | Description                          |
+| --------- | -------- | ------------------------------------ |
+| `id`      | Yes      | Generation ID (format: `gen_<ulid>`) |
+
+## Response
+
+```json
+{
+  "data": {
+    "id": "gen_01ARZ3NDEKTSV4RRFFQ69G5FAV",
+    "total_cost": 0.00123,
+    "usage": 0.00123,
+    "created_at": "2026-04-10T12:00:00.000Z",
+    "model": "anthropic/claude-sonnet-4-6",
+    "is_byok": false,
+    "provider_name": "anthropic",
+    "streamed": true,
+    "latency": 200,
+    "generation_time": 1500,
+    "tokens_prompt": 100,
+    "tokens_completion": 50,
+    "native_tokens_cached": 80,
+    "native_tokens_reasoning": 0,
+    "status": "success",
+    "routing_trace": {...},
+    "cache_hit": false
+  }
+}
+```
+
+## Errors
+
+| Status | Code        | Description                                     |
+| ------ | ----------- | ----------------------------------------------- |
+| 400    | —           | Missing or invalid generation ID                |
+| 401    | —           | Authentication required                         |
+| 404    | `not_found` | Generation not found or belongs to another user |
diff --git a/pages/gateway/authentication.mdx b/pages/gateway/authentication.mdx
new file mode 100644
index 00000000..5ce04f4e
--- /dev/null
+++ b/pages/gateway/authentication.mdx
@@ -0,0 +1,57 @@
+---
+title: Authentication
+description: Authentication methods for Tangle Gateway.
+---
+
+# Authentication
+
+Four authentication methods, each with different rate limits and capabilities.
+
+## API Key
+
+Create keys at the dashboard. Keys start with `sk-tan-` and are SHA256-hashed before storage.
+
+```bash
+curl -H "Authorization: Bearer sk-tan-YOUR_KEY" \
+  https://router.tangle.tools/v1/chat/completions
+```
+
+- **Rate limit:** 60 req/min
+- **Credit check:** Yes (402 if balance is zero on non-free-tier models)
+- **Key features:** Expiration dates, soft revocation, last-used tracking
+
+## Session (Cookie)
+
+Browser-based authentication via Better Auth. Supports email/password and OAuth (Google, GitHub).
+
+- **Rate limit:** 30 req/min
+- **Credit check:** Yes
+
+## SIWE (Sign-In with Ethereum)
+
+Wallet-based authentication via EIP-191 signatures. Authenticate with your Ethereum wallet.
+
+```
+POST /api/siwe/verify
+{ "address": "0x...", "signature": "0x...", "message": "..." }
+```
+
+## SpendAuth (On-Chain Payment)
+
+EIP-712 signed payment authorization. No account needed — pay operators directly on-chain.
+
+```bash
+curl -H "X-Payment-Signature: {\"commitment\":\"0x...\",\"amount\":\"1000000\",...}" \
+  https://router.tangle.tools/v1/chat/completions
+```
+
+- **Rate limit:** 120 req/min per commitment
+- **Credit check:** No (payment is on-chain)
+- See [SpendAuth](/gateway/spend-auth) for details.
+
+## Anonymous
+
+No authentication required for [free tier models](/gateway/free-tier).
+
+- **Rate limit:** 10 req/min, 5 req/day
+- **Model access:** Free tier only (gpt-4o-mini, llama-3.1-8b, etc.)
diff --git a/pages/gateway/byok.mdx b/pages/gateway/byok.mdx
new file mode 100644
index 00000000..120af118
--- /dev/null
+++ b/pages/gateway/byok.mdx
@@ -0,0 +1,86 @@
+---
+title: Bring Your Own Key (BYOK)
+description: Use your own provider API keys with Tangle Gateway for zero-markup access.
+---
+
+# Bring Your Own Key (BYOK)
+
+Use your existing provider API keys with Tangle Gateway. BYOK requests have **zero platform markup** — you pay the provider's list price directly.
+
+## Per-request BYOK
+
+Pass credentials in `providerOptions.gateway.byok`:
+
+```json
+{
+  "model": "anthropic/claude-sonnet-4-6",
+  "messages": [{"role": "user", "content": "Hello"}],
+  "providerOptions": {
+    "gateway": {
+      "byok": {
+        "anthropic": [{"apiKey": "sk-ant-your-key"}]
+      }
+    }
+  }
+}
+```
+
+### Multiple credentials
+
+Specify multiple credentials per provider. The gateway tries them in order:
+
+```json
+{
+  "providerOptions": {
+    "gateway": {
+      "byok": {
+        "anthropic": [
+          {"apiKey": "sk-ant-primary"},
+          {"apiKey": "sk-ant-backup"}
+        ]
+      }
+    }
+  }
+}
+```
+
+### Multiple providers
+
+```json
+{
+  "providerOptions": {
+    "gateway": {
+      "byok": {
+        "anthropic": [{"apiKey": "sk-ant-..."}],
+        "openai": [{"apiKey": "sk-..."}]
+      }
+    }
+  }
+}
+```
+
+## Automatic fallback
+
+If your BYOK credentials fail (401, 403, rate limit), the gateway automatically falls back to platform credentials. This fallback preserves all compliance filters — if you requested [ZDR](/gateway/zdr), the fallback will only use ZDR-compliant system credentials.
+
+The `X-Tangle-BYOK` response header indicates whether the request used your credentials:
+
+```
+X-Tangle-BYOK: true    # Your key was used
+```
+
+If the header is absent, platform credentials were used (possibly via fallback).
+
+## Pricing
+
+| Credential type      | Markup                       |
+| -------------------- | ---------------------------- |
+| BYOK                 | **0%** — provider list price |
+| Platform credentials | 20% markup (configurable)    |
+
+## Security
+
+- BYOK credentials are never logged, stored, or persisted.
+- Credentials exist only in memory for the duration of the request.
+- The `providerOptions` field is stripped from the request body before forwarding to providers.
+- Credentials are validated by structure (`apiKey` must be a string) and sanitized against prototype pollution.
diff --git a/pages/gateway/caching.mdx b/pages/gateway/caching.mdx
new file mode 100644
index 00000000..6e2417d5
--- /dev/null
+++ b/pages/gateway/caching.mdx
@@ -0,0 +1,72 @@
+---
+title: Automatic Caching
+description: Enable prompt caching across providers with a single flag.
+---
+
+# Automatic Caching
+
+Some providers require explicit cache markers to enable prompt caching, while others cache automatically. Use `caching: 'auto'` to let the gateway handle it.
+
+## Usage
+
+```json
+{
+  "model": "anthropic/claude-sonnet-4-6",
+  "messages": [
+    {"role": "system", "content": "You are a helpful assistant with a large knowledge base..."},
+    {"role": "user", "content": "What is Tangle?"}
+  ],
+  "providerOptions": {
+    "gateway": {
+      "caching": "auto"
+    }
+  }
+}
+```
+
+## How it works
+
+| Provider                       | Caching Type | What `auto` does                                                        |
+| ------------------------------ | ------------ | ----------------------------------------------------------------------- |
+| OpenAI                         | Implicit     | No change needed. Caching happens automatically.                        |
+| Google                         | Implicit     | No change needed.                                                       |
+| DeepSeek                       | Implicit     | No change needed.                                                       |
+| Anthropic                      | Explicit     | Adds `cache_control: { type: 'ephemeral' }` to the last system message. |
+| Anthropic (via Bedrock/Vertex) | Explicit     | Same as Anthropic direct.                                               |
+
+For Anthropic, the gateway converts:
+
+```json
+{"role": "system", "content": "You are helpful..."}
+```
+
+Into:
+
+```json
+{"role": "system", "content": [{"type": "text", "text": "You are helpful...", "cache_control": {"type": "ephemeral"}}]}
+```
+
+This caches the system prompt so subsequent messages in the same conversation reuse it, reducing costs by up to 90%.
+
+## Response caching
+
+Separately from prompt caching, the gateway caches complete responses for **deterministic requests** (temperature ≤ 0.01, non-streaming). Cached responses are free.
+
+```
+X-Tangle-Cache: HIT     # Served from cache
+X-Tangle-Cache: MISS    # Fetched from provider
+```
+
+Disable per-request:
+
+```json
+{
+  "providerOptions": {
+    "gateway": {
+      "cache": false
+    }
+  }
+}
+```
+
+The response cache key includes: model, messages, temperature, max_tokens, tools, response_format, and top_p. Different parameters always produce different cache entries.
diff --git a/pages/gateway/enterprise-zdr.mdx b/pages/gateway/enterprise-zdr.mdx
new file mode 100644
index 00000000..c1d157ae
--- /dev/null
+++ b/pages/gateway/enterprise-zdr.mdx
@@ -0,0 +1,94 @@
+---
+title: Enterprise ZDR Setup
+description: Configure Zero Data Retention for your organization.
+---
+
+# Enterprise ZDR Setup
+
+This guide walks through configuring ZDR for an organization that needs to guarantee no prompts or responses are retained by AI providers.
+
+## Step 1: Understand the trust model
+
+Read the [ZDR trust model](/gateway/zdr#trust-model) first. Key points:
+
+- ZDR is enforced at the **direct provider** level only.
+- **Operators are skipped** when ZDR is enabled (their backing provider is unverifiable).
+- **LiteLLM is skipped** (its internal routing is uncontrollable).
+- BYOK fallback to platform credentials preserves ZDR filtering.
+
+## Step 2: Choose your approach
+
+### Option A: Team-wide ZDR (recommended)
+
+Enable ZDR for all requests from your team. No code changes needed — every request is automatically filtered.
+
+Contact your admin to set `zdrEnabled: true` on your team record via the admin API:
+
+```bash
+# Admin sets team-wide ZDR
+curl -X PUT https://router.tangle.tools/api/admin/compliance \
+  -H "Cookie: session_token=ADMIN_SESSION" \
+  -d '{"providerId": "...", "zdr": true}'
+```
+
+### Option B: Per-request ZDR
+
+Add `zeroDataRetention: true` to individual requests. Useful for mixed workloads where only some requests handle sensitive data.
+
+```python
+response = client.chat.completions.create(
+    model="anthropic/claude-sonnet-4-6",
+    messages=[...],
+    extra_body={
+        "providerOptions": {
+            "gateway": {"zeroDataRetention": True}
+        }
+    }
+)
+```
+
+## Step 3: Verify provider coverage
+
+Check which providers are ZDR-verified for the models you need:
+
+```bash
+curl https://router.tangle.tools/api/gateway/compliance | jq '.providers[] | select(.zdr == true)'
+```
+
+If your required model is only available from a non-ZDR provider, the request will return 400 with a clear error listing which providers were considered.
+
+## Step 4: Set up BYOK (optional)
+
+For maximum control, use [BYOK](/gateway/byok) with your own provider keys. This gives you:
+
+- Zero platform markup
+- Direct contractual relationship with the provider
+- ZDR enforcement still applies on the fallback path
+
+## Step 5: Monitor compliance
+
+Use the [generation lookup API](/gateway/generation-lookup) to audit requests:
+
+```bash
+# Check if a specific request used a ZDR provider
+curl -H "Authorization: Bearer sk-tan-..." \
+  "https://router.tangle.tools/v1/generation?id=gen_..." \
+  | jq '.data.provider_name'
+```
+
+The `routing_trace` field shows exactly which providers were considered and filtered.
+
+## Combining ZDR + no-train
+
+Both flags work as an AND: when both are enabled, requests are routed only to providers that satisfy both criteria. This is the strictest compliance level.
+
+```json
+{
+  "providerOptions": {
+    "gateway": {
+      "zeroDataRetention": true,
+      "disallowPromptTraining": true
+    }
+  }
+}
+```
diff --git a/pages/gateway/fallbacks.mdx b/pages/gateway/fallbacks.mdx
new file mode 100644
index 00000000..bd01e96f
--- /dev/null
+++ b/pages/gateway/fallbacks.mdx
@@ -0,0 +1,71 @@
+---
+title: Model Fallbacks
+description: Configure backup models that are tried when the primary model fails.
+---
+
+# Model Fallbacks
+
+Specify backup models that are tried in order if the primary model fails or is unavailable.
+
+## Usage
+
+Pass a `models` array in `providerOptions.gateway`:
+
+```json
+{
+  "model": "openai/gpt-4o",
+  "messages": [{"role": "user", "content": "Hello"}],
+  "providerOptions": {
+    "gateway": {
+      "models": ["anthropic/claude-sonnet-4-6", "groq/llama-3.1-70b-versatile"]
+    }
+  }
+}
+```
+
+The gateway tries:
+
+1. `openai/gpt-4o` (primary model)
+2. `anthropic/claude-sonnet-4-6` (first fallback)
+3. `groq/llama-3.1-70b-versatile` (second fallback)
+
+The response comes from the first model that succeeds.
+
+## How fallback works
+
+For each model in the list, the gateway runs the full routing chain:
+
+1. **Operators** — try operators serving this model (if available)
+2. **LiteLLM** — try the proxy with built-in retries
+3. **Direct provider** — call the provider API directly
+
+If all tiers fail for a model, the gateway moves to the next model in the list.
+
+## Combining with provider ordering
+
+Use `models` with `order` to control both model fallback and provider preference:
+
+```json
+{
+  "model": "openai/gpt-4o",
+  "providerOptions": {
+    "gateway": {
+      "models": ["anthropic/claude-sonnet-4-6"],
+      "order": ["bedrock", "anthropic"]
+    }
+  }
+}
+```
+
+This tries:
+
+1. `openai/gpt-4o` via available providers
+2. `anthropic/claude-sonnet-4-6` via Bedrock first, then Anthropic direct
+
+## Observability
+
+When fallbacks occur, the [routing trace](/gateway/routing-trace) shows every model and provider attempted:
+
+```
+X-Tangle-Routing-Trace: openai/gpt-4o[openai(err:5001ms)], anthropic/claude-sonnet-4-6[anthropic(200:1847ms)]
+```
diff --git a/pages/gateway/feature-flags.mdx b/pages/gateway/feature-flags.mdx
new file mode 100644
index 00000000..128480e7
--- /dev/null
+++ b/pages/gateway/feature-flags.mdx
@@ -0,0 +1,35 @@
+---
+title: Feature Flags
+description: Disable gateway features without a code deploy.
+---
+
+# Feature Flags
+
+All gateway features are on by default. Set any flag to `false` to disable it without deploying new code.
+
+## Available flags
+
+| Environment Variable       | Default | Controls                                                     |
+| -------------------------- | ------- | ------------------------------------------------------------ |
+| `ENABLE_GUARDRAILS`        | `true`  | PII detection, prompt injection scanning                     |
+| `ENABLE_RESPONSE_CACHE`    | `true`  | Response caching for deterministic requests                  |
+| `ENABLE_COMPLIANCE_FILTER` | `true`  | Early ZDR/no-train validation (routing enforcement stays on) |
+| `ENABLE_PROMPT_CACHING`    | `true`  | Auto `cache_control` injection for Anthropic                 |
+| `ENABLE_ROUTING_TRACE`     | `true`  | `X-Tangle-Routing-Trace` response header                     |
+
+## Usage
+
+Set in your environment:
+
+```bash
+ENABLE_GUARDRAILS=false    # Disable all guardrail scanning
+ENABLE_RESPONSE_CACHE=false # Disable response cache reads/writes
+```
+
+## Notes
+
+- `ENABLE_COMPLIANCE_FILTER` only disables the early validation check that returns a 400 before routing. The actual ZDR/no-train enforcement in the routing tiers (skip operators, skip LiteLLM) stays active regardless. This flag is for suppressing the early error, not for bypassing compliance.
+
+- When `ENABLE_GUARDRAILS=false`, no PII or injection scanning occurs. The `X-Tangle-Guardrails` header is never set. GuardrailEvent records are not created.
+
+- When `ENABLE_RESPONSE_CACHE=false`, every request hits the provider. Cached entries are not read or written. Existing cache entries are not purged (they expire naturally via TTL).
diff --git a/pages/gateway/free-tier.mdx b/pages/gateway/free-tier.mdx
new file mode 100644
index 00000000..ff008510
--- /dev/null
+++ b/pages/gateway/free-tier.mdx
@@ -0,0 +1,62 @@
+---
+title: Free Tier
+description: Free access to small models with daily limits.
+---
+
+# Free Tier
+
+Try the gateway without credits. Free tier restricts to cheap, fast models with daily request limits.
+
+## Limits
+
+| Tier                         | Daily limit | Rate limit |
+| ---------------------------- | ----------- | ---------- |
+| Anonymous (no auth)          | 5 req/day   | 10 req/min |
+| Authenticated (zero credits) | 20 req/day  | 30 req/min |
+| Paid (any credits)           | Unlimited   | 60 req/min |
+
+## Allowed models
+
+Free tier requests can use:
+
+| Model                       | Provider  | Why it's free       |
+| --------------------------- | --------- | ------------------- |
+| `gpt-4o-mini`               | OpenAI    | Small, cheap        |
+| `claude-3-5-haiku-20241022` | Anthropic | Fast, cheap         |
+| `llama-3.1-8b-instant`      | Groq      | Free tier inference |
+| `llama-3.2-1b-preview`      | Groq      | Tiny model          |
+| `llama-3.2-3b-preview`      | Groq      | Small model         |
+| `gemini-2.0-flash-lite`     | Google    | Free tier           |
+| `cerebras/llama-3.1-8b`     | Cerebras  | Fast, cheap         |
+| `deepseek-chat`             | DeepSeek  | Very cheap          |
+
+## Blocked models
+
+These models require credits:
+
+- **OpenAI reasoning:** o1, o3, o4 (all variants)
+- **OpenAI flagship:** gpt-4o, gpt-4, gpt-5 (gpt-4o-mini is allowed)
+- **Anthropic flagship:** claude-opus, claude-sonnet (haiku is allowed)
+- **Google flagship:** gemini-2.5-pro, gemini-2.5-ultra
+- **xAI flagship:** grok-2, grok-3
+
+Requesting a blocked model without credits returns 402:
+
+```json
+{
+  "error": {
+    "message": "Model \"gpt-4o\" requires credits. Free tier models: gpt-4o-mini, llama-3.1-8b-instant, gemini-2.0-flash-lite, deepseek-chat. Add credits or use a free tier model.",
+    "type": "insufficient_funds",
+    "code": "free_tier_limit"
+  }
+}
+```
+
+## Response headers
+
+Free tier responses include remaining quota:
+
+```
+X-Free-Tier-Remaining: 3
+X-Free-Tier-Limit: 5
+```
diff --git a/pages/gateway/generation-lookup.mdx b/pages/gateway/generation-lookup.mdx
new file mode 100644
index 00000000..ccc988f8
--- /dev/null
+++ b/pages/gateway/generation-lookup.mdx
@@ -0,0 +1,71 @@
+---
+title: Generation Lookup
+description: Retrieve detailed information about any request by its generation ID.
+---
+
+# Generation Lookup
+
+Every request returns a unique generation ID in the `X-Generation-Id` header. Use it to look up full request details.
+
+## Endpoint
+
+```
+GET /v1/generation?id=gen_01ARZ3NDEKTSV4RRFFQ69G5FAV
+```
+
+Requires authentication. Returns details only for requests made by the authenticated user.
+
+## Example
+
+```bash
+curl -H "Authorization: Bearer sk-tan-YOUR_KEY" \
+  "https://router.tangle.tools/v1/generation?id=gen_01ARZ3NDEKTSV4RRFFQ69G5FAV"
+```
+
+## Response
+
+```json
+{
+  "data": {
+    "id": "gen_01ARZ3NDEKTSV4RRFFQ69G5FAV",
+    "total_cost": 0.00123,
+    "usage": 0.00123,
+    "created_at": "2026-04-10T12:00:00.000Z",
+    "model": "anthropic/claude-sonnet-4-6",
+    "is_byok": false,
+    "provider_name": "anthropic",
+    "streamed": true,
+    "latency": 200,
+    "generation_time": 1500,
+    "tokens_prompt": 100,
+    "tokens_completion": 50,
+    "native_tokens_cached": 80,
+    "native_tokens_reasoning": 0,
+    "status": "success",
+    "routing_trace": {
+      "planningReasoning": "ZDR requested: filtering to 13 ZDR providers",
+      "modelAttempts": [...],
+      "totalLatencyMs": 1500
+    },
+    "cache_hit": false
+  }
+}
+```
+
+## Fields
+
+| Field                                 | Description                                    |
+| ------------------------------------- | ---------------------------------------------- |
+| `id`                                  | Generation ID (`gen_<ulid>`)                   |
+| `total_cost`                          | Total cost in USD                              |
+| `model`                               | Model that served the request                  |
+| `is_byok`                             | Whether BYOK credentials were used             |
+| `provider_name`                       | Provider that served the request               |
+| `streamed`                            | Whether the request used streaming             |
+| `latency`                             | Time to first token (ms)                       |
+| `generation_time`                     | Total generation time (ms)                     |
+| `tokens_prompt` / `tokens_completion` | Token counts                                   |
+| `native_tokens_cached`                | Tokens served from provider cache              |
+| `native_tokens_reasoning`             | Reasoning tokens (o1/o3/o4 models)             |
+| `routing_trace`                       | Full routing attempt history                   |
+| `cache_hit`                           | Whether response was served from gateway cache |
diff --git a/pages/gateway/getting-started.mdx b/pages/gateway/getting-started.mdx
new file mode 100644
index 00000000..30d1edd2
--- /dev/null
+++ b/pages/gateway/getting-started.mdx
@@ -0,0 +1,94 @@
+---
+title: Getting Started
+description: Make your first inference request through Tangle Gateway in 2 minutes.
+---
+
+# Getting Started
+
+## 1. Get an API key
+
+Sign up at [router.tangle.tools](https://router.tangle.tools) and create an API key from the dashboard. Keys start with `sk-tan-`.
+
+## 2. Make a request
+
+### curl
+
+```bash
+curl -X POST "https://router.tangle.tools/v1/chat/completions" \
+  -H "Authorization: Bearer sk-tan-YOUR_KEY" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "openai/gpt-4o-mini",
+    "messages": [{"role": "user", "content": "What is Tangle?"}],
+    "stream": false
+  }'
+```
+
+### Python (OpenAI SDK)
+
+```python
+from openai import OpenAI
+
+client = OpenAI(
+    api_key="sk-tan-YOUR_KEY",
+    base_url="https://router.tangle.tools/v1"
+)
+
+response = client.chat.completions.create(
+    model="anthropic/claude-sonnet-4-6",
+    messages=[{"role": "user", "content": "What is Tangle?"}]
+)
+print(response.choices[0].message.content)
+```
+
+### TypeScript (AI SDK)
+
+```typescript
+import { generateText } from 'ai'
+import { createOpenAI } from '@ai-sdk/openai'
+
+const tangle = createOpenAI({
+  apiKey: 'sk-tan-YOUR_KEY',
+  baseURL: 'https://router.tangle.tools/v1',
+})
+
+const { text } = await generateText({
+  model: tangle('anthropic/claude-sonnet-4-6'),
+  prompt: 'What is Tangle?',
+})
+```
+
+## 3. Check the response headers
+
+Every response includes metadata headers:
+
+```
+X-Generation-Id: gen_01J5K7...       # Unique request ID
+X-Tangle-Price-Input: 0.000003       # USD per input token
+X-Tangle-Price-Output: 0.000015      # USD per output token
+X-Tangle-Cache: MISS                 # Response cache status
+X-RateLimit-Remaining: 59            # Requests left in window
+```
+
+Use the generation ID to look up request details later via [`GET /v1/generation`](/gateway/api-generation).
+
+## 4. Try different models
+
+The model ID format is `provider/model-name`:
+
+```
+openai/gpt-4o-mini
+anthropic/claude-sonnet-4-6
+google/gemini-2.0-flash-lite
+groq/llama-3.1-8b-instant
+deepseek/deepseek-chat
+mistral/mistral-large-latest
+```
+
+You can also use bare model names (`gpt-4o-mini`, `claude-sonnet-4-6`) — the gateway resolves the provider automatically.
+
+## What's next
+
+- [Bring Your Own Key](/gateway/byok) — use your existing provider API keys for zero markup
+- [Model Fallbacks](/gateway/fallbacks) — configure backup models
+- [Zero Data Retention](/gateway/zdr) — compliance for sensitive workloads
diff --git a/pages/gateway/guardrails.mdx b/pages/gateway/guardrails.mdx
new file mode 100644
index 00000000..e343a214
--- /dev/null
+++ b/pages/gateway/guardrails.mdx
@@ -0,0 +1,63 @@
+---
+title: Guardrails
+description: Gateway-level PII detection and prompt injection scanning.
+---
+
+# Guardrails
+
+The gateway scans all requests for PII and prompt injection patterns before routing. Results are available in the `X-Tangle-Guardrails` response header.
+
+## Detection categories
+
+### PII detection
+
+| Pattern                        | Severity | Example               |
+| ------------------------------ | -------- | --------------------- |
+| SSN                            | Critical | `123-45-6789`         |
+| Credit card (Visa/MC/Discover) | Critical | `4111 1111 1111 1111` |
+| Credit card (Amex)             | Critical | `3782 822463 10005`   |
+| Email                          | Low      | `user@example.com`    |
+| US phone                       | Medium   | `(555) 123-4567`      |
+| IP address                     | Low      | `192.168.1.1`         |
+
+### Prompt injection detection
+
+Applied to user messages only (not system or assistant):
+
+| Pattern                            | Severity |
+| ---------------------------------- | -------- |
+| "Ignore all previous instructions" | High     |
+| "You are now a different AI"       | High     |
+| "Pretend you have no restrictions" | High     |
+| "Reveal your system prompt"        | Medium   |
+| DAN-mode jailbreaks                | High     |
+
+## Modes
+
+### Audit mode (default)
+
+Flags are logged and returned in the `X-Tangle-Guardrails` header but requests are not blocked:
+
+```
+X-Tangle-Guardrails: pii:low,prompt_injection:high
+```
+
+### Block mode
+
+Requests matching configured categories are rejected with 400:
+
+```json
+{
+  "error": {
+    "message": "Request blocked by guardrails: pii, prompt_injection",
+    "type": "invalid_request_error",
+    "code": "guardrail_blocked"
+  }
+}
+```
+
+Block mode requires a `GuardrailPolicy` record configured for your team or user with specific categories to block.
+
+## Disabling
+
+Set `ENABLE_GUARDRAILS=false` to skip all scanning. See [Feature Flags](/gateway/feature-flags).
diff --git a/pages/gateway/how-routing-works.mdx b/pages/gateway/how-routing-works.mdx
new file mode 100644
index 00000000..464211cc
--- /dev/null
+++ b/pages/gateway/how-routing-works.mdx
@@ -0,0 +1,65 @@
+---
+title: How Routing Works
+description: The three-tier routing architecture behind Tangle Gateway.
+---
+
+# How Routing Works
+
+Every request passes through up to three routing tiers. The gateway tries each tier in order and returns the first successful response.
+
+## The three tiers
+
+```
+Request → Tier 1: Operators → Tier 2: LiteLLM → Tier 3: Direct Provider → Response
+```
+
+### Tier 1: Operator routing
+
+Operators run [Blueprints](/developers/blueprints/introduction) — on-chain service definitions like the [LLM Inference Blueprint](https://github.com/tangle-network/llm-inference-blueprint). They stake tokens, serve models, and compete on price, latency, and reputation.
+
+- Selected by [scoring algorithm](/gateway/smart-routing): reputation (40%) + latency (30%) + price (30%)
+- Discovered automatically from on-chain [Blueprint Service Manager](/developers/blueprints/service-lifecycle) contracts
+- Can be pinned by blueprint, service, or operator address
+- See [Operator Routing](/gateway/operator-routing) for the full Blueprint catalog
+
+**When it's used:** Default for `auto` routing mode, required for SpendAuth (on-chain payments).
+
+**When it's skipped:** When [ZDR](/gateway/zdr) or [no-train](/gateway/no-train) is requested (operators can't verify compliance). When `routing: "provider"` is set explicitly.
+
+### Tier 2: LiteLLM proxy
+
+An internal proxy that handles 100+ provider integrations with built-in retries and provider-level fallbacks.
+
+**When it's used:** Default for standard requests when no operator is available.
+
+**When it's skipped:** When ZDR or no-train is requested (LiteLLM's downstream routing is not compliance-controllable). When LiteLLM is not configured (`LITELLM_URL` unset).
+
+### Tier 3: Direct provider
+
+The gateway calls the provider API directly using platform credentials (or [BYOK](/gateway/byok) credentials).
+
+**When it's used:** Fallback when tiers 1 and 2 fail. Only tier used when compliance routing is active.
+
+**Always used for:** ZDR requests, no-train requests, BYOK with compliance flags.
+
+## Compliance mode
+
+When `zeroDataRetention` or `disallowPromptTraining` is set:
+
+```
+Request → Tier 3: Direct Provider (verified only) → Response
+```
+
+Tiers 1 and 2 are completely bypassed. The gateway routes only to providers with verified compliance agreements. See [Zero Data Retention](/gateway/zdr) for the trust model.
+
+## Routing control
+
+| Method                          | Effect                                               |
+| ------------------------------- | ---------------------------------------------------- |
+| `routing: "auto"`               | Try all three tiers (default)                        |
+| `routing: "operator"`           | Operators only. Fails if no operator available.      |
+| `routing: "provider"`           | Skip operators, use LiteLLM + direct only.           |
+| `X-Tangle-Blueprint: <id>`      | Pin to operators under this Blueprint.               |
+| `X-Tangle-Operator: <slug>`     | Pin to a specific operator.                          |
+| `providerOptions.gateway.order` | Control which providers are tried and in what order. |
+| `providerOptions.gateway.only`  | Restrict to these providers only.                    |
diff --git a/pages/gateway/index.mdx b/pages/gateway/index.mdx
new file mode 100644
index 00000000..57c7630a
--- /dev/null
+++ b/pages/gateway/index.mdx
@@ -0,0 +1,58 @@
+---
+title: Tangle Gateway
+description: Unified API for hundreds of AI models with built-in routing, compliance, and on-chain payments.
+---
+
+# Tangle Gateway
+
+Tangle Gateway is a unified inference API. One endpoint, hundreds of models, automatic routing across centralized providers and decentralized operators.
+
+## What it does
+
+- **One key, any model.** Access OpenAI, Anthropic, Google, Groq, Mistral, and 20+ providers through a single API key.
+- **Operator network.** Route to decentralized operators running [Blueprints](/developers/blueprints/introduction) on the Tangle network who compete on price, latency, and reputation.
+- **Compliance routing.** Zero Data Retention and no-train filtering with verified provider agreements.
+- **BYOK.** Bring your own provider keys for zero-markup access.
+- **On-chain payments.** Pay operators directly via SpendAuth — no credit card required.
+
+## Quick example
+
+```bash
+curl -X POST "https://router.tangle.tools/v1/chat/completions" \
+  -H "Authorization: Bearer $TANGLE_API_KEY" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "anthropic/claude-sonnet-4-6",
+    "messages": [{"role": "user", "content": "Hello"}],
+    "stream": true
+  }'
+```
+
+Works with any OpenAI-compatible SDK. Change the base URL and you're done.
+
+## Architecture
+
+The gateway routes through three tiers, in order:
+
+| Tier          | What                                                       | When                                                           |
+| ------------- | ---------------------------------------------------------- | -------------------------------------------------------------- |
+| **Operators** | Decentralized inference providers on Tangle                | Default for operator-pinned requests and SpendAuth             |
+| **LiteLLM**   | Proxy with 100+ provider integrations and built-in retries | Default for standard requests                                  |
+| **Direct**    | Straight to provider API (OpenAI, Anthropic, etc.)         | Fallback when LiteLLM unavailable, or when compliance required |
+
+When [Zero Data Retention](/gateway/zdr) or [no-train](/gateway/no-train) is requested, operators and LiteLLM are skipped — the gateway routes directly to verified providers only.
+
+## How it fits
+
+```
+Workbench (agents) → Gateway (inference) → Operators (serving) → Protocol (settlement)
+```
+
+The gateway sits between the [Workbench](/vibe/introduction) where agents run and the [Protocol](/network/overview) where operators get paid. Agents in the workbench call the gateway for model access. The gateway selects the best provider or operator, routes the request, tracks usage, and settles payment.
+
+## Next steps
+
+- [Getting Started](/gateway/getting-started) — make your first request in 2 minutes
+- [Supported Models](/gateway/models) — browse the model catalog
+- [How Routing Works](/gateway/how-routing-works) — understand the 3-tier architecture
+- [Zero Data Retention](/gateway/zdr) — compliance for regulated industries
diff --git a/pages/gateway/migrate-openai.mdx b/pages/gateway/migrate-openai.mdx
new file mode 100644
index 00000000..c27336f5
--- /dev/null
+++ b/pages/gateway/migrate-openai.mdx
@@ -0,0 +1,77 @@
+---
+title: Migrate from OpenAI
+description: Switch from OpenAI's API to Tangle Gateway in under 5 minutes.
+---
+
+# Migrate from OpenAI
+
+Tangle Gateway is OpenAI-compatible. Change two lines and you're done.
+
+## Python
+
+```diff
+  from openai import OpenAI
+
+  client = OpenAI(
+-     api_key="sk-...",
++     api_key="sk-tan-YOUR_KEY",
++     base_url="https://router.tangle.tools/v1",
+  )
+
+  response = client.chat.completions.create(
+-     model="gpt-4o",
++     model="openai/gpt-4o",  # or just "gpt-4o" — auto-resolved
+      messages=[{"role": "user", "content": "Hello"}]
+  )
+```
+
+## TypeScript
+
+```diff
+  import OpenAI from 'openai'
+
+  const client = new OpenAI({
+-   apiKey: 'sk-...',
++   apiKey: 'sk-tan-YOUR_KEY',
++   baseURL: 'https://router.tangle.tools/v1',
+  })
+```
+
+## curl
+
+```diff
+- curl https://api.openai.com/v1/chat/completions \
+-   -H "Authorization: Bearer sk-..." \
++ curl https://router.tangle.tools/v1/chat/completions \
++   -H "Authorization: Bearer sk-tan-YOUR_KEY" \
+    -H "Content-Type: application/json" \
+    -d '{"model": "gpt-4o", "messages": [...]}'
+```
+
+## What you get
+
+By switching to Tangle Gateway, you get:
+
+- **Access to every provider** through the same client. Try `anthropic/claude-sonnet-4-6` or `groq/llama-3.1-70b` without changing SDKs.
+- **Automatic fallbacks.** If OpenAI is down, configure backup models.
+- **Cost visibility.** Every response tells you exactly what it cost via `X-Tangle-Price-*` headers.
+- **Compliance routing.** One flag for ZDR, one flag for no-train.
+- **BYOK.** Keep using your OpenAI key with zero markup. Add it to [`providerOptions.gateway.byok`](/gateway/byok).
+
+## Keep your OpenAI key (zero markup)
+
+If you already have an OpenAI API key, use [BYOK](/gateway/byok) for zero platform markup:
+
+```python
+response = client.chat.completions.create(
+    model="openai/gpt-4o",
+    messages=[{"role": "user", "content": "Hello"}],
+    extra_body={
+        "providerOptions": {
+            "gateway": {
+                "byok": {"openai": [{"apiKey": "sk-YOUR_OPENAI_KEY"}]}
+            }
+        }
+    }
+)
+```
diff --git a/pages/gateway/migrate-vercel.mdx b/pages/gateway/migrate-vercel.mdx
new file mode 100644
index 00000000..abad708f
--- /dev/null
+++ b/pages/gateway/migrate-vercel.mdx
@@ -0,0 +1,80 @@
+---
+title: Migrate from Vercel AI Gateway
+description: Switch from Vercel AI Gateway to Tangle Gateway.
+---
+
+# Migrate from Vercel AI Gateway
+
+Tangle Gateway supports the same `providerOptions.gateway` schema as Vercel AI Gateway. Most code works unchanged.
+
+## What maps directly
+
+| Vercel Feature                                   | Tangle Equivalent | Notes                             |
+| ------------------------------------------------ | ----------------- | --------------------------------- |
+| `providerOptions.gateway.byok`                   | Same              | Identical schema                  |
+| `providerOptions.gateway.zeroDataRetention`      | Same              | 13 verified providers             |
+| `providerOptions.gateway.disallowPromptTraining` | Same              | 25 verified providers             |
+| `providerOptions.gateway.caching: 'auto'`        | Same              | Anthropic cache_control injection |
+| `providerOptions.gateway.order`                  | Same              | Provider priority                 |
+| `providerOptions.gateway.only`                   | Same              | Provider allowlist                |
+| `models` fallback array                          | Same              | Model-level failover              |
+| `GET /v1/credits`                                | Same              | Balance check                     |
+| `GET /v1/generation`                             | Same              | Request detail lookup             |
+
+## What's different
+
+| Feature               | Vercel                                | Tangle                                                   |
+| --------------------- | ------------------------------------- | -------------------------------------------------------- |
+| **Base URL**          | `ai-gateway.vercel.sh/v1`             | `router.tangle.tools/v1`                                 |
+| **Auth**              | API key or OIDC token                 | API key, session, SIWE (wallet), or SpendAuth (on-chain) |
+| **Pricing**           | Zero markup                           | 20% markup (0% with BYOK)                                |
+| **Operator network**  | None                                  | Decentralized operators compete on price/latency         |
+| **On-chain payments** | None                                  | SpendAuth (EIP-712) — pay without a credit card          |
+| **Guardrails**        | None                                  | PII + injection detection built-in                       |
+| **Web search tools**  | Perplexity, Parallel, provider-native | Not yet (planned)                                        |
+| **OIDC auth**         | Vercel-only                           | Not applicable                                           |
+
+## Code change
+
+### AI SDK
+
+```diff
+  import { generateText } from 'ai'
++ import { createOpenAI } from '@ai-sdk/openai'
+
++ const tangle = createOpenAI({
++   apiKey: 'sk-tan-YOUR_KEY',
++   baseURL: 'https://router.tangle.tools/v1',
++ })
+
+  const { text } = await generateText({
+-   model: 'anthropic/claude-sonnet-4-6',
++   model: tangle('anthropic/claude-sonnet-4-6'),
+    prompt: 'Hello',
+    providerOptions: {
+      gateway: {
+        zeroDataRetention: true,  // works the same
+        caching: 'auto',          // works the same
+      },
+    },
+  })
+```
+
+### OpenAI SDK
+
+```diff
+  const client = new OpenAI({
+-   apiKey: process.env.AI_GATEWAY_API_KEY,
+-   baseURL: 'https://ai-gateway.vercel.sh/v1',
++   apiKey: process.env.TANGLE_API_KEY,
++   baseURL: 'https://router.tangle.tools/v1',
+  })
+```
+
+## What you gain
+
+- **Operator network.** Access decentralized inference providers who compete on price and latency.
+- **On-chain payments.** Pay with crypto via SpendAuth — no Stripe/credit card required.
+- **Wallet auth.** Sign in with Ethereum (SIWE) for web3-native access.
+- **Guardrails.** Built-in PII and prompt injection detection on every request.
+- **Self-hostable.** Deploy your own gateway instance — it's open source.
diff --git a/pages/gateway/models.mdx b/pages/gateway/models.mdx
new file mode 100644
index 00000000..4c058018
--- /dev/null
+++ b/pages/gateway/models.mdx
@@ -0,0 +1,77 @@
+---
+title: Supported Models
+description: Browse models available through Tangle Gateway across 20+ providers.
+---
+
+# Supported Models
+
+Tangle Gateway provides access to models from 20+ providers through a single API.
+
+## Providers
+
+| Provider    | Slug        | Models                                                |
+| ----------- | ----------- | ----------------------------------------------------- |
+| OpenAI      | `openai`    | GPT-4o, GPT-4o-mini, o1, o3, o4, DALL-E, Whisper, TTS |
+| Anthropic   | `anthropic` | Claude Opus, Sonnet, Haiku                            |
+| Google      | `google`    | Gemini 2.5 Pro, Flash, Flash-Lite                     |
+| Groq        | `groq`      | Llama 3.1/3.2 (fast inference)                        |
+| Together AI | `together`  | Open-source models (Llama, Qwen, Mixtral)             |
+| DeepSeek    | `deepseek`  | DeepSeek Chat, DeepSeek Coder                         |
+| Mistral     | `mistral`   | Mistral Large, Codestral, Pixtral                     |
+| Fireworks   | `fireworks` | Phi, StarCoder, open models                           |
+| Cohere      | `cohere`    | Command R/R+                                          |
+| xAI         | `xai`       | Grok 2, Grok 3                                        |
+| Cerebras    | `cerebras`  | Llama (fast inference)                                |
+| SambaNova   | `sambanova` | Fast open-model inference                             |
+| AI21        | `ai21`      | Jamba                                                 |
+| Nvidia      | `nvidia`    | Nemotron                                              |
+| Z.ai        | `zai`       | GLM-4.7, GLM-5                                        |
+| Moonshot    | `moonshot`  | Kimi                                                  |
+
+Plus decentralized operators on the Tangle network running [Blueprints](/developers/blueprints/introduction):
+
+| Blueprint                                                                  | Models                                          | How to route                          |
+| -------------------------------------------------------------------------- | ----------------------------------------------- | ------------------------------------- |
+| [LLM Inference](https://github.com/tangle-network/llm-inference-blueprint) | Llama, Qwen, Mistral, any vLLM-compatible model | `X-Tangle-Routing: operator` or auto  |
+| Vector Store                                                               | Embedding models for RAG                        | `/v1/collections` and `/v1/rag/query` |
+| Custom Blueprints                                                          | Any model the operator deploys                  | Pin by Blueprint ID or operator slug  |
+
+Operators set their own pricing and the gateway [scores them](/gateway/smart-routing) on reputation, latency, and price. See [Operator Routing](/gateway/operator-routing) for details.
+
+## Model ID format
+
+Use `provider/model-name`:
+
+```
+anthropic/claude-sonnet-4-6
+openai/gpt-4o-mini
+groq/llama-3.1-70b-versatile
+```
+
+Or use bare names — the gateway resolves the provider by prefix:
+
+| Prefix                      | Resolves to |
+| --------------------------- | ----------- |
+| `gpt-`, `o1-`, `o3-`, `o4-` | OpenAI      |
+| `claude-`                   | Anthropic   |
+| `gemini-`, `gemma-`         | Google      |
+| `llama-`, `mixtral-`        | Groq        |
+| `deepseek-`                 | DeepSeek    |
+| `mistral-`, `codestral-`    | Mistral     |
+| `grok-`                     | xAI         |
+| `glm-`                      | Z.ai        |
+| `command-`                  | Cohere      |
+
+## Modalities
+
+| Modality   | Endpoint                                       | Examples                               |
+| ---------- | ---------------------------------------------- | -------------------------------------- |
+| Text       | `/v1/chat/completions`                         | All chat models                        |
+| Images     | `/v1/images/generations`                       | DALL-E, FLUX                           |
+| Audio      | `/v1/audio/transcriptions`, `/v1/audio/speech` | Whisper, TTS                           |
+| Embeddings | `/v1/embeddings`                               | text-embedding-3-small/large           |
+| Video      | `/v1/video/*`                                  | Avatar generation, dubbing (via ph0ny) |
+
+## Dynamic discovery
+
+The model catalog is available at [`GET /api/models`](https://router.tangle.tools/api/models) with pricing, context length, and modality information for every model.
diff --git a/pages/gateway/no-train.mdx b/pages/gateway/no-train.mdx
new file mode 100644
index 00000000..f6461c01
--- /dev/null
+++ b/pages/gateway/no-train.mdx
@@ -0,0 +1,43 @@
+---
+title: Disallow Prompt Training
+description: Route only through providers that don't use your data for model training.
+---
+
+# Disallow Prompt Training
+
+Ensure your prompts and responses are never used by providers to train their models.
+
+## Usage
+
+```json
+{
+  "providerOptions": {
+    "gateway": {
+      "disallowPromptTraining": true
+    }
+  }
+}
+```
+
+## Relationship to ZDR
+
+Disallow prompt training is a **subset** of [Zero Data Retention](/gateway/zdr). All ZDR-compliant providers also disallow prompt training, but more providers disallow training than offer full ZDR.
+
+| Filter                  | Verified providers |
+| ----------------------- | ------------------ |
+| No-train only           | 25 providers       |
+| ZDR (includes no-train) | 13 providers       |
+
+Use `disallowPromptTraining` when you care about IP protection but don't need full data deletion guarantees.
+
+## No-train verified providers
+
+All ZDR providers plus: OpenAI, Google AI Studio, Cohere, Perplexity, xAI, Morph AI, Novita AI, Voyage AI, and others.
+
+See the full list at [`GET /api/gateway/compliance`](/gateway/api-compliance).
+
+## Routing behavior
+
+Same as ZDR: operators and LiteLLM are skipped. Only direct provider calls to verified no-train providers.
+
+Can be enabled team-wide via `noTrainEnabled: true` on the team record.
diff --git a/pages/gateway/operator-routing.mdx b/pages/gateway/operator-routing.mdx
new file mode 100644
index 00000000..687d38d2
--- /dev/null
+++ b/pages/gateway/operator-routing.mdx
@@ -0,0 +1,89 @@
+---
+title: Operator Routing
+description: Route inference through decentralized operators on the Tangle network.
+---
+
+# Operator Routing
+
+Operators are independent inference providers registered on the Tangle network. They run models on their own hardware, set their own prices, and earn from every request routed through them.
+
+## Blueprints
+
+Operators run **Blueprints** — on-chain service definitions that specify what an operator does. The inference-related Blueprints the gateway routes through:
+
+| Blueprint         | What it serves                                                 | Repo                                                                                                |
+| ----------------- | -------------------------------------------------------------- | --------------------------------------------------------------------------------------------------- |
+| **LLM Inference** | Chat completions, text generation (Llama, Qwen, Mistral, etc.) | [tangle-network/llm-inference-blueprint](https://github.com/tangle-network/llm-inference-blueprint) |
+| **Vector Store**  | Embedding storage and retrieval for RAG                        | Operator-deployed                                                                                   |
+| **Custom**        | Any model/pipeline an operator chooses to serve                | [Build your own](/developers/blueprints/introduction)                                               |
+
+The LLM Inference Blueprint uses [tangle-inference-core](https://github.com/tangle-network/tangle-inference-core) — a shared Rust crate for EIP-712 signature verification, nonce management, and on-chain settlement. Operators compile it into a binary (`operator-lite`) that runs alongside their model server.
+
+To build and deploy your own inference Blueprint, see the [Blueprint SDK docs](/developers/blueprints/introduction) and the [Blueprint Runner](/developers/blueprint-runner/introduction).
+
+## How operators are discovered
+
+1. Operators register on-chain via the [Blueprint Service Manager (BSM)](/developers/blueprints/service-lifecycle) contract
+2. The gateway syncs operator data from the chain every 60 seconds
+3. Operators are stored in the database with their endpoint URL, pricing, and status
+4. The [scoring algorithm](/gateway/smart-routing) ranks operators per-request
+
+## Routing to operators
+
+### Automatic (default)
+
+In `auto` mode, the gateway checks for operators serving the requested model before trying centralized providers:
+
+```bash
+curl -X POST "https://router.tangle.tools/v1/chat/completions" \
+  -H "Authorization: Bearer sk-tan-YOUR_KEY" \
+  -d '{"model": "llama-3.1-70b", "messages": [...]}'
+```
+
+### Pin to a Blueprint
+
+Route only to operators registered under a specific Blueprint:
+
+```bash
+curl -X POST "https://router.tangle.tools/v1/chat/completions" \
+  -H "X-Tangle-Blueprint: 42" \
+  -d '{"model": "llama-3.1-70b", "messages": [...]}'
+```
+
+### Pin to an operator
+
+Route to a specific operator by slug or Ethereum address:
+
+```bash
+curl -X POST "https://router.tangle.tools/v1/chat/completions" \
+  -H "X-Tangle-Operator: tangle-core-1" \
+  -d '{"model": "llama-3.1-70b", "messages": [...]}'
+```
+
+### Pin to a service instance
+
+```bash
+curl -X POST "https://router.tangle.tools/v1/chat/completions" \
+  -H "X-Tangle-Service: 7" \
+  -d '{"model": "llama-3.1-70b", "messages": [...]}'
+```
+
+## What's verified on-chain
+
+| Data                      | Verified?                          |
+| ------------------------- | ---------------------------------- |
+| Operator Ethereum address | Yes (signed transaction)           |
+| Active/inactive status    | Yes (BSM contract state)           |
+| Staked amount             | Yes (on-chain balance)             |
+| Pricing (per-token)       | Yes (BSM contract)                 |
+| Endpoint URL              | No (self-reported at registration) |
+| Backing provider          | No (not tracked)                   |
+
+Because endpoint URL and backing provider are self-reported, operator routing is **not compatible with [ZDR](/gateway/zdr) or [no-train](/gateway/no-train)** compliance requirements. When compliance is required, operators are skipped and the gateway routes directly to verified providers.
+
+## Payment
+
+Operator requests can be paid two ways:
+
+1. **Platform credits** — deducted from your credit balance at the operator's listed price
+2. **SpendAuth (on-chain)** — direct EIP-712 signed payment to the operator. No credit card needed. See [SpendAuth](/gateway/spend-auth).
diff --git a/pages/gateway/pricing.mdx b/pages/gateway/pricing.mdx
new file mode 100644
index 00000000..6634f841
--- /dev/null
+++ b/pages/gateway/pricing.mdx
@@ -0,0 +1,55 @@
+---
+title: Credits & Pricing
+description: How billing works on Tangle Gateway.
+---
+
+# Credits & Pricing
+
+## Pricing model
+
+| Credential type                  | Markup                                      |
+| -------------------------------- | ------------------------------------------- |
+| Platform credentials             | 20% above provider list price               |
+| [BYOK](/gateway/byok)            | **0%** — provider list price, no markup     |
+| [SpendAuth](/gateway/spend-auth) | Operator-set prices (typically competitive) |
+
+The 20% platform markup on non-BYOK requests funds operator payouts and platform infrastructure. Operators earn a share of every request routed through them.
+
+## Credits
+
+Credits are denominated in USD. Purchase via Stripe or receive as part of a subscription plan.
+
+Check your balance:
+
+```bash
+curl -H "Authorization: Bearer sk-tan-YOUR_KEY" \
+  https://router.tangle.tools/v1/credits
+```
+
+```json
+{
+  "balance": "95.50",
+  "total_used": "4.50"
+}
+```
+
+## Cost per request
+
+Each request is charged based on tokens:
+
+```
+cost = (input_tokens × input_price) + (output_tokens × output_price)
+```
+
+Pricing varies by model. Check per-model pricing at [`GET /api/models`](https://router.tangle.tools/api/models) or in the `X-Tangle-Price-Input` / `X-Tangle-Price-Output` response headers.
+
+## Billing transparency
+
+Every response includes pricing headers so you know the cost before it hits your balance:
+
+```
+X-Tangle-Price-Input: 0.000003      # USD per input token
+X-Tangle-Price-Output: 0.000015     # USD per output token
+```
+
+Look up detailed billing for any request via [`GET /v1/generation`](/gateway/api-generation).
diff --git a/pages/gateway/provider-options.mdx b/pages/gateway/provider-options.mdx
new file mode 100644
index 00000000..ccb49797
--- /dev/null
+++ b/pages/gateway/provider-options.mdx
@@ -0,0 +1,70 @@
+---
+title: providerOptions.gateway
+description: Complete reference for gateway-specific request options.
+---
+
+# providerOptions.gateway
+
+All gateway-specific options are passed inside `providerOptions.gateway` in the request body. These are stripped before forwarding to providers.
+
+## Full schema
+
+```typescript
+interface GatewayOptions {
+  // Bring Your Own Key
+  byok?: Record<string, Array<{ apiKey: string }>>
+
+  // Compliance routing
+  zeroDataRetention?: boolean
+  disallowPromptTraining?: boolean
+
+  // Caching
+  caching?: 'auto' | false
+  cache?: false  // disable response caching
+
+  // Provider routing
+  order?: string[]   // provider priority
+  only?: string[]    // provider allowlist
+
+  // Model fallbacks
+  models?: string[]  // tried in order after primary model
+
+  // Timeouts (1s-120s, clamped)
+  timeout?: number | Record<string, number>
+}
+```
+
+## Options reference
+
+| Option                   | Type                               | Default | Description                                                    |
+| ------------------------ | ---------------------------------- | ------- | -------------------------------------------------------------- |
+| `byok`                   | `Record<string, Array<{apiKey}>>`  | —       | Per-request provider credentials. [Details](/gateway/byok)     |
+| `zeroDataRetention`      | `boolean`                          | `false` | Route only to ZDR-verified providers. [Details](/gateway/zdr)  |
+| `disallowPromptTraining` | `boolean`                          | `false` | Route only to no-train providers. [Details](/gateway/no-train) |
+| `caching`                | `'auto'`                           | —       | Auto-inject prompt cache markers. [Details](/gateway/caching)  |
+| `cache`                  | `false`                            | —       | Set `false` to skip response cache for this request.           |
+| `order`                  | `string[]`                         | —       | Provider priority order. [Details](/gateway/smart-routing)     |
+| `only`                   | `string[]`                         | —       | Restrict to these providers only.                              |
+| `models`                 | `string[]`                         | —       | Fallback model list. [Details](/gateway/fallbacks)             |
+| `timeout`                | `number \| Record<string, number>` | `30000` | Timeout in ms. [Details](/gateway/timeouts)                    |
+
+## Example: everything at once
+
+```json
+{
+  "model": "anthropic/claude-sonnet-4-6",
+  "messages": [{"role": "user", "content": "Hello"}],
+  "providerOptions": {
+    "gateway": {
+      "byok": {
+        "anthropic": [{"apiKey": "sk-ant-..."}]
+      },
+      "zeroDataRetention": true,
+      "caching": "auto",
+      "models": ["openai/gpt-4o"],
+      "timeout": {"anthropic": 10000, "openai": 5000},
+      "order": ["anthropic", "openai"]
+    }
+  }
+}
+```
diff --git a/pages/gateway/rate-limiting.mdx b/pages/gateway/rate-limiting.mdx
new file mode 100644
index 00000000..7b0962d2
--- /dev/null
+++ b/pages/gateway/rate-limiting.mdx
@@ -0,0 +1,44 @@
+---
+title: Rate Limiting
+description: Rate limits by authentication method.
+---
+
+# Rate Limiting
+
+The gateway enforces sliding-window rate limits per authentication method.
+
+## Limits
+
+| Auth method                | Rate limit  | Daily limit              |
+| -------------------------- | ----------- | ------------------------ |
+| API Key                    | 60 req/min  | Unlimited (with credits) |
+| Session                    | 30 req/min  | Unlimited (with credits) |
+| SpendAuth                  | 120 req/min | Unlimited                |
+| Anonymous                  | 10 req/min  | 5 req/day                |
+| Authenticated (no credits) | 30 req/min  | 20 req/day               |
+
+## Response headers
+
+Every response includes rate limit headers:
+
+```
+X-RateLimit-Limit: 60
+X-RateLimit-Remaining: 42
+X-RateLimit-Reset: 1712793600
+```
+
+## 429 responses
+
+When rate limited:
+
+```json
+{
+  "error": {
+    "message": "Rate limit exceeded for this API key.",
+    "type": "rate_limit_error",
+    "code": "rate_limit_exceeded"
+  }
+}
+```
+
+The `X-RateLimit-Reset` header indicates when the window resets (Unix timestamp in seconds).
diff --git a/pages/gateway/response-headers.mdx b/pages/gateway/response-headers.mdx
new file mode 100644
index 00000000..40350b0c
--- /dev/null
+++ b/pages/gateway/response-headers.mdx
@@ -0,0 +1,41 @@
+---
+title: Response Headers
+description: Headers returned on every gateway response.
+---
+
+# Response Headers
+
+Every response from the gateway includes metadata headers.
+
+## Standard headers
+
+| Header                  | Description                 | Example             |
+| ----------------------- | --------------------------- | ------------------- |
+| `X-Generation-Id`       | Unique request ID           | `gen_01J5K7ABCD...` |
+| `X-Tangle-Price-Input`  | USD per input token         | `0.000003`          |
+| `X-Tangle-Price-Output` | USD per output token        | `0.000015`          |
+| `X-Tangle-Cache`        | Response cache status       | `HIT` or `MISS`     |
+| `X-RateLimit-Limit`     | Requests allowed per window | `60`                |
+| `X-RateLimit-Remaining` | Requests remaining          | `42`                |
+| `X-RateLimit-Reset`     | Window reset (Unix seconds) | `1712793600`        |
+
+## Conditional headers
+
+| Header                   | When present                      | Description                     |
+| ------------------------ | --------------------------------- | ------------------------------- |
+| `X-Tangle-Routing-Trace` | When `ENABLE_ROUTING_TRACE` is on | Compact routing path            |
+| `X-Tangle-Operator`      | When served by an operator        | Operator slug                   |
+| `X-Tangle-BYOK`          | When BYOK credentials used        | `true`                          |
+| `X-Tangle-Caching`       | When prompt caching applied       | `auto`                          |
+| `X-Tangle-Guardrails`    | When guardrails flagged content   | `pii:low,prompt_injection:high` |
+| `X-Payment-Settled`      | When SpendAuth payment succeeded  | `true`                          |
+| `X-Free-Tier-Remaining`  | Free tier requests                | `3`                             |
+| `X-Free-Tier-Limit`      | Free tier daily cap               | `5`                             |
+
+## Error response headers
+
+| Header               | When present  | Description               |
+| -------------------- | ------------- | ------------------------- |
+| `X-Payment-Required` | 402 responses | Amount needed (micro-USD) |
+| `X-Payment-Currency` | 402 responses | `tsUSD`                   |
+| `X-Payment-Methods`  | 402 responses | `credits,spend_auth`      |
diff --git a/pages/gateway/routing-trace.mdx b/pages/gateway/routing-trace.mdx
new file mode 100644
index 00000000..9e23653d
--- /dev/null
+++ b/pages/gateway/routing-trace.mdx
@@ -0,0 +1,36 @@
+---
+title: Routing Trace
+description: See exactly which providers were tried for every request.
+---
+
+# Routing Trace
+
+Every response includes an `X-Tangle-Routing-Trace` header showing the routing path — which providers were tried, whether they succeeded, and how long each took.
+
+## Header format
+
+```
+X-Tangle-Routing-Trace: openai/gpt-4o[operator(err:5001ms)→litellm(200:340ms)]
+```
+
+Format: `model[provider(status:latency)→provider(status:latency)]`
+
+Multiple models (from [fallbacks](/gateway/fallbacks)):
+
+```
+X-Tangle-Routing-Trace: openai/gpt-4o[openai(500:2100ms)], anthropic/claude-sonnet-4-6[anthropic(200:1847ms)]
+```
+
+## Sanitization
+
+The trace header is sanitized for safety:
+
+- Operator names are shown as generic `operator` (slugs not exposed)
+- Error messages are not included (only status codes)
+- Internal URLs and hostnames are never leaked
+
+For the full unredacted trace including error messages, use the [generation lookup API](/gateway/generation-lookup) — the `routing_trace` field in the response contains the complete history.
+
+## Disabling
+
+Set `ENABLE_ROUTING_TRACE=false` to omit the header from all responses. See [Feature Flags](/gateway/feature-flags).
diff --git a/pages/gateway/smart-routing.mdx b/pages/gateway/smart-routing.mdx
new file mode 100644
index 00000000..9521f60f
--- /dev/null
+++ b/pages/gateway/smart-routing.mdx
@@ -0,0 +1,58 @@
+---
+title: Smart Routing
+description: How the gateway scores and selects operators.
+---
+
+# Smart Routing
+
+When multiple operators running the same [Blueprint](/developers/blueprints/introduction) serve the same model, the gateway selects the best one using a weighted scoring algorithm.
+
+## Scoring formula
+
+```
+score = reputation(40%) + latency(30%) + price(30%)
+```
+
+| Factor         | Weight | What it measures                                          |
+| -------------- | ------ | --------------------------------------------------------- |
+| **Reputation** | 40%    | Normalized reputation score (0-100) from on-chain history |
+| **Latency**    | 30%    | Inverse of average response time (lower = better)         |
+| **Price**      | 30%    | Inverse of per-token price (cheaper = better)             |
+
+## Operator selection
+
+1. Query all operators serving the requested model
+2. Filter: only `active` or `degraded` status, must be pipeline head
+3. Score each operator
+4. Sort by score descending
+5. Route to the highest-scoring operator
+
+If a preferred operator is specified (via `X-Tangle-Operator`), it's moved to the top of the ranked list regardless of score.
+
+## Health tracking
+
+The gateway tracks operator health via:
+
+- **Health checks** — periodic probes stored in `OperatorHealthCheck`
+- **Request outcomes** — success/failure recorded per request
+- **Latency tracking** — rolling average updated per request
+
+Operators that consistently fail are automatically deprioritized by their dropping reputation and rising latency scores.
+
+## Provider ordering (non-operator)
+
+For direct provider routing, use `providerOptions.gateway.order` and `only`:
+
+```json
+{
+  "providerOptions": {
+    "gateway": {
+      "order": ["bedrock", "anthropic"],
+      "only": ["bedrock", "anthropic"]
+    }
+  }
+}
+```
+
+- `order`: Try providers in this order. First with valid credentials wins.
+- `only`: Restrict to these providers. Others are excluded even if they have credentials.
diff --git a/pages/gateway/spend-auth.mdx b/pages/gateway/spend-auth.mdx
new file mode 100644
index 00000000..d464d36d
--- /dev/null
+++ b/pages/gateway/spend-auth.mdx
@@ -0,0 +1,55 @@
+---
+title: SpendAuth (On-Chain Payments)
+description: Pay operators directly on-chain via EIP-712 signed authorizations.
+---
+
+# SpendAuth
+
+SpendAuth lets you pay operators directly on-chain without a credit card or account. Sign an EIP-712 typed data message with your wallet, attach it to the request, and the operator claims payment after serving inference.
+
+## How it works
+
+1. **Sign:** Create an EIP-712 SpendAuth payload with your wallet
+2. **Send:** Attach the signature as `X-Payment-Signature` header
+3. **Authorize:** The gateway verifies the signature and calls `authorizeSpend` on-chain
+4. **Serve:** The operator processes your inference request
+5. **Claim:** The operator calls `claimPayment` to receive funds
+
+## Request format
+
+```bash
+curl -X POST "https://router.tangle.tools/v1/chat/completions" \
+  -H "Content-Type: application/json" \
+  -H "X-Payment-Signature: {
+    \"commitment\": \"0xabc...\",
+    \"serviceId\": \"1\",
+    \"jobIndex\": 0,
+    \"amount\": \"1000000\",
+    \"operator\": \"0x70997970...\",
+    \"nonce\": \"42\",
+    \"expiry\": \"1712793600\",
+    \"signature\": \"0xff...\"
+  }" \
+  -d '{"model": "llama-3.1-70b", "messages": [...]}'
+```
+
+## Security
+
+- **EIP-712 signatures:** Cryptographically verified against the signing address
+- **Nonce replay protection:** Each nonce can only be used once per commitment (in-memory + Redis store)
+- **On-chain verification:** The `ShieldedCredits` contract validates authorization and deducts balance
+- **Expiry:** SpendAuth payloads have a timestamp-based expiry
+
+## Rate limits
+
+SpendAuth requests get a generous 120 req/min limit per commitment since every request is paid.
+
+## On-chain contracts
+
+SpendAuth uses the `ShieldedCredits` contract deployed on the Tangle network. The contract handles:
+
+- Balance management (deposit, authorize, claim)
+- Authorization verification (EIP-712 signature recovery)
+- Payment settlement (operator claims after serving)
+
+The operator-side settlement logic is implemented in [tangle-inference-core](https://github.com/tangle-network/tangle-inference-core), a shared Rust crate used by the [LLM Inference Blueprint](https://github.com/tangle-network/llm-inference-blueprint) and other inference Blueprints.
diff --git a/pages/gateway/timeouts.mdx b/pages/gateway/timeouts.mdx
new file mode 100644
index 00000000..6c49cc18
--- /dev/null
+++ b/pages/gateway/timeouts.mdx
@@ -0,0 +1,57 @@
+---
+title: Provider Timeouts
+description: Configure per-provider timeouts for fast failover.
+---
+
+# Provider Timeouts
+
+Set timeouts to trigger fast failover when a provider is slow. Values are clamped to 1-120 seconds.
+
+## Global timeout
+
+Apply the same timeout to all providers:
+
+```json
+{
+  "providerOptions": {
+    "gateway": {
+      "timeout": 5000
+    }
+  }
+}
+```
+
+## Per-provider timeouts
+
+Different providers have different latency profiles. Set timeouts individually:
+
+```json
+{
+  "providerOptions": {
+    "gateway": {
+      "timeout": {
+        "openai": 5000,
+        "anthropic": 10000,
+        "groq": 3000
+      }
+    }
+  }
+}
+```
+
+## Default behavior
+
+Without explicit timeouts, the gateway uses a 30-second default for all providers and a 30-second idle timeout for streaming responses.
+
+## Bounds
+
+All timeout values are clamped:
+
+- **Minimum:** 1,000ms (1 second)
+- **Maximum:** 120,000ms (2 minutes)
+
+Values outside this range are silently clamped to the nearest bound.
+
+## Interaction with fallbacks
+
+When a provider times out, it counts as a failure in the [routing trace](/gateway/routing-trace) and the gateway moves to the next option — either a different provider for the same model, or the next [fallback model](/gateway/fallbacks).
diff --git a/pages/gateway/zdr.mdx b/pages/gateway/zdr.mdx
new file mode 100644
index 00000000..e9f44b4b
--- /dev/null
+++ b/pages/gateway/zdr.mdx
@@ -0,0 +1,84 @@
+---
+title: Zero Data Retention
+description: Route requests only through providers with verified zero data retention agreements.
+---
+
+# Zero Data Retention (ZDR)
+
+When ZDR is enabled, the gateway routes requests **only** through providers that have verified agreements to delete all request data immediately after processing.
+
+## Enable per-request
+
+```json
+{
+  "model": "anthropic/claude-sonnet-4-6",
+  "messages": [{"role": "user", "content": "Analyze this sensitive data..."}],
+  "providerOptions": {
+    "gateway": {
+      "zeroDataRetention": true
+    }
+  }
+}
+```
+
+## Enable team-wide
+
+Set `zdrEnabled: true` on your team record. All requests from team members will enforce ZDR. Team-wide ZDR overrides per-request `zeroDataRetention: false`.
+
+## How it works
+
+When ZDR is enabled:
+
+1. **Operators are skipped.** Operators self-report their backing provider. The gateway cannot verify what provider an operator actually routes through, so operators are excluded from ZDR-compliant routing.
+
+2. **LiteLLM is skipped.** LiteLLM has its own internal fallback chain that may route to non-ZDR providers. Since we can't control LiteLLM's routing decisions, it's excluded.
+
+3. **Direct provider only.** The gateway calls the provider API directly, selecting only from verified ZDR providers.
+
+4. **BYOK fallback preserves ZDR.** If your [BYOK](/gateway/byok) credentials fail, the fallback to platform credentials still enforces ZDR filtering.
+
+## ZDR-verified providers
+
+| Provider       | ZDR | No-Train | Policy                                                                                                 |
+| -------------- | --- | -------- | ------------------------------------------------------------------------------------------------------ |
+| Anthropic      | Yes | Yes      | [ZDR policy](https://platform.claude.com/docs/en/build-with-claude/zero-data-retention)                |
+| Amazon Bedrock | Yes | Yes      | [Data protection](https://docs.aws.amazon.com/bedrock/latest/userguide/data-protection.html)           |
+| Azure OpenAI   | Yes | Yes      | [Data privacy](https://learn.microsoft.com/en-us/azure/foundry/responsible-ai/openai/data-privacy)     |
+| Groq           | Yes | Yes      | [ZDR policy](https://console.groq.com/docs/your-data#zero-data-retention)                              |
+| Mistral        | Yes | Yes      | [Terms](https://legal.mistral.ai/terms)                                                                |
+| Fireworks      | Yes | Yes      | [Data handling](https://docs.fireworks.ai/guides/security_compliance/data_handling)                    |
+| Together       | Yes | Yes      | [Terms](https://www.together.ai/terms-of-service)                                                      |
+| Cerebras       | Yes | Yes      | [Privacy](https://www.cerebras.ai/privacy-policy)                                                      |
+| Google Vertex  | Yes | Yes      | [ZDR policy](https://docs.cloud.google.com/vertex-ai/generative-ai/docs/vertex-ai-zero-data-retention) |
+| Nebius         | Yes | Yes      | [Legal guide](https://docs.tokenfactory.nebius.com/legal/legal-quick-guide)                            |
+| Parasail       | Yes | Yes      | [Terms](https://parasail.io/legal/terms-of-service)                                                    |
+| Baseten        | Yes | Yes      | [Security](https://docs.baseten.co/observability/security)                                             |
+| DeepInfra      | Yes | Yes      | [Data handling](https://deepinfra.com/docs/data)                                                       |
+
+Compliance data is managed via the admin API (`PUT /api/admin/compliance`) and can be updated without code deploys.
+
+## Trust model
+
+| Routing tier            | ZDR behavior                                               |
+| ----------------------- | ---------------------------------------------------------- |
+| **Operators**           | Skipped. Self-reported backing provider is unverifiable.   |
+| **LiteLLM**             | Skipped. Internal fallback chain is uncontrollable.        |
+| **Direct provider**     | Routed only to verified ZDR providers.                     |
+| **BYOK fallback**       | ZDR filters preserved on fallback to platform credentials. |
+| **Operator-only + ZDR** | 400 error. Conflicting requirements.                       |
+
+The Tangle chain verifies operator **identity and stake**, not **behavior**. When compliance matters, the gateway routes direct.
+
+## Error responses
+
+If no ZDR-compliant provider is available for the requested model:
+
+```json
+{
+  "error": {
+    "message": "No ZDR providers available for model: deepseek/deepseek-chat. Providers considered: anthropic, groq, mistral, ...",
+    "type": "invalid_request_error",
+    "code": "no_providers_available"
+  }
+}
+```
diff --git a/pages/vision/architecture.mdx b/pages/vision/architecture.mdx
index d64cd5bc..6b68d079 100644
--- a/pages/vision/architecture.mdx
+++ b/pages/vision/architecture.mdx
@@ -20,6 +20,7 @@ Tangle ties together three layers most platforms separate: the workbench where w
 | Layer           | Runs here                     | Examples                                                 |
 | --------------- | ----------------------------- | -------------------------------------------------------- |
 | Workbench       | Human and agent collaboration | Workflows, profiles, simulations, reviews                |
+| Gateway         | Inference routing and billing | Model access, BYOK, ZDR compliance, operator selection   |
 | Sandbox runtime | Executed tasks and tools      | Agent sessions, tool calls, file edits                   |
 | Protocol        | Coordination and settlement   | Service registry, operator payments, staking, incentives |
 
@@ -28,10 +29,13 @@ Tangle ties together three layers most platforms separate: the workbench where w
 **1) Execution Layer**  
 Sandboxed runtimes with isolation, resource limits, and audit logs. This is where tasks actually run.
 
-**2) Protocol Layer**  
+**2) Inference Layer**  
+The [Gateway](/gateway) routes inference requests across centralized providers and decentralized operators. It handles model selection, compliance filtering ([ZDR](/gateway/zdr), [no-train](/gateway/no-train)), [BYOK](/gateway/byok) credential management, and billing.
+
+**3) Protocol Layer**  
 The coordination plane. It handles operator discovery, payment routing, and incentive enforcement.
 
-**3) Experience Layer**  
+**4) Experience Layer**  
 The agentic workbench and [Blueprint SDK](https://github.com/tangle-network/blueprint/tree/main). This is where teams design workflows, run simulations, and ship services.
 
 The SDK also includes optional gateways for integrating external events and payment-driven HTTP job execution (webhooks,