From c2d60418e89c67d4897a45f083d07433148a5933 Mon Sep 17 00:00:00 2001
From: Drew Stone <drewstone329@gmail.com>
Date: Fri, 10 Apr 2026 15:29:03 -0700
Subject: [PATCH 1/4] feat: add Gateway as top-level product section (31 pages)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

New top-level "Gateway" section at /gateway — same level as Workbench
and Runtime. Documents the Tangle inference gateway with full coverage:

Introduction & Getting Started (2 pages):
- Product overview, quickstart with curl/Python/TypeScript

Models & Providers (5 pages):
- Supported models, BYOK, fallbacks, timeouts, caching

Routing (3 pages):
- 3-tier architecture, operator routing, smart scoring

Security & Compliance (5 pages):
- Auth methods, ZDR, no-train, guardrails, rate limiting

Billing (3 pages):
- Credits/pricing, free tier model gating, SpendAuth on-chain payments

Observability (2 pages):
- Generation lookup API, routing trace

API Reference (7 pages):
- POST /v1/chat/completions, GET /v1/generation, GET /v1/credits,
  compliance API, providerOptions.gateway, response headers, feature flags

Guides (3 pages):
- Migrate from OpenAI, migrate from Vercel AI Gateway, enterprise ZDR

Also updated:
- Root _meta.ts: Gateway added between Workbench and Runtime
- AI hub page: added Inference Gateway section with links
- AI _meta.ts: added Gateway links in sidebar

Build: 206 pages indexed, zero errors.
---
 pages/_meta.ts                      |  4 ++
 pages/ai/_meta.ts                   | 20 ++++++
 pages/ai/index.mdx                  | 11 ++++
 pages/gateway/_meta.ts              | 64 +++++++++++++++++++
 pages/gateway/api-chat.mdx          | 96 +++++++++++++++++++++++++++++
 pages/gateway/api-compliance.mdx    | 49 +++++++++++++++
 pages/gateway/api-credits.mdx       | 29 +++++++++
 pages/gateway/api-generation.mdx    | 55 +++++++++++++++++
 pages/gateway/authentication.mdx    | 57 +++++++++++++++++
 pages/gateway/byok.mdx              | 86 ++++++++++++++++++++++++++
 pages/gateway/caching.mdx           | 72 ++++++++++++++++++++++
 pages/gateway/enterprise-zdr.mdx    | 93 ++++++++++++++++++++++++++++
 pages/gateway/fallbacks.mdx         | 69 +++++++++++++++++++++
 pages/gateway/feature-flags.mdx     | 35 +++++++++++
 pages/gateway/free-tier.mdx         | 62 +++++++++++++++++++
 pages/gateway/generation-lookup.mdx | 71 +++++++++++++++++++++
 pages/gateway/getting-started.mdx   | 94 ++++++++++++++++++++++++++++
 pages/gateway/guardrails.mdx        | 63 +++++++++++++++++++
 pages/gateway/how-routing-works.mdx | 64 +++++++++++++++++++
 pages/gateway/index.mdx             | 58 +++++++++++++++++
 pages/gateway/migrate-openai.mdx    | 77 +++++++++++++++++++++++
 pages/gateway/migrate-vercel.mdx    | 80 ++++++++++++++++++++++++
 pages/gateway/models.mdx            | 69 +++++++++++++++++++++
 pages/gateway/no-train.mdx          | 43 +++++++++++++
 pages/gateway/operator-routing.mdx  | 75 ++++++++++++++++++++++
 pages/gateway/pricing.mdx           | 55 +++++++++++++++++
 pages/gateway/provider-options.mdx  | 70 +++++++++++++++++++++
 pages/gateway/rate-limiting.mdx     | 44 +++++++++++++
 pages/gateway/response-headers.mdx  | 41 ++++++++++++
 pages/gateway/routing-trace.mdx     | 35 +++++++++++
 pages/gateway/smart-routing.mdx     | 58 +++++++++++++++++
 pages/gateway/spend-auth.mdx        | 52 ++++++++++++++++
 pages/gateway/timeouts.mdx          | 56 +++++++++++++++++
 pages/gateway/zdr.mdx               | 84 +++++++++++++++++++++++++
 34 files changed, 1991 insertions(+)
 create mode 100644 pages/gateway/_meta.ts
 create mode 100644 pages/gateway/api-chat.mdx
 create mode 100644 pages/gateway/api-compliance.mdx
 create mode 100644 pages/gateway/api-credits.mdx
 create mode 100644 pages/gateway/api-generation.mdx
 create mode 100644 pages/gateway/authentication.mdx
 create mode 100644 pages/gateway/byok.mdx
 create mode 100644 pages/gateway/caching.mdx
 create mode 100644 pages/gateway/enterprise-zdr.mdx
 create mode 100644 pages/gateway/fallbacks.mdx
 create mode 100644 pages/gateway/feature-flags.mdx
 create mode 100644 pages/gateway/free-tier.mdx
 create mode 100644 pages/gateway/generation-lookup.mdx
 create mode 100644 pages/gateway/getting-started.mdx
 create mode 100644 pages/gateway/guardrails.mdx
 create mode 100644 pages/gateway/how-routing-works.mdx
 create mode 100644 pages/gateway/index.mdx
 create mode 100644 pages/gateway/migrate-openai.mdx
 create mode 100644 pages/gateway/migrate-vercel.mdx
 create mode 100644 pages/gateway/models.mdx
 create mode 100644 pages/gateway/no-train.mdx
 create mode 100644 pages/gateway/operator-routing.mdx
 create mode 100644 pages/gateway/pricing.mdx
 create mode 100644 pages/gateway/provider-options.mdx
 create mode 100644 pages/gateway/rate-limiting.mdx
 create mode 100644 pages/gateway/response-headers.mdx
 create mode 100644 pages/gateway/routing-trace.mdx
 create mode 100644 pages/gateway/smart-routing.mdx
 create mode 100644 pages/gateway/spend-auth.mdx
 create mode 100644 pages/gateway/timeouts.mdx
 create mode 100644 pages/gateway/zdr.mdx

diff --git a/pages/_meta.ts b/pages/_meta.ts
index ef9a5cab..b700a1ca 100644
--- a/pages/_meta.ts
+++ b/pages/_meta.ts
@@ -18,6 +18,10 @@ const meta: Meta = {
     title: "Workbench",
     type: "page",
   },
+  gateway: {
+    title: "Gateway",
+    type: "page",
+  },
   infrastructure: {
     title: "Runtime",
     type: "page",
diff --git a/pages/ai/_meta.ts b/pages/ai/_meta.ts
index 7165fac5..88e4c860 100644
--- a/pages/ai/_meta.ts
+++ b/pages/ai/_meta.ts
@@ -2,6 +2,26 @@ import type { Meta } from "nextra";
 
 const meta: Meta = {
   index: "AI Introduction",
+  "-- gateway": {
+    type: "separator",
+    title: "Inference Gateway",
+  },
+  "gateway-intro": {
+    title: "Introduction",
+    href: "/gateway",
+  },
+  "gateway-start": {
+    title: "Getting Started",
+    href: "/gateway/getting-started",
+  },
+  "gateway-models": {
+    title: "Models & Providers",
+    href: "/gateway/models",
+  },
+  "gateway-zdr": {
+    title: "Zero Data Retention",
+    href: "/gateway/zdr",
+  },
   "-- workbench": {
     type: "separator",
     title: "Agentic Workbench",
diff --git a/pages/ai/index.mdx b/pages/ai/index.mdx
index fdfbcaed..fe2cf0bb 100644
--- a/pages/ai/index.mdx
+++ b/pages/ai/index.mdx
@@ -44,8 +44,19 @@ Core capabilities:
 
 Each run produces task and agent evaluations. That data feeds back into the workbench to improve prompts, policies, and workflows over time.
 
+## Inference Gateway
+
+The [Tangle Gateway](/gateway) is the inference routing layer. Agents and applications call a single API to access hundreds of models across centralized providers and decentralized operators. The gateway handles model selection, compliance routing, billing, and payment settlement.
+
+Key capabilities:
+- **One API, any model.** OpenAI, Anthropic, Google, Groq, and 20+ providers.
+- **Decentralized operators.** Route to operators on the Tangle network who compete on price and latency.
+- **Compliance.** [Zero Data Retention](/gateway/zdr) and [no-train](/gateway/no-train) routing with verified provider agreements.
+- **On-chain payments.** [SpendAuth](/gateway/spend-auth) — pay operators directly without a credit card.
+
 ## Learn More
 
+- [Gateway — Getting Started](/gateway/getting-started)
 - [Workbench details](/vibe/introduction)
 - [Runtime and sandboxing](/infrastructure/introduction)
 - [Operator onboarding](/operators/introduction)
diff --git a/pages/gateway/_meta.ts b/pages/gateway/_meta.ts
new file mode 100644
index 00000000..186342b5
--- /dev/null
+++ b/pages/gateway/_meta.ts
@@ -0,0 +1,64 @@
+import type { Meta } from "nextra";
+
+const meta: Meta = {
+  index: "Introduction",
+  "getting-started": "Getting Started",
+  "-- models": {
+    type: "separator",
+    title: "Models & Providers",
+  },
+  models: "Supported Models",
+  byok: "Bring Your Own Key",
+  fallbacks: "Model Fallbacks",
+  timeouts: "Provider Timeouts",
+  caching: "Automatic Caching",
+  "-- routing": {
+    type: "separator",
+    title: "Routing",
+  },
+  "how-routing-works": "How Routing Works",
+  "operator-routing": "Operator Routing",
+  "smart-routing": "Smart Routing",
+  "-- security": {
+    type: "separator",
+    title: "Security & Compliance",
+  },
+  authentication: "Authentication",
+  zdr: "Zero Data Retention",
+  "no-train": "Disallow Prompt Training",
+  guardrails: "Guardrails",
+  "rate-limiting": "Rate Limiting",
+  "-- billing": {
+    type: "separator",
+    title: "Billing",
+  },
+  pricing: "Credits & Pricing",
+  "free-tier": "Free Tier",
+  "spend-auth": "SpendAuth (On-Chain)",
+  "-- observability": {
+    type: "separator",
+    title: "Observability",
+  },
+  "generation-lookup": "Generation Lookup",
+  "routing-trace": "Routing Trace",
+  "-- reference": {
+    type: "separator",
+    title: "API Reference",
+  },
+  "api-chat": "POST /v1/chat/completions",
+  "api-generation": "GET /v1/generation",
+  "api-credits": "GET /v1/credits",
+  "api-compliance": "Provider Compliance API",
+  "provider-options": "providerOptions.gateway",
+  "response-headers": "Response Headers",
+  "feature-flags": "Feature Flags",
+  "-- guides": {
+    type: "separator",
+    title: "Guides",
+  },
+  "migrate-openai": "Migrate from OpenAI",
+  "migrate-vercel": "Migrate from Vercel AI Gateway",
+  "enterprise-zdr": "Enterprise ZDR Setup",
+};
+
+export default meta;
diff --git a/pages/gateway/api-chat.mdx b/pages/gateway/api-chat.mdx
new file mode 100644
index 00000000..2a8db365
--- /dev/null
+++ b/pages/gateway/api-chat.mdx
@@ -0,0 +1,96 @@
+---
+title: POST /v1/chat/completions
+description: OpenAI-compatible chat completion endpoint with gateway extensions.
+---
+
+# POST /v1/chat/completions
+
+OpenAI-compatible chat completion endpoint. Supports streaming, tool use, and all standard parameters, plus gateway-specific extensions via `providerOptions.gateway`.
+
+## Request
+
+```bash
+POST https://router.tangle.tools/v1/chat/completions
+Authorization: Bearer sk-tan-YOUR_KEY
+Content-Type: application/json
+```
+
+### Body
+
+```json
+{
+  "model": "anthropic/claude-sonnet-4-6",
+  "messages": [
+    {"role": "system", "content": "You are a helpful assistant."},
+    {"role": "user", "content": "Hello"}
+  ],
+  "temperature": 0.7,
+  "max_tokens": 4096,
+  "stream": true,
+  "tools": [...],
+  "tool_choice": "auto",
+  "response_format": {"type": "json_object"},
+  "top_p": 0.9,
+  "frequency_penalty": 0,
+  "presence_penalty": 0,
+  "stop": ["\n\n"],
+  "providerOptions": {
+    "gateway": {
+      "byok": {"anthropic": [{"apiKey": "sk-ant-..."}]},
+      "zeroDataRetention": true,
+      "caching": "auto",
+      "models": ["openai/gpt-4o"],
+      "timeout": 5000
+    }
+  }
+}
+```
+
+All standard OpenAI parameters (`tools`, `tool_choice`, `response_format`, `top_p`, `frequency_penalty`, `presence_penalty`, `stop`, `logprobs`) are forwarded to the provider.
+
+### Routing headers (optional)
+
+| Header | Effect |
+|--------|--------|
+| `X-Tangle-Routing` | `operator`, `provider`, or `auto` (default) |
+| `X-Tangle-Blueprint` | Pin to operators under this Blueprint ID |
+| `X-Tangle-Service` | Pin to a specific service instance |
+| `X-Tangle-Operator` | Pin to a specific operator (slug or 0x address) |
+| `X-Payment-Signature` | SpendAuth JSON payload for on-chain payment |
+
+### Validation
+
+| Field | Constraint |
+|-------|-----------|
+| `model` | Required. Alphanumeric + `/-.:\\_`, max 128 chars. |
+| `messages` | Required. Non-empty array. Each must have `role`. |
+| `max_tokens` | Optional. 1-128,000. Default: 4,096. |
+| `temperature` | Optional. 0-2. Default: 1. |
+| Body size | Max 1MB. |
+
+## Response (non-streaming)
+
+Standard OpenAI chat completion response:
+
+```json
+{
+  "id": "chatcmpl-...",
+  "choices": [{
+    "message": {"role": "assistant", "content": "Hello! How can I help?"},
+    "finish_reason": "stop"
+  }],
+  "usage": {
+    "prompt_tokens": 15,
+    "completion_tokens": 8,
+    "total_tokens": 23
+  }
+}
+```
+
+## Response (streaming)
+
+Server-sent events with `data: {...}` lines and `data: [DONE]` terminator.
+
+## Response headers
+
+See [Response Headers](/gateway/response-headers) for the full list.
diff --git a/pages/gateway/api-compliance.mdx b/pages/gateway/api-compliance.mdx
new file mode 100644
index 00000000..fd4e02d6
--- /dev/null
+++ b/pages/gateway/api-compliance.mdx
@@ -0,0 +1,49 @@
+---
+title: Provider Compliance API
+description: Query and manage provider ZDR and no-train compliance data.
+---
+
+# Provider Compliance API
+
+## GET /api/gateway/compliance
+
+List compliance data for all providers. Public endpoint (rate-limited).
+
+```bash
+curl https://router.tangle.tools/api/gateway/compliance
+```
+
+```json
+{
+  "providers": [
+    {
+      "provider": "anthropic",
+      "name": "Anthropic",
+      "zdr": true,
+      "no_train": true,
+      "supports_prompt_caching": true,
+      "caching_type": "explicit",
+      "policy_url": "https://platform.claude.com/docs/en/build-with-claude/zero-data-retention",
+      "verified_at": "2026-04-10T00:00:00.000Z"
+    }
+  ]
+}
+```
+
+## PUT /api/admin/compliance
+
+Update compliance data for a provider. Admin-only (requires `ADMIN_EMAILS` session).
+
+```bash
+curl -X PUT https://router.tangle.tools/api/admin/compliance \
+  -H "Cookie: session_token=ADMIN_SESSION" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "providerId": "openai",
+    "zdr": true,
+    "noTrain": true,
+    "policyUrl": "https://openai.com/policies/api-data-usage-policies"
+  }'
+```
+
+Only fields included in the request body are updated. Omitted fields remain unchanged. `verifiedAt` is automatically set to the current timestamp.
diff --git a/pages/gateway/api-credits.mdx b/pages/gateway/api-credits.mdx
new file mode 100644
index 00000000..deae1201
--- /dev/null
+++ b/pages/gateway/api-credits.mdx
@@ -0,0 +1,29 @@
+---
+title: GET /v1/credits
+description: Check your credit balance and total usage.
+---
+
+# GET /v1/credits
+
+Check your credit balance. Requires authentication.
+
+## Request
+
+```
+GET https://router.tangle.tools/v1/credits
+Authorization: Bearer sk-tan-YOUR_KEY
+```
+
+## Response
+
+```json
+{
+  "balance": "95.50",
+  "total_used": "4.50"
+}
+```
+
+| Field | Description |
+|-------|-------------|
+| `balance` | Remaining credit balance (USD) |
+| `total_used` | Total credits consumed (USD) |
diff --git a/pages/gateway/api-generation.mdx b/pages/gateway/api-generation.mdx
new file mode 100644
index 00000000..f52f7566
--- /dev/null
+++ b/pages/gateway/api-generation.mdx
@@ -0,0 +1,55 @@
+---
+title: GET /v1/generation
+description: Look up detailed information about a specific request.
+---
+
+# GET /v1/generation
+
+Retrieve detailed information about a specific generation by its ID. Requires authentication.
+
+## Request
+
+```
+GET https://router.tangle.tools/v1/generation?id=gen_01ARZ3NDEKTSV4RRFFQ69G5FAV
+Authorization: Bearer sk-tan-YOUR_KEY
+```
+
+## Parameters
+
+| Parameter | Required | Description |
+|-----------|----------|-------------|
+| `id` | Yes | Generation ID (format: `gen_<ulid>`) |
+
+## Response
+
+```json
+{
+  "data": {
+    "id": "gen_01ARZ3NDEKTSV4RRFFQ69G5FAV",
+    "total_cost": 0.00123,
+    "usage": 0.00123,
+    "created_at": "2026-04-10T12:00:00.000Z",
+    "model": "anthropic/claude-sonnet-4-6",
+    "is_byok": false,
+    "provider_name": "anthropic",
+    "streamed": true,
+    "latency": 200,
+    "generation_time": 1500,
+    "tokens_prompt": 100,
+    "tokens_completion": 50,
+    "native_tokens_cached": 80,
+    "native_tokens_reasoning": 0,
+    "status": "success",
+    "routing_trace": {...},
+    "cache_hit": false
+  }
+}
+```
+
+## Errors
+
+| Status | Code | Description |
+|--------|------|-------------|
+| 400 | — | Missing or invalid generation ID |
+| 401 | — | Authentication required |
+| 404 | `not_found` | Generation not found or belongs to another user |
diff --git a/pages/gateway/authentication.mdx b/pages/gateway/authentication.mdx
new file mode 100644
index 00000000..5ce04f4e
--- /dev/null
+++ b/pages/gateway/authentication.mdx
@@ -0,0 +1,57 @@
+---
+title: Authentication
+description: Authentication methods for Tangle Gateway.
+---
+
+# Authentication
+
+Four authentication methods, each with different rate limits and capabilities.
+
+## API Key
+
+Create keys at the dashboard. Keys start with `sk-tan-` and are SHA256-hashed before storage.
+
+```bash
+curl -H "Authorization: Bearer sk-tan-YOUR_KEY" \
+  https://router.tangle.tools/v1/chat/completions
+```
+
+- **Rate limit:** 60 req/min
+- **Credit check:** Yes (402 if balance is zero on non-free-tier models)
+- **Key features:** Expiration dates, soft revocation, last-used tracking
+
+## Session (Cookie)
+
+Browser-based authentication via Better Auth. Supports email/password and OAuth (Google, GitHub).
+
+- **Rate limit:** 30 req/min
+- **Credit check:** Yes
+
+## SIWE (Sign-In with Ethereum)
+
+Wallet-based authentication via EIP-191 signatures. Authenticate with your Ethereum wallet.
+
+```
+POST /api/siwe/verify
+{ "address": "0x...", "signature": "0x...", "message": "..." }
+```
+
+## SpendAuth (On-Chain Payment)
+
+EIP-712 signed payment authorization. No account needed — pay operators directly on-chain.
+
+```bash
+curl -H "X-Payment-Signature: {\"commitment\":\"0x...\",\"amount\":\"1000000\",...}" \
+  https://router.tangle.tools/v1/chat/completions
+```
+
+- **Rate limit:** 120 req/min per commitment
+- **Credit check:** No (payment is on-chain)
+- See [SpendAuth](/gateway/spend-auth) for details.
+
+## Anonymous
+
+No authentication required for [free tier models](/gateway/free-tier).
+
+- **Rate limit:** 10 req/min, 5 req/day
+- **Model access:** Free tier only (gpt-4o-mini, llama-3.1-8b, etc.)
diff --git a/pages/gateway/byok.mdx b/pages/gateway/byok.mdx
new file mode 100644
index 00000000..7db449b0
--- /dev/null
+++ b/pages/gateway/byok.mdx
@@ -0,0 +1,86 @@
+---
+title: Bring Your Own Key (BYOK)
+description: Use your own provider API keys with Tangle Gateway for zero-markup access.
+---
+
+# Bring Your Own Key (BYOK)
+
+Use your existing provider API keys with Tangle Gateway. BYOK requests have **zero platform markup** — you pay the provider's list price directly.
+
+## Per-request BYOK
+
+Pass credentials in `providerOptions.gateway.byok`:
+
+```json
+{
+  "model": "anthropic/claude-sonnet-4-6",
+  "messages": [{"role": "user", "content": "Hello"}],
+  "providerOptions": {
+    "gateway": {
+      "byok": {
+        "anthropic": [{"apiKey": "sk-ant-your-key"}]
+      }
+    }
+  }
+}
+```
+
+### Multiple credentials
+
+Specify multiple credentials per provider. The gateway tries them in order:
+
+```json
+{
+  "providerOptions": {
+    "gateway": {
+      "byok": {
+        "anthropic": [
+          {"apiKey": "sk-ant-primary"},
+          {"apiKey": "sk-ant-backup"}
+        ]
+      }
+    }
+  }
+}
+```
+
+### Multiple providers
+
+```json
+{
+  "providerOptions": {
+    "gateway": {
+      "byok": {
+        "anthropic": [{"apiKey": "sk-ant-..."}],
+        "openai": [{"apiKey": "sk-..."}]
+      }
+    }
+  }
+}
+```
+
+## Automatic fallback
+
+If your BYOK credentials fail (401, 403, rate limit), the gateway automatically falls back to platform credentials. This fallback preserves all compliance filters — if you requested [ZDR](/gateway/zdr), the fallback will only use ZDR-compliant system credentials.
+
+The `X-Tangle-BYOK` response header indicates whether the request used your credentials:
+
+```
+X-Tangle-BYOK: true    # Your key was used
+```
+
+If the header is absent, platform credentials were used (possibly via fallback).
+
+## Pricing
+
+| Credential type | Markup |
+|----------------|--------|
+| BYOK | **0%** — provider list price |
+| Platform credentials | 20% markup (configurable) |
+
+## Security
+
+- BYOK credentials are never logged, stored, or persisted.
+- Credentials exist only in memory for the duration of the request.
+- The `providerOptions` field is stripped from the request body before forwarding to providers.
+- Credentials are validated by structure (`apiKey` must be a string) and sanitized against prototype pollution.
diff --git a/pages/gateway/caching.mdx b/pages/gateway/caching.mdx
new file mode 100644
index 00000000..976894c0
--- /dev/null
+++ b/pages/gateway/caching.mdx
@@ -0,0 +1,72 @@
+---
+title: Automatic Caching
+description: Enable prompt caching across providers with a single flag.
+---
+
+# Automatic Caching
+
+Some providers require explicit cache markers to enable prompt caching, while others cache automatically. Use `caching: 'auto'` to let the gateway handle it.
+
+## Usage
+
+```json
+{
+  "model": "anthropic/claude-sonnet-4-6",
+  "messages": [
+    {"role": "system", "content": "You are a helpful assistant with a large knowledge base..."},
+    {"role": "user", "content": "What is Tangle?"}
+  ],
+  "providerOptions": {
+    "gateway": {
+      "caching": "auto"
+    }
+  }
+}
+```
+
+## How it works
+
+| Provider | Caching Type | What `auto` does |
+|----------|-------------|-----------------|
+| OpenAI | Implicit | No change needed. Caching happens automatically. |
+| Google | Implicit | No change needed. |
+| DeepSeek | Implicit | No change needed. |
+| Anthropic | Explicit | Adds `cache_control: { type: 'ephemeral' }` to the last system message. |
+| Anthropic (via Bedrock/Vertex) | Explicit | Same as Anthropic direct. |
+
+For Anthropic, the gateway converts:
+
+```json
+{"role": "system", "content": "You are helpful..."}
+```
+
+Into:
+
+```json
+{"role": "system", "content": [{"type": "text", "text": "You are helpful...", "cache_control": {"type": "ephemeral"}}]}
+```
+
+This caches the system prompt so subsequent messages in the same conversation reuse it, reducing costs by up to 90%.
+
+## Response caching
+
+Separately from prompt caching, the gateway caches complete responses for **deterministic requests** (temperature ≤ 0.01, non-streaming). Cached responses are free.
+
+```
+X-Tangle-Cache: HIT     # Served from cache
+X-Tangle-Cache: MISS    # Fetched from provider
+```
+
+Disable per-request:
+
+```json
+{
+  "providerOptions": {
+    "gateway": {
+      "cache": false
+    }
+  }
+}
+```
+
+The response cache key includes: model, messages, temperature, max_tokens, tools, response_format, and top_p. Different parameters always produce different cache entries.
diff --git a/pages/gateway/enterprise-zdr.mdx b/pages/gateway/enterprise-zdr.mdx
new file mode 100644
index 00000000..f331bd6f
--- /dev/null
+++ b/pages/gateway/enterprise-zdr.mdx
@@ -0,0 +1,93 @@
+---
+title: Enterprise ZDR Setup
+description: Configure Zero Data Retention for your organization.
+---
+
+# Enterprise ZDR Setup
+
+This guide walks through configuring ZDR for an organization that needs to guarantee no prompts or responses are retained by AI providers.
+
+## Step 1: Understand the trust model
+
+Read the [ZDR trust model](/gateway/zdr#trust-model) first. Key points:
+
+- ZDR is enforced at the **direct provider** level only.
+- **Operators are skipped** when ZDR is enabled (their backing provider is unverifiable).
+- **LiteLLM is skipped** (its internal routing is uncontrollable).
+- BYOK fallback to platform credentials preserves ZDR filtering.
+
+## Step 2: Choose your approach
+
+### Option A: Team-wide ZDR (recommended)
+
+Enable ZDR for all requests from your team. No code changes needed — every request is automatically filtered.
+
+Contact your admin to set `zdrEnabled: true` on your team record via the admin API:
+
+```bash
+# Admin sets team-wide ZDR
+curl -X PUT https://router.tangle.tools/api/admin/compliance \
+  -H "Cookie: session_token=ADMIN_SESSION" \
+  -d '{"providerId": "...", "zdr": true}'
+```
+
+### Option B: Per-request ZDR
+
+Add `zeroDataRetention: true` to individual requests. Useful for mixed workloads where only some requests handle sensitive data.
+
+```python
+response = client.chat.completions.create(
+    model="anthropic/claude-sonnet-4-6",
+    messages=[...],
+    extra_body={
+        "providerOptions": {
+            "gateway": {"zeroDataRetention": True}
+        }
+    }
+)
+```
+
+## Step 3: Verify provider coverage
+
+Check which providers are ZDR-verified for the models you need:
+
+```bash
+curl https://router.tangle.tools/api/gateway/compliance | jq '.providers[] | select(.zdr == true)'
+```
+
+If your required model is only available from a non-ZDR provider, the request will return 400 with a clear error listing which providers were considered.
+
+## Step 4: Set up BYOK (optional)
+
+For maximum control, use [BYOK](/gateway/byok) with your own provider keys. This gives you:
+- Zero platform markup
+- Direct contractual relationship with the provider
+- ZDR enforcement still applies on the fallback path
+
+## Step 5: Monitor compliance
+
+Use the [generation lookup API](/gateway/generation-lookup) to audit requests:
+
+```bash
+# Check if a specific request used a ZDR provider
+curl -H "Authorization: Bearer sk-tan-..." \
+  "https://router.tangle.tools/v1/generation?id=gen_..." \
+  | jq '.data.provider_name'
+```
+
+The `routing_trace` field shows exactly which providers were considered and filtered.
+
+## Combining ZDR + no-train
+
+Both flags work as an AND: when both are enabled, requests are routed only to providers that satisfy both criteria. This is the strictest compliance level.
+
+```json
+{
+  "providerOptions": {
+    "gateway": {
+      "zeroDataRetention": true,
+      "disallowPromptTraining": true
+    }
+  }
+}
+```
diff --git a/pages/gateway/fallbacks.mdx b/pages/gateway/fallbacks.mdx
new file mode 100644
index 00000000..bef37ec8
--- /dev/null
+++ b/pages/gateway/fallbacks.mdx
@@ -0,0 +1,69 @@
+---
+title: Model Fallbacks
+description: Configure backup models that are tried when the primary model fails.
+---
+
+# Model Fallbacks
+
+Specify backup models that are tried in order if the primary model fails or is unavailable.
+
+## Usage
+
+Pass a `models` array in `providerOptions.gateway`:
+
+```json
+{
+  "model": "openai/gpt-4o",
+  "messages": [{"role": "user", "content": "Hello"}],
+  "providerOptions": {
+    "gateway": {
+      "models": ["anthropic/claude-sonnet-4-6", "groq/llama-3.1-70b-versatile"]
+    }
+  }
+}
+```
+
+The gateway tries:
+1. `openai/gpt-4o` (primary model)
+2. `anthropic/claude-sonnet-4-6` (first fallback)
+3. `groq/llama-3.1-70b-versatile` (second fallback)
+
+The response comes from the first model that succeeds.
+
+## How fallback works
+
+For each model in the list, the gateway runs the full routing chain:
+
+1. **Operators** — try operators serving this model (if available)
+2. **LiteLLM** — try the proxy with built-in retries
+3. **Direct provider** — call the provider API directly
+
+If all tiers fail for a model, the gateway moves to the next model in the list.
+
+## Combining with provider ordering
+
+Use `models` with `order` to control both model fallback and provider preference:
+
+```json
+{
+  "model": "openai/gpt-4o",
+  "providerOptions": {
+    "gateway": {
+      "models": ["anthropic/claude-sonnet-4-6"],
+      "order": ["bedrock", "anthropic"]
+    }
+  }
+}
+```
+
+This tries:
+1. `openai/gpt-4o` via available providers
+2. `anthropic/claude-sonnet-4-6` via Bedrock first, then Anthropic direct
+
+## Observability
+
+When fallbacks occur, the [routing trace](/gateway/routing-trace) shows every model and provider attempted:
+
+```
+X-Tangle-Routing-Trace: openai/gpt-4o[openai(err:5001ms)], anthropic/claude-sonnet-4-6[anthropic(200:1847ms)]
+```
diff --git a/pages/gateway/feature-flags.mdx b/pages/gateway/feature-flags.mdx
new file mode 100644
index 00000000..6fe29e23
--- /dev/null
+++ b/pages/gateway/feature-flags.mdx
@@ -0,0 +1,35 @@
+---
+title: Feature Flags
+description: Disable gateway features without a code deploy.
+---
+
+# Feature Flags
+
+All gateway features are on by default. Set any flag to `false` to disable it without deploying new code.
+
+## Available flags
+
+| Environment Variable | Default | Controls |
+|---------------------|---------|----------|
+| `ENABLE_GUARDRAILS` | `true` | PII detection, prompt injection scanning |
+| `ENABLE_RESPONSE_CACHE` | `true` | Response caching for deterministic requests |
+| `ENABLE_COMPLIANCE_FILTER` | `true` | Early ZDR/no-train validation (routing enforcement stays on) |
+| `ENABLE_PROMPT_CACHING` | `true` | Auto `cache_control` injection for Anthropic |
+| `ENABLE_ROUTING_TRACE` | `true` | `X-Tangle-Routing-Trace` response header |
+
+## Usage
+
+Set in your environment:
+
+```bash
+ENABLE_GUARDRAILS=false    # Disable all guardrail scanning
+ENABLE_RESPONSE_CACHE=false # Disable response cache reads/writes
+```
+
+## Notes
+
+- `ENABLE_COMPLIANCE_FILTER` only disables the early validation check that returns a 400 before routing. The actual ZDR/no-train enforcement in the routing tiers (skip operators, skip LiteLLM) stays active regardless. This flag is for suppressing the early error, not for bypassing compliance.
+
+- When `ENABLE_GUARDRAILS=false`, no PII or injection scanning occurs. The `X-Tangle-Guardrails` header is never set. GuardrailEvent records are not created.
+
+- When `ENABLE_RESPONSE_CACHE=false`, every request hits the provider. Cached entries are not read or written. Existing cache entries are not purged (they expire naturally via TTL).
diff --git a/pages/gateway/free-tier.mdx b/pages/gateway/free-tier.mdx
new file mode 100644
index 00000000..2123547b
--- /dev/null
+++ b/pages/gateway/free-tier.mdx
@@ -0,0 +1,62 @@
+---
+title: Free Tier
+description: Free access to small models with daily limits.
+---
+
+# Free Tier
+
+Try the gateway without credits. Free tier restricts to cheap, fast models with daily request limits.
+
+## Limits
+
+| Tier | Daily limit | Rate limit |
+|------|------------|------------|
+| Anonymous (no auth) | 5 req/day | 10 req/min |
+| Authenticated (zero credits) | 20 req/day | 30 req/min |
+| Paid (any credits) | Unlimited | 60 req/min |
+
+## Allowed models
+
+Free tier requests can use:
+
+| Model | Provider | Why it's free |
+|-------|----------|---------------|
+| `gpt-4o-mini` | OpenAI | Small, cheap |
+| `claude-3-5-haiku-20241022` | Anthropic | Fast, cheap |
+| `llama-3.1-8b-instant` | Groq | Free tier inference |
+| `llama-3.2-1b-preview` | Groq | Tiny model |
+| `llama-3.2-3b-preview` | Groq | Small model |
+| `gemini-2.0-flash-lite` | Google | Free tier |
+| `cerebras/llama-3.1-8b` | Cerebras | Fast, cheap |
+| `deepseek-chat` | DeepSeek | Very cheap |
+
+## Blocked models
+
+These models require credits:
+
+- **OpenAI reasoning:** o1, o3, o4 (all variants)
+- **OpenAI flagship:** gpt-4o, gpt-4, gpt-5 (gpt-4o-mini is allowed)
+- **Anthropic flagship:** claude-opus, claude-sonnet (haiku is allowed)
+- **Google flagship:** gemini-2.5-pro, gemini-2.5-ultra
+- **xAI flagship:** grok-2, grok-3
+
+Requesting a blocked model without credits returns 402:
+
+```json
+{
+  "error": {
+    "message": "Model \"gpt-4o\" requires credits. Free tier models: gpt-4o-mini, llama-3.1-8b-instant, gemini-2.0-flash-lite, deepseek-chat. Add credits or use a free tier model.",
+    "type": "insufficient_funds",
+    "code": "free_tier_limit"
+  }
+}
+```
+
+## Response headers
+
+Free tier responses include remaining quota:
+
+```
+X-Free-Tier-Remaining: 3
+X-Free-Tier-Limit: 5
+```
diff --git a/pages/gateway/generation-lookup.mdx b/pages/gateway/generation-lookup.mdx
new file mode 100644
index 00000000..31c780ef
--- /dev/null
+++ b/pages/gateway/generation-lookup.mdx
@@ -0,0 +1,71 @@
+---
+title: Generation Lookup
+description: Retrieve detailed information about any request by its generation ID.
+---
+
+# Generation Lookup
+
+Every request returns a unique generation ID in the `X-Generation-Id` header. Use it to look up full request details.
+
+## Endpoint
+
+```
+GET /v1/generation?id=gen_01ARZ3NDEKTSV4RRFFQ69G5FAV
+```
+
+Requires authentication. Returns details only for requests made by the authenticated user.
+
+## Example
+
+```bash
+curl -H "Authorization: Bearer sk-tan-YOUR_KEY" \
+  "https://router.tangle.tools/v1/generation?id=gen_01ARZ3NDEKTSV4RRFFQ69G5FAV"
+```
+
+## Response
+
+```json
+{
+  "data": {
+    "id": "gen_01ARZ3NDEKTSV4RRFFQ69G5FAV",
+    "total_cost": 0.00123,
+    "usage": 0.00123,
+    "created_at": "2026-04-10T12:00:00.000Z",
+    "model": "anthropic/claude-sonnet-4-6",
+    "is_byok": false,
+    "provider_name": "anthropic",
+    "streamed": true,
+    "latency": 200,
+    "generation_time": 1500,
+    "tokens_prompt": 100,
+    "tokens_completion": 50,
+    "native_tokens_cached": 80,
+    "native_tokens_reasoning": 0,
+    "status": "success",
+    "routing_trace": {
+      "planningReasoning": "ZDR requested: filtering to 13 ZDR providers",
+      "modelAttempts": [...],
+      "totalLatencyMs": 1500
+    },
+    "cache_hit": false
+  }
+}
+```
+
+## Fields
+
+| Field | Description |
+|-------|-------------|
+| `id` | Generation ID (`gen_<ulid>`) |
+| `total_cost` | Total cost in USD |
+| `model` | Model that served the request |
+| `is_byok` | Whether BYOK credentials were used |
+| `provider_name` | Provider that served the request |
+| `streamed` | Whether the request used streaming |
+| `latency` | Time to first token (ms) |
+| `generation_time` | Total generation time (ms) |
+| `tokens_prompt` / `tokens_completion` | Token counts |
+| `native_tokens_cached` | Tokens served from provider cache |
+| `native_tokens_reasoning` | Reasoning tokens (o1/o3/o4 models) |
+| `routing_trace` | Full routing attempt history |
+| `cache_hit` | Whether response was served from gateway cache |
diff --git a/pages/gateway/getting-started.mdx b/pages/gateway/getting-started.mdx
new file mode 100644
index 00000000..30d1edd2
--- /dev/null
+++ b/pages/gateway/getting-started.mdx
@@ -0,0 +1,94 @@
+---
+title: Getting Started
+description: Make your first inference request through Tangle Gateway in 2 minutes.
+---
+
+# Getting Started
+
+## 1. Get an API key
+
+Sign up at [router.tangle.tools](https://router.tangle.tools) and create an API key from the dashboard. Keys start with `sk-tan-`.
+
+## 2. Make a request
+
+### curl
+
+```bash
+curl -X POST "https://router.tangle.tools/v1/chat/completions" \
+  -H "Authorization: Bearer sk-tan-YOUR_KEY" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "openai/gpt-4o-mini",
+    "messages": [{"role": "user", "content": "What is Tangle?"}],
+    "stream": false
+  }'
+```
+
+### Python (OpenAI SDK)
+
+```python
+from openai import OpenAI
+
+client = OpenAI(
+    api_key="sk-tan-YOUR_KEY",
+    base_url="https://router.tangle.tools/v1"
+)
+
+response = client.chat.completions.create(
+    model="anthropic/claude-sonnet-4-6",
+    messages=[{"role": "user", "content": "What is Tangle?"}]
+)
+print(response.choices[0].message.content)
+```
+
+### TypeScript (AI SDK)
+
+```typescript
+import { generateText } from 'ai'
+import { createOpenAI } from '@ai-sdk/openai'
+
+const tangle = createOpenAI({
+  apiKey: 'sk-tan-YOUR_KEY',
+  baseURL: 'https://router.tangle.tools/v1',
+})
+
+const { text } = await generateText({
+  model: tangle('anthropic/claude-sonnet-4-6'),
+  prompt: 'What is Tangle?',
+})
+```
+
+## 3. Check the response headers
+
+Every response includes metadata headers:
+
+```
+X-Generation-Id: gen_01J5K7...       # Unique request ID
+X-Tangle-Price-Input: 0.000003       # USD per input token
+X-Tangle-Price-Output: 0.000015      # USD per output token
+X-Tangle-Cache: MISS                 # Response cache status
+X-RateLimit-Remaining: 59            # Requests left in window
+```
+
+Use the generation ID to look up request details later via [`GET /v1/generation`](/gateway/api-generation).
+
+## 4. Try different models
+
+The model ID format is `provider/model-name`:
+
+```
+openai/gpt-4o-mini
+anthropic/claude-sonnet-4-6
+google/gemini-2.0-flash-lite
+groq/llama-3.1-8b-instant
+deepseek/deepseek-chat
+mistral/mistral-large-latest
+```
+
+You can also use bare model names (`gpt-4o-mini`, `claude-sonnet-4-6`) — the gateway resolves the provider automatically.
+
+## What's next
+
+- [Bring Your Own Key](/gateway/byok) — use your existing provider API keys for zero markup
+- [Model Fallbacks](/gateway/fallbacks) — configure backup models
+- [Zero Data Retention](/gateway/zdr) — compliance for sensitive workloads
diff --git a/pages/gateway/guardrails.mdx b/pages/gateway/guardrails.mdx
new file mode 100644
index 00000000..af7cf3ca
--- /dev/null
+++ b/pages/gateway/guardrails.mdx
@@ -0,0 +1,63 @@
+---
+title: Guardrails
+description: Gateway-level PII detection and prompt injection scanning.
+---
+
+# Guardrails
+
+The gateway scans all requests for PII and prompt injection patterns before routing. Results are available in the `X-Tangle-Guardrails` response header.
+
+## Detection categories
+
+### PII detection
+
+| Pattern | Severity | Example |
+|---------|----------|---------|
+| SSN | Critical | `123-45-6789` |
+| Credit card (Visa/MC/Discover) | Critical | `4111 1111 1111 1111` |
+| Credit card (Amex) | Critical | `3782 822463 10005` |
+| Email | Low | `user@example.com` |
+| US phone | Medium | `(555) 123-4567` |
+| IP address | Low | `192.168.1.1` |
+
+### Prompt injection detection
+
+Applied to user messages only (not system or assistant):
+
+| Pattern | Severity |
+|---------|----------|
+| "Ignore all previous instructions" | High |
+| "You are now a different AI" | High |
+| "Pretend you have no restrictions" | High |
+| "Reveal your system prompt" | Medium |
+| DAN-mode jailbreaks | High |
+
+## Modes
+
+### Audit mode (default)
+
+Flags are logged and returned in the `X-Tangle-Guardrails` header but requests are not blocked:
+
+```
+X-Tangle-Guardrails: pii:low,prompt_injection:high
+```
+
+### Block mode
+
+Requests matching configured categories are rejected with 400:
+
+```json
+{
+  "error": {
+    "message": "Request blocked by guardrails: pii, prompt_injection",
+    "type": "invalid_request_error",
+    "code": "guardrail_blocked"
+  }
+}
+```
+
+Block mode requires a `GuardrailPolicy` record configured for your team or user with specific categories to block.
+
+## Disabling
+
+Set `ENABLE_GUARDRAILS=false` to skip all scanning. See [Feature Flags](/gateway/feature-flags).
diff --git a/pages/gateway/how-routing-works.mdx b/pages/gateway/how-routing-works.mdx
new file mode 100644
index 00000000..314d97da
--- /dev/null
+++ b/pages/gateway/how-routing-works.mdx
@@ -0,0 +1,64 @@
+---
+title: How Routing Works
+description: The three-tier routing architecture behind Tangle Gateway.
+---
+
+# How Routing Works
+
+Every request passes through up to three routing tiers. The gateway tries each tier in order and returns the first successful response.
+
+## The three tiers
+
+```
+Request → Tier 1: Operators → Tier 2: LiteLLM → Tier 3: Direct Provider → Response
+```
+
+### Tier 1: Operator routing
+
+Operators are independent inference providers registered on the Tangle network. They stake tokens, serve models, and compete on price, latency, and reputation.
+
+- Selected by [scoring algorithm](/gateway/smart-routing): reputation (40%) + latency (30%) + price (30%)
+- Discovered automatically from on-chain Blueprint Service Manager contracts
+- Can be pinned by blueprint, service, or operator address
+
+**When it's used:** Default for `auto` routing mode, required for SpendAuth (on-chain payments).
+
+**When it's skipped:** When [ZDR](/gateway/zdr) or [no-train](/gateway/no-train) is requested (operators can't verify compliance). When `routing: "provider"` is set explicitly.
+
+### Tier 2: LiteLLM proxy
+
+An internal proxy that handles 100+ provider integrations with built-in retries and provider-level fallbacks.
+
+**When it's used:** Default for standard requests when no operator is available.
+
+**When it's skipped:** When ZDR or no-train is requested (LiteLLM's downstream routing is not compliance-controllable). When LiteLLM is not configured (`LITELLM_URL` unset).
+
+### Tier 3: Direct provider
+
+The gateway calls the provider API directly using platform credentials (or [BYOK](/gateway/byok) credentials).
+
+**When it's used:** Fallback when tiers 1 and 2 fail. Only tier used when compliance routing is active.
+
+**Always used for:** ZDR requests, no-train requests, BYOK with compliance flags.
+
+## Compliance mode
+
+When `zeroDataRetention` or `disallowPromptTraining` is set:
+
+```
+Request → Tier 3: Direct Provider (verified only) → Response
+```
+
+Tiers 1 and 2 are completely bypassed. The gateway routes only to providers with verified compliance agreements. See [Zero Data Retention](/gateway/zdr) for the trust model.
+
+## Routing control
+
+| Method | Effect |
+|--------|--------|
+| `routing: "auto"` | Try all three tiers (default) |
+| `routing: "operator"` | Operators only. Fails if no operator available. |
+| `routing: "provider"` | Skip operators, use LiteLLM + direct only. |
+| `X-Tangle-Blueprint: <id>` | Pin to operators under this Blueprint. |
+| `X-Tangle-Operator: <slug>` | Pin to a specific operator. |
+| `providerOptions.gateway.order` | Control which providers are tried and in what order. |
+| `providerOptions.gateway.only` | Restrict to these providers only. |
diff --git a/pages/gateway/index.mdx b/pages/gateway/index.mdx
new file mode 100644
index 00000000..84b2549d
--- /dev/null
+++ b/pages/gateway/index.mdx
@@ -0,0 +1,58 @@
+---
+title: Tangle Gateway
+description: Unified API for hundreds of AI models with built-in routing, compliance, and on-chain payments.
+---
+
+# Tangle Gateway
+
+Tangle Gateway is a unified inference API. One endpoint, hundreds of models, automatic routing across centralized providers and decentralized operators.
+
+## What it does
+
+- **One key, any model.** Access OpenAI, Anthropic, Google, Groq, Mistral, and 20+ providers through a single API key.
+- **Operator network.** Route to decentralized operators on the Tangle network who compete on price, latency, and reputation.
+- **Compliance routing.** Zero Data Retention and no-train filtering with verified provider agreements.
+- **BYOK.** Bring your own provider keys for zero-markup access.
+- **On-chain payments.** Pay operators directly via SpendAuth — no credit card required.
+
+## Quick example
+
+```bash
+curl -X POST "https://router.tangle.tools/v1/chat/completions" \
+  -H "Authorization: Bearer $TANGLE_API_KEY" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "anthropic/claude-sonnet-4-6",
+    "messages": [{"role": "user", "content": "Hello"}],
+    "stream": true
+  }'
+```
+
+Works with any OpenAI-compatible SDK. Change the base URL and you're done.
+
+## Architecture
+
+The gateway routes through three tiers, in order:
+
+| Tier | What | When |
+|------|------|------|
+| **Operators** | Decentralized inference providers on Tangle | Default for operator-pinned requests and SpendAuth |
+| **LiteLLM** | Proxy with 100+ provider integrations and built-in retries | Default for standard requests |
+| **Direct** | Straight to provider API (OpenAI, Anthropic, etc.) | Fallback when LiteLLM unavailable, or when compliance required |
+
+When [Zero Data Retention](/gateway/zdr) or [no-train](/gateway/no-train) is requested, operators and LiteLLM are skipped — the gateway routes directly to verified providers only.
+
+## How it fits
+
+```
+Workbench (agents) → Gateway (inference) → Operators (serving) → Protocol (settlement)
+```
+
+The gateway sits between the [Workbench](/vibe/introduction) where agents run and the [Protocol](/network/overview) where operators get paid. Agents in the workbench call the gateway for model access. The gateway selects the best provider or operator, routes the request, tracks usage, and settles payment.
+
+## Next steps
+
+- [Getting Started](/gateway/getting-started) — make your first request in 2 minutes
+- [Supported Models](/gateway/models) — browse the model catalog
+- [How Routing Works](/gateway/how-routing-works) — understand the 3-tier architecture
+- [Zero Data Retention](/gateway/zdr) — compliance for regulated industries
diff --git a/pages/gateway/migrate-openai.mdx b/pages/gateway/migrate-openai.mdx
new file mode 100644
index 00000000..c27336f5
--- /dev/null
+++ b/pages/gateway/migrate-openai.mdx
@@ -0,0 +1,77 @@
+---
+title: Migrate from OpenAI
+description: Switch from OpenAI's API to Tangle Gateway in under 5 minutes.
+---
+
+# Migrate from OpenAI
+
+Tangle Gateway is OpenAI-compatible. Change two lines and you're done.
+
+## Python
+
+```diff
+  from openai import OpenAI
+
+  client = OpenAI(
+-     api_key="sk-...",
++     api_key="sk-tan-YOUR_KEY",
++     base_url="https://router.tangle.tools/v1",
+  )
+
+  response = client.chat.completions.create(
+-     model="gpt-4o",
++     model="openai/gpt-4o",  # or just "gpt-4o" — auto-resolved
+      messages=[{"role": "user", "content": "Hello"}]
+  )
+```
+
+## TypeScript
+
+```diff
+  import OpenAI from 'openai'
+
+  const client = new OpenAI({
+-   apiKey: 'sk-...',
++   apiKey: 'sk-tan-YOUR_KEY',
++   baseURL: 'https://router.tangle.tools/v1',
+  })
+```
+
+## curl
+
+```diff
+- curl https://api.openai.com/v1/chat/completions \
+-   -H "Authorization: Bearer sk-..." \
++ curl https://router.tangle.tools/v1/chat/completions \
++   -H "Authorization: Bearer sk-tan-YOUR_KEY" \
+    -H "Content-Type: application/json" \
+    -d '{"model": "gpt-4o", "messages": [...]}'
+```
+
+## What you get
+
+By switching to Tangle Gateway, you get:
+
+- **Access to every provider** through the same client. Try `anthropic/claude-sonnet-4-6` or `groq/llama-3.1-70b` without changing SDKs.
+- **Automatic fallbacks.** If OpenAI is down, configure backup models.
+- **Cost visibility.** Every response tells you exactly what it cost via `X-Tangle-Price-*` headers.
+- **Compliance routing.** One flag for ZDR, one flag for no-train.
+- **BYOK.** Keep using your OpenAI key with zero markup. Add it to [`providerOptions.gateway.byok`](/gateway/byok).
+
+## Keep your OpenAI key (zero markup)
+
+If you already have an OpenAI API key, use [BYOK](/gateway/byok) for zero platform markup:
+
+```python
+response = client.chat.completions.create(
+    model="openai/gpt-4o",
+    messages=[{"role": "user", "content": "Hello"}],
+    extra_body={
+        "providerOptions": {
+            "gateway": {
+                "byok": {"openai": [{"apiKey": "sk-YOUR_OPENAI_KEY"}]}
+            }
+        }
+    }
+)
+```
diff --git a/pages/gateway/migrate-vercel.mdx b/pages/gateway/migrate-vercel.mdx
new file mode 100644
index 00000000..b18462a8
--- /dev/null
+++ b/pages/gateway/migrate-vercel.mdx
@@ -0,0 +1,80 @@
+---
+title: Migrate from Vercel AI Gateway
+description: Switch from Vercel AI Gateway to Tangle Gateway.
+---
+
+# Migrate from Vercel AI Gateway
+
+Tangle Gateway supports the same `providerOptions.gateway` schema as Vercel AI Gateway. Most code works unchanged.
+
+## What maps directly
+
+| Vercel Feature | Tangle Equivalent | Notes |
+|---|---|---|
+| `providerOptions.gateway.byok` | Same | Identical schema |
+| `providerOptions.gateway.zeroDataRetention` | Same | 13 verified providers |
+| `providerOptions.gateway.disallowPromptTraining` | Same | 25 verified providers |
+| `providerOptions.gateway.caching: 'auto'` | Same | Anthropic cache_control injection |
+| `providerOptions.gateway.order` | Same | Provider priority |
+| `providerOptions.gateway.only` | Same | Provider allowlist |
+| `models` fallback array | Same | Model-level failover |
+| `GET /v1/credits` | Same | Balance check |
+| `GET /v1/generation` | Same | Request detail lookup |
+
+## What's different
+
+| Feature | Vercel | Tangle |
+|---------|--------|--------|
+| **Base URL** | `ai-gateway.vercel.sh/v1` | `router.tangle.tools/v1` |
+| **Auth** | API key or OIDC token | API key, session, SIWE (wallet), or SpendAuth (on-chain) |
+| **Pricing** | Zero markup | 20% markup (0% with BYOK) |
+| **Operator network** | None | Decentralized operators compete on price/latency |
+| **On-chain payments** | None | SpendAuth (EIP-712) — pay without a credit card |
+| **Guardrails** | None | PII + injection detection built-in |
+| **Web search tools** | Perplexity, Parallel, provider-native | Not yet (planned) |
+| **OIDC auth** | Vercel-only | Not applicable |
+
+## Code change
+
+### AI SDK
+
+```diff
+  import { generateText } from 'ai'
++ import { createOpenAI } from '@ai-sdk/openai'
+
++ const tangle = createOpenAI({
++   apiKey: 'sk-tan-YOUR_KEY',
++   baseURL: 'https://router.tangle.tools/v1',
++ })
+
+  const { text } = await generateText({
+-   model: 'anthropic/claude-sonnet-4-6',
++   model: tangle('anthropic/claude-sonnet-4-6'),
+    prompt: 'Hello',
+    providerOptions: {
+      gateway: {
+        zeroDataRetention: true,  // works the same
+        caching: 'auto',          // works the same
+      },
+    },
+  })
+```
+
+### OpenAI SDK
+
+```diff
+  const client = new OpenAI({
+-   apiKey: process.env.AI_GATEWAY_API_KEY,
+-   baseURL: 'https://ai-gateway.vercel.sh/v1',
++   apiKey: process.env.TANGLE_API_KEY,
++   baseURL: 'https://router.tangle.tools/v1',
+  })
+```
+
+## What you gain
+
+- **Operator network.** Access decentralized inference providers who compete on price and latency.
+- **On-chain payments.** Pay with crypto via SpendAuth — no Stripe/credit card required.
+- **Wallet auth.** Sign in with Ethereum (SIWE) for web3-native access.
+- **Guardrails.** Built-in PII and prompt injection detection on every request.
+- **Self-hostable.** Deploy your own gateway instance — it's open source.
diff --git a/pages/gateway/models.mdx b/pages/gateway/models.mdx
new file mode 100644
index 00000000..5c009c9b
--- /dev/null
+++ b/pages/gateway/models.mdx
@@ -0,0 +1,69 @@
+---
+title: Supported Models
+description: Browse models available through Tangle Gateway across 20+ providers.
+---
+
+# Supported Models
+
+Tangle Gateway provides access to models from 20+ providers through a single API.
+
+## Providers
+
+| Provider | Slug | Models |
+|----------|------|--------|
+| OpenAI | `openai` | GPT-4o, GPT-4o-mini, o1, o3, o4, DALL-E, Whisper, TTS |
+| Anthropic | `anthropic` | Claude Opus, Sonnet, Haiku |
+| Google | `google` | Gemini 2.5 Pro, Flash, Flash-Lite |
+| Groq | `groq` | Llama 3.1/3.2 (fast inference) |
+| Together AI | `together` | Open-source models (Llama, Qwen, Mixtral) |
+| DeepSeek | `deepseek` | DeepSeek Chat, DeepSeek Coder |
+| Mistral | `mistral` | Mistral Large, Codestral, Pixtral |
+| Fireworks | `fireworks` | Phi, StarCoder, open models |
+| Cohere | `cohere` | Command R/R+ |
+| xAI | `xai` | Grok 2, Grok 3 |
+| Cerebras | `cerebras` | Llama (fast inference) |
+| SambaNova | `sambanova` | Fast open-model inference |
+| AI21 | `ai21` | Jamba |
+| Nvidia | `nvidia` | Nemotron |
+| Z.ai | `zai` | GLM-4.7, GLM-5 |
+| Moonshot | `moonshot` | Kimi |
+
+Plus operators on the Tangle network serving custom and open-source models.
+
+## Model ID format
+
+Use `provider/model-name`:
+
+```
+anthropic/claude-sonnet-4-6
+openai/gpt-4o-mini
+groq/llama-3.1-70b-versatile
+```
+
+Or use bare names — the gateway resolves the provider by prefix:
+
+| Prefix | Resolves to |
+|--------|-------------|
+| `gpt-`, `o1-`, `o3-`, `o4-` | OpenAI |
+| `claude-` | Anthropic |
+| `gemini-`, `gemma-` | Google |
+| `llama-`, `mixtral-` | Groq |
+| `deepseek-` | DeepSeek |
+| `mistral-`, `codestral-` | Mistral |
+| `grok-` | xAI |
+| `glm-` | Z.ai |
+| `command-` | Cohere |
+
+## Modalities
+
+| Modality | Endpoint | Examples |
+|----------|----------|---------|
+| Text | `/v1/chat/completions` | All chat models |
+| Images | `/v1/images/generations` | DALL-E, FLUX |
+| Audio | `/v1/audio/transcriptions`, `/v1/audio/speech` | Whisper, TTS |
+| Embeddings | `/v1/embeddings` | text-embedding-3-small/large |
+| Video | `/v1/video/*` | Avatar generation, dubbing (via ph0ny) |
+
+## Dynamic discovery
+
+The model catalog is available at [`GET /api/models`](https://router.tangle.tools/api/models) with pricing, context length, and modality information for every model.
diff --git a/pages/gateway/no-train.mdx b/pages/gateway/no-train.mdx
new file mode 100644
index 00000000..da6154af
--- /dev/null
+++ b/pages/gateway/no-train.mdx
@@ -0,0 +1,43 @@
+---
+title: Disallow Prompt Training
+description: Route only through providers that don't use your data for model training.
+---
+
+# Disallow Prompt Training
+
+Ensure your prompts and responses are never used by providers to train their models.
+
+## Usage
+
+```json
+{
+  "providerOptions": {
+    "gateway": {
+      "disallowPromptTraining": true
+    }
+  }
+}
+```
+
+## Relationship to ZDR
+
+Disallow prompt training is a **subset** of [Zero Data Retention](/gateway/zdr). All ZDR-compliant providers also disallow prompt training, but more providers disallow training than offer full ZDR.
+
+| Filter | Verified providers |
+|--------|-------------------|
+| No-train only | 25 providers |
+| ZDR (includes no-train) | 13 providers |
+
+Use `disallowPromptTraining` when you care about IP protection but don't need full data deletion guarantees.
+
+## No-train verified providers
+
+All ZDR providers plus: OpenAI, Google AI Studio, Cohere, Perplexity, xAI, Morph AI, Novita AI, Voyage AI, and others.
+
+See the full list at [`GET /api/gateway/compliance`](/gateway/api-compliance).
+
+## Routing behavior
+
+Same as ZDR: operators and LiteLLM are skipped. Only direct provider calls to verified no-train providers.
+
+Can be enabled team-wide via `noTrainEnabled: true` on the team record.
diff --git a/pages/gateway/operator-routing.mdx b/pages/gateway/operator-routing.mdx
new file mode 100644
index 00000000..09d9c767
--- /dev/null
+++ b/pages/gateway/operator-routing.mdx
@@ -0,0 +1,75 @@
+---
+title: Operator Routing
+description: Route inference through decentralized operators on the Tangle network.
+---
+
+# Operator Routing
+
+Operators are independent inference providers registered on the Tangle network. They run models on their own hardware, set their own prices, and earn from every request routed through them.
+
+## How operators are discovered
+
+1. Operators register on-chain via the Blueprint Service Manager (BSM) contract
+2. The gateway syncs operator data from the chain every 60 seconds
+3. Operators are stored in the database with their endpoint URL, pricing, and status
+4. The [scoring algorithm](/gateway/smart-routing) ranks operators per-request
+
+## Routing to operators
+
+### Automatic (default)
+
+In `auto` mode, the gateway checks for operators serving the requested model before trying centralized providers:
+
+```bash
+curl -X POST "https://router.tangle.tools/v1/chat/completions" \
+  -H "Authorization: Bearer sk-tan-YOUR_KEY" \
+  -d '{"model": "llama-3.1-70b", "messages": [...]}'
+```
+
+### Pin to a Blueprint
+
+Route only to operators registered under a specific Blueprint:
+
+```bash
+curl -X POST "https://router.tangle.tools/v1/chat/completions" \
+  -H "X-Tangle-Blueprint: 42" \
+  -d '{"model": "llama-3.1-70b", "messages": [...]}'
+```
+
+### Pin to an operator
+
+Route to a specific operator by slug or Ethereum address:
+
+```bash
+curl -X POST "https://router.tangle.tools/v1/chat/completions" \
+  -H "X-Tangle-Operator: tangle-core-1" \
+  -d '{"model": "llama-3.1-70b", "messages": [...]}'
+```
+
+### Pin to a service instance
+
+```bash
+curl -X POST "https://router.tangle.tools/v1/chat/completions" \
+  -H "X-Tangle-Service: 7" \
+  -d '{"model": "llama-3.1-70b", "messages": [...]}'
+```
+
+## What's verified on-chain
+
+| Data | Verified? |
+|------|-----------|
+| Operator Ethereum address | Yes (signed transaction) |
+| Active/inactive status | Yes (BSM contract state) |
+| Staked amount | Yes (on-chain balance) |
+| Pricing (per-token) | Yes (BSM contract) |
+| Endpoint URL | No (self-reported at registration) |
+| Backing provider | No (not tracked) |
+
+Because endpoint URL and backing provider are self-reported, operator routing is **not compatible with [ZDR](/gateway/zdr) or [no-train](/gateway/no-train)** compliance requirements. When compliance is required, operators are skipped and the gateway routes directly to verified providers.
+
+## Payment
+
+Operator requests can be paid two ways:
+
+1. **Platform credits** — deducted from your credit balance at the operator's listed price
+2. **SpendAuth (on-chain)** — direct EIP-712 signed payment to the operator. No credit card needed. See [SpendAuth](/gateway/spend-auth).
diff --git a/pages/gateway/pricing.mdx b/pages/gateway/pricing.mdx
new file mode 100644
index 00000000..59e55c65
--- /dev/null
+++ b/pages/gateway/pricing.mdx
@@ -0,0 +1,55 @@
+---
+title: Credits & Pricing
+description: How billing works on Tangle Gateway.
+---
+
+# Credits & Pricing
+
+## Pricing model
+
+| Credential type | Markup |
+|----------------|--------|
+| Platform credentials | 20% above provider list price |
+| [BYOK](/gateway/byok) | **0%** — provider list price, no markup |
+| [SpendAuth](/gateway/spend-auth) | Operator-set prices (typically competitive) |
+
+The 20% platform markup on non-BYOK requests funds operator payouts and platform infrastructure. Operators earn a share of every request routed through them.
+
+## Credits
+
+Credits are denominated in USD. Purchase via Stripe or receive as part of a subscription plan.
+
+Check your balance:
+
+```bash
+curl -H "Authorization: Bearer sk-tan-YOUR_KEY" \
+  https://router.tangle.tools/v1/credits
+```
+
+```json
+{
+  "balance": "95.50",
+  "total_used": "4.50"
+}
+```
+
+## Cost per request
+
+Each request is charged based on tokens:
+
+```
+cost = (input_tokens × input_price) + (output_tokens × output_price)
+```
+
+Pricing varies by model. Check per-model pricing at [`GET /api/models`](https://router.tangle.tools/api/models) or in the `X-Tangle-Price-Input` / `X-Tangle-Price-Output` response headers.
+
+## Billing transparency
+
+Every response includes pricing headers so you know the cost before it hits your balance:
+
+```
+X-Tangle-Price-Input: 0.000003      # USD per input token
+X-Tangle-Price-Output: 0.000015     # USD per output token
+```
+
+Look up detailed billing for any request via [`GET /v1/generation`](/gateway/api-generation).
diff --git a/pages/gateway/provider-options.mdx b/pages/gateway/provider-options.mdx
new file mode 100644
index 00000000..eb0fe5b1
--- /dev/null
+++ b/pages/gateway/provider-options.mdx
@@ -0,0 +1,70 @@
+---
+title: providerOptions.gateway
+description: Complete reference for gateway-specific request options.
+---
+
+# providerOptions.gateway
+
+All gateway-specific options are passed inside `providerOptions.gateway` in the request body. These are stripped before forwarding to providers.
+
+## Full schema
+
+```typescript
+interface GatewayOptions {
+  // Bring Your Own Key
+  byok?: Record<string, Array<{ apiKey: string }>>
+
+  // Compliance routing
+  zeroDataRetention?: boolean
+  disallowPromptTraining?: boolean
+
+  // Caching
+  caching?: 'auto' | false
+  cache?: false  // disable response caching
+
+  // Provider routing
+  order?: string[]   // provider priority
+  only?: string[]    // provider allowlist
+
+  // Model fallbacks
+  models?: string[]  // tried in order after primary model
+
+  // Timeouts (1s-120s, clamped)
+  timeout?: number | Record<string, number>
+}
+```
+
+## Options reference
+
+| Option | Type | Default | Description |
+|--------|------|---------|-------------|
+| `byok` | `Record<string, Array<{apiKey}>>` | — | Per-request provider credentials. [Details](/gateway/byok) |
+| `zeroDataRetention` | `boolean` | `false` | Route only to ZDR-verified providers. [Details](/gateway/zdr) |
+| `disallowPromptTraining` | `boolean` | `false` | Route only to no-train providers. [Details](/gateway/no-train) |
+| `caching` | `'auto'` | — | Auto-inject prompt cache markers. [Details](/gateway/caching) |
+| `cache` | `false` | — | Set `false` to skip response cache for this request. |
+| `order` | `string[]` | — | Provider priority order. [Details](/gateway/smart-routing) |
+| `only` | `string[]` | — | Restrict to these providers only. |
+| `models` | `string[]` | — | Fallback model list. [Details](/gateway/fallbacks) |
+| `timeout` | `number \| Record<string, number>` | `30000` | Timeout in ms. [Details](/gateway/timeouts) |
+
+## Example: everything at once
+
+```json
+{
+  "model": "anthropic/claude-sonnet-4-6",
+  "messages": [{"role": "user", "content": "Hello"}],
+  "providerOptions": {
+    "gateway": {
+      "byok": {
+        "anthropic": [{"apiKey": "sk-ant-..."}]
+      },
+      "zeroDataRetention": true,
+      "caching": "auto",
+      "models": ["openai/gpt-4o"],
+      "timeout": {"anthropic": 10000, "openai": 5000},
+      "order": ["anthropic", "openai"]
+    }
+  }
+}
+```
diff --git a/pages/gateway/rate-limiting.mdx b/pages/gateway/rate-limiting.mdx
new file mode 100644
index 00000000..587ed74f
--- /dev/null
+++ b/pages/gateway/rate-limiting.mdx
@@ -0,0 +1,44 @@
+---
+title: Rate Limiting
+description: Rate limits by authentication method.
+---
+
+# Rate Limiting
+
+The gateway enforces sliding-window rate limits per authentication method.
+
+## Limits
+
+| Auth method | Rate limit | Daily limit |
+|-------------|-----------|-------------|
+| API Key | 60 req/min | Unlimited (with credits) |
+| Session | 30 req/min | Unlimited (with credits) |
+| SpendAuth | 120 req/min | Unlimited |
+| Anonymous | 10 req/min | 5 req/day |
+| Authenticated (no credits) | 30 req/min | 20 req/day |
+
+## Response headers
+
+Every response includes rate limit headers:
+
+```
+X-RateLimit-Limit: 60
+X-RateLimit-Remaining: 42
+X-RateLimit-Reset: 1712793600
+```
+
+## 429 responses
+
+When rate limited:
+
+```json
+{
+  "error": {
+    "message": "Rate limit exceeded for this API key.",
+    "type": "rate_limit_error",
+    "code": "rate_limit_exceeded"
+  }
+}
+```
+
+The `X-RateLimit-Reset` header indicates when the window resets (Unix timestamp in seconds).
diff --git a/pages/gateway/response-headers.mdx b/pages/gateway/response-headers.mdx
new file mode 100644
index 00000000..f791ef95
--- /dev/null
+++ b/pages/gateway/response-headers.mdx
@@ -0,0 +1,41 @@
+---
+title: Response Headers
+description: Headers returned on every gateway response.
+---
+
+# Response Headers
+
+Every response from the gateway includes metadata headers.
+
+## Standard headers
+
+| Header | Description | Example |
+|--------|-------------|---------|
+| `X-Generation-Id` | Unique request ID | `gen_01J5K7ABCD...` |
+| `X-Tangle-Price-Input` | USD per input token | `0.000003` |
+| `X-Tangle-Price-Output` | USD per output token | `0.000015` |
+| `X-Tangle-Cache` | Response cache status | `HIT` or `MISS` |
+| `X-RateLimit-Limit` | Requests allowed per window | `60` |
+| `X-RateLimit-Remaining` | Requests remaining | `42` |
+| `X-RateLimit-Reset` | Window reset (Unix seconds) | `1712793600` |
+
+## Conditional headers
+
+| Header | When present | Description |
+|--------|-------------|-------------|
+| `X-Tangle-Routing-Trace` | When `ENABLE_ROUTING_TRACE` is on | Compact routing path |
+| `X-Tangle-Operator` | When served by an operator | Operator slug |
+| `X-Tangle-BYOK` | When BYOK credentials used | `true` |
+| `X-Tangle-Caching` | When prompt caching applied | `auto` |
+| `X-Tangle-Guardrails` | When guardrails flagged content | `pii:low,prompt_injection:high` |
+| `X-Payment-Settled` | When SpendAuth payment succeeded | `true` |
+| `X-Free-Tier-Remaining` | Free tier requests | `3` |
+| `X-Free-Tier-Limit` | Free tier daily cap | `5` |
+
+## Error response headers
+
+| Header | When present | Description |
+|--------|-------------|-------------|
+| `X-Payment-Required` | 402 responses | Amount needed (micro-USD) |
+| `X-Payment-Currency` | 402 responses | `tsUSD` |
+| `X-Payment-Methods` | 402 responses | `credits,spend_auth` |
diff --git a/pages/gateway/routing-trace.mdx b/pages/gateway/routing-trace.mdx
new file mode 100644
index 00000000..23b643d5
--- /dev/null
+++ b/pages/gateway/routing-trace.mdx
@@ -0,0 +1,35 @@
+---
+title: Routing Trace
+description: See exactly which providers were tried for every request.
+---
+
+# Routing Trace
+
+Every response includes an `X-Tangle-Routing-Trace` header showing the routing path — which providers were tried, whether they succeeded, and how long each took.
+
+## Header format
+
+```
+X-Tangle-Routing-Trace: openai/gpt-4o[operator(err:5001ms)→litellm(200:340ms)]
+```
+
+Format: `model[provider(status:latency)→provider(status:latency)]`
+
+Multiple models (from [fallbacks](/gateway/fallbacks)):
+
+```
+X-Tangle-Routing-Trace: openai/gpt-4o[openai(500:2100ms)], anthropic/claude-sonnet-4-6[anthropic(200:1847ms)]
+```
+
+## Sanitization
+
+The trace header is sanitized for safety:
+- Operator names are shown as generic `operator` (slugs not exposed)
+- Error messages are not included (only status codes)
+- Internal URLs and hostnames are never leaked
+
+For the full unredacted trace including error messages, use the [generation lookup API](/gateway/generation-lookup) — the `routing_trace` field in the response contains the complete history.
+
+## Disabling
+
+Set `ENABLE_ROUTING_TRACE=false` to omit the header from all responses. See [Feature Flags](/gateway/feature-flags).
diff --git a/pages/gateway/smart-routing.mdx b/pages/gateway/smart-routing.mdx
new file mode 100644
index 00000000..c39bcc2c
--- /dev/null
+++ b/pages/gateway/smart-routing.mdx
@@ -0,0 +1,58 @@
+---
+title: Smart Routing
+description: How the gateway scores and selects operators.
+---
+
+# Smart Routing
+
+When multiple operators serve the same model, the gateway selects the best one using a weighted scoring algorithm.
+
+## Scoring formula
+
+```
+score = reputation(40%) + latency(30%) + price(30%)
+```
+
+| Factor | Weight | What it measures |
+|--------|--------|-----------------|
+| **Reputation** | 40% | Normalized reputation score (0-100) from on-chain history |
+| **Latency** | 30% | Inverse of average response time (lower = better) |
+| **Price** | 30% | Inverse of per-token price (cheaper = better) |
+
+## Operator selection
+
+1. Query all operators serving the requested model
+2. Filter: only `active` or `degraded` status, must be pipeline head
+3. Score each operator
+4. Sort by score descending
+5. Route to the highest-scoring operator
+
+If a preferred operator is specified (via `X-Tangle-Operator`), it's moved to the top of the ranked list regardless of score.
+
+## Health tracking
+
+The gateway tracks operator health via:
+
+- **Health checks** — periodic probes stored in `OperatorHealthCheck`
+- **Request outcomes** — success/failure recorded per request
+- **Latency tracking** — rolling average updated per request
+
+Operators that consistently fail are automatically deprioritized by their dropping reputation and rising latency scores.
+
+## Provider ordering (non-operator)
+
+For direct provider routing, use `providerOptions.gateway.order` and `only`:
+
+```json
+{
+  "providerOptions": {
+    "gateway": {
+      "order": ["bedrock", "anthropic"],
+      "only": ["bedrock", "anthropic"]
+    }
+  }
+}
+```
+
+- `order`: Try providers in this order. First with valid credentials wins.
+- `only`: Restrict to these providers. Others are excluded even if they have credentials.
diff --git a/pages/gateway/spend-auth.mdx b/pages/gateway/spend-auth.mdx
new file mode 100644
index 00000000..1f4977c2
--- /dev/null
+++ b/pages/gateway/spend-auth.mdx
@@ -0,0 +1,52 @@
+---
+title: SpendAuth (On-Chain Payments)
+description: Pay operators directly on-chain via EIP-712 signed authorizations.
+---
+
+# SpendAuth
+
+SpendAuth lets you pay operators directly on-chain without a credit card or account. Sign an EIP-712 typed data message with your wallet, attach it to the request, and the operator claims payment after serving inference.
+
+## How it works
+
+1. **Sign:** Create an EIP-712 SpendAuth payload with your wallet
+2. **Send:** Attach the signature as `X-Payment-Signature` header
+3. **Authorize:** The gateway verifies the signature and calls `authorizeSpend` on-chain
+4. **Serve:** The operator processes your inference request
+5. **Claim:** The operator calls `claimPayment` to receive funds
+
+## Request format
+
+```bash
+curl -X POST "https://router.tangle.tools/v1/chat/completions" \
+  -H "Content-Type: application/json" \
+  -H "X-Payment-Signature: {
+    \"commitment\": \"0xabc...\",
+    \"serviceId\": \"1\",
+    \"jobIndex\": 0,
+    \"amount\": \"1000000\",
+    \"operator\": \"0x70997970...\",
+    \"nonce\": \"42\",
+    \"expiry\": \"1712793600\",
+    \"signature\": \"0xff...\"
+  }" \
+  -d '{"model": "llama-3.1-70b", "messages": [...]}'
+```
+
+## Security
+
+- **EIP-712 signatures:** Cryptographically verified against the signing address
+- **Nonce replay protection:** Each nonce can only be used once per commitment (in-memory + Redis store)
+- **On-chain verification:** The `ShieldedCredits` contract validates authorization and deducts balance
+- **Expiry:** SpendAuth payloads have a timestamp-based expiry
+
+## Rate limits
+
+SpendAuth requests get a generous 120 req/min limit per commitment since every request is paid.
+
+## On-chain contracts
+
+SpendAuth uses the `ShieldedCredits` contract deployed on the Tangle network. The contract handles:
+- Balance management (deposit, authorize, claim)
+- Authorization verification (EIP-712 signature recovery)
+- Payment settlement (operator claims after serving)
diff --git a/pages/gateway/timeouts.mdx b/pages/gateway/timeouts.mdx
new file mode 100644
index 00000000..10fe9645
--- /dev/null
+++ b/pages/gateway/timeouts.mdx
@@ -0,0 +1,56 @@
+---
+title: Provider Timeouts
+description: Configure per-provider timeouts for fast failover.
+---
+
+# Provider Timeouts
+
+Set timeouts to trigger fast failover when a provider is slow. Values are clamped to 1-120 seconds.
+
+## Global timeout
+
+Apply the same timeout to all providers:
+
+```json
+{
+  "providerOptions": {
+    "gateway": {
+      "timeout": 5000
+    }
+  }
+}
+```
+
+## Per-provider timeouts
+
+Different providers have different latency profiles. Set timeouts individually:
+
+```json
+{
+  "providerOptions": {
+    "gateway": {
+      "timeout": {
+        "openai": 5000,
+        "anthropic": 10000,
+        "groq": 3000
+      }
+    }
+  }
+}
+```
+
+## Default behavior
+
+Without explicit timeouts, the gateway uses a 30-second default for all providers and a 30-second idle timeout for streaming responses.
+
+## Bounds
+
+All timeout values are clamped:
+- **Minimum:** 1,000ms (1 second)
+- **Maximum:** 120,000ms (2 minutes)
+
+Values outside this range are silently clamped to the nearest bound.
+
+## Interaction with fallbacks
+
+When a provider times out, it counts as a failure in the [routing trace](/gateway/routing-trace) and the gateway moves to the next option — either a different provider for the same model, or the next [fallback model](/gateway/fallbacks).
diff --git a/pages/gateway/zdr.mdx b/pages/gateway/zdr.mdx
new file mode 100644
index 00000000..21340db9
--- /dev/null
+++ b/pages/gateway/zdr.mdx
@@ -0,0 +1,84 @@
+---
+title: Zero Data Retention
+description: Route requests only through providers with verified zero data retention agreements.
+---
+
+# Zero Data Retention (ZDR)
+
+When ZDR is enabled, the gateway routes requests **only** through providers that have verified agreements to delete all request data immediately after processing.
+
+## Enable per-request
+
+```json
+{
+  "model": "anthropic/claude-sonnet-4-6",
+  "messages": [{"role": "user", "content": "Analyze this sensitive data..."}],
+  "providerOptions": {
+    "gateway": {
+      "zeroDataRetention": true
+    }
+  }
+}
+```
+
+## Enable team-wide
+
+Set `zdrEnabled: true` on your team record. All requests from team members will enforce ZDR. Team-wide ZDR overrides per-request `zeroDataRetention: false`.
+
+## How it works
+
+When ZDR is enabled:
+
+1. **Operators are skipped.** Operators self-report their backing provider. The gateway cannot verify what provider an operator actually routes through, so operators are excluded from ZDR-compliant routing.
+
+2. **LiteLLM is skipped.** LiteLLM has its own internal fallback chain that may route to non-ZDR providers. Since we can't control LiteLLM's routing decisions, it's excluded.
+
+3. **Direct provider only.** The gateway calls the provider API directly, selecting only from verified ZDR providers.
+
+4. **BYOK fallback preserves ZDR.** If your [BYOK](/gateway/byok) credentials fail, the fallback to platform credentials still enforces ZDR filtering.
+
+## ZDR-verified providers
+
+| Provider | ZDR | No-Train | Policy |
+|----------|-----|----------|--------|
+| Anthropic | Yes | Yes | [ZDR policy](https://platform.claude.com/docs/en/build-with-claude/zero-data-retention) |
+| Amazon Bedrock | Yes | Yes | [Data protection](https://docs.aws.amazon.com/bedrock/latest/userguide/data-protection.html) |
+| Azure OpenAI | Yes | Yes | [Data privacy](https://learn.microsoft.com/en-us/azure/foundry/responsible-ai/openai/data-privacy) |
+| Groq | Yes | Yes | [ZDR policy](https://console.groq.com/docs/your-data#zero-data-retention) |
+| Mistral | Yes | Yes | [Terms](https://legal.mistral.ai/terms) |
+| Fireworks | Yes | Yes | [Data handling](https://docs.fireworks.ai/guides/security_compliance/data_handling) |
+| Together | Yes | Yes | [Terms](https://www.together.ai/terms-of-service) |
+| Cerebras | Yes | Yes | [Privacy](https://www.cerebras.ai/privacy-policy) |
+| Google Vertex | Yes | Yes | [ZDR policy](https://docs.cloud.google.com/vertex-ai/generative-ai/docs/vertex-ai-zero-data-retention) |
+| Nebius | Yes | Yes | [Legal guide](https://docs.tokenfactory.nebius.com/legal/legal-quick-guide) |
+| Parasail | Yes | Yes | [Terms](https://parasail.io/legal/terms-of-service) |
+| Baseten | Yes | Yes | [Security](https://docs.baseten.co/observability/security) |
+| DeepInfra | Yes | Yes | [Data handling](https://deepinfra.com/docs/data) |
+
+Compliance data is managed via the admin API (`PUT /api/admin/compliance`) and can be updated without code deploys.
+
+## Trust model
+
+| Routing tier | ZDR behavior |
+|---|---|
+| **Operators** | Skipped. Self-reported backing provider is unverifiable. |
+| **LiteLLM** | Skipped. Internal fallback chain is uncontrollable. |
+| **Direct provider** | Routed only to verified ZDR providers. |
+| **BYOK fallback** | ZDR filters preserved on fallback to platform credentials. |
+| **Operator-only + ZDR** | 400 error. Conflicting requirements. |
+
+The Tangle chain verifies operator **identity and stake**, not **behavior**. When compliance matters, the gateway routes direct.
+
+## Error responses
+
+If no ZDR-compliant provider is available for the requested model:
+
+```json
+{
+  "error": {
+    "message": "No ZDR providers available for model: deepseek/deepseek-chat. Providers considered: anthropic, groq, mistral, ...",
+    "type": "invalid_request_error",
+    "code": "no_providers_available"
+  }
+}
+```

From 486910627a5664a47a47adb6783c4978bfc82d7b Mon Sep 17 00:00:00 2001
From: Drew Stone <drewstone329@gmail.com>
Date: Fri, 10 Apr 2026 15:32:56 -0700
Subject: [PATCH 2/4] fix: add Gateway to architecture page as inference layer

---
 pages/vision/architecture.mdx | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/pages/vision/architecture.mdx b/pages/vision/architecture.mdx
index d64cd5bc..6b68d079 100644
--- a/pages/vision/architecture.mdx
+++ b/pages/vision/architecture.mdx
@@ -20,6 +20,7 @@ Tangle ties together three layers most platforms separate: the workbench where w
 | Layer           | Runs here                     | Examples                                                 |
 | --------------- | ----------------------------- | -------------------------------------------------------- |
 | Workbench       | Human and agent collaboration | Workflows, profiles, simulations, reviews                |
+| Gateway         | Inference routing and billing | Model access, BYOK, ZDR compliance, operator selection   |
 | Sandbox runtime | Executed tasks and tools      | Agent sessions, tool calls, file edits                   |
 | Protocol        | Coordination and settlement   | Service registry, operator payments, staking, incentives |
 
@@ -28,10 +29,13 @@ Tangle ties together three layers most platforms separate: the workbench where w
 **1) Execution Layer**  
 Sandboxed runtimes with isolation, resource limits, and audit logs. This is where tasks actually run.
 
-**2) Protocol Layer**  
+**2) Inference Layer**  
+The [Gateway](/gateway) routes inference requests across centralized providers and decentralized operators. It handles model selection, compliance filtering ([ZDR](/gateway/zdr), [no-train](/gateway/no-train)), [BYOK](/gateway/byok) credential management, and billing.
+
+**3) Protocol Layer**  
 The coordination plane. It handles operator discovery, payment routing, and incentive enforcement.
 
-**3) Experience Layer**  
+**4) Experience Layer**  
 The agentic workbench and [Blueprint SDK](https://github.com/tangle-network/blueprint/tree/main). This is where teams design workflows, run simulations, and ship services.
 
 The SDK also includes optional gateways for integrating external events and payment-driven HTTP job execution (webhooks,

From ad0904a7491531298b4a6400418bbd5ec0eeba6a Mon Sep 17 00:00:00 2001
From: Drew Stone <drewstone329@gmail.com>
Date: Fri, 10 Apr 2026 15:49:34 -0700
Subject: [PATCH 3/4] fix: add Blueprint references throughout gateway docs

- operator-routing.mdx: added Blueprints section with LLM Inference,
  Vector Store, and Custom Blueprint table. Links to llm-inference-blueprint
  repo, tangle-inference-core, Blueprint SDK, and Blueprint Runner docs.
- how-routing-works.mdx: operators now reference Blueprints and BSM.
- models.mdx: added Blueprint-served models table.
- smart-routing.mdx: references Blueprints in scoring intro.
- spend-auth.mdx: references tangle-inference-core for operator settlement.
- index.mdx: operator network links to Blueprint intro.
- developers/blueprints/use-cases.mdx: added llm-inference-blueprint and
  tangle-inference-core to the AI section.
---
 pages/developers/blueprints/use-cases.mdx | 16 +++++++++++++++-
 pages/gateway/how-routing-works.mdx       |  5 +++--
 pages/gateway/index.mdx                   |  2 +-
 pages/gateway/models.mdx                  | 10 +++++++++-
 pages/gateway/operator-routing.mdx        | 16 +++++++++++++++-
 pages/gateway/smart-routing.mdx           |  2 +-
 pages/gateway/spend-auth.mdx              |  2 ++
 7 files changed, 46 insertions(+), 7 deletions(-)

diff --git a/pages/developers/blueprints/use-cases.mdx b/pages/developers/blueprints/use-cases.mdx
index c329e097..39953f8e 100644
--- a/pages/developers/blueprints/use-cases.mdx
+++ b/pages/developers/blueprints/use-cases.mdx
@@ -21,7 +21,21 @@ Tangle Network enables developers to rapidly build and deploy secure multi-party
   displayStyle="row"
 />
 
-## AI
+## AI & Inference
+
+<GithubRepoCard
+  name="llm-inference-blueprint"
+  description="LLM inference serving via vLLM/Ollama with on-chain settlement. Powers the Tangle Gateway operator network."
+  url="https://github.com/tangle-network/llm-inference-blueprint"
+  displayStyle="row"
+/>
+
+<GithubRepoCard
+  name="tangle-inference-core"
+  description="Shared Rust crate for EIP-712 SpendAuth verification, nonce management, and on-chain payment settlement."
+  url="https://github.com/tangle-network/tangle-inference-core"
+  displayStyle="row"
+/>
 
 <GithubRepoCard
   name="coinbase-agentkit-blueprint"
diff --git a/pages/gateway/how-routing-works.mdx b/pages/gateway/how-routing-works.mdx
index 314d97da..7829a012 100644
--- a/pages/gateway/how-routing-works.mdx
+++ b/pages/gateway/how-routing-works.mdx
@@ -15,11 +15,12 @@ Request → Tier 1: Operators → Tier 2: LiteLLM → Tier 3: Direct Provider 
 
 ### Tier 1: Operator routing
 
-Operators are independent inference providers registered on the Tangle network. They stake tokens, serve models, and compete on price, latency, and reputation.
+Operators run [Blueprints](/developers/blueprints/introduction) — on-chain service definitions like the [LLM Inference Blueprint](https://github.com/tangle-network/llm-inference-blueprint). They stake tokens, serve models, and compete on price, latency, and reputation.
 
 - Selected by [scoring algorithm](/gateway/smart-routing): reputation (40%) + latency (30%) + price (30%)
-- Discovered automatically from on-chain Blueprint Service Manager contracts
+- Discovered automatically from on-chain [Blueprint Service Manager](/developers/blueprints/service-lifecycle) contracts
 - Can be pinned by blueprint, service, or operator address
+- See [Operator Routing](/gateway/operator-routing) for the full Blueprint catalog
 
 **When it's used:** Default for `auto` routing mode, required for SpendAuth (on-chain payments).
 
diff --git a/pages/gateway/index.mdx b/pages/gateway/index.mdx
index 84b2549d..129b8b9b 100644
--- a/pages/gateway/index.mdx
+++ b/pages/gateway/index.mdx
@@ -10,7 +10,7 @@ Tangle Gateway is a unified inference API. One endpoint, hundreds of models, aut
 ## What it does
 
 - **One key, any model.** Access OpenAI, Anthropic, Google, Groq, Mistral, and 20+ providers through a single API key.
-- **Operator network.** Route to decentralized operators on the Tangle network who compete on price, latency, and reputation.
+- **Operator network.** Route to decentralized operators running [Blueprints](/developers/blueprints/introduction) on the Tangle network who compete on price, latency, and reputation.
 - **Compliance routing.** Zero Data Retention and no-train filtering with verified provider agreements.
 - **BYOK.** Bring your own provider keys for zero-markup access.
 - **On-chain payments.** Pay operators directly via SpendAuth — no credit card required.
diff --git a/pages/gateway/models.mdx b/pages/gateway/models.mdx
index 5c009c9b..cf635ac4 100644
--- a/pages/gateway/models.mdx
+++ b/pages/gateway/models.mdx
@@ -28,7 +28,15 @@ Tangle Gateway provides access to models from 20+ providers through a single API
 | Z.ai | `zai` | GLM-4.7, GLM-5 |
 | Moonshot | `moonshot` | Kimi |
 
-Plus operators on the Tangle network serving custom and open-source models.
+Plus decentralized operators on the Tangle network running [Blueprints](/developers/blueprints/introduction):
+
+| Blueprint | Models | How to route |
+|-----------|--------|-------------|
+| [LLM Inference](https://github.com/tangle-network/llm-inference-blueprint) | Llama, Qwen, Mistral, any vLLM-compatible model | `X-Tangle-Routing: operator` or auto |
+| Vector Store | Embedding models for RAG | `/v1/collections` and `/v1/rag/query` |
+| Custom Blueprints | Any model the operator deploys | Pin by Blueprint ID or operator slug |
+
+Operators set their own pricing and the gateway [scores them](/gateway/smart-routing) on reputation, latency, and price. See [Operator Routing](/gateway/operator-routing) for details.
 
 ## Model ID format
 
diff --git a/pages/gateway/operator-routing.mdx b/pages/gateway/operator-routing.mdx
index 09d9c767..6f4d3e34 100644
--- a/pages/gateway/operator-routing.mdx
+++ b/pages/gateway/operator-routing.mdx
@@ -7,9 +7,23 @@ description: Route inference through decentralized operators on the Tangle netwo
 
 Operators are independent inference providers registered on the Tangle network. They run models on their own hardware, set their own prices, and earn from every request routed through them.
 
+## Blueprints
+
+Operators run **Blueprints** — on-chain service definitions that specify what an operator does. The inference-related Blueprints the gateway routes through:
+
+| Blueprint | What it serves | Repo |
+|-----------|---------------|------|
+| **LLM Inference** | Chat completions, text generation (Llama, Qwen, Mistral, etc.) | [tangle-network/llm-inference-blueprint](https://github.com/tangle-network/llm-inference-blueprint) |
+| **Vector Store** | Embedding storage and retrieval for RAG | Operator-deployed |
+| **Custom** | Any model/pipeline an operator chooses to serve | [Build your own](/developers/blueprints/introduction) |
+
+The LLM Inference Blueprint uses [tangle-inference-core](https://github.com/tangle-network/tangle-inference-core) — a shared Rust crate for EIP-712 signature verification, nonce management, and on-chain settlement. Operators compile it into a binary (`operator-lite`) that runs alongside their model server.
+
+To build and deploy your own inference Blueprint, see the [Blueprint SDK docs](/developers/blueprints/introduction) and the [Blueprint Runner](/developers/blueprint-runner/introduction).
+
 ## How operators are discovered
 
-1. Operators register on-chain via the Blueprint Service Manager (BSM) contract
+1. Operators register on-chain via the [Blueprint Service Manager (BSM)](/developers/blueprints/service-lifecycle) contract
 2. The gateway syncs operator data from the chain every 60 seconds
 3. Operators are stored in the database with their endpoint URL, pricing, and status
 4. The [scoring algorithm](/gateway/smart-routing) ranks operators per-request
diff --git a/pages/gateway/smart-routing.mdx b/pages/gateway/smart-routing.mdx
index c39bcc2c..54cf002b 100644
--- a/pages/gateway/smart-routing.mdx
+++ b/pages/gateway/smart-routing.mdx
@@ -5,7 +5,7 @@ description: How the gateway scores and selects operators.
 
 # Smart Routing
 
-When multiple operators serve the same model, the gateway selects the best one using a weighted scoring algorithm.
+When multiple operators running the same [Blueprint](/developers/blueprints/introduction) serve the same model, the gateway selects the best one using a weighted scoring algorithm.
 
 ## Scoring formula
 
diff --git a/pages/gateway/spend-auth.mdx b/pages/gateway/spend-auth.mdx
index 1f4977c2..e6da980a 100644
--- a/pages/gateway/spend-auth.mdx
+++ b/pages/gateway/spend-auth.mdx
@@ -50,3 +50,5 @@ SpendAuth uses the `ShieldedCredits` contract deployed on the Tangle network. Th
 - Balance management (deposit, authorize, claim)
 - Authorization verification (EIP-712 signature recovery)
 - Payment settlement (operator claims after serving)
+
+The operator-side settlement logic is implemented in [tangle-inference-core](https://github.com/tangle-network/tangle-inference-core), a shared Rust crate used by the [LLM Inference Blueprint](https://github.com/tangle-network/llm-inference-blueprint) and other inference Blueprints.

From d4950e6d563bcba167619f680b3ab2ae17d52c74 Mon Sep 17 00:00:00 2001
From: Drew Stone <drewstone329@gmail.com>
Date: Fri, 10 Apr 2026 15:55:33 -0700
Subject: [PATCH 4/4] style: run prettier on all gateway docs

---
 pages/ai/index.mdx                  |  1 +
 pages/gateway/api-chat.mdx          | 28 +++++-----
 pages/gateway/api-credits.mdx       |  8 +--
 pages/gateway/api-generation.mdx    | 16 +++---
 pages/gateway/byok.mdx              |  8 +--
 pages/gateway/caching.mdx           | 14 ++---
 pages/gateway/enterprise-zdr.mdx    |  1 +
 pages/gateway/fallbacks.mdx         |  2 +
 pages/gateway/feature-flags.mdx     | 14 ++---
 pages/gateway/free-tier.mdx         | 30 +++++------
 pages/gateway/generation-lookup.mdx | 30 +++++------
 pages/gateway/guardrails.mdx        | 28 +++++-----
 pages/gateway/how-routing-works.mdx | 16 +++---
 pages/gateway/index.mdx             | 10 ++--
 pages/gateway/migrate-vercel.mdx    | 42 +++++++--------
 pages/gateway/models.mdx            | 82 ++++++++++++++---------------
 pages/gateway/no-train.mdx          |  8 +--
 pages/gateway/operator-routing.mdx  | 24 ++++-----
 pages/gateway/pricing.mdx           |  8 +--
 pages/gateway/provider-options.mdx  | 22 ++++----
 pages/gateway/rate-limiting.mdx     | 14 ++---
 pages/gateway/response-headers.mdx  | 46 ++++++++--------
 pages/gateway/routing-trace.mdx     |  1 +
 pages/gateway/smart-routing.mdx     | 10 ++--
 pages/gateway/spend-auth.mdx        |  1 +
 pages/gateway/timeouts.mdx          |  1 +
 pages/gateway/zdr.mdx               | 44 ++++++++--------
 27 files changed, 258 insertions(+), 251 deletions(-)

diff --git a/pages/ai/index.mdx b/pages/ai/index.mdx
index fe2cf0bb..629fda09 100644
--- a/pages/ai/index.mdx
+++ b/pages/ai/index.mdx
@@ -49,6 +49,7 @@ Each run produces task and agent evaluations. That data feeds back into the work
 The [Tangle Gateway](/gateway) is the inference routing layer. Agents and applications call a single API to access hundreds of models across centralized providers and decentralized operators. The gateway handles model selection, compliance routing, billing, and payment settlement.
 
 Key capabilities:
+
 - **One API, any model.** OpenAI, Anthropic, Google, Groq, and 20+ providers.
 - **Decentralized operators.** Route to operators on the Tangle network who compete on price and latency.
 - **Compliance.** [Zero Data Retention](/gateway/zdr) and [no-train](/gateway/no-train) routing with verified provider agreements.
diff --git a/pages/gateway/api-chat.mdx b/pages/gateway/api-chat.mdx
index 2a8db365..db872118 100644
--- a/pages/gateway/api-chat.mdx
+++ b/pages/gateway/api-chat.mdx
@@ -50,23 +50,23 @@ All standard OpenAI parameters (`tools`, `tool_choice`, `response_format`, `top_
 
 ### Routing headers (optional)
 
-| Header | Effect |
-|--------|--------|
-| `X-Tangle-Routing` | `operator`, `provider`, or `auto` (default) |
-| `X-Tangle-Blueprint` | Pin to operators under this Blueprint ID |
-| `X-Tangle-Service` | Pin to a specific service instance |
-| `X-Tangle-Operator` | Pin to a specific operator (slug or 0x address) |
-| `X-Payment-Signature` | SpendAuth JSON payload for on-chain payment |
+| Header                | Effect                                          |
+| --------------------- | ----------------------------------------------- |
+| `X-Tangle-Routing`    | `operator`, `provider`, or `auto` (default)     |
+| `X-Tangle-Blueprint`  | Pin to operators under this Blueprint ID        |
+| `X-Tangle-Service`    | Pin to a specific service instance              |
+| `X-Tangle-Operator`   | Pin to a specific operator (slug or 0x address) |
+| `X-Payment-Signature` | SpendAuth JSON payload for on-chain payment     |
 
 ### Validation
 
-| Field | Constraint |
-|-------|-----------|
-| `model` | Required. Alphanumeric + `/-.:\\_`, max 128 chars. |
-| `messages` | Required. Non-empty array. Each must have `role`. |
-| `max_tokens` | Optional. 1-128,000. Default: 4,096. |
-| `temperature` | Optional. 0-2. Default: 1. |
-| Body size | Max 1MB. |
+| Field         | Constraint                                         |
+| ------------- | -------------------------------------------------- |
+| `model`       | Required. Alphanumeric + `/-.:\\_`, max 128 chars. |
+| `messages`    | Required. Non-empty array. Each must have `role`.  |
+| `max_tokens`  | Optional. 1-128,000. Default: 4,096.               |
+| `temperature` | Optional. 0-2. Default: 1.                         |
+| Body size     | Max 1MB.                                           |
 
 ## Response (non-streaming)
 
diff --git a/pages/gateway/api-credits.mdx b/pages/gateway/api-credits.mdx
index deae1201..9bfdf339 100644
--- a/pages/gateway/api-credits.mdx
+++ b/pages/gateway/api-credits.mdx
@@ -23,7 +23,7 @@ Authorization: Bearer sk-tan-YOUR_KEY
 }
 ```
 
-| Field | Description |
-|-------|-------------|
-| `balance` | Remaining credit balance (USD) |
-| `total_used` | Total credits consumed (USD) |
+| Field        | Description                    |
+| ------------ | ------------------------------ |
+| `balance`    | Remaining credit balance (USD) |
+| `total_used` | Total credits consumed (USD)   |
diff --git a/pages/gateway/api-generation.mdx b/pages/gateway/api-generation.mdx
index f52f7566..b86cf608 100644
--- a/pages/gateway/api-generation.mdx
+++ b/pages/gateway/api-generation.mdx
@@ -16,9 +16,9 @@ Authorization: Bearer sk-tan-YOUR_KEY
 
 ## Parameters
 
-| Parameter | Required | Description |
-|-----------|----------|-------------|
-| `id` | Yes | Generation ID (format: `gen_<ulid>`) |
+| Parameter | Required | Description                          |
+| --------- | -------- | ------------------------------------ |
+| `id`      | Yes      | Generation ID (format: `gen_<ulid>`) |
 
 ## Response
 
@@ -48,8 +48,8 @@ Authorization: Bearer sk-tan-YOUR_KEY
 
 ## Errors
 
-| Status | Code | Description |
-|--------|------|-------------|
-| 400 | — | Missing or invalid generation ID |
-| 401 | — | Authentication required |
-| 404 | `not_found` | Generation not found or belongs to another user |
+| Status | Code        | Description                                     |
+| ------ | ----------- | ----------------------------------------------- |
+| 400    | —           | Missing or invalid generation ID                |
+| 401    | —           | Authentication required                         |
+| 404    | `not_found` | Generation not found or belongs to another user |
diff --git a/pages/gateway/byok.mdx b/pages/gateway/byok.mdx
index 7db449b0..120af118 100644
--- a/pages/gateway/byok.mdx
+++ b/pages/gateway/byok.mdx
@@ -73,10 +73,10 @@ If the header is absent, platform credentials were used (possibly via fallback).
 
 ## Pricing
 
-| Credential type | Markup |
-|----------------|--------|
-| BYOK | **0%** — provider list price |
-| Platform credentials | 20% markup (configurable) |
+| Credential type      | Markup                       |
+| -------------------- | ---------------------------- |
+| BYOK                 | **0%** — provider list price |
+| Platform credentials | 20% markup (configurable)    |
 
 ## Security
 
diff --git a/pages/gateway/caching.mdx b/pages/gateway/caching.mdx
index 976894c0..6e2417d5 100644
--- a/pages/gateway/caching.mdx
+++ b/pages/gateway/caching.mdx
@@ -26,13 +26,13 @@ Some providers require explicit cache markers to enable prompt caching, while ot
 
 ## How it works
 
-| Provider | Caching Type | What `auto` does |
-|----------|-------------|-----------------|
-| OpenAI | Implicit | No change needed. Caching happens automatically. |
-| Google | Implicit | No change needed. |
-| DeepSeek | Implicit | No change needed. |
-| Anthropic | Explicit | Adds `cache_control: { type: 'ephemeral' }` to the last system message. |
-| Anthropic (via Bedrock/Vertex) | Explicit | Same as Anthropic direct. |
+| Provider                       | Caching Type | What `auto` does                                                        |
+| ------------------------------ | ------------ | ----------------------------------------------------------------------- |
+| OpenAI                         | Implicit     | No change needed. Caching happens automatically.                        |
+| Google                         | Implicit     | No change needed.                                                       |
+| DeepSeek                       | Implicit     | No change needed.                                                       |
+| Anthropic                      | Explicit     | Adds `cache_control: { type: 'ephemeral' }` to the last system message. |
+| Anthropic (via Bedrock/Vertex) | Explicit     | Same as Anthropic direct.                                               |
 
 For Anthropic, the gateway converts:
 
diff --git a/pages/gateway/enterprise-zdr.mdx b/pages/gateway/enterprise-zdr.mdx
index f331bd6f..c1d157ae 100644
--- a/pages/gateway/enterprise-zdr.mdx
+++ b/pages/gateway/enterprise-zdr.mdx
@@ -60,6 +60,7 @@ If your required model is only available from a non-ZDR provider, the request wi
 ## Step 4: Set up BYOK (optional)
 
 For maximum control, use [BYOK](/gateway/byok) with your own provider keys. This gives you:
+
 - Zero platform markup
 - Direct contractual relationship with the provider
 - ZDR enforcement still applies on the fallback path
diff --git a/pages/gateway/fallbacks.mdx b/pages/gateway/fallbacks.mdx
index bef37ec8..bd01e96f 100644
--- a/pages/gateway/fallbacks.mdx
+++ b/pages/gateway/fallbacks.mdx
@@ -24,6 +24,7 @@ Pass a `models` array in `providerOptions.gateway`:
 ```
 
 The gateway tries:
+
 1. `openai/gpt-4o` (primary model)
 2. `anthropic/claude-sonnet-4-6` (first fallback)
 3. `groq/llama-3.1-70b-versatile` (second fallback)
@@ -57,6 +58,7 @@ Use `models` with `order` to control both model fallback and provider preference
 ```
 
 This tries:
+
 1. `openai/gpt-4o` via available providers
 2. `anthropic/claude-sonnet-4-6` via Bedrock first, then Anthropic direct
 
diff --git a/pages/gateway/feature-flags.mdx b/pages/gateway/feature-flags.mdx
index 6fe29e23..128480e7 100644
--- a/pages/gateway/feature-flags.mdx
+++ b/pages/gateway/feature-flags.mdx
@@ -9,13 +9,13 @@ All gateway features are on by default. Set any flag to `false` to disable it wi
 
 ## Available flags
 
-| Environment Variable | Default | Controls |
-|---------------------|---------|----------|
-| `ENABLE_GUARDRAILS` | `true` | PII detection, prompt injection scanning |
-| `ENABLE_RESPONSE_CACHE` | `true` | Response caching for deterministic requests |
-| `ENABLE_COMPLIANCE_FILTER` | `true` | Early ZDR/no-train validation (routing enforcement stays on) |
-| `ENABLE_PROMPT_CACHING` | `true` | Auto `cache_control` injection for Anthropic |
-| `ENABLE_ROUTING_TRACE` | `true` | `X-Tangle-Routing-Trace` response header |
+| Environment Variable       | Default | Controls                                                     |
+| -------------------------- | ------- | ------------------------------------------------------------ |
+| `ENABLE_GUARDRAILS`        | `true`  | PII detection, prompt injection scanning                     |
+| `ENABLE_RESPONSE_CACHE`    | `true`  | Response caching for deterministic requests                  |
+| `ENABLE_COMPLIANCE_FILTER` | `true`  | Early ZDR/no-train validation (routing enforcement stays on) |
+| `ENABLE_PROMPT_CACHING`    | `true`  | Auto `cache_control` injection for Anthropic                 |
+| `ENABLE_ROUTING_TRACE`     | `true`  | `X-Tangle-Routing-Trace` response header                     |
 
 ## Usage
 
diff --git a/pages/gateway/free-tier.mdx b/pages/gateway/free-tier.mdx
index 2123547b..ff008510 100644
--- a/pages/gateway/free-tier.mdx
+++ b/pages/gateway/free-tier.mdx
@@ -9,26 +9,26 @@ Try the gateway without credits. Free tier restricts to cheap, fast models with
 
 ## Limits
 
-| Tier | Daily limit | Rate limit |
-|------|------------|------------|
-| Anonymous (no auth) | 5 req/day | 10 req/min |
-| Authenticated (zero credits) | 20 req/day | 30 req/min |
-| Paid (any credits) | Unlimited | 60 req/min |
+| Tier                         | Daily limit | Rate limit |
+| ---------------------------- | ----------- | ---------- |
+| Anonymous (no auth)          | 5 req/day   | 10 req/min |
+| Authenticated (zero credits) | 20 req/day  | 30 req/min |
+| Paid (any credits)           | Unlimited   | 60 req/min |
 
 ## Allowed models
 
 Free tier requests can use:
 
-| Model | Provider | Why it's free |
-|-------|----------|---------------|
-| `gpt-4o-mini` | OpenAI | Small, cheap |
-| `claude-3-5-haiku-20241022` | Anthropic | Fast, cheap |
-| `llama-3.1-8b-instant` | Groq | Free tier inference |
-| `llama-3.2-1b-preview` | Groq | Tiny model |
-| `llama-3.2-3b-preview` | Groq | Small model |
-| `gemini-2.0-flash-lite` | Google | Free tier |
-| `cerebras/llama-3.1-8b` | Cerebras | Fast, cheap |
-| `deepseek-chat` | DeepSeek | Very cheap |
+| Model                       | Provider  | Why it's free       |
+| --------------------------- | --------- | ------------------- |
+| `gpt-4o-mini`               | OpenAI    | Small, cheap        |
+| `claude-3-5-haiku-20241022` | Anthropic | Fast, cheap         |
+| `llama-3.1-8b-instant`      | Groq      | Free tier inference |
+| `llama-3.2-1b-preview`      | Groq      | Tiny model          |
+| `llama-3.2-3b-preview`      | Groq      | Small model         |
+| `gemini-2.0-flash-lite`     | Google    | Free tier           |
+| `cerebras/llama-3.1-8b`     | Cerebras  | Fast, cheap         |
+| `deepseek-chat`             | DeepSeek  | Very cheap          |
 
 ## Blocked models
 
diff --git a/pages/gateway/generation-lookup.mdx b/pages/gateway/generation-lookup.mdx
index 31c780ef..ccc988f8 100644
--- a/pages/gateway/generation-lookup.mdx
+++ b/pages/gateway/generation-lookup.mdx
@@ -54,18 +54,18 @@ curl -H "Authorization: Bearer sk-tan-YOUR_KEY" \
 
 ## Fields
 
-| Field | Description |
-|-------|-------------|
-| `id` | Generation ID (`gen_<ulid>`) |
-| `total_cost` | Total cost in USD |
-| `model` | Model that served the request |
-| `is_byok` | Whether BYOK credentials were used |
-| `provider_name` | Provider that served the request |
-| `streamed` | Whether the request used streaming |
-| `latency` | Time to first token (ms) |
-| `generation_time` | Total generation time (ms) |
-| `tokens_prompt` / `tokens_completion` | Token counts |
-| `native_tokens_cached` | Tokens served from provider cache |
-| `native_tokens_reasoning` | Reasoning tokens (o1/o3/o4 models) |
-| `routing_trace` | Full routing attempt history |
-| `cache_hit` | Whether response was served from gateway cache |
+| Field                                 | Description                                    |
+| ------------------------------------- | ---------------------------------------------- |
+| `id`                                  | Generation ID (`gen_<ulid>`)                   |
+| `total_cost`                          | Total cost in USD                              |
+| `model`                               | Model that served the request                  |
+| `is_byok`                             | Whether BYOK credentials were used             |
+| `provider_name`                       | Provider that served the request               |
+| `streamed`                            | Whether the request used streaming             |
+| `latency`                             | Time to first token (ms)                       |
+| `generation_time`                     | Total generation time (ms)                     |
+| `tokens_prompt` / `tokens_completion` | Token counts                                   |
+| `native_tokens_cached`                | Tokens served from provider cache              |
+| `native_tokens_reasoning`             | Reasoning tokens (o1/o3/o4 models)             |
+| `routing_trace`                       | Full routing attempt history                   |
+| `cache_hit`                           | Whether response was served from gateway cache |
diff --git a/pages/gateway/guardrails.mdx b/pages/gateway/guardrails.mdx
index af7cf3ca..e343a214 100644
--- a/pages/gateway/guardrails.mdx
+++ b/pages/gateway/guardrails.mdx
@@ -11,26 +11,26 @@ The gateway scans all requests for PII and prompt injection patterns before rout
 
 ### PII detection
 
-| Pattern | Severity | Example |
-|---------|----------|---------|
-| SSN | Critical | `123-45-6789` |
+| Pattern                        | Severity | Example               |
+| ------------------------------ | -------- | --------------------- |
+| SSN                            | Critical | `123-45-6789`         |
 | Credit card (Visa/MC/Discover) | Critical | `4111 1111 1111 1111` |
-| Credit card (Amex) | Critical | `3782 822463 10005` |
-| Email | Low | `user@example.com` |
-| US phone | Medium | `(555) 123-4567` |
-| IP address | Low | `192.168.1.1` |
+| Credit card (Amex)             | Critical | `3782 822463 10005`   |
+| Email                          | Low      | `user@example.com`    |
+| US phone                       | Medium   | `(555) 123-4567`      |
+| IP address                     | Low      | `192.168.1.1`         |
 
 ### Prompt injection detection
 
 Applied to user messages only (not system or assistant):
 
-| Pattern | Severity |
-|---------|----------|
-| "Ignore all previous instructions" | High |
-| "You are now a different AI" | High |
-| "Pretend you have no restrictions" | High |
-| "Reveal your system prompt" | Medium |
-| DAN-mode jailbreaks | High |
+| Pattern                            | Severity |
+| ---------------------------------- | -------- |
+| "Ignore all previous instructions" | High     |
+| "You are now a different AI"       | High     |
+| "Pretend you have no restrictions" | High     |
+| "Reveal your system prompt"        | Medium   |
+| DAN-mode jailbreaks                | High     |
 
 ## Modes
 
diff --git a/pages/gateway/how-routing-works.mdx b/pages/gateway/how-routing-works.mdx
index 7829a012..464211cc 100644
--- a/pages/gateway/how-routing-works.mdx
+++ b/pages/gateway/how-routing-works.mdx
@@ -54,12 +54,12 @@ Tiers 1 and 2 are completely bypassed. The gateway routes only to providers with
 
 ## Routing control
 
-| Method | Effect |
-|--------|--------|
-| `routing: "auto"` | Try all three tiers (default) |
-| `routing: "operator"` | Operators only. Fails if no operator available. |
-| `routing: "provider"` | Skip operators, use LiteLLM + direct only. |
-| `X-Tangle-Blueprint: <id>` | Pin to operators under this Blueprint. |
-| `X-Tangle-Operator: <slug>` | Pin to a specific operator. |
+| Method                          | Effect                                               |
+| ------------------------------- | ---------------------------------------------------- |
+| `routing: "auto"`               | Try all three tiers (default)                        |
+| `routing: "operator"`           | Operators only. Fails if no operator available.      |
+| `routing: "provider"`           | Skip operators, use LiteLLM + direct only.           |
+| `X-Tangle-Blueprint: <id>`      | Pin to operators under this Blueprint.               |
+| `X-Tangle-Operator: <slug>`     | Pin to a specific operator.                          |
 | `providerOptions.gateway.order` | Control which providers are tried and in what order. |
-| `providerOptions.gateway.only` | Restrict to these providers only. |
+| `providerOptions.gateway.only`  | Restrict to these providers only.                    |
diff --git a/pages/gateway/index.mdx b/pages/gateway/index.mdx
index 129b8b9b..57c7630a 100644
--- a/pages/gateway/index.mdx
+++ b/pages/gateway/index.mdx
@@ -34,11 +34,11 @@ Works with any OpenAI-compatible SDK. Change the base URL and you're done.
 
 The gateway routes through three tiers, in order:
 
-| Tier | What | When |
-|------|------|------|
-| **Operators** | Decentralized inference providers on Tangle | Default for operator-pinned requests and SpendAuth |
-| **LiteLLM** | Proxy with 100+ provider integrations and built-in retries | Default for standard requests |
-| **Direct** | Straight to provider API (OpenAI, Anthropic, etc.) | Fallback when LiteLLM unavailable, or when compliance required |
+| Tier          | What                                                       | When                                                           |
+| ------------- | ---------------------------------------------------------- | -------------------------------------------------------------- |
+| **Operators** | Decentralized inference providers on Tangle                | Default for operator-pinned requests and SpendAuth             |
+| **LiteLLM**   | Proxy with 100+ provider integrations and built-in retries | Default for standard requests                                  |
+| **Direct**    | Straight to provider API (OpenAI, Anthropic, etc.)         | Fallback when LiteLLM unavailable, or when compliance required |
 
 When [Zero Data Retention](/gateway/zdr) or [no-train](/gateway/no-train) is requested, operators and LiteLLM are skipped — the gateway routes directly to verified providers only.
 
diff --git a/pages/gateway/migrate-vercel.mdx b/pages/gateway/migrate-vercel.mdx
index b18462a8..abad708f 100644
--- a/pages/gateway/migrate-vercel.mdx
+++ b/pages/gateway/migrate-vercel.mdx
@@ -9,30 +9,30 @@ Tangle Gateway supports the same `providerOptions.gateway` schema as Vercel AI G
 
 ## What maps directly
 
-| Vercel Feature | Tangle Equivalent | Notes |
-|---|---|---|
-| `providerOptions.gateway.byok` | Same | Identical schema |
-| `providerOptions.gateway.zeroDataRetention` | Same | 13 verified providers |
-| `providerOptions.gateway.disallowPromptTraining` | Same | 25 verified providers |
-| `providerOptions.gateway.caching: 'auto'` | Same | Anthropic cache_control injection |
-| `providerOptions.gateway.order` | Same | Provider priority |
-| `providerOptions.gateway.only` | Same | Provider allowlist |
-| `models` fallback array | Same | Model-level failover |
-| `GET /v1/credits` | Same | Balance check |
-| `GET /v1/generation` | Same | Request detail lookup |
+| Vercel Feature                                   | Tangle Equivalent | Notes                             |
+| ------------------------------------------------ | ----------------- | --------------------------------- |
+| `providerOptions.gateway.byok`                   | Same              | Identical schema                  |
+| `providerOptions.gateway.zeroDataRetention`      | Same              | 13 verified providers             |
+| `providerOptions.gateway.disallowPromptTraining` | Same              | 25 verified providers             |
+| `providerOptions.gateway.caching: 'auto'`        | Same              | Anthropic cache_control injection |
+| `providerOptions.gateway.order`                  | Same              | Provider priority                 |
+| `providerOptions.gateway.only`                   | Same              | Provider allowlist                |
+| `models` fallback array                          | Same              | Model-level failover              |
+| `GET /v1/credits`                                | Same              | Balance check                     |
+| `GET /v1/generation`                             | Same              | Request detail lookup             |
 
 ## What's different
 
-| Feature | Vercel | Tangle |
-|---------|--------|--------|
-| **Base URL** | `ai-gateway.vercel.sh/v1` | `router.tangle.tools/v1` |
-| **Auth** | API key or OIDC token | API key, session, SIWE (wallet), or SpendAuth (on-chain) |
-| **Pricing** | Zero markup | 20% markup (0% with BYOK) |
-| **Operator network** | None | Decentralized operators compete on price/latency |
-| **On-chain payments** | None | SpendAuth (EIP-712) — pay without a credit card |
-| **Guardrails** | None | PII + injection detection built-in |
-| **Web search tools** | Perplexity, Parallel, provider-native | Not yet (planned) |
-| **OIDC auth** | Vercel-only | Not applicable |
+| Feature               | Vercel                                | Tangle                                                   |
+| --------------------- | ------------------------------------- | -------------------------------------------------------- |
+| **Base URL**          | `ai-gateway.vercel.sh/v1`             | `router.tangle.tools/v1`                                 |
+| **Auth**              | API key or OIDC token                 | API key, session, SIWE (wallet), or SpendAuth (on-chain) |
+| **Pricing**           | Zero markup                           | 20% markup (0% with BYOK)                                |
+| **Operator network**  | None                                  | Decentralized operators compete on price/latency         |
+| **On-chain payments** | None                                  | SpendAuth (EIP-712) — pay without a credit card          |
+| **Guardrails**        | None                                  | PII + injection detection built-in                       |
+| **Web search tools**  | Perplexity, Parallel, provider-native | Not yet (planned)                                        |
+| **OIDC auth**         | Vercel-only                           | Not applicable                                           |
 
 ## Code change
 
diff --git a/pages/gateway/models.mdx b/pages/gateway/models.mdx
index cf635ac4..4c058018 100644
--- a/pages/gateway/models.mdx
+++ b/pages/gateway/models.mdx
@@ -9,32 +9,32 @@ Tangle Gateway provides access to models from 20+ providers through a single API
 
 ## Providers
 
-| Provider | Slug | Models |
-|----------|------|--------|
-| OpenAI | `openai` | GPT-4o, GPT-4o-mini, o1, o3, o4, DALL-E, Whisper, TTS |
-| Anthropic | `anthropic` | Claude Opus, Sonnet, Haiku |
-| Google | `google` | Gemini 2.5 Pro, Flash, Flash-Lite |
-| Groq | `groq` | Llama 3.1/3.2 (fast inference) |
-| Together AI | `together` | Open-source models (Llama, Qwen, Mixtral) |
-| DeepSeek | `deepseek` | DeepSeek Chat, DeepSeek Coder |
-| Mistral | `mistral` | Mistral Large, Codestral, Pixtral |
-| Fireworks | `fireworks` | Phi, StarCoder, open models |
-| Cohere | `cohere` | Command R/R+ |
-| xAI | `xai` | Grok 2, Grok 3 |
-| Cerebras | `cerebras` | Llama (fast inference) |
-| SambaNova | `sambanova` | Fast open-model inference |
-| AI21 | `ai21` | Jamba |
-| Nvidia | `nvidia` | Nemotron |
-| Z.ai | `zai` | GLM-4.7, GLM-5 |
-| Moonshot | `moonshot` | Kimi |
+| Provider    | Slug        | Models                                                |
+| ----------- | ----------- | ----------------------------------------------------- |
+| OpenAI      | `openai`    | GPT-4o, GPT-4o-mini, o1, o3, o4, DALL-E, Whisper, TTS |
+| Anthropic   | `anthropic` | Claude Opus, Sonnet, Haiku                            |
+| Google      | `google`    | Gemini 2.5 Pro, Flash, Flash-Lite                     |
+| Groq        | `groq`      | Llama 3.1/3.2 (fast inference)                        |
+| Together AI | `together`  | Open-source models (Llama, Qwen, Mixtral)             |
+| DeepSeek    | `deepseek`  | DeepSeek Chat, DeepSeek Coder                         |
+| Mistral     | `mistral`   | Mistral Large, Codestral, Pixtral                     |
+| Fireworks   | `fireworks` | Phi, StarCoder, open models                           |
+| Cohere      | `cohere`    | Command R/R+                                          |
+| xAI         | `xai`       | Grok 2, Grok 3                                        |
+| Cerebras    | `cerebras`  | Llama (fast inference)                                |
+| SambaNova   | `sambanova` | Fast open-model inference                             |
+| AI21        | `ai21`      | Jamba                                                 |
+| Nvidia      | `nvidia`    | Nemotron                                              |
+| Z.ai        | `zai`       | GLM-4.7, GLM-5                                        |
+| Moonshot    | `moonshot`  | Kimi                                                  |
 
 Plus decentralized operators on the Tangle network running [Blueprints](/developers/blueprints/introduction):
 
-| Blueprint | Models | How to route |
-|-----------|--------|-------------|
-| [LLM Inference](https://github.com/tangle-network/llm-inference-blueprint) | Llama, Qwen, Mistral, any vLLM-compatible model | `X-Tangle-Routing: operator` or auto |
-| Vector Store | Embedding models for RAG | `/v1/collections` and `/v1/rag/query` |
-| Custom Blueprints | Any model the operator deploys | Pin by Blueprint ID or operator slug |
+| Blueprint                                                                  | Models                                          | How to route                          |
+| -------------------------------------------------------------------------- | ----------------------------------------------- | ------------------------------------- |
+| [LLM Inference](https://github.com/tangle-network/llm-inference-blueprint) | Llama, Qwen, Mistral, any vLLM-compatible model | `X-Tangle-Routing: operator` or auto  |
+| Vector Store                                                               | Embedding models for RAG                        | `/v1/collections` and `/v1/rag/query` |
+| Custom Blueprints                                                          | Any model the operator deploys                  | Pin by Blueprint ID or operator slug  |
 
 Operators set their own pricing and the gateway [scores them](/gateway/smart-routing) on reputation, latency, and price. See [Operator Routing](/gateway/operator-routing) for details.
 
@@ -50,27 +50,27 @@ groq/llama-3.1-70b-versatile
 
 Or use bare names — the gateway resolves the provider by prefix:
 
-| Prefix | Resolves to |
-|--------|-------------|
-| `gpt-`, `o1-`, `o3-`, `o4-` | OpenAI |
-| `claude-` | Anthropic |
-| `gemini-`, `gemma-` | Google |
-| `llama-`, `mixtral-` | Groq |
-| `deepseek-` | DeepSeek |
-| `mistral-`, `codestral-` | Mistral |
-| `grok-` | xAI |
-| `glm-` | Z.ai |
-| `command-` | Cohere |
+| Prefix                      | Resolves to |
+| --------------------------- | ----------- |
+| `gpt-`, `o1-`, `o3-`, `o4-` | OpenAI      |
+| `claude-`                   | Anthropic   |
+| `gemini-`, `gemma-`         | Google      |
+| `llama-`, `mixtral-`        | Groq        |
+| `deepseek-`                 | DeepSeek    |
+| `mistral-`, `codestral-`    | Mistral     |
+| `grok-`                     | xAI         |
+| `glm-`                      | Z.ai        |
+| `command-`                  | Cohere      |
 
 ## Modalities
 
-| Modality | Endpoint | Examples |
-|----------|----------|---------|
-| Text | `/v1/chat/completions` | All chat models |
-| Images | `/v1/images/generations` | DALL-E, FLUX |
-| Audio | `/v1/audio/transcriptions`, `/v1/audio/speech` | Whisper, TTS |
-| Embeddings | `/v1/embeddings` | text-embedding-3-small/large |
-| Video | `/v1/video/*` | Avatar generation, dubbing (via ph0ny) |
+| Modality   | Endpoint                                       | Examples                               |
+| ---------- | ---------------------------------------------- | -------------------------------------- |
+| Text       | `/v1/chat/completions`                         | All chat models                        |
+| Images     | `/v1/images/generations`                       | DALL-E, FLUX                           |
+| Audio      | `/v1/audio/transcriptions`, `/v1/audio/speech` | Whisper, TTS                           |
+| Embeddings | `/v1/embeddings`                               | text-embedding-3-small/large           |
+| Video      | `/v1/video/*`                                  | Avatar generation, dubbing (via ph0ny) |
 
 ## Dynamic discovery
 
diff --git a/pages/gateway/no-train.mdx b/pages/gateway/no-train.mdx
index da6154af..f6461c01 100644
--- a/pages/gateway/no-train.mdx
+++ b/pages/gateway/no-train.mdx
@@ -23,10 +23,10 @@ Ensure your prompts and responses are never used by providers to train their mod
 
 Disallow prompt training is a **subset** of [Zero Data Retention](/gateway/zdr). All ZDR-compliant providers also disallow prompt training, but more providers disallow training than offer full ZDR.
 
-| Filter | Verified providers |
-|--------|-------------------|
-| No-train only | 25 providers |
-| ZDR (includes no-train) | 13 providers |
+| Filter                  | Verified providers |
+| ----------------------- | ------------------ |
+| No-train only           | 25 providers       |
+| ZDR (includes no-train) | 13 providers       |
 
 Use `disallowPromptTraining` when you care about IP protection but don't need full data deletion guarantees.
 
diff --git a/pages/gateway/operator-routing.mdx b/pages/gateway/operator-routing.mdx
index 6f4d3e34..687d38d2 100644
--- a/pages/gateway/operator-routing.mdx
+++ b/pages/gateway/operator-routing.mdx
@@ -11,11 +11,11 @@ Operators are independent inference providers registered on the Tangle network.
 
 Operators run **Blueprints** — on-chain service definitions that specify what an operator does. The inference-related Blueprints the gateway routes through:
 
-| Blueprint | What it serves | Repo |
-|-----------|---------------|------|
+| Blueprint         | What it serves                                                 | Repo                                                                                                |
+| ----------------- | -------------------------------------------------------------- | --------------------------------------------------------------------------------------------------- |
 | **LLM Inference** | Chat completions, text generation (Llama, Qwen, Mistral, etc.) | [tangle-network/llm-inference-blueprint](https://github.com/tangle-network/llm-inference-blueprint) |
-| **Vector Store** | Embedding storage and retrieval for RAG | Operator-deployed |
-| **Custom** | Any model/pipeline an operator chooses to serve | [Build your own](/developers/blueprints/introduction) |
+| **Vector Store**  | Embedding storage and retrieval for RAG                        | Operator-deployed                                                                                   |
+| **Custom**        | Any model/pipeline an operator chooses to serve                | [Build your own](/developers/blueprints/introduction)                                               |
 
 The LLM Inference Blueprint uses [tangle-inference-core](https://github.com/tangle-network/tangle-inference-core) — a shared Rust crate for EIP-712 signature verification, nonce management, and on-chain settlement. Operators compile it into a binary (`operator-lite`) that runs alongside their model server.
 
@@ -70,14 +70,14 @@ curl -X POST "https://router.tangle.tools/v1/chat/completions" \
 
 ## What's verified on-chain
 
-| Data | Verified? |
-|------|-----------|
-| Operator Ethereum address | Yes (signed transaction) |
-| Active/inactive status | Yes (BSM contract state) |
-| Staked amount | Yes (on-chain balance) |
-| Pricing (per-token) | Yes (BSM contract) |
-| Endpoint URL | No (self-reported at registration) |
-| Backing provider | No (not tracked) |
+| Data                      | Verified?                          |
+| ------------------------- | ---------------------------------- |
+| Operator Ethereum address | Yes (signed transaction)           |
+| Active/inactive status    | Yes (BSM contract state)           |
+| Staked amount             | Yes (on-chain balance)             |
+| Pricing (per-token)       | Yes (BSM contract)                 |
+| Endpoint URL              | No (self-reported at registration) |
+| Backing provider          | No (not tracked)                   |
 
 Because endpoint URL and backing provider are self-reported, operator routing is **not compatible with [ZDR](/gateway/zdr) or [no-train](/gateway/no-train)** compliance requirements. When compliance is required, operators are skipped and the gateway routes directly to verified providers.
 
diff --git a/pages/gateway/pricing.mdx b/pages/gateway/pricing.mdx
index 59e55c65..6634f841 100644
--- a/pages/gateway/pricing.mdx
+++ b/pages/gateway/pricing.mdx
@@ -7,10 +7,10 @@ description: How billing works on Tangle Gateway.
 
 ## Pricing model
 
-| Credential type | Markup |
-|----------------|--------|
-| Platform credentials | 20% above provider list price |
-| [BYOK](/gateway/byok) | **0%** — provider list price, no markup |
+| Credential type                  | Markup                                      |
+| -------------------------------- | ------------------------------------------- |
+| Platform credentials             | 20% above provider list price               |
+| [BYOK](/gateway/byok)            | **0%** — provider list price, no markup     |
 | [SpendAuth](/gateway/spend-auth) | Operator-set prices (typically competitive) |
 
 The 20% platform markup on non-BYOK requests funds operator payouts and platform infrastructure. Operators earn a share of every request routed through them.
diff --git a/pages/gateway/provider-options.mdx b/pages/gateway/provider-options.mdx
index eb0fe5b1..ccb49797 100644
--- a/pages/gateway/provider-options.mdx
+++ b/pages/gateway/provider-options.mdx
@@ -36,17 +36,17 @@ interface GatewayOptions {
 
 ## Options reference
 
-| Option | Type | Default | Description |
-|--------|------|---------|-------------|
-| `byok` | `Record<string, Array<{apiKey}>>` | — | Per-request provider credentials. [Details](/gateway/byok) |
-| `zeroDataRetention` | `boolean` | `false` | Route only to ZDR-verified providers. [Details](/gateway/zdr) |
-| `disallowPromptTraining` | `boolean` | `false` | Route only to no-train providers. [Details](/gateway/no-train) |
-| `caching` | `'auto'` | — | Auto-inject prompt cache markers. [Details](/gateway/caching) |
-| `cache` | `false` | — | Set `false` to skip response cache for this request. |
-| `order` | `string[]` | — | Provider priority order. [Details](/gateway/smart-routing) |
-| `only` | `string[]` | — | Restrict to these providers only. |
-| `models` | `string[]` | — | Fallback model list. [Details](/gateway/fallbacks) |
-| `timeout` | `number \| Record<string, number>` | `30000` | Timeout in ms. [Details](/gateway/timeouts) |
+| Option                   | Type                               | Default | Description                                                    |
+| ------------------------ | ---------------------------------- | ------- | -------------------------------------------------------------- |
+| `byok`                   | `Record<string, Array<{apiKey}>>`  | —       | Per-request provider credentials. [Details](/gateway/byok)     |
+| `zeroDataRetention`      | `boolean`                          | `false` | Route only to ZDR-verified providers. [Details](/gateway/zdr)  |
+| `disallowPromptTraining` | `boolean`                          | `false` | Route only to no-train providers. [Details](/gateway/no-train) |
+| `caching`                | `'auto'`                           | —       | Auto-inject prompt cache markers. [Details](/gateway/caching)  |
+| `cache`                  | `false`                            | —       | Set `false` to skip response cache for this request.           |
+| `order`                  | `string[]`                         | —       | Provider priority order. [Details](/gateway/smart-routing)     |
+| `only`                   | `string[]`                         | —       | Restrict to these providers only.                              |
+| `models`                 | `string[]`                         | —       | Fallback model list. [Details](/gateway/fallbacks)             |
+| `timeout`                | `number \| Record<string, number>` | `30000` | Timeout in ms. [Details](/gateway/timeouts)                    |
 
 ## Example: everything at once
 
diff --git a/pages/gateway/rate-limiting.mdx b/pages/gateway/rate-limiting.mdx
index 587ed74f..7b0962d2 100644
--- a/pages/gateway/rate-limiting.mdx
+++ b/pages/gateway/rate-limiting.mdx
@@ -9,13 +9,13 @@ The gateway enforces sliding-window rate limits per authentication method.
 
 ## Limits
 
-| Auth method | Rate limit | Daily limit |
-|-------------|-----------|-------------|
-| API Key | 60 req/min | Unlimited (with credits) |
-| Session | 30 req/min | Unlimited (with credits) |
-| SpendAuth | 120 req/min | Unlimited |
-| Anonymous | 10 req/min | 5 req/day |
-| Authenticated (no credits) | 30 req/min | 20 req/day |
+| Auth method                | Rate limit  | Daily limit              |
+| -------------------------- | ----------- | ------------------------ |
+| API Key                    | 60 req/min  | Unlimited (with credits) |
+| Session                    | 30 req/min  | Unlimited (with credits) |
+| SpendAuth                  | 120 req/min | Unlimited                |
+| Anonymous                  | 10 req/min  | 5 req/day                |
+| Authenticated (no credits) | 30 req/min  | 20 req/day               |
 
 ## Response headers
 
diff --git a/pages/gateway/response-headers.mdx b/pages/gateway/response-headers.mdx
index f791ef95..40350b0c 100644
--- a/pages/gateway/response-headers.mdx
+++ b/pages/gateway/response-headers.mdx
@@ -9,33 +9,33 @@ Every response from the gateway includes metadata headers.
 
 ## Standard headers
 
-| Header | Description | Example |
-|--------|-------------|---------|
-| `X-Generation-Id` | Unique request ID | `gen_01J5K7ABCD...` |
-| `X-Tangle-Price-Input` | USD per input token | `0.000003` |
-| `X-Tangle-Price-Output` | USD per output token | `0.000015` |
-| `X-Tangle-Cache` | Response cache status | `HIT` or `MISS` |
-| `X-RateLimit-Limit` | Requests allowed per window | `60` |
-| `X-RateLimit-Remaining` | Requests remaining | `42` |
-| `X-RateLimit-Reset` | Window reset (Unix seconds) | `1712793600` |
+| Header                  | Description                 | Example             |
+| ----------------------- | --------------------------- | ------------------- |
+| `X-Generation-Id`       | Unique request ID           | `gen_01J5K7ABCD...` |
+| `X-Tangle-Price-Input`  | USD per input token         | `0.000003`          |
+| `X-Tangle-Price-Output` | USD per output token        | `0.000015`          |
+| `X-Tangle-Cache`        | Response cache status       | `HIT` or `MISS`     |
+| `X-RateLimit-Limit`     | Requests allowed per window | `60`                |
+| `X-RateLimit-Remaining` | Requests remaining          | `42`                |
+| `X-RateLimit-Reset`     | Window reset (Unix seconds) | `1712793600`        |
 
 ## Conditional headers
 
-| Header | When present | Description |
-|--------|-------------|-------------|
-| `X-Tangle-Routing-Trace` | When `ENABLE_ROUTING_TRACE` is on | Compact routing path |
-| `X-Tangle-Operator` | When served by an operator | Operator slug |
-| `X-Tangle-BYOK` | When BYOK credentials used | `true` |
-| `X-Tangle-Caching` | When prompt caching applied | `auto` |
-| `X-Tangle-Guardrails` | When guardrails flagged content | `pii:low,prompt_injection:high` |
-| `X-Payment-Settled` | When SpendAuth payment succeeded | `true` |
-| `X-Free-Tier-Remaining` | Free tier requests | `3` |
-| `X-Free-Tier-Limit` | Free tier daily cap | `5` |
+| Header                   | When present                      | Description                     |
+| ------------------------ | --------------------------------- | ------------------------------- |
+| `X-Tangle-Routing-Trace` | When `ENABLE_ROUTING_TRACE` is on | Compact routing path            |
+| `X-Tangle-Operator`      | When served by an operator        | Operator slug                   |
+| `X-Tangle-BYOK`          | When BYOK credentials used        | `true`                          |
+| `X-Tangle-Caching`       | When prompt caching applied       | `auto`                          |
+| `X-Tangle-Guardrails`    | When guardrails flagged content   | `pii:low,prompt_injection:high` |
+| `X-Payment-Settled`      | When SpendAuth payment succeeded  | `true`                          |
+| `X-Free-Tier-Remaining`  | Free tier requests                | `3`                             |
+| `X-Free-Tier-Limit`      | Free tier daily cap               | `5`                             |
 
 ## Error response headers
 
-| Header | When present | Description |
-|--------|-------------|-------------|
+| Header               | When present  | Description               |
+| -------------------- | ------------- | ------------------------- |
 | `X-Payment-Required` | 402 responses | Amount needed (micro-USD) |
-| `X-Payment-Currency` | 402 responses | `tsUSD` |
-| `X-Payment-Methods` | 402 responses | `credits,spend_auth` |
+| `X-Payment-Currency` | 402 responses | `tsUSD`                   |
+| `X-Payment-Methods`  | 402 responses | `credits,spend_auth`      |
diff --git a/pages/gateway/routing-trace.mdx b/pages/gateway/routing-trace.mdx
index 23b643d5..9e23653d 100644
--- a/pages/gateway/routing-trace.mdx
+++ b/pages/gateway/routing-trace.mdx
@@ -24,6 +24,7 @@ X-Tangle-Routing-Trace: openai/gpt-4o[openai(500:2100ms)], anthropic/claude-sonn
 ## Sanitization
 
 The trace header is sanitized for safety:
+
 - Operator names are shown as generic `operator` (slugs not exposed)
 - Error messages are not included (only status codes)
 - Internal URLs and hostnames are never leaked
diff --git a/pages/gateway/smart-routing.mdx b/pages/gateway/smart-routing.mdx
index 54cf002b..9521f60f 100644
--- a/pages/gateway/smart-routing.mdx
+++ b/pages/gateway/smart-routing.mdx
@@ -13,11 +13,11 @@ When multiple operators running the same [Blueprint](/developers/blueprints/intr
 score = reputation(40%) + latency(30%) + price(30%)
 ```
 
-| Factor | Weight | What it measures |
-|--------|--------|-----------------|
-| **Reputation** | 40% | Normalized reputation score (0-100) from on-chain history |
-| **Latency** | 30% | Inverse of average response time (lower = better) |
-| **Price** | 30% | Inverse of per-token price (cheaper = better) |
+| Factor         | Weight | What it measures                                          |
+| -------------- | ------ | --------------------------------------------------------- |
+| **Reputation** | 40%    | Normalized reputation score (0-100) from on-chain history |
+| **Latency**    | 30%    | Inverse of average response time (lower = better)         |
+| **Price**      | 30%    | Inverse of per-token price (cheaper = better)             |
 
 ## Operator selection
 
diff --git a/pages/gateway/spend-auth.mdx b/pages/gateway/spend-auth.mdx
index e6da980a..d464d36d 100644
--- a/pages/gateway/spend-auth.mdx
+++ b/pages/gateway/spend-auth.mdx
@@ -47,6 +47,7 @@ SpendAuth requests get a generous 120 req/min limit per commitment since every r
 ## On-chain contracts
 
 SpendAuth uses the `ShieldedCredits` contract deployed on the Tangle network. The contract handles:
+
 - Balance management (deposit, authorize, claim)
 - Authorization verification (EIP-712 signature recovery)
 - Payment settlement (operator claims after serving)
diff --git a/pages/gateway/timeouts.mdx b/pages/gateway/timeouts.mdx
index 10fe9645..6c49cc18 100644
--- a/pages/gateway/timeouts.mdx
+++ b/pages/gateway/timeouts.mdx
@@ -46,6 +46,7 @@ Without explicit timeouts, the gateway uses a 30-second default for all provider
 ## Bounds
 
 All timeout values are clamped:
+
 - **Minimum:** 1,000ms (1 second)
 - **Maximum:** 120,000ms (2 minutes)
 
diff --git a/pages/gateway/zdr.mdx b/pages/gateway/zdr.mdx
index 21340db9..e9f44b4b 100644
--- a/pages/gateway/zdr.mdx
+++ b/pages/gateway/zdr.mdx
@@ -39,33 +39,33 @@ When ZDR is enabled:
 
 ## ZDR-verified providers
 
-| Provider | ZDR | No-Train | Policy |
-|----------|-----|----------|--------|
-| Anthropic | Yes | Yes | [ZDR policy](https://platform.claude.com/docs/en/build-with-claude/zero-data-retention) |
-| Amazon Bedrock | Yes | Yes | [Data protection](https://docs.aws.amazon.com/bedrock/latest/userguide/data-protection.html) |
-| Azure OpenAI | Yes | Yes | [Data privacy](https://learn.microsoft.com/en-us/azure/foundry/responsible-ai/openai/data-privacy) |
-| Groq | Yes | Yes | [ZDR policy](https://console.groq.com/docs/your-data#zero-data-retention) |
-| Mistral | Yes | Yes | [Terms](https://legal.mistral.ai/terms) |
-| Fireworks | Yes | Yes | [Data handling](https://docs.fireworks.ai/guides/security_compliance/data_handling) |
-| Together | Yes | Yes | [Terms](https://www.together.ai/terms-of-service) |
-| Cerebras | Yes | Yes | [Privacy](https://www.cerebras.ai/privacy-policy) |
-| Google Vertex | Yes | Yes | [ZDR policy](https://docs.cloud.google.com/vertex-ai/generative-ai/docs/vertex-ai-zero-data-retention) |
-| Nebius | Yes | Yes | [Legal guide](https://docs.tokenfactory.nebius.com/legal/legal-quick-guide) |
-| Parasail | Yes | Yes | [Terms](https://parasail.io/legal/terms-of-service) |
-| Baseten | Yes | Yes | [Security](https://docs.baseten.co/observability/security) |
-| DeepInfra | Yes | Yes | [Data handling](https://deepinfra.com/docs/data) |
+| Provider       | ZDR | No-Train | Policy                                                                                                 |
+| -------------- | --- | -------- | ------------------------------------------------------------------------------------------------------ |
+| Anthropic      | Yes | Yes      | [ZDR policy](https://platform.claude.com/docs/en/build-with-claude/zero-data-retention)                |
+| Amazon Bedrock | Yes | Yes      | [Data protection](https://docs.aws.amazon.com/bedrock/latest/userguide/data-protection.html)           |
+| Azure OpenAI   | Yes | Yes      | [Data privacy](https://learn.microsoft.com/en-us/azure/foundry/responsible-ai/openai/data-privacy)     |
+| Groq           | Yes | Yes      | [ZDR policy](https://console.groq.com/docs/your-data#zero-data-retention)                              |
+| Mistral        | Yes | Yes      | [Terms](https://legal.mistral.ai/terms)                                                                |
+| Fireworks      | Yes | Yes      | [Data handling](https://docs.fireworks.ai/guides/security_compliance/data_handling)                    |
+| Together       | Yes | Yes      | [Terms](https://www.together.ai/terms-of-service)                                                      |
+| Cerebras       | Yes | Yes      | [Privacy](https://www.cerebras.ai/privacy-policy)                                                      |
+| Google Vertex  | Yes | Yes      | [ZDR policy](https://docs.cloud.google.com/vertex-ai/generative-ai/docs/vertex-ai-zero-data-retention) |
+| Nebius         | Yes | Yes      | [Legal guide](https://docs.tokenfactory.nebius.com/legal/legal-quick-guide)                            |
+| Parasail       | Yes | Yes      | [Terms](https://parasail.io/legal/terms-of-service)                                                    |
+| Baseten        | Yes | Yes      | [Security](https://docs.baseten.co/observability/security)                                             |
+| DeepInfra      | Yes | Yes      | [Data handling](https://deepinfra.com/docs/data)                                                       |
 
 Compliance data is managed via the admin API (`PUT /api/admin/compliance`) and can be updated without code deploys.
 
 ## Trust model
 
-| Routing tier | ZDR behavior |
-|---|---|
-| **Operators** | Skipped. Self-reported backing provider is unverifiable. |
-| **LiteLLM** | Skipped. Internal fallback chain is uncontrollable. |
-| **Direct provider** | Routed only to verified ZDR providers. |
-| **BYOK fallback** | ZDR filters preserved on fallback to platform credentials. |
-| **Operator-only + ZDR** | 400 error. Conflicting requirements. |
+| Routing tier            | ZDR behavior                                               |
+| ----------------------- | ---------------------------------------------------------- |
+| **Operators**           | Skipped. Self-reported backing provider is unverifiable.   |
+| **LiteLLM**             | Skipped. Internal fallback chain is uncontrollable.        |
+| **Direct provider**     | Routed only to verified ZDR providers.                     |
+| **BYOK fallback**       | ZDR filters preserved on fallback to platform credentials. |
+| **Operator-only + ZDR** | 400 error. Conflicting requirements.                       |
 
 The Tangle chain verifies operator **identity and stake**, not **behavior**. When compliance matters, the gateway routes direct.