diff --git a/pages/_meta.ts b/pages/_meta.ts
index ef9a5cab..b700a1ca 100644
--- a/pages/_meta.ts
+++ b/pages/_meta.ts
@@ -18,6 +18,10 @@ const meta: Meta = {
title: "Workbench",
type: "page",
},
+ gateway: {
+ title: "Gateway",
+ type: "page",
+ },
infrastructure: {
title: "Runtime",
type: "page",
diff --git a/pages/ai/_meta.ts b/pages/ai/_meta.ts
index 7165fac5..88e4c860 100644
--- a/pages/ai/_meta.ts
+++ b/pages/ai/_meta.ts
@@ -2,6 +2,26 @@ import type { Meta } from "nextra";
const meta: Meta = {
index: "AI Introduction",
+ "-- gateway": {
+ type: "separator",
+ title: "Inference Gateway",
+ },
+ "gateway-intro": {
+ title: "Introduction",
+ href: "/gateway",
+ },
+ "gateway-start": {
+ title: "Getting Started",
+ href: "/gateway/getting-started",
+ },
+ "gateway-models": {
+ title: "Models & Providers",
+ href: "/gateway/models",
+ },
+ "gateway-zdr": {
+ title: "Zero Data Retention",
+ href: "/gateway/zdr",
+ },
"-- workbench": {
type: "separator",
title: "Agentic Workbench",
diff --git a/pages/ai/index.mdx b/pages/ai/index.mdx
index fdfbcaed..629fda09 100644
--- a/pages/ai/index.mdx
+++ b/pages/ai/index.mdx
@@ -44,8 +44,20 @@ Core capabilities:
Each run produces task and agent evaluations. That data feeds back into the workbench to improve prompts, policies, and workflows over time.
+## Inference Gateway
+
+The [Tangle Gateway](/gateway) is the inference routing layer. Agents and applications call a single API to access hundreds of models across centralized providers and decentralized operators. The gateway handles model selection, compliance routing, billing, and payment settlement.
+
+Key capabilities:
+
+- **One API, any model.** OpenAI, Anthropic, Google, Groq, and 20+ providers.
+- **Decentralized operators.** Route to operators on the Tangle network who compete on price and latency.
+- **Compliance.** [Zero Data Retention](/gateway/zdr) and [no-train](/gateway/no-train) routing with verified provider agreements.
+- **On-chain payments.** [SpendAuth](/gateway/spend-auth) — pay operators directly without a credit card.
+
## Learn More
+- [Gateway — Getting Started](/gateway/getting-started)
- [Workbench details](/vibe/introduction)
- [Runtime and sandboxing](/infrastructure/introduction)
- [Operator onboarding](/operators/introduction)
diff --git a/pages/developers/blueprints/use-cases.mdx b/pages/developers/blueprints/use-cases.mdx
index c329e097..39953f8e 100644
--- a/pages/developers/blueprints/use-cases.mdx
+++ b/pages/developers/blueprints/use-cases.mdx
@@ -21,7 +21,21 @@ Tangle Network enables developers to rapidly build and deploy secure multi-party
displayStyle="row"
/>
-## AI
+## AI & Inference
+
+
+
+
`) |
+
+## Response
+
+```json
+{
+ "data": {
+ "id": "gen_01ARZ3NDEKTSV4RRFFQ69G5FAV",
+ "total_cost": 0.00123,
+ "usage": 0.00123,
+ "created_at": "2026-04-10T12:00:00.000Z",
+ "model": "anthropic/claude-sonnet-4-6",
+ "is_byok": false,
+ "provider_name": "anthropic",
+ "streamed": true,
+ "latency": 200,
+ "generation_time": 1500,
+ "tokens_prompt": 100,
+ "tokens_completion": 50,
+ "native_tokens_cached": 80,
+ "native_tokens_reasoning": 0,
+ "status": "success",
+ "routing_trace": {...},
+ "cache_hit": false
+ }
+}
+```
+
+## Errors
+
+| Status | Code | Description |
+| ------ | ----------- | ----------------------------------------------- |
+| 400 | — | Missing or invalid generation ID |
+| 401 | — | Authentication required |
+| 404 | `not_found` | Generation not found or belongs to another user |
diff --git a/pages/gateway/authentication.mdx b/pages/gateway/authentication.mdx
new file mode 100644
index 00000000..5ce04f4e
--- /dev/null
+++ b/pages/gateway/authentication.mdx
@@ -0,0 +1,57 @@
+---
+title: Authentication
+description: Authentication methods for Tangle Gateway.
+---
+
+# Authentication
+
+Four authentication methods, each with different rate limits and capabilities.
+
+## API Key
+
+Create keys at the dashboard. Keys start with `sk-tan-` and are SHA256-hashed before storage.
+
+```bash
+curl -H "Authorization: Bearer sk-tan-YOUR_KEY" \
+ https://router.tangle.tools/v1/chat/completions
+```
+
+- **Rate limit:** 60 req/min
+- **Credit check:** Yes (402 if balance is zero on non-free-tier models)
+- **Key features:** Expiration dates, soft revocation, last-used tracking
+
+## Session (Cookie)
+
+Browser-based authentication via Better Auth. Supports email/password and OAuth (Google, GitHub).
+
+- **Rate limit:** 30 req/min
+- **Credit check:** Yes
+
+## SIWE (Sign-In with Ethereum)
+
+Wallet-based authentication via EIP-191 signatures. Authenticate with your Ethereum wallet.
+
+```
+POST /api/siwe/verify
+{ "address": "0x...", "signature": "0x...", "message": "..." }
+```
+
+## SpendAuth (On-Chain Payment)
+
+EIP-712 signed payment authorization. No account needed — pay operators directly on-chain.
+
+```bash
+curl -H "X-Payment-Signature: {\"commitment\":\"0x...\",\"amount\":\"1000000\",...}" \
+ https://router.tangle.tools/v1/chat/completions
+```
+
+- **Rate limit:** 120 req/min per commitment
+- **Credit check:** No (payment is on-chain)
+- See [SpendAuth](/gateway/spend-auth) for details.
+
+## Anonymous
+
+No authentication required for [free tier models](/gateway/free-tier).
+
+- **Rate limit:** 10 req/min, 5 req/day
+- **Model access:** Free tier only (gpt-4o-mini, llama-3.1-8b, etc.)
diff --git a/pages/gateway/byok.mdx b/pages/gateway/byok.mdx
new file mode 100644
index 00000000..120af118
--- /dev/null
+++ b/pages/gateway/byok.mdx
@@ -0,0 +1,86 @@
+---
+title: Bring Your Own Key (BYOK)
+description: Use your own provider API keys with Tangle Gateway for zero-markup access.
+---
+
+# Bring Your Own Key (BYOK)
+
+Use your existing provider API keys with Tangle Gateway. BYOK requests have **zero platform markup** — you pay the provider's list price directly.
+
+## Per-request BYOK
+
+Pass credentials in `providerOptions.gateway.byok`:
+
+```json
+{
+ "model": "anthropic/claude-sonnet-4-6",
+ "messages": [{"role": "user", "content": "Hello"}],
+ "providerOptions": {
+ "gateway": {
+ "byok": {
+ "anthropic": [{"apiKey": "sk-ant-your-key"}]
+ }
+ }
+ }
+}
+```
+
+### Multiple credentials
+
+Specify multiple credentials per provider. The gateway tries them in order:
+
+```json
+{
+ "providerOptions": {
+ "gateway": {
+ "byok": {
+ "anthropic": [
+ {"apiKey": "sk-ant-primary"},
+ {"apiKey": "sk-ant-backup"}
+ ]
+ }
+ }
+ }
+}
+```
+
+### Multiple providers
+
+```json
+{
+ "providerOptions": {
+ "gateway": {
+ "byok": {
+ "anthropic": [{"apiKey": "sk-ant-..."}],
+ "openai": [{"apiKey": "sk-..."}]
+ }
+ }
+ }
+}
+```
+
+## Automatic fallback
+
+If your BYOK credentials fail (401, 403, rate limit), the gateway automatically falls back to platform credentials. This fallback preserves all compliance filters — if you requested [ZDR](/gateway/zdr), the fallback will only use ZDR-compliant system credentials.
+
+The `X-Tangle-BYOK` response header indicates whether the request used your credentials:
+
+```
+X-Tangle-BYOK: true # Your key was used
+```
+
+If the header is absent, platform credentials were used (possibly via fallback).
+
+## Pricing
+
+| Credential type | Markup |
+| -------------------- | ---------------------------- |
+| BYOK | **0%** — provider list price |
+| Platform credentials | 20% markup (configurable) |
+
+## Security
+
+- BYOK credentials are never logged, stored, or persisted.
+- Credentials exist only in memory for the duration of the request.
+- The `providerOptions` field is stripped from the request body before forwarding to providers.
+- Credentials are validated by structure (`apiKey` must be a string) and sanitized against prototype pollution.
diff --git a/pages/gateway/caching.mdx b/pages/gateway/caching.mdx
new file mode 100644
index 00000000..6e2417d5
--- /dev/null
+++ b/pages/gateway/caching.mdx
@@ -0,0 +1,72 @@
+---
+title: Automatic Caching
+description: Enable prompt caching across providers with a single flag.
+---
+
+# Automatic Caching
+
+Some providers require explicit cache markers to enable prompt caching, while others cache automatically. Use `caching: 'auto'` to let the gateway handle it.
+
+## Usage
+
+```json
+{
+ "model": "anthropic/claude-sonnet-4-6",
+ "messages": [
+ {"role": "system", "content": "You are a helpful assistant with a large knowledge base..."},
+ {"role": "user", "content": "What is Tangle?"}
+ ],
+ "providerOptions": {
+ "gateway": {
+ "caching": "auto"
+ }
+ }
+}
+```
+
+## How it works
+
+| Provider | Caching Type | What `auto` does |
+| ------------------------------ | ------------ | ----------------------------------------------------------------------- |
+| OpenAI | Implicit | No change needed. Caching happens automatically. |
+| Google | Implicit | No change needed. |
+| DeepSeek | Implicit | No change needed. |
+| Anthropic | Explicit | Adds `cache_control: { type: 'ephemeral' }` to the last system message. |
+| Anthropic (via Bedrock/Vertex) | Explicit | Same as Anthropic direct. |
+
+For Anthropic, the gateway converts:
+
+```json
+{"role": "system", "content": "You are helpful..."}
+```
+
+Into:
+
+```json
+{"role": "system", "content": [{"type": "text", "text": "You are helpful...", "cache_control": {"type": "ephemeral"}}]}
+```
+
+This caches the system prompt so subsequent messages in the same conversation reuse it, reducing costs by up to 90%.
+
+## Response caching
+
+Separately from prompt caching, the gateway caches complete responses for **deterministic requests** (temperature ≤ 0.01, non-streaming). Cached responses are free.
+
+```
+X-Tangle-Cache: HIT # Served from cache
+X-Tangle-Cache: MISS # Fetched from provider
+```
+
+Disable per-request:
+
+```json
+{
+ "providerOptions": {
+ "gateway": {
+ "cache": false
+ }
+ }
+}
+```
+
+The response cache key includes: model, messages, temperature, max_tokens, tools, response_format, and top_p. Different parameters always produce different cache entries.
diff --git a/pages/gateway/enterprise-zdr.mdx b/pages/gateway/enterprise-zdr.mdx
new file mode 100644
index 00000000..c1d157ae
--- /dev/null
+++ b/pages/gateway/enterprise-zdr.mdx
@@ -0,0 +1,94 @@
+---
+title: Enterprise ZDR Setup
+description: Configure Zero Data Retention for your organization.
+---
+
+# Enterprise ZDR Setup
+
+This guide walks through configuring ZDR for an organization that needs to guarantee no prompts or responses are retained by AI providers.
+
+## Step 1: Understand the trust model
+
+Read the [ZDR trust model](/gateway/zdr#trust-model) first. Key points:
+
+- ZDR is enforced at the **direct provider** level only.
+- **Operators are skipped** when ZDR is enabled (their backing provider is unverifiable).
+- **LiteLLM is skipped** (its internal routing is uncontrollable).
+- BYOK fallback to platform credentials preserves ZDR filtering.
+
+## Step 2: Choose your approach
+
+### Option A: Team-wide ZDR (recommended)
+
+Enable ZDR for all requests from your team. No code changes needed — every request is automatically filtered.
+
+Contact your admin to set `zdrEnabled: true` on your team record via the admin API:
+
+```bash
+# Admin sets team-wide ZDR
+curl -X PUT https://router.tangle.tools/api/admin/compliance \
+ -H "Cookie: session_token=ADMIN_SESSION" \
+ -d '{"providerId": "...", "zdr": true}'
+```
+
+### Option B: Per-request ZDR
+
+Add `zeroDataRetention: true` to individual requests. Useful for mixed workloads where only some requests handle sensitive data.
+
+```python
+response = client.chat.completions.create(
+ model="anthropic/claude-sonnet-4-6",
+ messages=[...],
+ extra_body={
+ "providerOptions": {
+ "gateway": {"zeroDataRetention": True}
+ }
+ }
+)
+```
+
+## Step 3: Verify provider coverage
+
+Check which providers are ZDR-verified for the models you need:
+
+```bash
+curl https://router.tangle.tools/api/gateway/compliance | jq '.providers[] | select(.zdr == true)'
+```
+
+If your required model is only available from a non-ZDR provider, the request will return 400 with a clear error listing which providers were considered.
+
+## Step 4: Set up BYOK (optional)
+
+For maximum control, use [BYOK](/gateway/byok) with your own provider keys. This gives you:
+
+- Zero platform markup
+- Direct contractual relationship with the provider
+- ZDR enforcement still applies on the fallback path
+
+## Step 5: Monitor compliance
+
+Use the [generation lookup API](/gateway/generation-lookup) to audit requests:
+
+```bash
+# Check if a specific request used a ZDR provider
+curl -H "Authorization: Bearer sk-tan-..." \
+ "https://router.tangle.tools/v1/generation?id=gen_..." \
+ | jq '.data.provider_name'
+```
+
+The `routing_trace` field shows exactly which providers were considered and filtered.
+
+## Combining ZDR + no-train
+
+Both flags work as an AND: when both are enabled, requests are routed only to providers that satisfy both criteria. This is the strictest compliance level.
+
+```json
+{
+ "providerOptions": {
+ "gateway": {
+ "zeroDataRetention": true,
+ "disallowPromptTraining": true
+ }
+ }
+}
+```
diff --git a/pages/gateway/fallbacks.mdx b/pages/gateway/fallbacks.mdx
new file mode 100644
index 00000000..bd01e96f
--- /dev/null
+++ b/pages/gateway/fallbacks.mdx
@@ -0,0 +1,71 @@
+---
+title: Model Fallbacks
+description: Configure backup models that are tried when the primary model fails.
+---
+
+# Model Fallbacks
+
+Specify backup models that are tried in order if the primary model fails or is unavailable.
+
+## Usage
+
+Pass a `models` array in `providerOptions.gateway`:
+
+```json
+{
+ "model": "openai/gpt-4o",
+ "messages": [{"role": "user", "content": "Hello"}],
+ "providerOptions": {
+ "gateway": {
+ "models": ["anthropic/claude-sonnet-4-6", "groq/llama-3.1-70b-versatile"]
+ }
+ }
+}
+```
+
+The gateway tries:
+
+1. `openai/gpt-4o` (primary model)
+2. `anthropic/claude-sonnet-4-6` (first fallback)
+3. `groq/llama-3.1-70b-versatile` (second fallback)
+
+The response comes from the first model that succeeds.
+
+## How fallback works
+
+For each model in the list, the gateway runs the full routing chain:
+
+1. **Operators** — try operators serving this model (if available)
+2. **LiteLLM** — try the proxy with built-in retries
+3. **Direct provider** — call the provider API directly
+
+If all tiers fail for a model, the gateway moves to the next model in the list.
+
+## Combining with provider ordering
+
+Use `models` with `order` to control both model fallback and provider preference:
+
+```json
+{
+ "model": "openai/gpt-4o",
+ "providerOptions": {
+ "gateway": {
+ "models": ["anthropic/claude-sonnet-4-6"],
+ "order": ["bedrock", "anthropic"]
+ }
+ }
+}
+```
+
+This tries:
+
+1. `openai/gpt-4o` via available providers
+2. `anthropic/claude-sonnet-4-6` via Bedrock first, then Anthropic direct
+
+## Observability
+
+When fallbacks occur, the [routing trace](/gateway/routing-trace) shows every model and provider attempted:
+
+```
+X-Tangle-Routing-Trace: openai/gpt-4o[openai(err:5001ms)], anthropic/claude-sonnet-4-6[anthropic(200:1847ms)]
+```
diff --git a/pages/gateway/feature-flags.mdx b/pages/gateway/feature-flags.mdx
new file mode 100644
index 00000000..128480e7
--- /dev/null
+++ b/pages/gateway/feature-flags.mdx
@@ -0,0 +1,35 @@
+---
+title: Feature Flags
+description: Disable gateway features without a code deploy.
+---
+
+# Feature Flags
+
+All gateway features are on by default. Set any flag to `false` to disable it without deploying new code.
+
+## Available flags
+
+| Environment Variable | Default | Controls |
+| -------------------------- | ------- | ------------------------------------------------------------ |
+| `ENABLE_GUARDRAILS` | `true` | PII detection, prompt injection scanning |
+| `ENABLE_RESPONSE_CACHE` | `true` | Response caching for deterministic requests |
+| `ENABLE_COMPLIANCE_FILTER` | `true` | Early ZDR/no-train validation (routing enforcement stays on) |
+| `ENABLE_PROMPT_CACHING` | `true` | Auto `cache_control` injection for Anthropic |
+| `ENABLE_ROUTING_TRACE` | `true` | `X-Tangle-Routing-Trace` response header |
+
+## Usage
+
+Set in your environment:
+
+```bash
+ENABLE_GUARDRAILS=false # Disable all guardrail scanning
+ENABLE_RESPONSE_CACHE=false # Disable response cache reads/writes
+```
+
+## Notes
+
+- `ENABLE_COMPLIANCE_FILTER` only disables the early validation check that returns a 400 before routing. The actual ZDR/no-train enforcement in the routing tiers (skip operators, skip LiteLLM) stays active regardless. This flag is for suppressing the early error, not for bypassing compliance.
+
+- When `ENABLE_GUARDRAILS=false`, no PII or injection scanning occurs. The `X-Tangle-Guardrails` header is never set. GuardrailEvent records are not created.
+
+- When `ENABLE_RESPONSE_CACHE=false`, every request hits the provider. Cached entries are not read or written. Existing cache entries are not purged (they expire naturally via TTL).
diff --git a/pages/gateway/free-tier.mdx b/pages/gateway/free-tier.mdx
new file mode 100644
index 00000000..ff008510
--- /dev/null
+++ b/pages/gateway/free-tier.mdx
@@ -0,0 +1,62 @@
+---
+title: Free Tier
+description: Free access to small models with daily limits.
+---
+
+# Free Tier
+
+Try the gateway without credits. Free tier restricts to cheap, fast models with daily request limits.
+
+## Limits
+
+| Tier | Daily limit | Rate limit |
+| ---------------------------- | ----------- | ---------- |
+| Anonymous (no auth) | 5 req/day | 10 req/min |
+| Authenticated (zero credits) | 20 req/day | 30 req/min |
+| Paid (any credits) | Unlimited | 60 req/min |
+
+## Allowed models
+
+Free tier requests can use:
+
+| Model | Provider | Why it's free |
+| --------------------------- | --------- | ------------------- |
+| `gpt-4o-mini` | OpenAI | Small, cheap |
+| `claude-3-5-haiku-20241022` | Anthropic | Fast, cheap |
+| `llama-3.1-8b-instant` | Groq | Free tier inference |
+| `llama-3.2-1b-preview` | Groq | Tiny model |
+| `llama-3.2-3b-preview` | Groq | Small model |
+| `gemini-2.0-flash-lite` | Google | Free tier |
+| `cerebras/llama-3.1-8b` | Cerebras | Fast, cheap |
+| `deepseek-chat` | DeepSeek | Very cheap |
+
+## Blocked models
+
+These models require credits:
+
+- **OpenAI reasoning:** o1, o3, o4 (all variants)
+- **OpenAI flagship:** gpt-4o, gpt-4, gpt-5 (gpt-4o-mini is allowed)
+- **Anthropic flagship:** claude-opus, claude-sonnet (haiku is allowed)
+- **Google flagship:** gemini-2.5-pro, gemini-2.5-ultra
+- **xAI flagship:** grok-2, grok-3
+
+Requesting a blocked model without credits returns 402:
+
+```json
+{
+ "error": {
+ "message": "Model \"gpt-4o\" requires credits. Free tier models: gpt-4o-mini, llama-3.1-8b-instant, gemini-2.0-flash-lite, deepseek-chat. Add credits or use a free tier model.",
+ "type": "insufficient_funds",
+ "code": "free_tier_limit"
+ }
+}
+```
+
+## Response headers
+
+Free tier responses include remaining quota:
+
+```
+X-Free-Tier-Remaining: 3
+X-Free-Tier-Limit: 5
+```
diff --git a/pages/gateway/generation-lookup.mdx b/pages/gateway/generation-lookup.mdx
new file mode 100644
index 00000000..ccc988f8
--- /dev/null
+++ b/pages/gateway/generation-lookup.mdx
@@ -0,0 +1,71 @@
+---
+title: Generation Lookup
+description: Retrieve detailed information about any request by its generation ID.
+---
+
+# Generation Lookup
+
+Every request returns a unique generation ID in the `X-Generation-Id` header. Use it to look up full request details.
+
+## Endpoint
+
+```
+GET /v1/generation?id=gen_01ARZ3NDEKTSV4RRFFQ69G5FAV
+```
+
+Requires authentication. Returns details only for requests made by the authenticated user.
+
+## Example
+
+```bash
+curl -H "Authorization: Bearer sk-tan-YOUR_KEY" \
+ "https://router.tangle.tools/v1/generation?id=gen_01ARZ3NDEKTSV4RRFFQ69G5FAV"
+```
+
+## Response
+
+```json
+{
+ "data": {
+ "id": "gen_01ARZ3NDEKTSV4RRFFQ69G5FAV",
+ "total_cost": 0.00123,
+ "usage": 0.00123,
+ "created_at": "2026-04-10T12:00:00.000Z",
+ "model": "anthropic/claude-sonnet-4-6",
+ "is_byok": false,
+ "provider_name": "anthropic",
+ "streamed": true,
+ "latency": 200,
+ "generation_time": 1500,
+ "tokens_prompt": 100,
+ "tokens_completion": 50,
+ "native_tokens_cached": 80,
+ "native_tokens_reasoning": 0,
+ "status": "success",
+ "routing_trace": {
+ "planningReasoning": "ZDR requested: filtering to 13 ZDR providers",
+ "modelAttempts": [...],
+ "totalLatencyMs": 1500
+ },
+ "cache_hit": false
+ }
+}
+```
+
+## Fields
+
+| Field | Description |
+| ------------------------------------- | ---------------------------------------------- |
+| `id` | Generation ID (`gen_`) |
+| `total_cost` | Total cost in USD |
+| `model` | Model that served the request |
+| `is_byok` | Whether BYOK credentials were used |
+| `provider_name` | Provider that served the request |
+| `streamed` | Whether the request used streaming |
+| `latency` | Time to first token (ms) |
+| `generation_time` | Total generation time (ms) |
+| `tokens_prompt` / `tokens_completion` | Token counts |
+| `native_tokens_cached` | Tokens served from provider cache |
+| `native_tokens_reasoning` | Reasoning tokens (o1/o3/o4 models) |
+| `routing_trace` | Full routing attempt history |
+| `cache_hit` | Whether response was served from gateway cache |
diff --git a/pages/gateway/getting-started.mdx b/pages/gateway/getting-started.mdx
new file mode 100644
index 00000000..30d1edd2
--- /dev/null
+++ b/pages/gateway/getting-started.mdx
@@ -0,0 +1,94 @@
+---
+title: Getting Started
+description: Make your first inference request through Tangle Gateway in 2 minutes.
+---
+
+# Getting Started
+
+## 1. Get an API key
+
+Sign up at [router.tangle.tools](https://router.tangle.tools) and create an API key from the dashboard. Keys start with `sk-tan-`.
+
+## 2. Make a request
+
+### curl
+
+```bash
+curl -X POST "https://router.tangle.tools/v1/chat/completions" \
+ -H "Authorization: Bearer sk-tan-YOUR_KEY" \
+ -H "Content-Type: application/json" \
+ -d '{
+ "model": "openai/gpt-4o-mini",
+ "messages": [{"role": "user", "content": "What is Tangle?"}],
+ "stream": false
+ }'
+```
+
+### Python (OpenAI SDK)
+
+```python
+from openai import OpenAI
+
+client = OpenAI(
+ api_key="sk-tan-YOUR_KEY",
+ base_url="https://router.tangle.tools/v1"
+)
+
+response = client.chat.completions.create(
+ model="anthropic/claude-sonnet-4-6",
+ messages=[{"role": "user", "content": "What is Tangle?"}]
+)
+print(response.choices[0].message.content)
+```
+
+### TypeScript (AI SDK)
+
+```typescript
+import { generateText } from 'ai'
+import { createOpenAI } from '@ai-sdk/openai'
+
+const tangle = createOpenAI({
+ apiKey: 'sk-tan-YOUR_KEY',
+ baseURL: 'https://router.tangle.tools/v1',
+})
+
+const { text } = await generateText({
+ model: tangle('anthropic/claude-sonnet-4-6'),
+ prompt: 'What is Tangle?',
+})
+```
+
+## 3. Check the response headers
+
+Every response includes metadata headers:
+
+```
+X-Generation-Id: gen_01J5K7... # Unique request ID
+X-Tangle-Price-Input: 0.000003 # USD per input token
+X-Tangle-Price-Output: 0.000015 # USD per output token
+X-Tangle-Cache: MISS # Response cache status
+X-RateLimit-Remaining: 59 # Requests left in window
+```
+
+Use the generation ID to look up request details later via [`GET /v1/generation`](/gateway/api-generation).
+
+## 4. Try different models
+
+The model ID format is `provider/model-name`:
+
+```
+openai/gpt-4o-mini
+anthropic/claude-sonnet-4-6
+google/gemini-2.0-flash-lite
+groq/llama-3.1-8b-instant
+deepseek/deepseek-chat
+mistral/mistral-large-latest
+```
+
+You can also use bare model names (`gpt-4o-mini`, `claude-sonnet-4-6`) — the gateway resolves the provider automatically.
+
+## What's next
+
+- [Bring Your Own Key](/gateway/byok) — use your existing provider API keys for zero markup
+- [Model Fallbacks](/gateway/fallbacks) — configure backup models
+- [Zero Data Retention](/gateway/zdr) — compliance for sensitive workloads
diff --git a/pages/gateway/guardrails.mdx b/pages/gateway/guardrails.mdx
new file mode 100644
index 00000000..e343a214
--- /dev/null
+++ b/pages/gateway/guardrails.mdx
@@ -0,0 +1,63 @@
+---
+title: Guardrails
+description: Gateway-level PII detection and prompt injection scanning.
+---
+
+# Guardrails
+
+The gateway scans all requests for PII and prompt injection patterns before routing. Results are available in the `X-Tangle-Guardrails` response header.
+
+## Detection categories
+
+### PII detection
+
+| Pattern | Severity | Example |
+| ------------------------------ | -------- | --------------------- |
+| SSN | Critical | `123-45-6789` |
+| Credit card (Visa/MC/Discover) | Critical | `4111 1111 1111 1111` |
+| Credit card (Amex) | Critical | `3782 822463 10005` |
+| Email | Low | `user@example.com` |
+| US phone | Medium | `(555) 123-4567` |
+| IP address | Low | `192.168.1.1` |
+
+### Prompt injection detection
+
+Applied to user messages only (not system or assistant):
+
+| Pattern | Severity |
+| ---------------------------------- | -------- |
+| "Ignore all previous instructions" | High |
+| "You are now a different AI" | High |
+| "Pretend you have no restrictions" | High |
+| "Reveal your system prompt" | Medium |
+| DAN-mode jailbreaks | High |
+
+## Modes
+
+### Audit mode (default)
+
+Flags are logged and returned in the `X-Tangle-Guardrails` header but requests are not blocked:
+
+```
+X-Tangle-Guardrails: pii:low,prompt_injection:high
+```
+
+### Block mode
+
+Requests matching configured categories are rejected with 400:
+
+```json
+{
+ "error": {
+ "message": "Request blocked by guardrails: pii, prompt_injection",
+ "type": "invalid_request_error",
+ "code": "guardrail_blocked"
+ }
+}
+```
+
+Block mode requires a `GuardrailPolicy` record configured for your team or user with specific categories to block.
+
+## Disabling
+
+Set `ENABLE_GUARDRAILS=false` to skip all scanning. See [Feature Flags](/gateway/feature-flags).
diff --git a/pages/gateway/how-routing-works.mdx b/pages/gateway/how-routing-works.mdx
new file mode 100644
index 00000000..464211cc
--- /dev/null
+++ b/pages/gateway/how-routing-works.mdx
@@ -0,0 +1,65 @@
+---
+title: How Routing Works
+description: The three-tier routing architecture behind Tangle Gateway.
+---
+
+# How Routing Works
+
+Every request passes through up to three routing tiers. The gateway tries each tier in order and returns the first successful response.
+
+## The three tiers
+
+```
+Request → Tier 1: Operators → Tier 2: LiteLLM → Tier 3: Direct Provider → Response
+```
+
+### Tier 1: Operator routing
+
+Operators run [Blueprints](/developers/blueprints/introduction) — on-chain service definitions like the [LLM Inference Blueprint](https://github.com/tangle-network/llm-inference-blueprint). They stake tokens, serve models, and compete on price, latency, and reputation.
+
+- Selected by [scoring algorithm](/gateway/smart-routing): reputation (40%) + latency (30%) + price (30%)
+- Discovered automatically from on-chain [Blueprint Service Manager](/developers/blueprints/service-lifecycle) contracts
+- Can be pinned by blueprint, service, or operator address
+- See [Operator Routing](/gateway/operator-routing) for the full Blueprint catalog
+
+**When it's used:** Default for `auto` routing mode, required for SpendAuth (on-chain payments).
+
+**When it's skipped:** When [ZDR](/gateway/zdr) or [no-train](/gateway/no-train) is requested (operators can't verify compliance). When `routing: "provider"` is set explicitly.
+
+### Tier 2: LiteLLM proxy
+
+An internal proxy that handles 100+ provider integrations with built-in retries and provider-level fallbacks.
+
+**When it's used:** Default for standard requests when no operator is available.
+
+**When it's skipped:** When ZDR or no-train is requested (LiteLLM's downstream routing is not compliance-controllable). When LiteLLM is not configured (`LITELLM_URL` unset).
+
+### Tier 3: Direct provider
+
+The gateway calls the provider API directly using platform credentials (or [BYOK](/gateway/byok) credentials).
+
+**When it's used:** Fallback when tiers 1 and 2 fail. Only tier used when compliance routing is active.
+
+**Always used for:** ZDR requests, no-train requests, BYOK with compliance flags.
+
+## Compliance mode
+
+When `zeroDataRetention` or `disallowPromptTraining` is set:
+
+```
+Request → Tier 3: Direct Provider (verified only) → Response
+```
+
+Tiers 1 and 2 are completely bypassed. The gateway routes only to providers with verified compliance agreements. See [Zero Data Retention](/gateway/zdr) for the trust model.
+
+## Routing control
+
+| Method | Effect |
+| ------------------------------- | ---------------------------------------------------- |
+| `routing: "auto"` | Try all three tiers (default) |
+| `routing: "operator"` | Operators only. Fails if no operator available. |
+| `routing: "provider"` | Skip operators, use LiteLLM + direct only. |
+| `X-Tangle-Blueprint: ` | Pin to operators under this Blueprint. |
+| `X-Tangle-Operator: ` | Pin to a specific operator. |
+| `providerOptions.gateway.order` | Control which providers are tried and in what order. |
+| `providerOptions.gateway.only` | Restrict to these providers only. |
diff --git a/pages/gateway/index.mdx b/pages/gateway/index.mdx
new file mode 100644
index 00000000..57c7630a
--- /dev/null
+++ b/pages/gateway/index.mdx
@@ -0,0 +1,58 @@
+---
+title: Tangle Gateway
+description: Unified API for hundreds of AI models with built-in routing, compliance, and on-chain payments.
+---
+
+# Tangle Gateway
+
+Tangle Gateway is a unified inference API. One endpoint, hundreds of models, automatic routing across centralized providers and decentralized operators.
+
+## What it does
+
+- **One key, any model.** Access OpenAI, Anthropic, Google, Groq, Mistral, and 20+ providers through a single API key.
+- **Operator network.** Route to decentralized operators running [Blueprints](/developers/blueprints/introduction) on the Tangle network who compete on price, latency, and reputation.
+- **Compliance routing.** Zero Data Retention and no-train filtering with verified provider agreements.
+- **BYOK.** Bring your own provider keys for zero-markup access.
+- **On-chain payments.** Pay operators directly via SpendAuth — no credit card required.
+
+## Quick example
+
+```bash
+curl -X POST "https://router.tangle.tools/v1/chat/completions" \
+ -H "Authorization: Bearer $TANGLE_API_KEY" \
+ -H "Content-Type: application/json" \
+ -d '{
+ "model": "anthropic/claude-sonnet-4-6",
+ "messages": [{"role": "user", "content": "Hello"}],
+ "stream": true
+ }'
+```
+
+Works with any OpenAI-compatible SDK. Change the base URL and you're done.
+
+## Architecture
+
+The gateway routes through three tiers, in order:
+
+| Tier | What | When |
+| ------------- | ---------------------------------------------------------- | -------------------------------------------------------------- |
+| **Operators** | Decentralized inference providers on Tangle | Default for operator-pinned requests and SpendAuth |
+| **LiteLLM** | Proxy with 100+ provider integrations and built-in retries | Default for standard requests |
+| **Direct** | Straight to provider API (OpenAI, Anthropic, etc.) | Fallback when LiteLLM unavailable, or when compliance required |
+
+When [Zero Data Retention](/gateway/zdr) or [no-train](/gateway/no-train) is requested, operators and LiteLLM are skipped — the gateway routes directly to verified providers only.
+
+## How it fits
+
+```
+Workbench (agents) → Gateway (inference) → Operators (serving) → Protocol (settlement)
+```
+
+The gateway sits between the [Workbench](/vibe/introduction) where agents run and the [Protocol](/network/overview) where operators get paid. Agents in the workbench call the gateway for model access. The gateway selects the best provider or operator, routes the request, tracks usage, and settles payment.
+
+## Next steps
+
+- [Getting Started](/gateway/getting-started) — make your first request in 2 minutes
+- [Supported Models](/gateway/models) — browse the model catalog
+- [How Routing Works](/gateway/how-routing-works) — understand the 3-tier architecture
+- [Zero Data Retention](/gateway/zdr) — compliance for regulated industries
diff --git a/pages/gateway/migrate-openai.mdx b/pages/gateway/migrate-openai.mdx
new file mode 100644
index 00000000..c27336f5
--- /dev/null
+++ b/pages/gateway/migrate-openai.mdx
@@ -0,0 +1,77 @@
+---
+title: Migrate from OpenAI
+description: Switch from OpenAI's API to Tangle Gateway in under 5 minutes.
+---
+
+# Migrate from OpenAI
+
+Tangle Gateway is OpenAI-compatible. Change two lines and you're done.
+
+## Python
+
+```diff
+ from openai import OpenAI
+
+ client = OpenAI(
+- api_key="sk-...",
++ api_key="sk-tan-YOUR_KEY",
++ base_url="https://router.tangle.tools/v1",
+ )
+
+ response = client.chat.completions.create(
+- model="gpt-4o",
++ model="openai/gpt-4o", # or just "gpt-4o" — auto-resolved
+ messages=[{"role": "user", "content": "Hello"}]
+ )
+```
+
+## TypeScript
+
+```diff
+ import OpenAI from 'openai'
+
+ const client = new OpenAI({
+- apiKey: 'sk-...',
++ apiKey: 'sk-tan-YOUR_KEY',
++ baseURL: 'https://router.tangle.tools/v1',
+ })
+```
+
+## curl
+
+```diff
+- curl https://api.openai.com/v1/chat/completions \
+- -H "Authorization: Bearer sk-..." \
++ curl https://router.tangle.tools/v1/chat/completions \
++ -H "Authorization: Bearer sk-tan-YOUR_KEY" \
+ -H "Content-Type: application/json" \
+ -d '{"model": "gpt-4o", "messages": [...]}'
+```
+
+## What you get
+
+By switching to Tangle Gateway, you get:
+
+- **Access to every provider** through the same client. Try `anthropic/claude-sonnet-4-6` or `groq/llama-3.1-70b` without changing SDKs.
+- **Automatic fallbacks.** If OpenAI is down, configure backup models.
+- **Cost visibility.** Every response tells you exactly what it cost via `X-Tangle-Price-*` headers.
+- **Compliance routing.** One flag for ZDR, one flag for no-train.
+- **BYOK.** Keep using your OpenAI key with zero markup. Add it to [`providerOptions.gateway.byok`](/gateway/byok).
+
+## Keep your OpenAI key (zero markup)
+
+If you already have an OpenAI API key, use [BYOK](/gateway/byok) for zero platform markup:
+
+```python
+response = client.chat.completions.create(
+ model="openai/gpt-4o",
+ messages=[{"role": "user", "content": "Hello"}],
+ extra_body={
+ "providerOptions": {
+ "gateway": {
+ "byok": {"openai": [{"apiKey": "sk-YOUR_OPENAI_KEY"}]}
+ }
+ }
+ }
+)
+```
diff --git a/pages/gateway/migrate-vercel.mdx b/pages/gateway/migrate-vercel.mdx
new file mode 100644
index 00000000..abad708f
--- /dev/null
+++ b/pages/gateway/migrate-vercel.mdx
@@ -0,0 +1,80 @@
+---
+title: Migrate from Vercel AI Gateway
+description: Switch from Vercel AI Gateway to Tangle Gateway.
+---
+
+# Migrate from Vercel AI Gateway
+
+Tangle Gateway supports the same `providerOptions.gateway` schema as Vercel AI Gateway. Most code works unchanged.
+
+## What maps directly
+
+| Vercel Feature | Tangle Equivalent | Notes |
+| ------------------------------------------------ | ----------------- | --------------------------------- |
+| `providerOptions.gateway.byok` | Same | Identical schema |
+| `providerOptions.gateway.zeroDataRetention` | Same | 13 verified providers |
+| `providerOptions.gateway.disallowPromptTraining` | Same | 25 verified providers |
+| `providerOptions.gateway.caching: 'auto'` | Same | Anthropic cache_control injection |
+| `providerOptions.gateway.order` | Same | Provider priority |
+| `providerOptions.gateway.only` | Same | Provider allowlist |
+| `models` fallback array | Same | Model-level failover |
+| `GET /v1/credits` | Same | Balance check |
+| `GET /v1/generation` | Same | Request detail lookup |
+
+## What's different
+
+| Feature | Vercel | Tangle |
+| --------------------- | ------------------------------------- | -------------------------------------------------------- |
+| **Base URL** | `ai-gateway.vercel.sh/v1` | `router.tangle.tools/v1` |
+| **Auth** | API key or OIDC token | API key, session, SIWE (wallet), or SpendAuth (on-chain) |
+| **Pricing** | Zero markup | 20% markup (0% with BYOK) |
+| **Operator network** | None | Decentralized operators compete on price/latency |
+| **On-chain payments** | None | SpendAuth (EIP-712) — pay without a credit card |
+| **Guardrails** | None | PII + injection detection built-in |
+| **Web search tools** | Perplexity, Parallel, provider-native | Not yet (planned) |
+| **OIDC auth** | Vercel-only | Not applicable |
+
+## Code change
+
+### AI SDK
+
+```diff
+ import { generateText } from 'ai'
++ import { createOpenAI } from '@ai-sdk/openai'
+
++ const tangle = createOpenAI({
++ apiKey: 'sk-tan-YOUR_KEY',
++ baseURL: 'https://router.tangle.tools/v1',
++ })
+
+ const { text } = await generateText({
+- model: 'anthropic/claude-sonnet-4-6',
++ model: tangle('anthropic/claude-sonnet-4-6'),
+ prompt: 'Hello',
+ providerOptions: {
+ gateway: {
+ zeroDataRetention: true, // works the same
+ caching: 'auto', // works the same
+ },
+ },
+ })
+```
+
+### OpenAI SDK
+
+```diff
+ const client = new OpenAI({
+- apiKey: process.env.AI_GATEWAY_API_KEY,
+- baseURL: 'https://ai-gateway.vercel.sh/v1',
++ apiKey: process.env.TANGLE_API_KEY,
++ baseURL: 'https://router.tangle.tools/v1',
+ })
+```
+
+## What you gain
+
+- **Operator network.** Access decentralized inference providers who compete on price and latency.
+- **On-chain payments.** Pay with crypto via SpendAuth — no Stripe/credit card required.
+- **Wallet auth.** Sign in with Ethereum (SIWE) for web3-native access.
+- **Guardrails.** Built-in PII and prompt injection detection on every request.
+- **Self-hostable.** Deploy your own gateway instance — it's open source.
diff --git a/pages/gateway/models.mdx b/pages/gateway/models.mdx
new file mode 100644
index 00000000..4c058018
--- /dev/null
+++ b/pages/gateway/models.mdx
@@ -0,0 +1,77 @@
+---
+title: Supported Models
+description: Browse models available through Tangle Gateway across 20+ providers.
+---
+
+# Supported Models
+
+Tangle Gateway provides access to models from 20+ providers through a single API.
+
+## Providers
+
+| Provider | Slug | Models |
+| ----------- | ----------- | ----------------------------------------------------- |
+| OpenAI | `openai` | GPT-4o, GPT-4o-mini, o1, o3, o4, DALL-E, Whisper, TTS |
+| Anthropic | `anthropic` | Claude Opus, Sonnet, Haiku |
+| Google | `google` | Gemini 2.5 Pro, Flash, Flash-Lite |
+| Groq | `groq` | Llama 3.1/3.2 (fast inference) |
+| Together AI | `together` | Open-source models (Llama, Qwen, Mixtral) |
+| DeepSeek | `deepseek` | DeepSeek Chat, DeepSeek Coder |
+| Mistral | `mistral` | Mistral Large, Codestral, Pixtral |
+| Fireworks | `fireworks` | Phi, StarCoder, open models |
+| Cohere | `cohere` | Command R/R+ |
+| xAI | `xai` | Grok 2, Grok 3 |
+| Cerebras | `cerebras` | Llama (fast inference) |
+| SambaNova | `sambanova` | Fast open-model inference |
+| AI21 | `ai21` | Jamba |
+| Nvidia | `nvidia` | Nemotron |
+| Z.ai | `zai` | GLM-4.7, GLM-5 |
+| Moonshot | `moonshot` | Kimi |
+
+Plus decentralized operators on the Tangle network running [Blueprints](/developers/blueprints/introduction):
+
+| Blueprint | Models | How to route |
+| -------------------------------------------------------------------------- | ----------------------------------------------- | ------------------------------------- |
+| [LLM Inference](https://github.com/tangle-network/llm-inference-blueprint) | Llama, Qwen, Mistral, any vLLM-compatible model | `X-Tangle-Routing: operator` or auto |
+| Vector Store | Embedding models for RAG | `/v1/collections` and `/v1/rag/query` |
+| Custom Blueprints | Any model the operator deploys | Pin by Blueprint ID or operator slug |
+
+Operators set their own pricing and the gateway [scores them](/gateway/smart-routing) on reputation, latency, and price. See [Operator Routing](/gateway/operator-routing) for details.
+
+## Model ID format
+
+Use `provider/model-name`:
+
+```
+anthropic/claude-sonnet-4-6
+openai/gpt-4o-mini
+groq/llama-3.1-70b-versatile
+```
+
+Or use bare names — the gateway resolves the provider by prefix:
+
+| Prefix | Resolves to |
+| --------------------------- | ----------- |
+| `gpt-`, `o1-`, `o3-`, `o4-` | OpenAI |
+| `claude-` | Anthropic |
+| `gemini-`, `gemma-` | Google |
+| `llama-`, `mixtral-` | Groq |
+| `deepseek-` | DeepSeek |
+| `mistral-`, `codestral-` | Mistral |
+| `grok-` | xAI |
+| `glm-` | Z.ai |
+| `command-` | Cohere |
+
+## Modalities
+
+| Modality | Endpoint | Examples |
+| ---------- | ---------------------------------------------- | -------------------------------------- |
+| Text | `/v1/chat/completions` | All chat models |
+| Images | `/v1/images/generations` | DALL-E, FLUX |
+| Audio | `/v1/audio/transcriptions`, `/v1/audio/speech` | Whisper, TTS |
+| Embeddings | `/v1/embeddings` | text-embedding-3-small/large |
+| Video | `/v1/video/*` | Avatar generation, dubbing (via ph0ny) |
+
+## Dynamic discovery
+
+The model catalog is available at [`GET /api/models`](https://router.tangle.tools/api/models) with pricing, context length, and modality information for every model.
diff --git a/pages/gateway/no-train.mdx b/pages/gateway/no-train.mdx
new file mode 100644
index 00000000..f6461c01
--- /dev/null
+++ b/pages/gateway/no-train.mdx
@@ -0,0 +1,43 @@
+---
+title: Disallow Prompt Training
+description: Route only through providers that don't use your data for model training.
+---
+
+# Disallow Prompt Training
+
+Ensure your prompts and responses are never used by providers to train their models.
+
+## Usage
+
+```json
+{
+ "providerOptions": {
+ "gateway": {
+ "disallowPromptTraining": true
+ }
+ }
+}
+```
+
+## Relationship to ZDR
+
+Disallow prompt training is a **subset** of [Zero Data Retention](/gateway/zdr). All ZDR-compliant providers also disallow prompt training, but more providers disallow training than offer full ZDR.
+
+| Filter | Verified providers |
+| ----------------------- | ------------------ |
+| No-train only | 25 providers |
+| ZDR (includes no-train) | 13 providers |
+
+Use `disallowPromptTraining` when you care about IP protection but don't need full data deletion guarantees.
+
+## No-train verified providers
+
+All ZDR providers plus: OpenAI, Google AI Studio, Cohere, Perplexity, xAI, Morph AI, Novita AI, Voyage AI, and others.
+
+See the full list at [`GET /api/gateway/compliance`](/gateway/api-compliance).
+
+## Routing behavior
+
+Same as ZDR: operators and LiteLLM are skipped. Only direct provider calls to verified no-train providers.
+
+Can be enabled team-wide via `noTrainEnabled: true` on the team record.
diff --git a/pages/gateway/operator-routing.mdx b/pages/gateway/operator-routing.mdx
new file mode 100644
index 00000000..687d38d2
--- /dev/null
+++ b/pages/gateway/operator-routing.mdx
@@ -0,0 +1,89 @@
+---
+title: Operator Routing
+description: Route inference through decentralized operators on the Tangle network.
+---
+
+# Operator Routing
+
+Operators are independent inference providers registered on the Tangle network. They run models on their own hardware, set their own prices, and earn from every request routed through them.
+
+## Blueprints
+
+Operators run **Blueprints** — on-chain service definitions that specify what an operator does. The inference-related Blueprints the gateway routes through:
+
+| Blueprint | What it serves | Repo |
+| ----------------- | -------------------------------------------------------------- | --------------------------------------------------------------------------------------------------- |
+| **LLM Inference** | Chat completions, text generation (Llama, Qwen, Mistral, etc.) | [tangle-network/llm-inference-blueprint](https://github.com/tangle-network/llm-inference-blueprint) |
+| **Vector Store** | Embedding storage and retrieval for RAG | Operator-deployed |
+| **Custom** | Any model/pipeline an operator chooses to serve | [Build your own](/developers/blueprints/introduction) |
+
+The LLM Inference Blueprint uses [tangle-inference-core](https://github.com/tangle-network/tangle-inference-core) — a shared Rust crate for EIP-712 signature verification, nonce management, and on-chain settlement. Operators compile it into a binary (`operator-lite`) that runs alongside their model server.
+
+To build and deploy your own inference Blueprint, see the [Blueprint SDK docs](/developers/blueprints/introduction) and the [Blueprint Runner](/developers/blueprint-runner/introduction).
+
+## How operators are discovered
+
+1. Operators register on-chain via the [Blueprint Service Manager (BSM)](/developers/blueprints/service-lifecycle) contract
+2. The gateway syncs operator data from the chain every 60 seconds
+3. Operators are stored in the database with their endpoint URL, pricing, and status
+4. The [scoring algorithm](/gateway/smart-routing) ranks operators per-request
+
+## Routing to operators
+
+### Automatic (default)
+
+In `auto` mode, the gateway checks for operators serving the requested model before trying centralized providers:
+
+```bash
+curl -X POST "https://router.tangle.tools/v1/chat/completions" \
+ -H "Authorization: Bearer sk-tan-YOUR_KEY" \
+ -d '{"model": "llama-3.1-70b", "messages": [...]}'
+```
+
+### Pin to a Blueprint
+
+Route only to operators registered under a specific Blueprint:
+
+```bash
+curl -X POST "https://router.tangle.tools/v1/chat/completions" \
+ -H "X-Tangle-Blueprint: 42" \
+ -d '{"model": "llama-3.1-70b", "messages": [...]}'
+```
+
+### Pin to an operator
+
+Route to a specific operator by slug or Ethereum address:
+
+```bash
+curl -X POST "https://router.tangle.tools/v1/chat/completions" \
+ -H "X-Tangle-Operator: tangle-core-1" \
+ -d '{"model": "llama-3.1-70b", "messages": [...]}'
+```
+
+### Pin to a service instance
+
+```bash
+curl -X POST "https://router.tangle.tools/v1/chat/completions" \
+ -H "X-Tangle-Service: 7" \
+ -d '{"model": "llama-3.1-70b", "messages": [...]}'
+```
+
+## What's verified on-chain
+
+| Data | Verified? |
+| ------------------------- | ---------------------------------- |
+| Operator Ethereum address | Yes (signed transaction) |
+| Active/inactive status | Yes (BSM contract state) |
+| Staked amount | Yes (on-chain balance) |
+| Pricing (per-token) | Yes (BSM contract) |
+| Endpoint URL | No (self-reported at registration) |
+| Backing provider | No (not tracked) |
+
+Because endpoint URL and backing provider are self-reported, operator routing is **not compatible with [ZDR](/gateway/zdr) or [no-train](/gateway/no-train)** compliance requirements. When compliance is required, operators are skipped and the gateway routes directly to verified providers.
+
+## Payment
+
+Operator requests can be paid two ways:
+
+1. **Platform credits** — deducted from your credit balance at the operator's listed price
+2. **SpendAuth (on-chain)** — direct EIP-712 signed payment to the operator. No credit card needed. See [SpendAuth](/gateway/spend-auth).
diff --git a/pages/gateway/pricing.mdx b/pages/gateway/pricing.mdx
new file mode 100644
index 00000000..6634f841
--- /dev/null
+++ b/pages/gateway/pricing.mdx
@@ -0,0 +1,55 @@
+---
+title: Credits & Pricing
+description: How billing works on Tangle Gateway.
+---
+
+# Credits & Pricing
+
+## Pricing model
+
+| Credential type | Markup |
+| -------------------------------- | ------------------------------------------- |
+| Platform credentials | 20% above provider list price |
+| [BYOK](/gateway/byok) | **0%** — provider list price, no markup |
+| [SpendAuth](/gateway/spend-auth) | Operator-set prices (typically competitive) |
+
+The 20% platform markup on non-BYOK requests funds operator payouts and platform infrastructure. Operators earn a share of every request routed through them.
+
+## Credits
+
+Credits are denominated in USD. Purchase via Stripe or receive as part of a subscription plan.
+
+Check your balance:
+
+```bash
+curl -H "Authorization: Bearer sk-tan-YOUR_KEY" \
+ https://router.tangle.tools/v1/credits
+```
+
+```json
+{
+ "balance": "95.50",
+ "total_used": "4.50"
+}
+```
+
+## Cost per request
+
+Each request is charged based on tokens:
+
+```
+cost = (input_tokens × input_price) + (output_tokens × output_price)
+```
+
+Pricing varies by model. Check per-model pricing at [`GET /api/models`](https://router.tangle.tools/api/models) or in the `X-Tangle-Price-Input` / `X-Tangle-Price-Output` response headers.
+
+## Billing transparency
+
+Every response includes pricing headers so you know the cost before it hits your balance:
+
+```
+X-Tangle-Price-Input: 0.000003 # USD per input token
+X-Tangle-Price-Output: 0.000015 # USD per output token
+```
+
+Look up detailed billing for any request via [`GET /v1/generation`](/gateway/api-generation).
diff --git a/pages/gateway/provider-options.mdx b/pages/gateway/provider-options.mdx
new file mode 100644
index 00000000..ccb49797
--- /dev/null
+++ b/pages/gateway/provider-options.mdx
@@ -0,0 +1,70 @@
+---
+title: providerOptions.gateway
+description: Complete reference for gateway-specific request options.
+---
+
+# providerOptions.gateway
+
+All gateway-specific options are passed inside `providerOptions.gateway` in the request body. These are stripped before forwarding to providers.
+
+## Full schema
+
+```typescript
+interface GatewayOptions {
+ // Bring Your Own Key
+ byok?: Record>
+
+ // Compliance routing
+ zeroDataRetention?: boolean
+ disallowPromptTraining?: boolean
+
+ // Caching
+ caching?: 'auto' | false
+ cache?: false // disable response caching
+
+ // Provider routing
+ order?: string[] // provider priority
+ only?: string[] // provider allowlist
+
+ // Model fallbacks
+ models?: string[] // tried in order after primary model
+
+ // Timeouts (1s-120s, clamped)
+ timeout?: number | Record
+}
+```
+
+## Options reference
+
+| Option | Type | Default | Description |
+| ------------------------ | ---------------------------------- | ------- | -------------------------------------------------------------- |
+| `byok` | `Record>` | — | Per-request provider credentials. [Details](/gateway/byok) |
+| `zeroDataRetention` | `boolean` | `false` | Route only to ZDR-verified providers. [Details](/gateway/zdr) |
+| `disallowPromptTraining` | `boolean` | `false` | Route only to no-train providers. [Details](/gateway/no-train) |
+| `caching` | `'auto'` | — | Auto-inject prompt cache markers. [Details](/gateway/caching) |
+| `cache` | `false` | — | Set `false` to skip response cache for this request. |
+| `order` | `string[]` | — | Provider priority order. [Details](/gateway/smart-routing) |
+| `only` | `string[]` | — | Restrict to these providers only. |
+| `models` | `string[]` | — | Fallback model list. [Details](/gateway/fallbacks) |
+| `timeout` | `number \| Record` | `30000` | Timeout in ms. [Details](/gateway/timeouts) |
+
+## Example: everything at once
+
+```json
+{
+ "model": "anthropic/claude-sonnet-4-6",
+ "messages": [{"role": "user", "content": "Hello"}],
+ "providerOptions": {
+ "gateway": {
+ "byok": {
+ "anthropic": [{"apiKey": "sk-ant-..."}]
+ },
+ "zeroDataRetention": true,
+ "caching": "auto",
+ "models": ["openai/gpt-4o"],
+ "timeout": {"anthropic": 10000, "openai": 5000},
+ "order": ["anthropic", "openai"]
+ }
+ }
+}
+```
diff --git a/pages/gateway/rate-limiting.mdx b/pages/gateway/rate-limiting.mdx
new file mode 100644
index 00000000..7b0962d2
--- /dev/null
+++ b/pages/gateway/rate-limiting.mdx
@@ -0,0 +1,44 @@
+---
+title: Rate Limiting
+description: Rate limits by authentication method.
+---
+
+# Rate Limiting
+
+The gateway enforces sliding-window rate limits per authentication method.
+
+## Limits
+
+| Auth method | Rate limit | Daily limit |
+| -------------------------- | ----------- | ------------------------ |
+| API Key | 60 req/min | Unlimited (with credits) |
+| Session | 30 req/min | Unlimited (with credits) |
+| SpendAuth | 120 req/min | Unlimited |
+| Anonymous | 10 req/min | 5 req/day |
+| Authenticated (no credits) | 30 req/min | 20 req/day |
+
+## Response headers
+
+Every response includes rate limit headers:
+
+```
+X-RateLimit-Limit: 60
+X-RateLimit-Remaining: 42
+X-RateLimit-Reset: 1712793600
+```
+
+## 429 responses
+
+When rate limited:
+
+```json
+{
+ "error": {
+ "message": "Rate limit exceeded for this API key.",
+ "type": "rate_limit_error",
+ "code": "rate_limit_exceeded"
+ }
+}
+```
+
+The `X-RateLimit-Reset` header indicates when the window resets (Unix timestamp in seconds).
diff --git a/pages/gateway/response-headers.mdx b/pages/gateway/response-headers.mdx
new file mode 100644
index 00000000..40350b0c
--- /dev/null
+++ b/pages/gateway/response-headers.mdx
@@ -0,0 +1,41 @@
+---
+title: Response Headers
+description: Headers returned on every gateway response.
+---
+
+# Response Headers
+
+Every response from the gateway includes metadata headers.
+
+## Standard headers
+
+| Header | Description | Example |
+| ----------------------- | --------------------------- | ------------------- |
+| `X-Generation-Id` | Unique request ID | `gen_01J5K7ABCD...` |
+| `X-Tangle-Price-Input` | USD per input token | `0.000003` |
+| `X-Tangle-Price-Output` | USD per output token | `0.000015` |
+| `X-Tangle-Cache` | Response cache status | `HIT` or `MISS` |
+| `X-RateLimit-Limit` | Requests allowed per window | `60` |
+| `X-RateLimit-Remaining` | Requests remaining | `42` |
+| `X-RateLimit-Reset` | Window reset (Unix seconds) | `1712793600` |
+
+## Conditional headers
+
+| Header | When present | Description |
+| ------------------------ | --------------------------------- | ------------------------------- |
+| `X-Tangle-Routing-Trace` | When `ENABLE_ROUTING_TRACE` is on | Compact routing path |
+| `X-Tangle-Operator` | When served by an operator | Operator slug |
+| `X-Tangle-BYOK` | When BYOK credentials used | `true` |
+| `X-Tangle-Caching` | When prompt caching applied | `auto` |
+| `X-Tangle-Guardrails` | When guardrails flagged content | `pii:low,prompt_injection:high` |
+| `X-Payment-Settled` | When SpendAuth payment succeeded | `true` |
+| `X-Free-Tier-Remaining` | Free tier requests | `3` |
+| `X-Free-Tier-Limit` | Free tier daily cap | `5` |
+
+## Error response headers
+
+| Header | When present | Description |
+| -------------------- | ------------- | ------------------------- |
+| `X-Payment-Required` | 402 responses | Amount needed (micro-USD) |
+| `X-Payment-Currency` | 402 responses | `tsUSD` |
+| `X-Payment-Methods` | 402 responses | `credits,spend_auth` |
diff --git a/pages/gateway/routing-trace.mdx b/pages/gateway/routing-trace.mdx
new file mode 100644
index 00000000..9e23653d
--- /dev/null
+++ b/pages/gateway/routing-trace.mdx
@@ -0,0 +1,36 @@
+---
+title: Routing Trace
+description: See exactly which providers were tried for every request.
+---
+
+# Routing Trace
+
+Every response includes an `X-Tangle-Routing-Trace` header showing the routing path — which providers were tried, whether they succeeded, and how long each took.
+
+## Header format
+
+```
+X-Tangle-Routing-Trace: openai/gpt-4o[operator(err:5001ms)→litellm(200:340ms)]
+```
+
+Format: `model[provider(status:latency)→provider(status:latency)]`
+
+Multiple models (from [fallbacks](/gateway/fallbacks)):
+
+```
+X-Tangle-Routing-Trace: openai/gpt-4o[openai(500:2100ms)], anthropic/claude-sonnet-4-6[anthropic(200:1847ms)]
+```
+
+## Sanitization
+
+The trace header is sanitized for safety:
+
+- Operator names are shown as generic `operator` (slugs not exposed)
+- Error messages are not included (only status codes)
+- Internal URLs and hostnames are never leaked
+
+For the full unredacted trace including error messages, use the [generation lookup API](/gateway/generation-lookup) — the `routing_trace` field in the response contains the complete history.
+
+## Disabling
+
+Set `ENABLE_ROUTING_TRACE=false` to omit the header from all responses. See [Feature Flags](/gateway/feature-flags).
diff --git a/pages/gateway/smart-routing.mdx b/pages/gateway/smart-routing.mdx
new file mode 100644
index 00000000..9521f60f
--- /dev/null
+++ b/pages/gateway/smart-routing.mdx
@@ -0,0 +1,58 @@
+---
+title: Smart Routing
+description: How the gateway scores and selects operators.
+---
+
+# Smart Routing
+
+When multiple operators running the same [Blueprint](/developers/blueprints/introduction) serve the same model, the gateway selects the best one using a weighted scoring algorithm.
+
+## Scoring formula
+
+```
+score = reputation(40%) + latency(30%) + price(30%)
+```
+
+| Factor | Weight | What it measures |
+| -------------- | ------ | --------------------------------------------------------- |
+| **Reputation** | 40% | Normalized reputation score (0-100) from on-chain history |
+| **Latency** | 30% | Inverse of average response time (lower = better) |
+| **Price** | 30% | Inverse of per-token price (cheaper = better) |
+
+## Operator selection
+
+1. Query all operators serving the requested model
+2. Filter: only `active` or `degraded` status, must be pipeline head
+3. Score each operator
+4. Sort by score descending
+5. Route to the highest-scoring operator
+
+If a preferred operator is specified (via `X-Tangle-Operator`), it's moved to the top of the ranked list regardless of score.
+
+## Health tracking
+
+The gateway tracks operator health via:
+
+- **Health checks** — periodic probes stored in `OperatorHealthCheck`
+- **Request outcomes** — success/failure recorded per request
+- **Latency tracking** — rolling average updated per request
+
+Operators that consistently fail are automatically deprioritized by their dropping reputation and rising latency scores.
+
+## Provider ordering (non-operator)
+
+For direct provider routing, use `providerOptions.gateway.order` and `only`:
+
+```json
+{
+ "providerOptions": {
+ "gateway": {
+ "order": ["bedrock", "anthropic"],
+ "only": ["bedrock", "anthropic"]
+ }
+ }
+}
+```
+
+- `order`: Try providers in this order. First with valid credentials wins.
+- `only`: Restrict to these providers. Others are excluded even if they have credentials.
diff --git a/pages/gateway/spend-auth.mdx b/pages/gateway/spend-auth.mdx
new file mode 100644
index 00000000..d464d36d
--- /dev/null
+++ b/pages/gateway/spend-auth.mdx
@@ -0,0 +1,55 @@
+---
+title: SpendAuth (On-Chain Payments)
+description: Pay operators directly on-chain via EIP-712 signed authorizations.
+---
+
+# SpendAuth
+
+SpendAuth lets you pay operators directly on-chain without a credit card or account. Sign an EIP-712 typed data message with your wallet, attach it to the request, and the operator claims payment after serving inference.
+
+## How it works
+
+1. **Sign:** Create an EIP-712 SpendAuth payload with your wallet
+2. **Send:** Attach the signature as `X-Payment-Signature` header
+3. **Authorize:** The gateway verifies the signature and calls `authorizeSpend` on-chain
+4. **Serve:** The operator processes your inference request
+5. **Claim:** The operator calls `claimPayment` to receive funds
+
+## Request format
+
+```bash
+curl -X POST "https://router.tangle.tools/v1/chat/completions" \
+ -H "Content-Type: application/json" \
+ -H "X-Payment-Signature: {
+ \"commitment\": \"0xabc...\",
+ \"serviceId\": \"1\",
+ \"jobIndex\": 0,
+ \"amount\": \"1000000\",
+ \"operator\": \"0x70997970...\",
+ \"nonce\": \"42\",
+ \"expiry\": \"1712793600\",
+ \"signature\": \"0xff...\"
+ }" \
+ -d '{"model": "llama-3.1-70b", "messages": [...]}'
+```
+
+## Security
+
+- **EIP-712 signatures:** Cryptographically verified against the signing address
+- **Nonce replay protection:** Each nonce can only be used once per commitment (in-memory + Redis store)
+- **On-chain verification:** The `ShieldedCredits` contract validates authorization and deducts balance
+- **Expiry:** SpendAuth payloads have a timestamp-based expiry
+
+## Rate limits
+
+SpendAuth requests get a generous 120 req/min limit per commitment since every request is paid.
+
+## On-chain contracts
+
+SpendAuth uses the `ShieldedCredits` contract deployed on the Tangle network. The contract handles:
+
+- Balance management (deposit, authorize, claim)
+- Authorization verification (EIP-712 signature recovery)
+- Payment settlement (operator claims after serving)
+
+The operator-side settlement logic is implemented in [tangle-inference-core](https://github.com/tangle-network/tangle-inference-core), a shared Rust crate used by the [LLM Inference Blueprint](https://github.com/tangle-network/llm-inference-blueprint) and other inference Blueprints.
diff --git a/pages/gateway/timeouts.mdx b/pages/gateway/timeouts.mdx
new file mode 100644
index 00000000..6c49cc18
--- /dev/null
+++ b/pages/gateway/timeouts.mdx
@@ -0,0 +1,57 @@
+---
+title: Provider Timeouts
+description: Configure per-provider timeouts for fast failover.
+---
+
+# Provider Timeouts
+
+Set timeouts to trigger fast failover when a provider is slow. Values are clamped to 1-120 seconds.
+
+## Global timeout
+
+Apply the same timeout to all providers:
+
+```json
+{
+ "providerOptions": {
+ "gateway": {
+ "timeout": 5000
+ }
+ }
+}
+```
+
+## Per-provider timeouts
+
+Different providers have different latency profiles. Set timeouts individually:
+
+```json
+{
+ "providerOptions": {
+ "gateway": {
+ "timeout": {
+ "openai": 5000,
+ "anthropic": 10000,
+ "groq": 3000
+ }
+ }
+ }
+}
+```
+
+## Default behavior
+
+Without explicit timeouts, the gateway uses a 30-second default for all providers and a 30-second idle timeout for streaming responses.
+
+## Bounds
+
+All timeout values are clamped:
+
+- **Minimum:** 1,000ms (1 second)
+- **Maximum:** 120,000ms (2 minutes)
+
+Values outside this range are silently clamped to the nearest bound.
+
+## Interaction with fallbacks
+
+When a provider times out, it counts as a failure in the [routing trace](/gateway/routing-trace) and the gateway moves to the next option — either a different provider for the same model, or the next [fallback model](/gateway/fallbacks).
diff --git a/pages/gateway/zdr.mdx b/pages/gateway/zdr.mdx
new file mode 100644
index 00000000..e9f44b4b
--- /dev/null
+++ b/pages/gateway/zdr.mdx
@@ -0,0 +1,84 @@
+---
+title: Zero Data Retention
+description: Route requests only through providers with verified zero data retention agreements.
+---
+
+# Zero Data Retention (ZDR)
+
+When ZDR is enabled, the gateway routes requests **only** through providers that have verified agreements to delete all request data immediately after processing.
+
+## Enable per-request
+
+```json
+{
+ "model": "anthropic/claude-sonnet-4-6",
+ "messages": [{"role": "user", "content": "Analyze this sensitive data..."}],
+ "providerOptions": {
+ "gateway": {
+ "zeroDataRetention": true
+ }
+ }
+}
+```
+
+## Enable team-wide
+
+Set `zdrEnabled: true` on your team record. All requests from team members will enforce ZDR. Team-wide ZDR overrides per-request `zeroDataRetention: false`.
+
+## How it works
+
+When ZDR is enabled:
+
+1. **Operators are skipped.** Operators self-report their backing provider. The gateway cannot verify what provider an operator actually routes through, so operators are excluded from ZDR-compliant routing.
+
+2. **LiteLLM is skipped.** LiteLLM has its own internal fallback chain that may route to non-ZDR providers. Since we can't control LiteLLM's routing decisions, it's excluded.
+
+3. **Direct provider only.** The gateway calls the provider API directly, selecting only from verified ZDR providers.
+
+4. **BYOK fallback preserves ZDR.** If your [BYOK](/gateway/byok) credentials fail, the fallback to platform credentials still enforces ZDR filtering.
+
+## ZDR-verified providers
+
+| Provider | ZDR | No-Train | Policy |
+| -------------- | --- | -------- | ------------------------------------------------------------------------------------------------------ |
+| Anthropic | Yes | Yes | [ZDR policy](https://platform.claude.com/docs/en/build-with-claude/zero-data-retention) |
+| Amazon Bedrock | Yes | Yes | [Data protection](https://docs.aws.amazon.com/bedrock/latest/userguide/data-protection.html) |
+| Azure OpenAI | Yes | Yes | [Data privacy](https://learn.microsoft.com/en-us/azure/foundry/responsible-ai/openai/data-privacy) |
+| Groq | Yes | Yes | [ZDR policy](https://console.groq.com/docs/your-data#zero-data-retention) |
+| Mistral | Yes | Yes | [Terms](https://legal.mistral.ai/terms) |
+| Fireworks | Yes | Yes | [Data handling](https://docs.fireworks.ai/guides/security_compliance/data_handling) |
+| Together | Yes | Yes | [Terms](https://www.together.ai/terms-of-service) |
+| Cerebras | Yes | Yes | [Privacy](https://www.cerebras.ai/privacy-policy) |
+| Google Vertex | Yes | Yes | [ZDR policy](https://docs.cloud.google.com/vertex-ai/generative-ai/docs/vertex-ai-zero-data-retention) |
+| Nebius | Yes | Yes | [Legal guide](https://docs.tokenfactory.nebius.com/legal/legal-quick-guide) |
+| Parasail | Yes | Yes | [Terms](https://parasail.io/legal/terms-of-service) |
+| Baseten | Yes | Yes | [Security](https://docs.baseten.co/observability/security) |
+| DeepInfra | Yes | Yes | [Data handling](https://deepinfra.com/docs/data) |
+
+Compliance data is managed via the admin API (`PUT /api/admin/compliance`) and can be updated without code deploys.
+
+## Trust model
+
+| Routing tier | ZDR behavior |
+| ----------------------- | ---------------------------------------------------------- |
+| **Operators** | Skipped. Self-reported backing provider is unverifiable. |
+| **LiteLLM** | Skipped. Internal fallback chain is uncontrollable. |
+| **Direct provider** | Routed only to verified ZDR providers. |
+| **BYOK fallback** | ZDR filters preserved on fallback to platform credentials. |
+| **Operator-only + ZDR** | 400 error. Conflicting requirements. |
+
+The Tangle chain verifies operator **identity and stake**, not **behavior**. When compliance matters, the gateway routes direct.
+
+## Error responses
+
+If no ZDR-compliant provider is available for the requested model:
+
+```json
+{
+ "error": {
+ "message": "No ZDR providers available for model: deepseek/deepseek-chat. Providers considered: anthropic, groq, mistral, ...",
+ "type": "invalid_request_error",
+ "code": "no_providers_available"
+ }
+}
+```
diff --git a/pages/vision/architecture.mdx b/pages/vision/architecture.mdx
index d64cd5bc..6b68d079 100644
--- a/pages/vision/architecture.mdx
+++ b/pages/vision/architecture.mdx
@@ -20,6 +20,7 @@ Tangle ties together three layers most platforms separate: the workbench where w
| Layer | Runs here | Examples |
| --------------- | ----------------------------- | -------------------------------------------------------- |
| Workbench | Human and agent collaboration | Workflows, profiles, simulations, reviews |
+| Gateway | Inference routing and billing | Model access, BYOK, ZDR compliance, operator selection |
| Sandbox runtime | Executed tasks and tools | Agent sessions, tool calls, file edits |
| Protocol | Coordination and settlement | Service registry, operator payments, staking, incentives |
@@ -28,10 +29,13 @@ Tangle ties together three layers most platforms separate: the workbench where w
**1) Execution Layer**
Sandboxed runtimes with isolation, resource limits, and audit logs. This is where tasks actually run.
-**2) Protocol Layer**
+**2) Inference Layer**
+The [Gateway](/gateway) routes inference requests across centralized providers and decentralized operators. It handles model selection, compliance filtering ([ZDR](/gateway/zdr), [no-train](/gateway/no-train)), [BYOK](/gateway/byok) credential management, and billing.
+
+**3) Protocol Layer**
The coordination plane. It handles operator discovery, payment routing, and incentive enforcement.
-**3) Experience Layer**
+**4) Experience Layer**
The agentic workbench and [Blueprint SDK](https://github.com/tangle-network/blueprint/tree/main). This is where teams design workflows, run simulations, and ship services.
The SDK also includes optional gateways for integrating external events and payment-driven HTTP job execution (webhooks,