From c2d60418e89c67d4897a45f083d07433148a5933 Mon Sep 17 00:00:00 2001 From: Drew Stone Date: Fri, 10 Apr 2026 15:29:03 -0700 Subject: [PATCH 1/4] feat: add Gateway as top-level product section (31 pages) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit New top-level "Gateway" section at /gateway — same level as Workbench and Runtime. Documents the Tangle inference gateway with full coverage: Introduction & Getting Started (2 pages): - Product overview, quickstart with curl/Python/TypeScript Models & Providers (5 pages): - Supported models, BYOK, fallbacks, timeouts, caching Routing (3 pages): - 3-tier architecture, operator routing, smart scoring Security & Compliance (5 pages): - Auth methods, ZDR, no-train, guardrails, rate limiting Billing (3 pages): - Credits/pricing, free tier model gating, SpendAuth on-chain payments Observability (2 pages): - Generation lookup API, routing trace API Reference (7 pages): - POST /v1/chat/completions, GET /v1/generation, GET /v1/credits, compliance API, providerOptions.gateway, response headers, feature flags Guides (3 pages): - Migrate from OpenAI, migrate from Vercel AI Gateway, enterprise ZDR Also updated: - Root _meta.ts: Gateway added between Workbench and Runtime - AI hub page: added Inference Gateway section with links - AI _meta.ts: added Gateway links in sidebar Build: 206 pages indexed, zero errors. --- pages/_meta.ts | 4 ++ pages/ai/_meta.ts | 20 ++++++ pages/ai/index.mdx | 11 ++++ pages/gateway/_meta.ts | 64 +++++++++++++++++++ pages/gateway/api-chat.mdx | 96 +++++++++++++++++++++++++++++ pages/gateway/api-compliance.mdx | 49 +++++++++++++++ pages/gateway/api-credits.mdx | 29 +++++++++ pages/gateway/api-generation.mdx | 55 +++++++++++++++++ pages/gateway/authentication.mdx | 57 +++++++++++++++++ pages/gateway/byok.mdx | 86 ++++++++++++++++++++++++++ pages/gateway/caching.mdx | 72 ++++++++++++++++++++++ pages/gateway/enterprise-zdr.mdx | 93 ++++++++++++++++++++++++++++ pages/gateway/fallbacks.mdx | 69 +++++++++++++++++++++ pages/gateway/feature-flags.mdx | 35 +++++++++++ pages/gateway/free-tier.mdx | 62 +++++++++++++++++++ pages/gateway/generation-lookup.mdx | 71 +++++++++++++++++++++ pages/gateway/getting-started.mdx | 94 ++++++++++++++++++++++++++++ pages/gateway/guardrails.mdx | 63 +++++++++++++++++++ pages/gateway/how-routing-works.mdx | 64 +++++++++++++++++++ pages/gateway/index.mdx | 58 +++++++++++++++++ pages/gateway/migrate-openai.mdx | 77 +++++++++++++++++++++++ pages/gateway/migrate-vercel.mdx | 80 ++++++++++++++++++++++++ pages/gateway/models.mdx | 69 +++++++++++++++++++++ pages/gateway/no-train.mdx | 43 +++++++++++++ pages/gateway/operator-routing.mdx | 75 ++++++++++++++++++++++ pages/gateway/pricing.mdx | 55 +++++++++++++++++ pages/gateway/provider-options.mdx | 70 +++++++++++++++++++++ pages/gateway/rate-limiting.mdx | 44 +++++++++++++ pages/gateway/response-headers.mdx | 41 ++++++++++++ pages/gateway/routing-trace.mdx | 35 +++++++++++ pages/gateway/smart-routing.mdx | 58 +++++++++++++++++ pages/gateway/spend-auth.mdx | 52 ++++++++++++++++ pages/gateway/timeouts.mdx | 56 +++++++++++++++++ pages/gateway/zdr.mdx | 84 +++++++++++++++++++++++++ 34 files changed, 1991 insertions(+) create mode 100644 pages/gateway/_meta.ts create mode 100644 pages/gateway/api-chat.mdx create mode 100644 pages/gateway/api-compliance.mdx create mode 100644 pages/gateway/api-credits.mdx create mode 100644 pages/gateway/api-generation.mdx create mode 100644 pages/gateway/authentication.mdx create mode 100644 pages/gateway/byok.mdx create mode 100644 pages/gateway/caching.mdx create mode 100644 pages/gateway/enterprise-zdr.mdx create mode 100644 pages/gateway/fallbacks.mdx create mode 100644 pages/gateway/feature-flags.mdx create mode 100644 pages/gateway/free-tier.mdx create mode 100644 pages/gateway/generation-lookup.mdx create mode 100644 pages/gateway/getting-started.mdx create mode 100644 pages/gateway/guardrails.mdx create mode 100644 pages/gateway/how-routing-works.mdx create mode 100644 pages/gateway/index.mdx create mode 100644 pages/gateway/migrate-openai.mdx create mode 100644 pages/gateway/migrate-vercel.mdx create mode 100644 pages/gateway/models.mdx create mode 100644 pages/gateway/no-train.mdx create mode 100644 pages/gateway/operator-routing.mdx create mode 100644 pages/gateway/pricing.mdx create mode 100644 pages/gateway/provider-options.mdx create mode 100644 pages/gateway/rate-limiting.mdx create mode 100644 pages/gateway/response-headers.mdx create mode 100644 pages/gateway/routing-trace.mdx create mode 100644 pages/gateway/smart-routing.mdx create mode 100644 pages/gateway/spend-auth.mdx create mode 100644 pages/gateway/timeouts.mdx create mode 100644 pages/gateway/zdr.mdx diff --git a/pages/_meta.ts b/pages/_meta.ts index ef9a5cab..b700a1ca 100644 --- a/pages/_meta.ts +++ b/pages/_meta.ts @@ -18,6 +18,10 @@ const meta: Meta = { title: "Workbench", type: "page", }, + gateway: { + title: "Gateway", + type: "page", + }, infrastructure: { title: "Runtime", type: "page", diff --git a/pages/ai/_meta.ts b/pages/ai/_meta.ts index 7165fac5..88e4c860 100644 --- a/pages/ai/_meta.ts +++ b/pages/ai/_meta.ts @@ -2,6 +2,26 @@ import type { Meta } from "nextra"; const meta: Meta = { index: "AI Introduction", + "-- gateway": { + type: "separator", + title: "Inference Gateway", + }, + "gateway-intro": { + title: "Introduction", + href: "/gateway", + }, + "gateway-start": { + title: "Getting Started", + href: "/gateway/getting-started", + }, + "gateway-models": { + title: "Models & Providers", + href: "/gateway/models", + }, + "gateway-zdr": { + title: "Zero Data Retention", + href: "/gateway/zdr", + }, "-- workbench": { type: "separator", title: "Agentic Workbench", diff --git a/pages/ai/index.mdx b/pages/ai/index.mdx index fdfbcaed..fe2cf0bb 100644 --- a/pages/ai/index.mdx +++ b/pages/ai/index.mdx @@ -44,8 +44,19 @@ Core capabilities: Each run produces task and agent evaluations. That data feeds back into the workbench to improve prompts, policies, and workflows over time. +## Inference Gateway + +The [Tangle Gateway](/gateway) is the inference routing layer. Agents and applications call a single API to access hundreds of models across centralized providers and decentralized operators. The gateway handles model selection, compliance routing, billing, and payment settlement. + +Key capabilities: +- **One API, any model.** OpenAI, Anthropic, Google, Groq, and 20+ providers. +- **Decentralized operators.** Route to operators on the Tangle network who compete on price and latency. +- **Compliance.** [Zero Data Retention](/gateway/zdr) and [no-train](/gateway/no-train) routing with verified provider agreements. +- **On-chain payments.** [SpendAuth](/gateway/spend-auth) — pay operators directly without a credit card. + ## Learn More +- [Gateway — Getting Started](/gateway/getting-started) - [Workbench details](/vibe/introduction) - [Runtime and sandboxing](/infrastructure/introduction) - [Operator onboarding](/operators/introduction) diff --git a/pages/gateway/_meta.ts b/pages/gateway/_meta.ts new file mode 100644 index 00000000..186342b5 --- /dev/null +++ b/pages/gateway/_meta.ts @@ -0,0 +1,64 @@ +import type { Meta } from "nextra"; + +const meta: Meta = { + index: "Introduction", + "getting-started": "Getting Started", + "-- models": { + type: "separator", + title: "Models & Providers", + }, + models: "Supported Models", + byok: "Bring Your Own Key", + fallbacks: "Model Fallbacks", + timeouts: "Provider Timeouts", + caching: "Automatic Caching", + "-- routing": { + type: "separator", + title: "Routing", + }, + "how-routing-works": "How Routing Works", + "operator-routing": "Operator Routing", + "smart-routing": "Smart Routing", + "-- security": { + type: "separator", + title: "Security & Compliance", + }, + authentication: "Authentication", + zdr: "Zero Data Retention", + "no-train": "Disallow Prompt Training", + guardrails: "Guardrails", + "rate-limiting": "Rate Limiting", + "-- billing": { + type: "separator", + title: "Billing", + }, + pricing: "Credits & Pricing", + "free-tier": "Free Tier", + "spend-auth": "SpendAuth (On-Chain)", + "-- observability": { + type: "separator", + title: "Observability", + }, + "generation-lookup": "Generation Lookup", + "routing-trace": "Routing Trace", + "-- reference": { + type: "separator", + title: "API Reference", + }, + "api-chat": "POST /v1/chat/completions", + "api-generation": "GET /v1/generation", + "api-credits": "GET /v1/credits", + "api-compliance": "Provider Compliance API", + "provider-options": "providerOptions.gateway", + "response-headers": "Response Headers", + "feature-flags": "Feature Flags", + "-- guides": { + type: "separator", + title: "Guides", + }, + "migrate-openai": "Migrate from OpenAI", + "migrate-vercel": "Migrate from Vercel AI Gateway", + "enterprise-zdr": "Enterprise ZDR Setup", +}; + +export default meta; diff --git a/pages/gateway/api-chat.mdx b/pages/gateway/api-chat.mdx new file mode 100644 index 00000000..2a8db365 --- /dev/null +++ b/pages/gateway/api-chat.mdx @@ -0,0 +1,96 @@ +--- +title: POST /v1/chat/completions +description: OpenAI-compatible chat completion endpoint with gateway extensions. +--- + +# POST /v1/chat/completions + +OpenAI-compatible chat completion endpoint. Supports streaming, tool use, and all standard parameters, plus gateway-specific extensions via `providerOptions.gateway`. + +## Request + +```bash +POST https://router.tangle.tools/v1/chat/completions +Authorization: Bearer sk-tan-YOUR_KEY +Content-Type: application/json +``` + +### Body + +```json +{ + "model": "anthropic/claude-sonnet-4-6", + "messages": [ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "Hello"} + ], + "temperature": 0.7, + "max_tokens": 4096, + "stream": true, + "tools": [...], + "tool_choice": "auto", + "response_format": {"type": "json_object"}, + "top_p": 0.9, + "frequency_penalty": 0, + "presence_penalty": 0, + "stop": ["\n\n"], + "providerOptions": { + "gateway": { + "byok": {"anthropic": [{"apiKey": "sk-ant-..."}]}, + "zeroDataRetention": true, + "caching": "auto", + "models": ["openai/gpt-4o"], + "timeout": 5000 + } + } +} +``` + +All standard OpenAI parameters (`tools`, `tool_choice`, `response_format`, `top_p`, `frequency_penalty`, `presence_penalty`, `stop`, `logprobs`) are forwarded to the provider. + +### Routing headers (optional) + +| Header | Effect | +|--------|--------| +| `X-Tangle-Routing` | `operator`, `provider`, or `auto` (default) | +| `X-Tangle-Blueprint` | Pin to operators under this Blueprint ID | +| `X-Tangle-Service` | Pin to a specific service instance | +| `X-Tangle-Operator` | Pin to a specific operator (slug or 0x address) | +| `X-Payment-Signature` | SpendAuth JSON payload for on-chain payment | + +### Validation + +| Field | Constraint | +|-------|-----------| +| `model` | Required. Alphanumeric + `/-.:\\_`, max 128 chars. | +| `messages` | Required. Non-empty array. Each must have `role`. | +| `max_tokens` | Optional. 1-128,000. Default: 4,096. | +| `temperature` | Optional. 0-2. Default: 1. | +| Body size | Max 1MB. | + +## Response (non-streaming) + +Standard OpenAI chat completion response: + +```json +{ + "id": "chatcmpl-...", + "choices": [{ + "message": {"role": "assistant", "content": "Hello! How can I help?"}, + "finish_reason": "stop" + }], + "usage": { + "prompt_tokens": 15, + "completion_tokens": 8, + "total_tokens": 23 + } +} +``` + +## Response (streaming) + +Server-sent events with `data: {...}` lines and `data: [DONE]` terminator. + +## Response headers + +See [Response Headers](/gateway/response-headers) for the full list. diff --git a/pages/gateway/api-compliance.mdx b/pages/gateway/api-compliance.mdx new file mode 100644 index 00000000..fd4e02d6 --- /dev/null +++ b/pages/gateway/api-compliance.mdx @@ -0,0 +1,49 @@ +--- +title: Provider Compliance API +description: Query and manage provider ZDR and no-train compliance data. +--- + +# Provider Compliance API + +## GET /api/gateway/compliance + +List compliance data for all providers. Public endpoint (rate-limited). + +```bash +curl https://router.tangle.tools/api/gateway/compliance +``` + +```json +{ + "providers": [ + { + "provider": "anthropic", + "name": "Anthropic", + "zdr": true, + "no_train": true, + "supports_prompt_caching": true, + "caching_type": "explicit", + "policy_url": "https://platform.claude.com/docs/en/build-with-claude/zero-data-retention", + "verified_at": "2026-04-10T00:00:00.000Z" + } + ] +} +``` + +## PUT /api/admin/compliance + +Update compliance data for a provider. Admin-only (requires `ADMIN_EMAILS` session). + +```bash +curl -X PUT https://router.tangle.tools/api/admin/compliance \ + -H "Cookie: session_token=ADMIN_SESSION" \ + -H "Content-Type: application/json" \ + -d '{ + "providerId": "openai", + "zdr": true, + "noTrain": true, + "policyUrl": "https://openai.com/policies/api-data-usage-policies" + }' +``` + +Only fields included in the request body are updated. Omitted fields remain unchanged. `verifiedAt` is automatically set to the current timestamp. diff --git a/pages/gateway/api-credits.mdx b/pages/gateway/api-credits.mdx new file mode 100644 index 00000000..deae1201 --- /dev/null +++ b/pages/gateway/api-credits.mdx @@ -0,0 +1,29 @@ +--- +title: GET /v1/credits +description: Check your credit balance and total usage. +--- + +# GET /v1/credits + +Check your credit balance. Requires authentication. + +## Request + +``` +GET https://router.tangle.tools/v1/credits +Authorization: Bearer sk-tan-YOUR_KEY +``` + +## Response + +```json +{ + "balance": "95.50", + "total_used": "4.50" +} +``` + +| Field | Description | +|-------|-------------| +| `balance` | Remaining credit balance (USD) | +| `total_used` | Total credits consumed (USD) | diff --git a/pages/gateway/api-generation.mdx b/pages/gateway/api-generation.mdx new file mode 100644 index 00000000..f52f7566 --- /dev/null +++ b/pages/gateway/api-generation.mdx @@ -0,0 +1,55 @@ +--- +title: GET /v1/generation +description: Look up detailed information about a specific request. +--- + +# GET /v1/generation + +Retrieve detailed information about a specific generation by its ID. Requires authentication. + +## Request + +``` +GET https://router.tangle.tools/v1/generation?id=gen_01ARZ3NDEKTSV4RRFFQ69G5FAV +Authorization: Bearer sk-tan-YOUR_KEY +``` + +## Parameters + +| Parameter | Required | Description | +|-----------|----------|-------------| +| `id` | Yes | Generation ID (format: `gen_`) | + +## Response + +```json +{ + "data": { + "id": "gen_01ARZ3NDEKTSV4RRFFQ69G5FAV", + "total_cost": 0.00123, + "usage": 0.00123, + "created_at": "2026-04-10T12:00:00.000Z", + "model": "anthropic/claude-sonnet-4-6", + "is_byok": false, + "provider_name": "anthropic", + "streamed": true, + "latency": 200, + "generation_time": 1500, + "tokens_prompt": 100, + "tokens_completion": 50, + "native_tokens_cached": 80, + "native_tokens_reasoning": 0, + "status": "success", + "routing_trace": {...}, + "cache_hit": false + } +} +``` + +## Errors + +| Status | Code | Description | +|--------|------|-------------| +| 400 | — | Missing or invalid generation ID | +| 401 | — | Authentication required | +| 404 | `not_found` | Generation not found or belongs to another user | diff --git a/pages/gateway/authentication.mdx b/pages/gateway/authentication.mdx new file mode 100644 index 00000000..5ce04f4e --- /dev/null +++ b/pages/gateway/authentication.mdx @@ -0,0 +1,57 @@ +--- +title: Authentication +description: Authentication methods for Tangle Gateway. +--- + +# Authentication + +Four authentication methods, each with different rate limits and capabilities. + +## API Key + +Create keys at the dashboard. Keys start with `sk-tan-` and are SHA256-hashed before storage. + +```bash +curl -H "Authorization: Bearer sk-tan-YOUR_KEY" \ + https://router.tangle.tools/v1/chat/completions +``` + +- **Rate limit:** 60 req/min +- **Credit check:** Yes (402 if balance is zero on non-free-tier models) +- **Key features:** Expiration dates, soft revocation, last-used tracking + +## Session (Cookie) + +Browser-based authentication via Better Auth. Supports email/password and OAuth (Google, GitHub). + +- **Rate limit:** 30 req/min +- **Credit check:** Yes + +## SIWE (Sign-In with Ethereum) + +Wallet-based authentication via EIP-191 signatures. Authenticate with your Ethereum wallet. + +``` +POST /api/siwe/verify +{ "address": "0x...", "signature": "0x...", "message": "..." } +``` + +## SpendAuth (On-Chain Payment) + +EIP-712 signed payment authorization. No account needed — pay operators directly on-chain. + +```bash +curl -H "X-Payment-Signature: {\"commitment\":\"0x...\",\"amount\":\"1000000\",...}" \ + https://router.tangle.tools/v1/chat/completions +``` + +- **Rate limit:** 120 req/min per commitment +- **Credit check:** No (payment is on-chain) +- See [SpendAuth](/gateway/spend-auth) for details. + +## Anonymous + +No authentication required for [free tier models](/gateway/free-tier). + +- **Rate limit:** 10 req/min, 5 req/day +- **Model access:** Free tier only (gpt-4o-mini, llama-3.1-8b, etc.) diff --git a/pages/gateway/byok.mdx b/pages/gateway/byok.mdx new file mode 100644 index 00000000..7db449b0 --- /dev/null +++ b/pages/gateway/byok.mdx @@ -0,0 +1,86 @@ +--- +title: Bring Your Own Key (BYOK) +description: Use your own provider API keys with Tangle Gateway for zero-markup access. +--- + +# Bring Your Own Key (BYOK) + +Use your existing provider API keys with Tangle Gateway. BYOK requests have **zero platform markup** — you pay the provider's list price directly. + +## Per-request BYOK + +Pass credentials in `providerOptions.gateway.byok`: + +```json +{ + "model": "anthropic/claude-sonnet-4-6", + "messages": [{"role": "user", "content": "Hello"}], + "providerOptions": { + "gateway": { + "byok": { + "anthropic": [{"apiKey": "sk-ant-your-key"}] + } + } + } +} +``` + +### Multiple credentials + +Specify multiple credentials per provider. The gateway tries them in order: + +```json +{ + "providerOptions": { + "gateway": { + "byok": { + "anthropic": [ + {"apiKey": "sk-ant-primary"}, + {"apiKey": "sk-ant-backup"} + ] + } + } + } +} +``` + +### Multiple providers + +```json +{ + "providerOptions": { + "gateway": { + "byok": { + "anthropic": [{"apiKey": "sk-ant-..."}], + "openai": [{"apiKey": "sk-..."}] + } + } + } +} +``` + +## Automatic fallback + +If your BYOK credentials fail (401, 403, rate limit), the gateway automatically falls back to platform credentials. This fallback preserves all compliance filters — if you requested [ZDR](/gateway/zdr), the fallback will only use ZDR-compliant system credentials. + +The `X-Tangle-BYOK` response header indicates whether the request used your credentials: + +``` +X-Tangle-BYOK: true # Your key was used +``` + +If the header is absent, platform credentials were used (possibly via fallback). + +## Pricing + +| Credential type | Markup | +|----------------|--------| +| BYOK | **0%** — provider list price | +| Platform credentials | 20% markup (configurable) | + +## Security + +- BYOK credentials are never logged, stored, or persisted. +- Credentials exist only in memory for the duration of the request. +- The `providerOptions` field is stripped from the request body before forwarding to providers. +- Credentials are validated by structure (`apiKey` must be a string) and sanitized against prototype pollution. diff --git a/pages/gateway/caching.mdx b/pages/gateway/caching.mdx new file mode 100644 index 00000000..976894c0 --- /dev/null +++ b/pages/gateway/caching.mdx @@ -0,0 +1,72 @@ +--- +title: Automatic Caching +description: Enable prompt caching across providers with a single flag. +--- + +# Automatic Caching + +Some providers require explicit cache markers to enable prompt caching, while others cache automatically. Use `caching: 'auto'` to let the gateway handle it. + +## Usage + +```json +{ + "model": "anthropic/claude-sonnet-4-6", + "messages": [ + {"role": "system", "content": "You are a helpful assistant with a large knowledge base..."}, + {"role": "user", "content": "What is Tangle?"} + ], + "providerOptions": { + "gateway": { + "caching": "auto" + } + } +} +``` + +## How it works + +| Provider | Caching Type | What `auto` does | +|----------|-------------|-----------------| +| OpenAI | Implicit | No change needed. Caching happens automatically. | +| Google | Implicit | No change needed. | +| DeepSeek | Implicit | No change needed. | +| Anthropic | Explicit | Adds `cache_control: { type: 'ephemeral' }` to the last system message. | +| Anthropic (via Bedrock/Vertex) | Explicit | Same as Anthropic direct. | + +For Anthropic, the gateway converts: + +```json +{"role": "system", "content": "You are helpful..."} +``` + +Into: + +```json +{"role": "system", "content": [{"type": "text", "text": "You are helpful...", "cache_control": {"type": "ephemeral"}}]} +``` + +This caches the system prompt so subsequent messages in the same conversation reuse it, reducing costs by up to 90%. + +## Response caching + +Separately from prompt caching, the gateway caches complete responses for **deterministic requests** (temperature ≤ 0.01, non-streaming). Cached responses are free. + +``` +X-Tangle-Cache: HIT # Served from cache +X-Tangle-Cache: MISS # Fetched from provider +``` + +Disable per-request: + +```json +{ + "providerOptions": { + "gateway": { + "cache": false + } + } +} +``` + +The response cache key includes: model, messages, temperature, max_tokens, tools, response_format, and top_p. Different parameters always produce different cache entries. diff --git a/pages/gateway/enterprise-zdr.mdx b/pages/gateway/enterprise-zdr.mdx new file mode 100644 index 00000000..f331bd6f --- /dev/null +++ b/pages/gateway/enterprise-zdr.mdx @@ -0,0 +1,93 @@ +--- +title: Enterprise ZDR Setup +description: Configure Zero Data Retention for your organization. +--- + +# Enterprise ZDR Setup + +This guide walks through configuring ZDR for an organization that needs to guarantee no prompts or responses are retained by AI providers. + +## Step 1: Understand the trust model + +Read the [ZDR trust model](/gateway/zdr#trust-model) first. Key points: + +- ZDR is enforced at the **direct provider** level only. +- **Operators are skipped** when ZDR is enabled (their backing provider is unverifiable). +- **LiteLLM is skipped** (its internal routing is uncontrollable). +- BYOK fallback to platform credentials preserves ZDR filtering. + +## Step 2: Choose your approach + +### Option A: Team-wide ZDR (recommended) + +Enable ZDR for all requests from your team. No code changes needed — every request is automatically filtered. + +Contact your admin to set `zdrEnabled: true` on your team record via the admin API: + +```bash +# Admin sets team-wide ZDR +curl -X PUT https://router.tangle.tools/api/admin/compliance \ + -H "Cookie: session_token=ADMIN_SESSION" \ + -d '{"providerId": "...", "zdr": true}' +``` + +### Option B: Per-request ZDR + +Add `zeroDataRetention: true` to individual requests. Useful for mixed workloads where only some requests handle sensitive data. + +```python +response = client.chat.completions.create( + model="anthropic/claude-sonnet-4-6", + messages=[...], + extra_body={ + "providerOptions": { + "gateway": {"zeroDataRetention": True} + } + } +) +``` + +## Step 3: Verify provider coverage + +Check which providers are ZDR-verified for the models you need: + +```bash +curl https://router.tangle.tools/api/gateway/compliance | jq '.providers[] | select(.zdr == true)' +``` + +If your required model is only available from a non-ZDR provider, the request will return 400 with a clear error listing which providers were considered. + +## Step 4: Set up BYOK (optional) + +For maximum control, use [BYOK](/gateway/byok) with your own provider keys. This gives you: +- Zero platform markup +- Direct contractual relationship with the provider +- ZDR enforcement still applies on the fallback path + +## Step 5: Monitor compliance + +Use the [generation lookup API](/gateway/generation-lookup) to audit requests: + +```bash +# Check if a specific request used a ZDR provider +curl -H "Authorization: Bearer sk-tan-..." \ + "https://router.tangle.tools/v1/generation?id=gen_..." \ + | jq '.data.provider_name' +``` + +The `routing_trace` field shows exactly which providers were considered and filtered. + +## Combining ZDR + no-train + +Both flags work as an AND: when both are enabled, requests are routed only to providers that satisfy both criteria. This is the strictest compliance level. + +```json +{ + "providerOptions": { + "gateway": { + "zeroDataRetention": true, + "disallowPromptTraining": true + } + } +} +``` diff --git a/pages/gateway/fallbacks.mdx b/pages/gateway/fallbacks.mdx new file mode 100644 index 00000000..bef37ec8 --- /dev/null +++ b/pages/gateway/fallbacks.mdx @@ -0,0 +1,69 @@ +--- +title: Model Fallbacks +description: Configure backup models that are tried when the primary model fails. +--- + +# Model Fallbacks + +Specify backup models that are tried in order if the primary model fails or is unavailable. + +## Usage + +Pass a `models` array in `providerOptions.gateway`: + +```json +{ + "model": "openai/gpt-4o", + "messages": [{"role": "user", "content": "Hello"}], + "providerOptions": { + "gateway": { + "models": ["anthropic/claude-sonnet-4-6", "groq/llama-3.1-70b-versatile"] + } + } +} +``` + +The gateway tries: +1. `openai/gpt-4o` (primary model) +2. `anthropic/claude-sonnet-4-6` (first fallback) +3. `groq/llama-3.1-70b-versatile` (second fallback) + +The response comes from the first model that succeeds. + +## How fallback works + +For each model in the list, the gateway runs the full routing chain: + +1. **Operators** — try operators serving this model (if available) +2. **LiteLLM** — try the proxy with built-in retries +3. **Direct provider** — call the provider API directly + +If all tiers fail for a model, the gateway moves to the next model in the list. + +## Combining with provider ordering + +Use `models` with `order` to control both model fallback and provider preference: + +```json +{ + "model": "openai/gpt-4o", + "providerOptions": { + "gateway": { + "models": ["anthropic/claude-sonnet-4-6"], + "order": ["bedrock", "anthropic"] + } + } +} +``` + +This tries: +1. `openai/gpt-4o` via available providers +2. `anthropic/claude-sonnet-4-6` via Bedrock first, then Anthropic direct + +## Observability + +When fallbacks occur, the [routing trace](/gateway/routing-trace) shows every model and provider attempted: + +``` +X-Tangle-Routing-Trace: openai/gpt-4o[openai(err:5001ms)], anthropic/claude-sonnet-4-6[anthropic(200:1847ms)] +``` diff --git a/pages/gateway/feature-flags.mdx b/pages/gateway/feature-flags.mdx new file mode 100644 index 00000000..6fe29e23 --- /dev/null +++ b/pages/gateway/feature-flags.mdx @@ -0,0 +1,35 @@ +--- +title: Feature Flags +description: Disable gateway features without a code deploy. +--- + +# Feature Flags + +All gateway features are on by default. Set any flag to `false` to disable it without deploying new code. + +## Available flags + +| Environment Variable | Default | Controls | +|---------------------|---------|----------| +| `ENABLE_GUARDRAILS` | `true` | PII detection, prompt injection scanning | +| `ENABLE_RESPONSE_CACHE` | `true` | Response caching for deterministic requests | +| `ENABLE_COMPLIANCE_FILTER` | `true` | Early ZDR/no-train validation (routing enforcement stays on) | +| `ENABLE_PROMPT_CACHING` | `true` | Auto `cache_control` injection for Anthropic | +| `ENABLE_ROUTING_TRACE` | `true` | `X-Tangle-Routing-Trace` response header | + +## Usage + +Set in your environment: + +```bash +ENABLE_GUARDRAILS=false # Disable all guardrail scanning +ENABLE_RESPONSE_CACHE=false # Disable response cache reads/writes +``` + +## Notes + +- `ENABLE_COMPLIANCE_FILTER` only disables the early validation check that returns a 400 before routing. The actual ZDR/no-train enforcement in the routing tiers (skip operators, skip LiteLLM) stays active regardless. This flag is for suppressing the early error, not for bypassing compliance. + +- When `ENABLE_GUARDRAILS=false`, no PII or injection scanning occurs. The `X-Tangle-Guardrails` header is never set. GuardrailEvent records are not created. + +- When `ENABLE_RESPONSE_CACHE=false`, every request hits the provider. Cached entries are not read or written. Existing cache entries are not purged (they expire naturally via TTL). diff --git a/pages/gateway/free-tier.mdx b/pages/gateway/free-tier.mdx new file mode 100644 index 00000000..2123547b --- /dev/null +++ b/pages/gateway/free-tier.mdx @@ -0,0 +1,62 @@ +--- +title: Free Tier +description: Free access to small models with daily limits. +--- + +# Free Tier + +Try the gateway without credits. Free tier restricts to cheap, fast models with daily request limits. + +## Limits + +| Tier | Daily limit | Rate limit | +|------|------------|------------| +| Anonymous (no auth) | 5 req/day | 10 req/min | +| Authenticated (zero credits) | 20 req/day | 30 req/min | +| Paid (any credits) | Unlimited | 60 req/min | + +## Allowed models + +Free tier requests can use: + +| Model | Provider | Why it's free | +|-------|----------|---------------| +| `gpt-4o-mini` | OpenAI | Small, cheap | +| `claude-3-5-haiku-20241022` | Anthropic | Fast, cheap | +| `llama-3.1-8b-instant` | Groq | Free tier inference | +| `llama-3.2-1b-preview` | Groq | Tiny model | +| `llama-3.2-3b-preview` | Groq | Small model | +| `gemini-2.0-flash-lite` | Google | Free tier | +| `cerebras/llama-3.1-8b` | Cerebras | Fast, cheap | +| `deepseek-chat` | DeepSeek | Very cheap | + +## Blocked models + +These models require credits: + +- **OpenAI reasoning:** o1, o3, o4 (all variants) +- **OpenAI flagship:** gpt-4o, gpt-4, gpt-5 (gpt-4o-mini is allowed) +- **Anthropic flagship:** claude-opus, claude-sonnet (haiku is allowed) +- **Google flagship:** gemini-2.5-pro, gemini-2.5-ultra +- **xAI flagship:** grok-2, grok-3 + +Requesting a blocked model without credits returns 402: + +```json +{ + "error": { + "message": "Model \"gpt-4o\" requires credits. Free tier models: gpt-4o-mini, llama-3.1-8b-instant, gemini-2.0-flash-lite, deepseek-chat. Add credits or use a free tier model.", + "type": "insufficient_funds", + "code": "free_tier_limit" + } +} +``` + +## Response headers + +Free tier responses include remaining quota: + +``` +X-Free-Tier-Remaining: 3 +X-Free-Tier-Limit: 5 +``` diff --git a/pages/gateway/generation-lookup.mdx b/pages/gateway/generation-lookup.mdx new file mode 100644 index 00000000..31c780ef --- /dev/null +++ b/pages/gateway/generation-lookup.mdx @@ -0,0 +1,71 @@ +--- +title: Generation Lookup +description: Retrieve detailed information about any request by its generation ID. +--- + +# Generation Lookup + +Every request returns a unique generation ID in the `X-Generation-Id` header. Use it to look up full request details. + +## Endpoint + +``` +GET /v1/generation?id=gen_01ARZ3NDEKTSV4RRFFQ69G5FAV +``` + +Requires authentication. Returns details only for requests made by the authenticated user. + +## Example + +```bash +curl -H "Authorization: Bearer sk-tan-YOUR_KEY" \ + "https://router.tangle.tools/v1/generation?id=gen_01ARZ3NDEKTSV4RRFFQ69G5FAV" +``` + +## Response + +```json +{ + "data": { + "id": "gen_01ARZ3NDEKTSV4RRFFQ69G5FAV", + "total_cost": 0.00123, + "usage": 0.00123, + "created_at": "2026-04-10T12:00:00.000Z", + "model": "anthropic/claude-sonnet-4-6", + "is_byok": false, + "provider_name": "anthropic", + "streamed": true, + "latency": 200, + "generation_time": 1500, + "tokens_prompt": 100, + "tokens_completion": 50, + "native_tokens_cached": 80, + "native_tokens_reasoning": 0, + "status": "success", + "routing_trace": { + "planningReasoning": "ZDR requested: filtering to 13 ZDR providers", + "modelAttempts": [...], + "totalLatencyMs": 1500 + }, + "cache_hit": false + } +} +``` + +## Fields + +| Field | Description | +|-------|-------------| +| `id` | Generation ID (`gen_`) | +| `total_cost` | Total cost in USD | +| `model` | Model that served the request | +| `is_byok` | Whether BYOK credentials were used | +| `provider_name` | Provider that served the request | +| `streamed` | Whether the request used streaming | +| `latency` | Time to first token (ms) | +| `generation_time` | Total generation time (ms) | +| `tokens_prompt` / `tokens_completion` | Token counts | +| `native_tokens_cached` | Tokens served from provider cache | +| `native_tokens_reasoning` | Reasoning tokens (o1/o3/o4 models) | +| `routing_trace` | Full routing attempt history | +| `cache_hit` | Whether response was served from gateway cache | diff --git a/pages/gateway/getting-started.mdx b/pages/gateway/getting-started.mdx new file mode 100644 index 00000000..30d1edd2 --- /dev/null +++ b/pages/gateway/getting-started.mdx @@ -0,0 +1,94 @@ +--- +title: Getting Started +description: Make your first inference request through Tangle Gateway in 2 minutes. +--- + +# Getting Started + +## 1. Get an API key + +Sign up at [router.tangle.tools](https://router.tangle.tools) and create an API key from the dashboard. Keys start with `sk-tan-`. + +## 2. Make a request + +### curl + +```bash +curl -X POST "https://router.tangle.tools/v1/chat/completions" \ + -H "Authorization: Bearer sk-tan-YOUR_KEY" \ + -H "Content-Type: application/json" \ + -d '{ + "model": "openai/gpt-4o-mini", + "messages": [{"role": "user", "content": "What is Tangle?"}], + "stream": false + }' +``` + +### Python (OpenAI SDK) + +```python +from openai import OpenAI + +client = OpenAI( + api_key="sk-tan-YOUR_KEY", + base_url="https://router.tangle.tools/v1" +) + +response = client.chat.completions.create( + model="anthropic/claude-sonnet-4-6", + messages=[{"role": "user", "content": "What is Tangle?"}] +) +print(response.choices[0].message.content) +``` + +### TypeScript (AI SDK) + +```typescript +import { generateText } from 'ai' +import { createOpenAI } from '@ai-sdk/openai' + +const tangle = createOpenAI({ + apiKey: 'sk-tan-YOUR_KEY', + baseURL: 'https://router.tangle.tools/v1', +}) + +const { text } = await generateText({ + model: tangle('anthropic/claude-sonnet-4-6'), + prompt: 'What is Tangle?', +}) +``` + +## 3. Check the response headers + +Every response includes metadata headers: + +``` +X-Generation-Id: gen_01J5K7... # Unique request ID +X-Tangle-Price-Input: 0.000003 # USD per input token +X-Tangle-Price-Output: 0.000015 # USD per output token +X-Tangle-Cache: MISS # Response cache status +X-RateLimit-Remaining: 59 # Requests left in window +``` + +Use the generation ID to look up request details later via [`GET /v1/generation`](/gateway/api-generation). + +## 4. Try different models + +The model ID format is `provider/model-name`: + +``` +openai/gpt-4o-mini +anthropic/claude-sonnet-4-6 +google/gemini-2.0-flash-lite +groq/llama-3.1-8b-instant +deepseek/deepseek-chat +mistral/mistral-large-latest +``` + +You can also use bare model names (`gpt-4o-mini`, `claude-sonnet-4-6`) — the gateway resolves the provider automatically. + +## What's next + +- [Bring Your Own Key](/gateway/byok) — use your existing provider API keys for zero markup +- [Model Fallbacks](/gateway/fallbacks) — configure backup models +- [Zero Data Retention](/gateway/zdr) — compliance for sensitive workloads diff --git a/pages/gateway/guardrails.mdx b/pages/gateway/guardrails.mdx new file mode 100644 index 00000000..af7cf3ca --- /dev/null +++ b/pages/gateway/guardrails.mdx @@ -0,0 +1,63 @@ +--- +title: Guardrails +description: Gateway-level PII detection and prompt injection scanning. +--- + +# Guardrails + +The gateway scans all requests for PII and prompt injection patterns before routing. Results are available in the `X-Tangle-Guardrails` response header. + +## Detection categories + +### PII detection + +| Pattern | Severity | Example | +|---------|----------|---------| +| SSN | Critical | `123-45-6789` | +| Credit card (Visa/MC/Discover) | Critical | `4111 1111 1111 1111` | +| Credit card (Amex) | Critical | `3782 822463 10005` | +| Email | Low | `user@example.com` | +| US phone | Medium | `(555) 123-4567` | +| IP address | Low | `192.168.1.1` | + +### Prompt injection detection + +Applied to user messages only (not system or assistant): + +| Pattern | Severity | +|---------|----------| +| "Ignore all previous instructions" | High | +| "You are now a different AI" | High | +| "Pretend you have no restrictions" | High | +| "Reveal your system prompt" | Medium | +| DAN-mode jailbreaks | High | + +## Modes + +### Audit mode (default) + +Flags are logged and returned in the `X-Tangle-Guardrails` header but requests are not blocked: + +``` +X-Tangle-Guardrails: pii:low,prompt_injection:high +``` + +### Block mode + +Requests matching configured categories are rejected with 400: + +```json +{ + "error": { + "message": "Request blocked by guardrails: pii, prompt_injection", + "type": "invalid_request_error", + "code": "guardrail_blocked" + } +} +``` + +Block mode requires a `GuardrailPolicy` record configured for your team or user with specific categories to block. + +## Disabling + +Set `ENABLE_GUARDRAILS=false` to skip all scanning. See [Feature Flags](/gateway/feature-flags). diff --git a/pages/gateway/how-routing-works.mdx b/pages/gateway/how-routing-works.mdx new file mode 100644 index 00000000..314d97da --- /dev/null +++ b/pages/gateway/how-routing-works.mdx @@ -0,0 +1,64 @@ +--- +title: How Routing Works +description: The three-tier routing architecture behind Tangle Gateway. +--- + +# How Routing Works + +Every request passes through up to three routing tiers. The gateway tries each tier in order and returns the first successful response. + +## The three tiers + +``` +Request → Tier 1: Operators → Tier 2: LiteLLM → Tier 3: Direct Provider → Response +``` + +### Tier 1: Operator routing + +Operators are independent inference providers registered on the Tangle network. They stake tokens, serve models, and compete on price, latency, and reputation. + +- Selected by [scoring algorithm](/gateway/smart-routing): reputation (40%) + latency (30%) + price (30%) +- Discovered automatically from on-chain Blueprint Service Manager contracts +- Can be pinned by blueprint, service, or operator address + +**When it's used:** Default for `auto` routing mode, required for SpendAuth (on-chain payments). + +**When it's skipped:** When [ZDR](/gateway/zdr) or [no-train](/gateway/no-train) is requested (operators can't verify compliance). When `routing: "provider"` is set explicitly. + +### Tier 2: LiteLLM proxy + +An internal proxy that handles 100+ provider integrations with built-in retries and provider-level fallbacks. + +**When it's used:** Default for standard requests when no operator is available. + +**When it's skipped:** When ZDR or no-train is requested (LiteLLM's downstream routing is not compliance-controllable). When LiteLLM is not configured (`LITELLM_URL` unset). + +### Tier 3: Direct provider + +The gateway calls the provider API directly using platform credentials (or [BYOK](/gateway/byok) credentials). + +**When it's used:** Fallback when tiers 1 and 2 fail. Only tier used when compliance routing is active. + +**Always used for:** ZDR requests, no-train requests, BYOK with compliance flags. + +## Compliance mode + +When `zeroDataRetention` or `disallowPromptTraining` is set: + +``` +Request → Tier 3: Direct Provider (verified only) → Response +``` + +Tiers 1 and 2 are completely bypassed. The gateway routes only to providers with verified compliance agreements. See [Zero Data Retention](/gateway/zdr) for the trust model. + +## Routing control + +| Method | Effect | +|--------|--------| +| `routing: "auto"` | Try all three tiers (default) | +| `routing: "operator"` | Operators only. Fails if no operator available. | +| `routing: "provider"` | Skip operators, use LiteLLM + direct only. | +| `X-Tangle-Blueprint: ` | Pin to operators under this Blueprint. | +| `X-Tangle-Operator: ` | Pin to a specific operator. | +| `providerOptions.gateway.order` | Control which providers are tried and in what order. | +| `providerOptions.gateway.only` | Restrict to these providers only. | diff --git a/pages/gateway/index.mdx b/pages/gateway/index.mdx new file mode 100644 index 00000000..84b2549d --- /dev/null +++ b/pages/gateway/index.mdx @@ -0,0 +1,58 @@ +--- +title: Tangle Gateway +description: Unified API for hundreds of AI models with built-in routing, compliance, and on-chain payments. +--- + +# Tangle Gateway + +Tangle Gateway is a unified inference API. One endpoint, hundreds of models, automatic routing across centralized providers and decentralized operators. + +## What it does + +- **One key, any model.** Access OpenAI, Anthropic, Google, Groq, Mistral, and 20+ providers through a single API key. +- **Operator network.** Route to decentralized operators on the Tangle network who compete on price, latency, and reputation. +- **Compliance routing.** Zero Data Retention and no-train filtering with verified provider agreements. +- **BYOK.** Bring your own provider keys for zero-markup access. +- **On-chain payments.** Pay operators directly via SpendAuth — no credit card required. + +## Quick example + +```bash +curl -X POST "https://router.tangle.tools/v1/chat/completions" \ + -H "Authorization: Bearer $TANGLE_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{ + "model": "anthropic/claude-sonnet-4-6", + "messages": [{"role": "user", "content": "Hello"}], + "stream": true + }' +``` + +Works with any OpenAI-compatible SDK. Change the base URL and you're done. + +## Architecture + +The gateway routes through three tiers, in order: + +| Tier | What | When | +|------|------|------| +| **Operators** | Decentralized inference providers on Tangle | Default for operator-pinned requests and SpendAuth | +| **LiteLLM** | Proxy with 100+ provider integrations and built-in retries | Default for standard requests | +| **Direct** | Straight to provider API (OpenAI, Anthropic, etc.) | Fallback when LiteLLM unavailable, or when compliance required | + +When [Zero Data Retention](/gateway/zdr) or [no-train](/gateway/no-train) is requested, operators and LiteLLM are skipped — the gateway routes directly to verified providers only. + +## How it fits + +``` +Workbench (agents) → Gateway (inference) → Operators (serving) → Protocol (settlement) +``` + +The gateway sits between the [Workbench](/vibe/introduction) where agents run and the [Protocol](/network/overview) where operators get paid. Agents in the workbench call the gateway for model access. The gateway selects the best provider or operator, routes the request, tracks usage, and settles payment. + +## Next steps + +- [Getting Started](/gateway/getting-started) — make your first request in 2 minutes +- [Supported Models](/gateway/models) — browse the model catalog +- [How Routing Works](/gateway/how-routing-works) — understand the 3-tier architecture +- [Zero Data Retention](/gateway/zdr) — compliance for regulated industries diff --git a/pages/gateway/migrate-openai.mdx b/pages/gateway/migrate-openai.mdx new file mode 100644 index 00000000..c27336f5 --- /dev/null +++ b/pages/gateway/migrate-openai.mdx @@ -0,0 +1,77 @@ +--- +title: Migrate from OpenAI +description: Switch from OpenAI's API to Tangle Gateway in under 5 minutes. +--- + +# Migrate from OpenAI + +Tangle Gateway is OpenAI-compatible. Change two lines and you're done. + +## Python + +```diff + from openai import OpenAI + + client = OpenAI( +- api_key="sk-...", ++ api_key="sk-tan-YOUR_KEY", ++ base_url="https://router.tangle.tools/v1", + ) + + response = client.chat.completions.create( +- model="gpt-4o", ++ model="openai/gpt-4o", # or just "gpt-4o" — auto-resolved + messages=[{"role": "user", "content": "Hello"}] + ) +``` + +## TypeScript + +```diff + import OpenAI from 'openai' + + const client = new OpenAI({ +- apiKey: 'sk-...', ++ apiKey: 'sk-tan-YOUR_KEY', ++ baseURL: 'https://router.tangle.tools/v1', + }) +``` + +## curl + +```diff +- curl https://api.openai.com/v1/chat/completions \ +- -H "Authorization: Bearer sk-..." \ ++ curl https://router.tangle.tools/v1/chat/completions \ ++ -H "Authorization: Bearer sk-tan-YOUR_KEY" \ + -H "Content-Type: application/json" \ + -d '{"model": "gpt-4o", "messages": [...]}' +``` + +## What you get + +By switching to Tangle Gateway, you get: + +- **Access to every provider** through the same client. Try `anthropic/claude-sonnet-4-6` or `groq/llama-3.1-70b` without changing SDKs. +- **Automatic fallbacks.** If OpenAI is down, configure backup models. +- **Cost visibility.** Every response tells you exactly what it cost via `X-Tangle-Price-*` headers. +- **Compliance routing.** One flag for ZDR, one flag for no-train. +- **BYOK.** Keep using your OpenAI key with zero markup. Add it to [`providerOptions.gateway.byok`](/gateway/byok). + +## Keep your OpenAI key (zero markup) + +If you already have an OpenAI API key, use [BYOK](/gateway/byok) for zero platform markup: + +```python +response = client.chat.completions.create( + model="openai/gpt-4o", + messages=[{"role": "user", "content": "Hello"}], + extra_body={ + "providerOptions": { + "gateway": { + "byok": {"openai": [{"apiKey": "sk-YOUR_OPENAI_KEY"}]} + } + } + } +) +``` diff --git a/pages/gateway/migrate-vercel.mdx b/pages/gateway/migrate-vercel.mdx new file mode 100644 index 00000000..b18462a8 --- /dev/null +++ b/pages/gateway/migrate-vercel.mdx @@ -0,0 +1,80 @@ +--- +title: Migrate from Vercel AI Gateway +description: Switch from Vercel AI Gateway to Tangle Gateway. +--- + +# Migrate from Vercel AI Gateway + +Tangle Gateway supports the same `providerOptions.gateway` schema as Vercel AI Gateway. Most code works unchanged. + +## What maps directly + +| Vercel Feature | Tangle Equivalent | Notes | +|---|---|---| +| `providerOptions.gateway.byok` | Same | Identical schema | +| `providerOptions.gateway.zeroDataRetention` | Same | 13 verified providers | +| `providerOptions.gateway.disallowPromptTraining` | Same | 25 verified providers | +| `providerOptions.gateway.caching: 'auto'` | Same | Anthropic cache_control injection | +| `providerOptions.gateway.order` | Same | Provider priority | +| `providerOptions.gateway.only` | Same | Provider allowlist | +| `models` fallback array | Same | Model-level failover | +| `GET /v1/credits` | Same | Balance check | +| `GET /v1/generation` | Same | Request detail lookup | + +## What's different + +| Feature | Vercel | Tangle | +|---------|--------|--------| +| **Base URL** | `ai-gateway.vercel.sh/v1` | `router.tangle.tools/v1` | +| **Auth** | API key or OIDC token | API key, session, SIWE (wallet), or SpendAuth (on-chain) | +| **Pricing** | Zero markup | 20% markup (0% with BYOK) | +| **Operator network** | None | Decentralized operators compete on price/latency | +| **On-chain payments** | None | SpendAuth (EIP-712) — pay without a credit card | +| **Guardrails** | None | PII + injection detection built-in | +| **Web search tools** | Perplexity, Parallel, provider-native | Not yet (planned) | +| **OIDC auth** | Vercel-only | Not applicable | + +## Code change + +### AI SDK + +```diff + import { generateText } from 'ai' ++ import { createOpenAI } from '@ai-sdk/openai' + ++ const tangle = createOpenAI({ ++ apiKey: 'sk-tan-YOUR_KEY', ++ baseURL: 'https://router.tangle.tools/v1', ++ }) + + const { text } = await generateText({ +- model: 'anthropic/claude-sonnet-4-6', ++ model: tangle('anthropic/claude-sonnet-4-6'), + prompt: 'Hello', + providerOptions: { + gateway: { + zeroDataRetention: true, // works the same + caching: 'auto', // works the same + }, + }, + }) +``` + +### OpenAI SDK + +```diff + const client = new OpenAI({ +- apiKey: process.env.AI_GATEWAY_API_KEY, +- baseURL: 'https://ai-gateway.vercel.sh/v1', ++ apiKey: process.env.TANGLE_API_KEY, ++ baseURL: 'https://router.tangle.tools/v1', + }) +``` + +## What you gain + +- **Operator network.** Access decentralized inference providers who compete on price and latency. +- **On-chain payments.** Pay with crypto via SpendAuth — no Stripe/credit card required. +- **Wallet auth.** Sign in with Ethereum (SIWE) for web3-native access. +- **Guardrails.** Built-in PII and prompt injection detection on every request. +- **Self-hostable.** Deploy your own gateway instance — it's open source. diff --git a/pages/gateway/models.mdx b/pages/gateway/models.mdx new file mode 100644 index 00000000..5c009c9b --- /dev/null +++ b/pages/gateway/models.mdx @@ -0,0 +1,69 @@ +--- +title: Supported Models +description: Browse models available through Tangle Gateway across 20+ providers. +--- + +# Supported Models + +Tangle Gateway provides access to models from 20+ providers through a single API. + +## Providers + +| Provider | Slug | Models | +|----------|------|--------| +| OpenAI | `openai` | GPT-4o, GPT-4o-mini, o1, o3, o4, DALL-E, Whisper, TTS | +| Anthropic | `anthropic` | Claude Opus, Sonnet, Haiku | +| Google | `google` | Gemini 2.5 Pro, Flash, Flash-Lite | +| Groq | `groq` | Llama 3.1/3.2 (fast inference) | +| Together AI | `together` | Open-source models (Llama, Qwen, Mixtral) | +| DeepSeek | `deepseek` | DeepSeek Chat, DeepSeek Coder | +| Mistral | `mistral` | Mistral Large, Codestral, Pixtral | +| Fireworks | `fireworks` | Phi, StarCoder, open models | +| Cohere | `cohere` | Command R/R+ | +| xAI | `xai` | Grok 2, Grok 3 | +| Cerebras | `cerebras` | Llama (fast inference) | +| SambaNova | `sambanova` | Fast open-model inference | +| AI21 | `ai21` | Jamba | +| Nvidia | `nvidia` | Nemotron | +| Z.ai | `zai` | GLM-4.7, GLM-5 | +| Moonshot | `moonshot` | Kimi | + +Plus operators on the Tangle network serving custom and open-source models. + +## Model ID format + +Use `provider/model-name`: + +``` +anthropic/claude-sonnet-4-6 +openai/gpt-4o-mini +groq/llama-3.1-70b-versatile +``` + +Or use bare names — the gateway resolves the provider by prefix: + +| Prefix | Resolves to | +|--------|-------------| +| `gpt-`, `o1-`, `o3-`, `o4-` | OpenAI | +| `claude-` | Anthropic | +| `gemini-`, `gemma-` | Google | +| `llama-`, `mixtral-` | Groq | +| `deepseek-` | DeepSeek | +| `mistral-`, `codestral-` | Mistral | +| `grok-` | xAI | +| `glm-` | Z.ai | +| `command-` | Cohere | + +## Modalities + +| Modality | Endpoint | Examples | +|----------|----------|---------| +| Text | `/v1/chat/completions` | All chat models | +| Images | `/v1/images/generations` | DALL-E, FLUX | +| Audio | `/v1/audio/transcriptions`, `/v1/audio/speech` | Whisper, TTS | +| Embeddings | `/v1/embeddings` | text-embedding-3-small/large | +| Video | `/v1/video/*` | Avatar generation, dubbing (via ph0ny) | + +## Dynamic discovery + +The model catalog is available at [`GET /api/models`](https://router.tangle.tools/api/models) with pricing, context length, and modality information for every model. diff --git a/pages/gateway/no-train.mdx b/pages/gateway/no-train.mdx new file mode 100644 index 00000000..da6154af --- /dev/null +++ b/pages/gateway/no-train.mdx @@ -0,0 +1,43 @@ +--- +title: Disallow Prompt Training +description: Route only through providers that don't use your data for model training. +--- + +# Disallow Prompt Training + +Ensure your prompts and responses are never used by providers to train their models. + +## Usage + +```json +{ + "providerOptions": { + "gateway": { + "disallowPromptTraining": true + } + } +} +``` + +## Relationship to ZDR + +Disallow prompt training is a **subset** of [Zero Data Retention](/gateway/zdr). All ZDR-compliant providers also disallow prompt training, but more providers disallow training than offer full ZDR. + +| Filter | Verified providers | +|--------|-------------------| +| No-train only | 25 providers | +| ZDR (includes no-train) | 13 providers | + +Use `disallowPromptTraining` when you care about IP protection but don't need full data deletion guarantees. + +## No-train verified providers + +All ZDR providers plus: OpenAI, Google AI Studio, Cohere, Perplexity, xAI, Morph AI, Novita AI, Voyage AI, and others. + +See the full list at [`GET /api/gateway/compliance`](/gateway/api-compliance). + +## Routing behavior + +Same as ZDR: operators and LiteLLM are skipped. Only direct provider calls to verified no-train providers. + +Can be enabled team-wide via `noTrainEnabled: true` on the team record. diff --git a/pages/gateway/operator-routing.mdx b/pages/gateway/operator-routing.mdx new file mode 100644 index 00000000..09d9c767 --- /dev/null +++ b/pages/gateway/operator-routing.mdx @@ -0,0 +1,75 @@ +--- +title: Operator Routing +description: Route inference through decentralized operators on the Tangle network. +--- + +# Operator Routing + +Operators are independent inference providers registered on the Tangle network. They run models on their own hardware, set their own prices, and earn from every request routed through them. + +## How operators are discovered + +1. Operators register on-chain via the Blueprint Service Manager (BSM) contract +2. The gateway syncs operator data from the chain every 60 seconds +3. Operators are stored in the database with their endpoint URL, pricing, and status +4. The [scoring algorithm](/gateway/smart-routing) ranks operators per-request + +## Routing to operators + +### Automatic (default) + +In `auto` mode, the gateway checks for operators serving the requested model before trying centralized providers: + +```bash +curl -X POST "https://router.tangle.tools/v1/chat/completions" \ + -H "Authorization: Bearer sk-tan-YOUR_KEY" \ + -d '{"model": "llama-3.1-70b", "messages": [...]}' +``` + +### Pin to a Blueprint + +Route only to operators registered under a specific Blueprint: + +```bash +curl -X POST "https://router.tangle.tools/v1/chat/completions" \ + -H "X-Tangle-Blueprint: 42" \ + -d '{"model": "llama-3.1-70b", "messages": [...]}' +``` + +### Pin to an operator + +Route to a specific operator by slug or Ethereum address: + +```bash +curl -X POST "https://router.tangle.tools/v1/chat/completions" \ + -H "X-Tangle-Operator: tangle-core-1" \ + -d '{"model": "llama-3.1-70b", "messages": [...]}' +``` + +### Pin to a service instance + +```bash +curl -X POST "https://router.tangle.tools/v1/chat/completions" \ + -H "X-Tangle-Service: 7" \ + -d '{"model": "llama-3.1-70b", "messages": [...]}' +``` + +## What's verified on-chain + +| Data | Verified? | +|------|-----------| +| Operator Ethereum address | Yes (signed transaction) | +| Active/inactive status | Yes (BSM contract state) | +| Staked amount | Yes (on-chain balance) | +| Pricing (per-token) | Yes (BSM contract) | +| Endpoint URL | No (self-reported at registration) | +| Backing provider | No (not tracked) | + +Because endpoint URL and backing provider are self-reported, operator routing is **not compatible with [ZDR](/gateway/zdr) or [no-train](/gateway/no-train)** compliance requirements. When compliance is required, operators are skipped and the gateway routes directly to verified providers. + +## Payment + +Operator requests can be paid two ways: + +1. **Platform credits** — deducted from your credit balance at the operator's listed price +2. **SpendAuth (on-chain)** — direct EIP-712 signed payment to the operator. No credit card needed. See [SpendAuth](/gateway/spend-auth). diff --git a/pages/gateway/pricing.mdx b/pages/gateway/pricing.mdx new file mode 100644 index 00000000..59e55c65 --- /dev/null +++ b/pages/gateway/pricing.mdx @@ -0,0 +1,55 @@ +--- +title: Credits & Pricing +description: How billing works on Tangle Gateway. +--- + +# Credits & Pricing + +## Pricing model + +| Credential type | Markup | +|----------------|--------| +| Platform credentials | 20% above provider list price | +| [BYOK](/gateway/byok) | **0%** — provider list price, no markup | +| [SpendAuth](/gateway/spend-auth) | Operator-set prices (typically competitive) | + +The 20% platform markup on non-BYOK requests funds operator payouts and platform infrastructure. Operators earn a share of every request routed through them. + +## Credits + +Credits are denominated in USD. Purchase via Stripe or receive as part of a subscription plan. + +Check your balance: + +```bash +curl -H "Authorization: Bearer sk-tan-YOUR_KEY" \ + https://router.tangle.tools/v1/credits +``` + +```json +{ + "balance": "95.50", + "total_used": "4.50" +} +``` + +## Cost per request + +Each request is charged based on tokens: + +``` +cost = (input_tokens × input_price) + (output_tokens × output_price) +``` + +Pricing varies by model. Check per-model pricing at [`GET /api/models`](https://router.tangle.tools/api/models) or in the `X-Tangle-Price-Input` / `X-Tangle-Price-Output` response headers. + +## Billing transparency + +Every response includes pricing headers so you know the cost before it hits your balance: + +``` +X-Tangle-Price-Input: 0.000003 # USD per input token +X-Tangle-Price-Output: 0.000015 # USD per output token +``` + +Look up detailed billing for any request via [`GET /v1/generation`](/gateway/api-generation). diff --git a/pages/gateway/provider-options.mdx b/pages/gateway/provider-options.mdx new file mode 100644 index 00000000..eb0fe5b1 --- /dev/null +++ b/pages/gateway/provider-options.mdx @@ -0,0 +1,70 @@ +--- +title: providerOptions.gateway +description: Complete reference for gateway-specific request options. +--- + +# providerOptions.gateway + +All gateway-specific options are passed inside `providerOptions.gateway` in the request body. These are stripped before forwarding to providers. + +## Full schema + +```typescript +interface GatewayOptions { + // Bring Your Own Key + byok?: Record> + + // Compliance routing + zeroDataRetention?: boolean + disallowPromptTraining?: boolean + + // Caching + caching?: 'auto' | false + cache?: false // disable response caching + + // Provider routing + order?: string[] // provider priority + only?: string[] // provider allowlist + + // Model fallbacks + models?: string[] // tried in order after primary model + + // Timeouts (1s-120s, clamped) + timeout?: number | Record +} +``` + +## Options reference + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `byok` | `Record>` | — | Per-request provider credentials. [Details](/gateway/byok) | +| `zeroDataRetention` | `boolean` | `false` | Route only to ZDR-verified providers. [Details](/gateway/zdr) | +| `disallowPromptTraining` | `boolean` | `false` | Route only to no-train providers. [Details](/gateway/no-train) | +| `caching` | `'auto'` | — | Auto-inject prompt cache markers. [Details](/gateway/caching) | +| `cache` | `false` | — | Set `false` to skip response cache for this request. | +| `order` | `string[]` | — | Provider priority order. [Details](/gateway/smart-routing) | +| `only` | `string[]` | — | Restrict to these providers only. | +| `models` | `string[]` | — | Fallback model list. [Details](/gateway/fallbacks) | +| `timeout` | `number \| Record` | `30000` | Timeout in ms. [Details](/gateway/timeouts) | + +## Example: everything at once + +```json +{ + "model": "anthropic/claude-sonnet-4-6", + "messages": [{"role": "user", "content": "Hello"}], + "providerOptions": { + "gateway": { + "byok": { + "anthropic": [{"apiKey": "sk-ant-..."}] + }, + "zeroDataRetention": true, + "caching": "auto", + "models": ["openai/gpt-4o"], + "timeout": {"anthropic": 10000, "openai": 5000}, + "order": ["anthropic", "openai"] + } + } +} +``` diff --git a/pages/gateway/rate-limiting.mdx b/pages/gateway/rate-limiting.mdx new file mode 100644 index 00000000..587ed74f --- /dev/null +++ b/pages/gateway/rate-limiting.mdx @@ -0,0 +1,44 @@ +--- +title: Rate Limiting +description: Rate limits by authentication method. +--- + +# Rate Limiting + +The gateway enforces sliding-window rate limits per authentication method. + +## Limits + +| Auth method | Rate limit | Daily limit | +|-------------|-----------|-------------| +| API Key | 60 req/min | Unlimited (with credits) | +| Session | 30 req/min | Unlimited (with credits) | +| SpendAuth | 120 req/min | Unlimited | +| Anonymous | 10 req/min | 5 req/day | +| Authenticated (no credits) | 30 req/min | 20 req/day | + +## Response headers + +Every response includes rate limit headers: + +``` +X-RateLimit-Limit: 60 +X-RateLimit-Remaining: 42 +X-RateLimit-Reset: 1712793600 +``` + +## 429 responses + +When rate limited: + +```json +{ + "error": { + "message": "Rate limit exceeded for this API key.", + "type": "rate_limit_error", + "code": "rate_limit_exceeded" + } +} +``` + +The `X-RateLimit-Reset` header indicates when the window resets (Unix timestamp in seconds). diff --git a/pages/gateway/response-headers.mdx b/pages/gateway/response-headers.mdx new file mode 100644 index 00000000..f791ef95 --- /dev/null +++ b/pages/gateway/response-headers.mdx @@ -0,0 +1,41 @@ +--- +title: Response Headers +description: Headers returned on every gateway response. +--- + +# Response Headers + +Every response from the gateway includes metadata headers. + +## Standard headers + +| Header | Description | Example | +|--------|-------------|---------| +| `X-Generation-Id` | Unique request ID | `gen_01J5K7ABCD...` | +| `X-Tangle-Price-Input` | USD per input token | `0.000003` | +| `X-Tangle-Price-Output` | USD per output token | `0.000015` | +| `X-Tangle-Cache` | Response cache status | `HIT` or `MISS` | +| `X-RateLimit-Limit` | Requests allowed per window | `60` | +| `X-RateLimit-Remaining` | Requests remaining | `42` | +| `X-RateLimit-Reset` | Window reset (Unix seconds) | `1712793600` | + +## Conditional headers + +| Header | When present | Description | +|--------|-------------|-------------| +| `X-Tangle-Routing-Trace` | When `ENABLE_ROUTING_TRACE` is on | Compact routing path | +| `X-Tangle-Operator` | When served by an operator | Operator slug | +| `X-Tangle-BYOK` | When BYOK credentials used | `true` | +| `X-Tangle-Caching` | When prompt caching applied | `auto` | +| `X-Tangle-Guardrails` | When guardrails flagged content | `pii:low,prompt_injection:high` | +| `X-Payment-Settled` | When SpendAuth payment succeeded | `true` | +| `X-Free-Tier-Remaining` | Free tier requests | `3` | +| `X-Free-Tier-Limit` | Free tier daily cap | `5` | + +## Error response headers + +| Header | When present | Description | +|--------|-------------|-------------| +| `X-Payment-Required` | 402 responses | Amount needed (micro-USD) | +| `X-Payment-Currency` | 402 responses | `tsUSD` | +| `X-Payment-Methods` | 402 responses | `credits,spend_auth` | diff --git a/pages/gateway/routing-trace.mdx b/pages/gateway/routing-trace.mdx new file mode 100644 index 00000000..23b643d5 --- /dev/null +++ b/pages/gateway/routing-trace.mdx @@ -0,0 +1,35 @@ +--- +title: Routing Trace +description: See exactly which providers were tried for every request. +--- + +# Routing Trace + +Every response includes an `X-Tangle-Routing-Trace` header showing the routing path — which providers were tried, whether they succeeded, and how long each took. + +## Header format + +``` +X-Tangle-Routing-Trace: openai/gpt-4o[operator(err:5001ms)→litellm(200:340ms)] +``` + +Format: `model[provider(status:latency)→provider(status:latency)]` + +Multiple models (from [fallbacks](/gateway/fallbacks)): + +``` +X-Tangle-Routing-Trace: openai/gpt-4o[openai(500:2100ms)], anthropic/claude-sonnet-4-6[anthropic(200:1847ms)] +``` + +## Sanitization + +The trace header is sanitized for safety: +- Operator names are shown as generic `operator` (slugs not exposed) +- Error messages are not included (only status codes) +- Internal URLs and hostnames are never leaked + +For the full unredacted trace including error messages, use the [generation lookup API](/gateway/generation-lookup) — the `routing_trace` field in the response contains the complete history. + +## Disabling + +Set `ENABLE_ROUTING_TRACE=false` to omit the header from all responses. See [Feature Flags](/gateway/feature-flags). diff --git a/pages/gateway/smart-routing.mdx b/pages/gateway/smart-routing.mdx new file mode 100644 index 00000000..c39bcc2c --- /dev/null +++ b/pages/gateway/smart-routing.mdx @@ -0,0 +1,58 @@ +--- +title: Smart Routing +description: How the gateway scores and selects operators. +--- + +# Smart Routing + +When multiple operators serve the same model, the gateway selects the best one using a weighted scoring algorithm. + +## Scoring formula + +``` +score = reputation(40%) + latency(30%) + price(30%) +``` + +| Factor | Weight | What it measures | +|--------|--------|-----------------| +| **Reputation** | 40% | Normalized reputation score (0-100) from on-chain history | +| **Latency** | 30% | Inverse of average response time (lower = better) | +| **Price** | 30% | Inverse of per-token price (cheaper = better) | + +## Operator selection + +1. Query all operators serving the requested model +2. Filter: only `active` or `degraded` status, must be pipeline head +3. Score each operator +4. Sort by score descending +5. Route to the highest-scoring operator + +If a preferred operator is specified (via `X-Tangle-Operator`), it's moved to the top of the ranked list regardless of score. + +## Health tracking + +The gateway tracks operator health via: + +- **Health checks** — periodic probes stored in `OperatorHealthCheck` +- **Request outcomes** — success/failure recorded per request +- **Latency tracking** — rolling average updated per request + +Operators that consistently fail are automatically deprioritized by their dropping reputation and rising latency scores. + +## Provider ordering (non-operator) + +For direct provider routing, use `providerOptions.gateway.order` and `only`: + +```json +{ + "providerOptions": { + "gateway": { + "order": ["bedrock", "anthropic"], + "only": ["bedrock", "anthropic"] + } + } +} +``` + +- `order`: Try providers in this order. First with valid credentials wins. +- `only`: Restrict to these providers. Others are excluded even if they have credentials. diff --git a/pages/gateway/spend-auth.mdx b/pages/gateway/spend-auth.mdx new file mode 100644 index 00000000..1f4977c2 --- /dev/null +++ b/pages/gateway/spend-auth.mdx @@ -0,0 +1,52 @@ +--- +title: SpendAuth (On-Chain Payments) +description: Pay operators directly on-chain via EIP-712 signed authorizations. +--- + +# SpendAuth + +SpendAuth lets you pay operators directly on-chain without a credit card or account. Sign an EIP-712 typed data message with your wallet, attach it to the request, and the operator claims payment after serving inference. + +## How it works + +1. **Sign:** Create an EIP-712 SpendAuth payload with your wallet +2. **Send:** Attach the signature as `X-Payment-Signature` header +3. **Authorize:** The gateway verifies the signature and calls `authorizeSpend` on-chain +4. **Serve:** The operator processes your inference request +5. **Claim:** The operator calls `claimPayment` to receive funds + +## Request format + +```bash +curl -X POST "https://router.tangle.tools/v1/chat/completions" \ + -H "Content-Type: application/json" \ + -H "X-Payment-Signature: { + \"commitment\": \"0xabc...\", + \"serviceId\": \"1\", + \"jobIndex\": 0, + \"amount\": \"1000000\", + \"operator\": \"0x70997970...\", + \"nonce\": \"42\", + \"expiry\": \"1712793600\", + \"signature\": \"0xff...\" + }" \ + -d '{"model": "llama-3.1-70b", "messages": [...]}' +``` + +## Security + +- **EIP-712 signatures:** Cryptographically verified against the signing address +- **Nonce replay protection:** Each nonce can only be used once per commitment (in-memory + Redis store) +- **On-chain verification:** The `ShieldedCredits` contract validates authorization and deducts balance +- **Expiry:** SpendAuth payloads have a timestamp-based expiry + +## Rate limits + +SpendAuth requests get a generous 120 req/min limit per commitment since every request is paid. + +## On-chain contracts + +SpendAuth uses the `ShieldedCredits` contract deployed on the Tangle network. The contract handles: +- Balance management (deposit, authorize, claim) +- Authorization verification (EIP-712 signature recovery) +- Payment settlement (operator claims after serving) diff --git a/pages/gateway/timeouts.mdx b/pages/gateway/timeouts.mdx new file mode 100644 index 00000000..10fe9645 --- /dev/null +++ b/pages/gateway/timeouts.mdx @@ -0,0 +1,56 @@ +--- +title: Provider Timeouts +description: Configure per-provider timeouts for fast failover. +--- + +# Provider Timeouts + +Set timeouts to trigger fast failover when a provider is slow. Values are clamped to 1-120 seconds. + +## Global timeout + +Apply the same timeout to all providers: + +```json +{ + "providerOptions": { + "gateway": { + "timeout": 5000 + } + } +} +``` + +## Per-provider timeouts + +Different providers have different latency profiles. Set timeouts individually: + +```json +{ + "providerOptions": { + "gateway": { + "timeout": { + "openai": 5000, + "anthropic": 10000, + "groq": 3000 + } + } + } +} +``` + +## Default behavior + +Without explicit timeouts, the gateway uses a 30-second default for all providers and a 30-second idle timeout for streaming responses. + +## Bounds + +All timeout values are clamped: +- **Minimum:** 1,000ms (1 second) +- **Maximum:** 120,000ms (2 minutes) + +Values outside this range are silently clamped to the nearest bound. + +## Interaction with fallbacks + +When a provider times out, it counts as a failure in the [routing trace](/gateway/routing-trace) and the gateway moves to the next option — either a different provider for the same model, or the next [fallback model](/gateway/fallbacks). diff --git a/pages/gateway/zdr.mdx b/pages/gateway/zdr.mdx new file mode 100644 index 00000000..21340db9 --- /dev/null +++ b/pages/gateway/zdr.mdx @@ -0,0 +1,84 @@ +--- +title: Zero Data Retention +description: Route requests only through providers with verified zero data retention agreements. +--- + +# Zero Data Retention (ZDR) + +When ZDR is enabled, the gateway routes requests **only** through providers that have verified agreements to delete all request data immediately after processing. + +## Enable per-request + +```json +{ + "model": "anthropic/claude-sonnet-4-6", + "messages": [{"role": "user", "content": "Analyze this sensitive data..."}], + "providerOptions": { + "gateway": { + "zeroDataRetention": true + } + } +} +``` + +## Enable team-wide + +Set `zdrEnabled: true` on your team record. All requests from team members will enforce ZDR. Team-wide ZDR overrides per-request `zeroDataRetention: false`. + +## How it works + +When ZDR is enabled: + +1. **Operators are skipped.** Operators self-report their backing provider. The gateway cannot verify what provider an operator actually routes through, so operators are excluded from ZDR-compliant routing. + +2. **LiteLLM is skipped.** LiteLLM has its own internal fallback chain that may route to non-ZDR providers. Since we can't control LiteLLM's routing decisions, it's excluded. + +3. **Direct provider only.** The gateway calls the provider API directly, selecting only from verified ZDR providers. + +4. **BYOK fallback preserves ZDR.** If your [BYOK](/gateway/byok) credentials fail, the fallback to platform credentials still enforces ZDR filtering. + +## ZDR-verified providers + +| Provider | ZDR | No-Train | Policy | +|----------|-----|----------|--------| +| Anthropic | Yes | Yes | [ZDR policy](https://platform.claude.com/docs/en/build-with-claude/zero-data-retention) | +| Amazon Bedrock | Yes | Yes | [Data protection](https://docs.aws.amazon.com/bedrock/latest/userguide/data-protection.html) | +| Azure OpenAI | Yes | Yes | [Data privacy](https://learn.microsoft.com/en-us/azure/foundry/responsible-ai/openai/data-privacy) | +| Groq | Yes | Yes | [ZDR policy](https://console.groq.com/docs/your-data#zero-data-retention) | +| Mistral | Yes | Yes | [Terms](https://legal.mistral.ai/terms) | +| Fireworks | Yes | Yes | [Data handling](https://docs.fireworks.ai/guides/security_compliance/data_handling) | +| Together | Yes | Yes | [Terms](https://www.together.ai/terms-of-service) | +| Cerebras | Yes | Yes | [Privacy](https://www.cerebras.ai/privacy-policy) | +| Google Vertex | Yes | Yes | [ZDR policy](https://docs.cloud.google.com/vertex-ai/generative-ai/docs/vertex-ai-zero-data-retention) | +| Nebius | Yes | Yes | [Legal guide](https://docs.tokenfactory.nebius.com/legal/legal-quick-guide) | +| Parasail | Yes | Yes | [Terms](https://parasail.io/legal/terms-of-service) | +| Baseten | Yes | Yes | [Security](https://docs.baseten.co/observability/security) | +| DeepInfra | Yes | Yes | [Data handling](https://deepinfra.com/docs/data) | + +Compliance data is managed via the admin API (`PUT /api/admin/compliance`) and can be updated without code deploys. + +## Trust model + +| Routing tier | ZDR behavior | +|---|---| +| **Operators** | Skipped. Self-reported backing provider is unverifiable. | +| **LiteLLM** | Skipped. Internal fallback chain is uncontrollable. | +| **Direct provider** | Routed only to verified ZDR providers. | +| **BYOK fallback** | ZDR filters preserved on fallback to platform credentials. | +| **Operator-only + ZDR** | 400 error. Conflicting requirements. | + +The Tangle chain verifies operator **identity and stake**, not **behavior**. When compliance matters, the gateway routes direct. + +## Error responses + +If no ZDR-compliant provider is available for the requested model: + +```json +{ + "error": { + "message": "No ZDR providers available for model: deepseek/deepseek-chat. Providers considered: anthropic, groq, mistral, ...", + "type": "invalid_request_error", + "code": "no_providers_available" + } +} +``` From 486910627a5664a47a47adb6783c4978bfc82d7b Mon Sep 17 00:00:00 2001 From: Drew Stone Date: Fri, 10 Apr 2026 15:32:56 -0700 Subject: [PATCH 2/4] fix: add Gateway to architecture page as inference layer --- pages/vision/architecture.mdx | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pages/vision/architecture.mdx b/pages/vision/architecture.mdx index d64cd5bc..6b68d079 100644 --- a/pages/vision/architecture.mdx +++ b/pages/vision/architecture.mdx @@ -20,6 +20,7 @@ Tangle ties together three layers most platforms separate: the workbench where w | Layer | Runs here | Examples | | --------------- | ----------------------------- | -------------------------------------------------------- | | Workbench | Human and agent collaboration | Workflows, profiles, simulations, reviews | +| Gateway | Inference routing and billing | Model access, BYOK, ZDR compliance, operator selection | | Sandbox runtime | Executed tasks and tools | Agent sessions, tool calls, file edits | | Protocol | Coordination and settlement | Service registry, operator payments, staking, incentives | @@ -28,10 +29,13 @@ Tangle ties together three layers most platforms separate: the workbench where w **1) Execution Layer** Sandboxed runtimes with isolation, resource limits, and audit logs. This is where tasks actually run. -**2) Protocol Layer** +**2) Inference Layer** +The [Gateway](/gateway) routes inference requests across centralized providers and decentralized operators. It handles model selection, compliance filtering ([ZDR](/gateway/zdr), [no-train](/gateway/no-train)), [BYOK](/gateway/byok) credential management, and billing. + +**3) Protocol Layer** The coordination plane. It handles operator discovery, payment routing, and incentive enforcement. -**3) Experience Layer** +**4) Experience Layer** The agentic workbench and [Blueprint SDK](https://github.com/tangle-network/blueprint/tree/main). This is where teams design workflows, run simulations, and ship services. The SDK also includes optional gateways for integrating external events and payment-driven HTTP job execution (webhooks, From ad0904a7491531298b4a6400418bbd5ec0eeba6a Mon Sep 17 00:00:00 2001 From: Drew Stone Date: Fri, 10 Apr 2026 15:49:34 -0700 Subject: [PATCH 3/4] fix: add Blueprint references throughout gateway docs - operator-routing.mdx: added Blueprints section with LLM Inference, Vector Store, and Custom Blueprint table. Links to llm-inference-blueprint repo, tangle-inference-core, Blueprint SDK, and Blueprint Runner docs. - how-routing-works.mdx: operators now reference Blueprints and BSM. - models.mdx: added Blueprint-served models table. - smart-routing.mdx: references Blueprints in scoring intro. - spend-auth.mdx: references tangle-inference-core for operator settlement. - index.mdx: operator network links to Blueprint intro. - developers/blueprints/use-cases.mdx: added llm-inference-blueprint and tangle-inference-core to the AI section. --- pages/developers/blueprints/use-cases.mdx | 16 +++++++++++++++- pages/gateway/how-routing-works.mdx | 5 +++-- pages/gateway/index.mdx | 2 +- pages/gateway/models.mdx | 10 +++++++++- pages/gateway/operator-routing.mdx | 16 +++++++++++++++- pages/gateway/smart-routing.mdx | 2 +- pages/gateway/spend-auth.mdx | 2 ++ 7 files changed, 46 insertions(+), 7 deletions(-) diff --git a/pages/developers/blueprints/use-cases.mdx b/pages/developers/blueprints/use-cases.mdx index c329e097..39953f8e 100644 --- a/pages/developers/blueprints/use-cases.mdx +++ b/pages/developers/blueprints/use-cases.mdx @@ -21,7 +21,21 @@ Tangle Network enables developers to rapidly build and deploy secure multi-party displayStyle="row" /> -## AI +## AI & Inference + + + + Date: Fri, 10 Apr 2026 15:55:33 -0700 Subject: [PATCH 4/4] style: run prettier on all gateway docs --- pages/ai/index.mdx | 1 + pages/gateway/api-chat.mdx | 28 +++++----- pages/gateway/api-credits.mdx | 8 +-- pages/gateway/api-generation.mdx | 16 +++--- pages/gateway/byok.mdx | 8 +-- pages/gateway/caching.mdx | 14 ++--- pages/gateway/enterprise-zdr.mdx | 1 + pages/gateway/fallbacks.mdx | 2 + pages/gateway/feature-flags.mdx | 14 ++--- pages/gateway/free-tier.mdx | 30 +++++------ pages/gateway/generation-lookup.mdx | 30 +++++------ pages/gateway/guardrails.mdx | 28 +++++----- pages/gateway/how-routing-works.mdx | 16 +++--- pages/gateway/index.mdx | 10 ++-- pages/gateway/migrate-vercel.mdx | 42 +++++++-------- pages/gateway/models.mdx | 82 ++++++++++++++--------------- pages/gateway/no-train.mdx | 8 +-- pages/gateway/operator-routing.mdx | 24 ++++----- pages/gateway/pricing.mdx | 8 +-- pages/gateway/provider-options.mdx | 22 ++++---- pages/gateway/rate-limiting.mdx | 14 ++--- pages/gateway/response-headers.mdx | 46 ++++++++-------- pages/gateway/routing-trace.mdx | 1 + pages/gateway/smart-routing.mdx | 10 ++-- pages/gateway/spend-auth.mdx | 1 + pages/gateway/timeouts.mdx | 1 + pages/gateway/zdr.mdx | 44 ++++++++-------- 27 files changed, 258 insertions(+), 251 deletions(-) diff --git a/pages/ai/index.mdx b/pages/ai/index.mdx index fe2cf0bb..629fda09 100644 --- a/pages/ai/index.mdx +++ b/pages/ai/index.mdx @@ -49,6 +49,7 @@ Each run produces task and agent evaluations. That data feeds back into the work The [Tangle Gateway](/gateway) is the inference routing layer. Agents and applications call a single API to access hundreds of models across centralized providers and decentralized operators. The gateway handles model selection, compliance routing, billing, and payment settlement. Key capabilities: + - **One API, any model.** OpenAI, Anthropic, Google, Groq, and 20+ providers. - **Decentralized operators.** Route to operators on the Tangle network who compete on price and latency. - **Compliance.** [Zero Data Retention](/gateway/zdr) and [no-train](/gateway/no-train) routing with verified provider agreements. diff --git a/pages/gateway/api-chat.mdx b/pages/gateway/api-chat.mdx index 2a8db365..db872118 100644 --- a/pages/gateway/api-chat.mdx +++ b/pages/gateway/api-chat.mdx @@ -50,23 +50,23 @@ All standard OpenAI parameters (`tools`, `tool_choice`, `response_format`, `top_ ### Routing headers (optional) -| Header | Effect | -|--------|--------| -| `X-Tangle-Routing` | `operator`, `provider`, or `auto` (default) | -| `X-Tangle-Blueprint` | Pin to operators under this Blueprint ID | -| `X-Tangle-Service` | Pin to a specific service instance | -| `X-Tangle-Operator` | Pin to a specific operator (slug or 0x address) | -| `X-Payment-Signature` | SpendAuth JSON payload for on-chain payment | +| Header | Effect | +| --------------------- | ----------------------------------------------- | +| `X-Tangle-Routing` | `operator`, `provider`, or `auto` (default) | +| `X-Tangle-Blueprint` | Pin to operators under this Blueprint ID | +| `X-Tangle-Service` | Pin to a specific service instance | +| `X-Tangle-Operator` | Pin to a specific operator (slug or 0x address) | +| `X-Payment-Signature` | SpendAuth JSON payload for on-chain payment | ### Validation -| Field | Constraint | -|-------|-----------| -| `model` | Required. Alphanumeric + `/-.:\\_`, max 128 chars. | -| `messages` | Required. Non-empty array. Each must have `role`. | -| `max_tokens` | Optional. 1-128,000. Default: 4,096. | -| `temperature` | Optional. 0-2. Default: 1. | -| Body size | Max 1MB. | +| Field | Constraint | +| ------------- | -------------------------------------------------- | +| `model` | Required. Alphanumeric + `/-.:\\_`, max 128 chars. | +| `messages` | Required. Non-empty array. Each must have `role`. | +| `max_tokens` | Optional. 1-128,000. Default: 4,096. | +| `temperature` | Optional. 0-2. Default: 1. | +| Body size | Max 1MB. | ## Response (non-streaming) diff --git a/pages/gateway/api-credits.mdx b/pages/gateway/api-credits.mdx index deae1201..9bfdf339 100644 --- a/pages/gateway/api-credits.mdx +++ b/pages/gateway/api-credits.mdx @@ -23,7 +23,7 @@ Authorization: Bearer sk-tan-YOUR_KEY } ``` -| Field | Description | -|-------|-------------| -| `balance` | Remaining credit balance (USD) | -| `total_used` | Total credits consumed (USD) | +| Field | Description | +| ------------ | ------------------------------ | +| `balance` | Remaining credit balance (USD) | +| `total_used` | Total credits consumed (USD) | diff --git a/pages/gateway/api-generation.mdx b/pages/gateway/api-generation.mdx index f52f7566..b86cf608 100644 --- a/pages/gateway/api-generation.mdx +++ b/pages/gateway/api-generation.mdx @@ -16,9 +16,9 @@ Authorization: Bearer sk-tan-YOUR_KEY ## Parameters -| Parameter | Required | Description | -|-----------|----------|-------------| -| `id` | Yes | Generation ID (format: `gen_`) | +| Parameter | Required | Description | +| --------- | -------- | ------------------------------------ | +| `id` | Yes | Generation ID (format: `gen_`) | ## Response @@ -48,8 +48,8 @@ Authorization: Bearer sk-tan-YOUR_KEY ## Errors -| Status | Code | Description | -|--------|------|-------------| -| 400 | — | Missing or invalid generation ID | -| 401 | — | Authentication required | -| 404 | `not_found` | Generation not found or belongs to another user | +| Status | Code | Description | +| ------ | ----------- | ----------------------------------------------- | +| 400 | — | Missing or invalid generation ID | +| 401 | — | Authentication required | +| 404 | `not_found` | Generation not found or belongs to another user | diff --git a/pages/gateway/byok.mdx b/pages/gateway/byok.mdx index 7db449b0..120af118 100644 --- a/pages/gateway/byok.mdx +++ b/pages/gateway/byok.mdx @@ -73,10 +73,10 @@ If the header is absent, platform credentials were used (possibly via fallback). ## Pricing -| Credential type | Markup | -|----------------|--------| -| BYOK | **0%** — provider list price | -| Platform credentials | 20% markup (configurable) | +| Credential type | Markup | +| -------------------- | ---------------------------- | +| BYOK | **0%** — provider list price | +| Platform credentials | 20% markup (configurable) | ## Security diff --git a/pages/gateway/caching.mdx b/pages/gateway/caching.mdx index 976894c0..6e2417d5 100644 --- a/pages/gateway/caching.mdx +++ b/pages/gateway/caching.mdx @@ -26,13 +26,13 @@ Some providers require explicit cache markers to enable prompt caching, while ot ## How it works -| Provider | Caching Type | What `auto` does | -|----------|-------------|-----------------| -| OpenAI | Implicit | No change needed. Caching happens automatically. | -| Google | Implicit | No change needed. | -| DeepSeek | Implicit | No change needed. | -| Anthropic | Explicit | Adds `cache_control: { type: 'ephemeral' }` to the last system message. | -| Anthropic (via Bedrock/Vertex) | Explicit | Same as Anthropic direct. | +| Provider | Caching Type | What `auto` does | +| ------------------------------ | ------------ | ----------------------------------------------------------------------- | +| OpenAI | Implicit | No change needed. Caching happens automatically. | +| Google | Implicit | No change needed. | +| DeepSeek | Implicit | No change needed. | +| Anthropic | Explicit | Adds `cache_control: { type: 'ephemeral' }` to the last system message. | +| Anthropic (via Bedrock/Vertex) | Explicit | Same as Anthropic direct. | For Anthropic, the gateway converts: diff --git a/pages/gateway/enterprise-zdr.mdx b/pages/gateway/enterprise-zdr.mdx index f331bd6f..c1d157ae 100644 --- a/pages/gateway/enterprise-zdr.mdx +++ b/pages/gateway/enterprise-zdr.mdx @@ -60,6 +60,7 @@ If your required model is only available from a non-ZDR provider, the request wi ## Step 4: Set up BYOK (optional) For maximum control, use [BYOK](/gateway/byok) with your own provider keys. This gives you: + - Zero platform markup - Direct contractual relationship with the provider - ZDR enforcement still applies on the fallback path diff --git a/pages/gateway/fallbacks.mdx b/pages/gateway/fallbacks.mdx index bef37ec8..bd01e96f 100644 --- a/pages/gateway/fallbacks.mdx +++ b/pages/gateway/fallbacks.mdx @@ -24,6 +24,7 @@ Pass a `models` array in `providerOptions.gateway`: ``` The gateway tries: + 1. `openai/gpt-4o` (primary model) 2. `anthropic/claude-sonnet-4-6` (first fallback) 3. `groq/llama-3.1-70b-versatile` (second fallback) @@ -57,6 +58,7 @@ Use `models` with `order` to control both model fallback and provider preference ``` This tries: + 1. `openai/gpt-4o` via available providers 2. `anthropic/claude-sonnet-4-6` via Bedrock first, then Anthropic direct diff --git a/pages/gateway/feature-flags.mdx b/pages/gateway/feature-flags.mdx index 6fe29e23..128480e7 100644 --- a/pages/gateway/feature-flags.mdx +++ b/pages/gateway/feature-flags.mdx @@ -9,13 +9,13 @@ All gateway features are on by default. Set any flag to `false` to disable it wi ## Available flags -| Environment Variable | Default | Controls | -|---------------------|---------|----------| -| `ENABLE_GUARDRAILS` | `true` | PII detection, prompt injection scanning | -| `ENABLE_RESPONSE_CACHE` | `true` | Response caching for deterministic requests | -| `ENABLE_COMPLIANCE_FILTER` | `true` | Early ZDR/no-train validation (routing enforcement stays on) | -| `ENABLE_PROMPT_CACHING` | `true` | Auto `cache_control` injection for Anthropic | -| `ENABLE_ROUTING_TRACE` | `true` | `X-Tangle-Routing-Trace` response header | +| Environment Variable | Default | Controls | +| -------------------------- | ------- | ------------------------------------------------------------ | +| `ENABLE_GUARDRAILS` | `true` | PII detection, prompt injection scanning | +| `ENABLE_RESPONSE_CACHE` | `true` | Response caching for deterministic requests | +| `ENABLE_COMPLIANCE_FILTER` | `true` | Early ZDR/no-train validation (routing enforcement stays on) | +| `ENABLE_PROMPT_CACHING` | `true` | Auto `cache_control` injection for Anthropic | +| `ENABLE_ROUTING_TRACE` | `true` | `X-Tangle-Routing-Trace` response header | ## Usage diff --git a/pages/gateway/free-tier.mdx b/pages/gateway/free-tier.mdx index 2123547b..ff008510 100644 --- a/pages/gateway/free-tier.mdx +++ b/pages/gateway/free-tier.mdx @@ -9,26 +9,26 @@ Try the gateway without credits. Free tier restricts to cheap, fast models with ## Limits -| Tier | Daily limit | Rate limit | -|------|------------|------------| -| Anonymous (no auth) | 5 req/day | 10 req/min | -| Authenticated (zero credits) | 20 req/day | 30 req/min | -| Paid (any credits) | Unlimited | 60 req/min | +| Tier | Daily limit | Rate limit | +| ---------------------------- | ----------- | ---------- | +| Anonymous (no auth) | 5 req/day | 10 req/min | +| Authenticated (zero credits) | 20 req/day | 30 req/min | +| Paid (any credits) | Unlimited | 60 req/min | ## Allowed models Free tier requests can use: -| Model | Provider | Why it's free | -|-------|----------|---------------| -| `gpt-4o-mini` | OpenAI | Small, cheap | -| `claude-3-5-haiku-20241022` | Anthropic | Fast, cheap | -| `llama-3.1-8b-instant` | Groq | Free tier inference | -| `llama-3.2-1b-preview` | Groq | Tiny model | -| `llama-3.2-3b-preview` | Groq | Small model | -| `gemini-2.0-flash-lite` | Google | Free tier | -| `cerebras/llama-3.1-8b` | Cerebras | Fast, cheap | -| `deepseek-chat` | DeepSeek | Very cheap | +| Model | Provider | Why it's free | +| --------------------------- | --------- | ------------------- | +| `gpt-4o-mini` | OpenAI | Small, cheap | +| `claude-3-5-haiku-20241022` | Anthropic | Fast, cheap | +| `llama-3.1-8b-instant` | Groq | Free tier inference | +| `llama-3.2-1b-preview` | Groq | Tiny model | +| `llama-3.2-3b-preview` | Groq | Small model | +| `gemini-2.0-flash-lite` | Google | Free tier | +| `cerebras/llama-3.1-8b` | Cerebras | Fast, cheap | +| `deepseek-chat` | DeepSeek | Very cheap | ## Blocked models diff --git a/pages/gateway/generation-lookup.mdx b/pages/gateway/generation-lookup.mdx index 31c780ef..ccc988f8 100644 --- a/pages/gateway/generation-lookup.mdx +++ b/pages/gateway/generation-lookup.mdx @@ -54,18 +54,18 @@ curl -H "Authorization: Bearer sk-tan-YOUR_KEY" \ ## Fields -| Field | Description | -|-------|-------------| -| `id` | Generation ID (`gen_`) | -| `total_cost` | Total cost in USD | -| `model` | Model that served the request | -| `is_byok` | Whether BYOK credentials were used | -| `provider_name` | Provider that served the request | -| `streamed` | Whether the request used streaming | -| `latency` | Time to first token (ms) | -| `generation_time` | Total generation time (ms) | -| `tokens_prompt` / `tokens_completion` | Token counts | -| `native_tokens_cached` | Tokens served from provider cache | -| `native_tokens_reasoning` | Reasoning tokens (o1/o3/o4 models) | -| `routing_trace` | Full routing attempt history | -| `cache_hit` | Whether response was served from gateway cache | +| Field | Description | +| ------------------------------------- | ---------------------------------------------- | +| `id` | Generation ID (`gen_`) | +| `total_cost` | Total cost in USD | +| `model` | Model that served the request | +| `is_byok` | Whether BYOK credentials were used | +| `provider_name` | Provider that served the request | +| `streamed` | Whether the request used streaming | +| `latency` | Time to first token (ms) | +| `generation_time` | Total generation time (ms) | +| `tokens_prompt` / `tokens_completion` | Token counts | +| `native_tokens_cached` | Tokens served from provider cache | +| `native_tokens_reasoning` | Reasoning tokens (o1/o3/o4 models) | +| `routing_trace` | Full routing attempt history | +| `cache_hit` | Whether response was served from gateway cache | diff --git a/pages/gateway/guardrails.mdx b/pages/gateway/guardrails.mdx index af7cf3ca..e343a214 100644 --- a/pages/gateway/guardrails.mdx +++ b/pages/gateway/guardrails.mdx @@ -11,26 +11,26 @@ The gateway scans all requests for PII and prompt injection patterns before rout ### PII detection -| Pattern | Severity | Example | -|---------|----------|---------| -| SSN | Critical | `123-45-6789` | +| Pattern | Severity | Example | +| ------------------------------ | -------- | --------------------- | +| SSN | Critical | `123-45-6789` | | Credit card (Visa/MC/Discover) | Critical | `4111 1111 1111 1111` | -| Credit card (Amex) | Critical | `3782 822463 10005` | -| Email | Low | `user@example.com` | -| US phone | Medium | `(555) 123-4567` | -| IP address | Low | `192.168.1.1` | +| Credit card (Amex) | Critical | `3782 822463 10005` | +| Email | Low | `user@example.com` | +| US phone | Medium | `(555) 123-4567` | +| IP address | Low | `192.168.1.1` | ### Prompt injection detection Applied to user messages only (not system or assistant): -| Pattern | Severity | -|---------|----------| -| "Ignore all previous instructions" | High | -| "You are now a different AI" | High | -| "Pretend you have no restrictions" | High | -| "Reveal your system prompt" | Medium | -| DAN-mode jailbreaks | High | +| Pattern | Severity | +| ---------------------------------- | -------- | +| "Ignore all previous instructions" | High | +| "You are now a different AI" | High | +| "Pretend you have no restrictions" | High | +| "Reveal your system prompt" | Medium | +| DAN-mode jailbreaks | High | ## Modes diff --git a/pages/gateway/how-routing-works.mdx b/pages/gateway/how-routing-works.mdx index 7829a012..464211cc 100644 --- a/pages/gateway/how-routing-works.mdx +++ b/pages/gateway/how-routing-works.mdx @@ -54,12 +54,12 @@ Tiers 1 and 2 are completely bypassed. The gateway routes only to providers with ## Routing control -| Method | Effect | -|--------|--------| -| `routing: "auto"` | Try all three tiers (default) | -| `routing: "operator"` | Operators only. Fails if no operator available. | -| `routing: "provider"` | Skip operators, use LiteLLM + direct only. | -| `X-Tangle-Blueprint: ` | Pin to operators under this Blueprint. | -| `X-Tangle-Operator: ` | Pin to a specific operator. | +| Method | Effect | +| ------------------------------- | ---------------------------------------------------- | +| `routing: "auto"` | Try all three tiers (default) | +| `routing: "operator"` | Operators only. Fails if no operator available. | +| `routing: "provider"` | Skip operators, use LiteLLM + direct only. | +| `X-Tangle-Blueprint: ` | Pin to operators under this Blueprint. | +| `X-Tangle-Operator: ` | Pin to a specific operator. | | `providerOptions.gateway.order` | Control which providers are tried and in what order. | -| `providerOptions.gateway.only` | Restrict to these providers only. | +| `providerOptions.gateway.only` | Restrict to these providers only. | diff --git a/pages/gateway/index.mdx b/pages/gateway/index.mdx index 129b8b9b..57c7630a 100644 --- a/pages/gateway/index.mdx +++ b/pages/gateway/index.mdx @@ -34,11 +34,11 @@ Works with any OpenAI-compatible SDK. Change the base URL and you're done. The gateway routes through three tiers, in order: -| Tier | What | When | -|------|------|------| -| **Operators** | Decentralized inference providers on Tangle | Default for operator-pinned requests and SpendAuth | -| **LiteLLM** | Proxy with 100+ provider integrations and built-in retries | Default for standard requests | -| **Direct** | Straight to provider API (OpenAI, Anthropic, etc.) | Fallback when LiteLLM unavailable, or when compliance required | +| Tier | What | When | +| ------------- | ---------------------------------------------------------- | -------------------------------------------------------------- | +| **Operators** | Decentralized inference providers on Tangle | Default for operator-pinned requests and SpendAuth | +| **LiteLLM** | Proxy with 100+ provider integrations and built-in retries | Default for standard requests | +| **Direct** | Straight to provider API (OpenAI, Anthropic, etc.) | Fallback when LiteLLM unavailable, or when compliance required | When [Zero Data Retention](/gateway/zdr) or [no-train](/gateway/no-train) is requested, operators and LiteLLM are skipped — the gateway routes directly to verified providers only. diff --git a/pages/gateway/migrate-vercel.mdx b/pages/gateway/migrate-vercel.mdx index b18462a8..abad708f 100644 --- a/pages/gateway/migrate-vercel.mdx +++ b/pages/gateway/migrate-vercel.mdx @@ -9,30 +9,30 @@ Tangle Gateway supports the same `providerOptions.gateway` schema as Vercel AI G ## What maps directly -| Vercel Feature | Tangle Equivalent | Notes | -|---|---|---| -| `providerOptions.gateway.byok` | Same | Identical schema | -| `providerOptions.gateway.zeroDataRetention` | Same | 13 verified providers | -| `providerOptions.gateway.disallowPromptTraining` | Same | 25 verified providers | -| `providerOptions.gateway.caching: 'auto'` | Same | Anthropic cache_control injection | -| `providerOptions.gateway.order` | Same | Provider priority | -| `providerOptions.gateway.only` | Same | Provider allowlist | -| `models` fallback array | Same | Model-level failover | -| `GET /v1/credits` | Same | Balance check | -| `GET /v1/generation` | Same | Request detail lookup | +| Vercel Feature | Tangle Equivalent | Notes | +| ------------------------------------------------ | ----------------- | --------------------------------- | +| `providerOptions.gateway.byok` | Same | Identical schema | +| `providerOptions.gateway.zeroDataRetention` | Same | 13 verified providers | +| `providerOptions.gateway.disallowPromptTraining` | Same | 25 verified providers | +| `providerOptions.gateway.caching: 'auto'` | Same | Anthropic cache_control injection | +| `providerOptions.gateway.order` | Same | Provider priority | +| `providerOptions.gateway.only` | Same | Provider allowlist | +| `models` fallback array | Same | Model-level failover | +| `GET /v1/credits` | Same | Balance check | +| `GET /v1/generation` | Same | Request detail lookup | ## What's different -| Feature | Vercel | Tangle | -|---------|--------|--------| -| **Base URL** | `ai-gateway.vercel.sh/v1` | `router.tangle.tools/v1` | -| **Auth** | API key or OIDC token | API key, session, SIWE (wallet), or SpendAuth (on-chain) | -| **Pricing** | Zero markup | 20% markup (0% with BYOK) | -| **Operator network** | None | Decentralized operators compete on price/latency | -| **On-chain payments** | None | SpendAuth (EIP-712) — pay without a credit card | -| **Guardrails** | None | PII + injection detection built-in | -| **Web search tools** | Perplexity, Parallel, provider-native | Not yet (planned) | -| **OIDC auth** | Vercel-only | Not applicable | +| Feature | Vercel | Tangle | +| --------------------- | ------------------------------------- | -------------------------------------------------------- | +| **Base URL** | `ai-gateway.vercel.sh/v1` | `router.tangle.tools/v1` | +| **Auth** | API key or OIDC token | API key, session, SIWE (wallet), or SpendAuth (on-chain) | +| **Pricing** | Zero markup | 20% markup (0% with BYOK) | +| **Operator network** | None | Decentralized operators compete on price/latency | +| **On-chain payments** | None | SpendAuth (EIP-712) — pay without a credit card | +| **Guardrails** | None | PII + injection detection built-in | +| **Web search tools** | Perplexity, Parallel, provider-native | Not yet (planned) | +| **OIDC auth** | Vercel-only | Not applicable | ## Code change diff --git a/pages/gateway/models.mdx b/pages/gateway/models.mdx index cf635ac4..4c058018 100644 --- a/pages/gateway/models.mdx +++ b/pages/gateway/models.mdx @@ -9,32 +9,32 @@ Tangle Gateway provides access to models from 20+ providers through a single API ## Providers -| Provider | Slug | Models | -|----------|------|--------| -| OpenAI | `openai` | GPT-4o, GPT-4o-mini, o1, o3, o4, DALL-E, Whisper, TTS | -| Anthropic | `anthropic` | Claude Opus, Sonnet, Haiku | -| Google | `google` | Gemini 2.5 Pro, Flash, Flash-Lite | -| Groq | `groq` | Llama 3.1/3.2 (fast inference) | -| Together AI | `together` | Open-source models (Llama, Qwen, Mixtral) | -| DeepSeek | `deepseek` | DeepSeek Chat, DeepSeek Coder | -| Mistral | `mistral` | Mistral Large, Codestral, Pixtral | -| Fireworks | `fireworks` | Phi, StarCoder, open models | -| Cohere | `cohere` | Command R/R+ | -| xAI | `xai` | Grok 2, Grok 3 | -| Cerebras | `cerebras` | Llama (fast inference) | -| SambaNova | `sambanova` | Fast open-model inference | -| AI21 | `ai21` | Jamba | -| Nvidia | `nvidia` | Nemotron | -| Z.ai | `zai` | GLM-4.7, GLM-5 | -| Moonshot | `moonshot` | Kimi | +| Provider | Slug | Models | +| ----------- | ----------- | ----------------------------------------------------- | +| OpenAI | `openai` | GPT-4o, GPT-4o-mini, o1, o3, o4, DALL-E, Whisper, TTS | +| Anthropic | `anthropic` | Claude Opus, Sonnet, Haiku | +| Google | `google` | Gemini 2.5 Pro, Flash, Flash-Lite | +| Groq | `groq` | Llama 3.1/3.2 (fast inference) | +| Together AI | `together` | Open-source models (Llama, Qwen, Mixtral) | +| DeepSeek | `deepseek` | DeepSeek Chat, DeepSeek Coder | +| Mistral | `mistral` | Mistral Large, Codestral, Pixtral | +| Fireworks | `fireworks` | Phi, StarCoder, open models | +| Cohere | `cohere` | Command R/R+ | +| xAI | `xai` | Grok 2, Grok 3 | +| Cerebras | `cerebras` | Llama (fast inference) | +| SambaNova | `sambanova` | Fast open-model inference | +| AI21 | `ai21` | Jamba | +| Nvidia | `nvidia` | Nemotron | +| Z.ai | `zai` | GLM-4.7, GLM-5 | +| Moonshot | `moonshot` | Kimi | Plus decentralized operators on the Tangle network running [Blueprints](/developers/blueprints/introduction): -| Blueprint | Models | How to route | -|-----------|--------|-------------| -| [LLM Inference](https://github.com/tangle-network/llm-inference-blueprint) | Llama, Qwen, Mistral, any vLLM-compatible model | `X-Tangle-Routing: operator` or auto | -| Vector Store | Embedding models for RAG | `/v1/collections` and `/v1/rag/query` | -| Custom Blueprints | Any model the operator deploys | Pin by Blueprint ID or operator slug | +| Blueprint | Models | How to route | +| -------------------------------------------------------------------------- | ----------------------------------------------- | ------------------------------------- | +| [LLM Inference](https://github.com/tangle-network/llm-inference-blueprint) | Llama, Qwen, Mistral, any vLLM-compatible model | `X-Tangle-Routing: operator` or auto | +| Vector Store | Embedding models for RAG | `/v1/collections` and `/v1/rag/query` | +| Custom Blueprints | Any model the operator deploys | Pin by Blueprint ID or operator slug | Operators set their own pricing and the gateway [scores them](/gateway/smart-routing) on reputation, latency, and price. See [Operator Routing](/gateway/operator-routing) for details. @@ -50,27 +50,27 @@ groq/llama-3.1-70b-versatile Or use bare names — the gateway resolves the provider by prefix: -| Prefix | Resolves to | -|--------|-------------| -| `gpt-`, `o1-`, `o3-`, `o4-` | OpenAI | -| `claude-` | Anthropic | -| `gemini-`, `gemma-` | Google | -| `llama-`, `mixtral-` | Groq | -| `deepseek-` | DeepSeek | -| `mistral-`, `codestral-` | Mistral | -| `grok-` | xAI | -| `glm-` | Z.ai | -| `command-` | Cohere | +| Prefix | Resolves to | +| --------------------------- | ----------- | +| `gpt-`, `o1-`, `o3-`, `o4-` | OpenAI | +| `claude-` | Anthropic | +| `gemini-`, `gemma-` | Google | +| `llama-`, `mixtral-` | Groq | +| `deepseek-` | DeepSeek | +| `mistral-`, `codestral-` | Mistral | +| `grok-` | xAI | +| `glm-` | Z.ai | +| `command-` | Cohere | ## Modalities -| Modality | Endpoint | Examples | -|----------|----------|---------| -| Text | `/v1/chat/completions` | All chat models | -| Images | `/v1/images/generations` | DALL-E, FLUX | -| Audio | `/v1/audio/transcriptions`, `/v1/audio/speech` | Whisper, TTS | -| Embeddings | `/v1/embeddings` | text-embedding-3-small/large | -| Video | `/v1/video/*` | Avatar generation, dubbing (via ph0ny) | +| Modality | Endpoint | Examples | +| ---------- | ---------------------------------------------- | -------------------------------------- | +| Text | `/v1/chat/completions` | All chat models | +| Images | `/v1/images/generations` | DALL-E, FLUX | +| Audio | `/v1/audio/transcriptions`, `/v1/audio/speech` | Whisper, TTS | +| Embeddings | `/v1/embeddings` | text-embedding-3-small/large | +| Video | `/v1/video/*` | Avatar generation, dubbing (via ph0ny) | ## Dynamic discovery diff --git a/pages/gateway/no-train.mdx b/pages/gateway/no-train.mdx index da6154af..f6461c01 100644 --- a/pages/gateway/no-train.mdx +++ b/pages/gateway/no-train.mdx @@ -23,10 +23,10 @@ Ensure your prompts and responses are never used by providers to train their mod Disallow prompt training is a **subset** of [Zero Data Retention](/gateway/zdr). All ZDR-compliant providers also disallow prompt training, but more providers disallow training than offer full ZDR. -| Filter | Verified providers | -|--------|-------------------| -| No-train only | 25 providers | -| ZDR (includes no-train) | 13 providers | +| Filter | Verified providers | +| ----------------------- | ------------------ | +| No-train only | 25 providers | +| ZDR (includes no-train) | 13 providers | Use `disallowPromptTraining` when you care about IP protection but don't need full data deletion guarantees. diff --git a/pages/gateway/operator-routing.mdx b/pages/gateway/operator-routing.mdx index 6f4d3e34..687d38d2 100644 --- a/pages/gateway/operator-routing.mdx +++ b/pages/gateway/operator-routing.mdx @@ -11,11 +11,11 @@ Operators are independent inference providers registered on the Tangle network. Operators run **Blueprints** — on-chain service definitions that specify what an operator does. The inference-related Blueprints the gateway routes through: -| Blueprint | What it serves | Repo | -|-----------|---------------|------| +| Blueprint | What it serves | Repo | +| ----------------- | -------------------------------------------------------------- | --------------------------------------------------------------------------------------------------- | | **LLM Inference** | Chat completions, text generation (Llama, Qwen, Mistral, etc.) | [tangle-network/llm-inference-blueprint](https://github.com/tangle-network/llm-inference-blueprint) | -| **Vector Store** | Embedding storage and retrieval for RAG | Operator-deployed | -| **Custom** | Any model/pipeline an operator chooses to serve | [Build your own](/developers/blueprints/introduction) | +| **Vector Store** | Embedding storage and retrieval for RAG | Operator-deployed | +| **Custom** | Any model/pipeline an operator chooses to serve | [Build your own](/developers/blueprints/introduction) | The LLM Inference Blueprint uses [tangle-inference-core](https://github.com/tangle-network/tangle-inference-core) — a shared Rust crate for EIP-712 signature verification, nonce management, and on-chain settlement. Operators compile it into a binary (`operator-lite`) that runs alongside their model server. @@ -70,14 +70,14 @@ curl -X POST "https://router.tangle.tools/v1/chat/completions" \ ## What's verified on-chain -| Data | Verified? | -|------|-----------| -| Operator Ethereum address | Yes (signed transaction) | -| Active/inactive status | Yes (BSM contract state) | -| Staked amount | Yes (on-chain balance) | -| Pricing (per-token) | Yes (BSM contract) | -| Endpoint URL | No (self-reported at registration) | -| Backing provider | No (not tracked) | +| Data | Verified? | +| ------------------------- | ---------------------------------- | +| Operator Ethereum address | Yes (signed transaction) | +| Active/inactive status | Yes (BSM contract state) | +| Staked amount | Yes (on-chain balance) | +| Pricing (per-token) | Yes (BSM contract) | +| Endpoint URL | No (self-reported at registration) | +| Backing provider | No (not tracked) | Because endpoint URL and backing provider are self-reported, operator routing is **not compatible with [ZDR](/gateway/zdr) or [no-train](/gateway/no-train)** compliance requirements. When compliance is required, operators are skipped and the gateway routes directly to verified providers. diff --git a/pages/gateway/pricing.mdx b/pages/gateway/pricing.mdx index 59e55c65..6634f841 100644 --- a/pages/gateway/pricing.mdx +++ b/pages/gateway/pricing.mdx @@ -7,10 +7,10 @@ description: How billing works on Tangle Gateway. ## Pricing model -| Credential type | Markup | -|----------------|--------| -| Platform credentials | 20% above provider list price | -| [BYOK](/gateway/byok) | **0%** — provider list price, no markup | +| Credential type | Markup | +| -------------------------------- | ------------------------------------------- | +| Platform credentials | 20% above provider list price | +| [BYOK](/gateway/byok) | **0%** — provider list price, no markup | | [SpendAuth](/gateway/spend-auth) | Operator-set prices (typically competitive) | The 20% platform markup on non-BYOK requests funds operator payouts and platform infrastructure. Operators earn a share of every request routed through them. diff --git a/pages/gateway/provider-options.mdx b/pages/gateway/provider-options.mdx index eb0fe5b1..ccb49797 100644 --- a/pages/gateway/provider-options.mdx +++ b/pages/gateway/provider-options.mdx @@ -36,17 +36,17 @@ interface GatewayOptions { ## Options reference -| Option | Type | Default | Description | -|--------|------|---------|-------------| -| `byok` | `Record>` | — | Per-request provider credentials. [Details](/gateway/byok) | -| `zeroDataRetention` | `boolean` | `false` | Route only to ZDR-verified providers. [Details](/gateway/zdr) | -| `disallowPromptTraining` | `boolean` | `false` | Route only to no-train providers. [Details](/gateway/no-train) | -| `caching` | `'auto'` | — | Auto-inject prompt cache markers. [Details](/gateway/caching) | -| `cache` | `false` | — | Set `false` to skip response cache for this request. | -| `order` | `string[]` | — | Provider priority order. [Details](/gateway/smart-routing) | -| `only` | `string[]` | — | Restrict to these providers only. | -| `models` | `string[]` | — | Fallback model list. [Details](/gateway/fallbacks) | -| `timeout` | `number \| Record` | `30000` | Timeout in ms. [Details](/gateway/timeouts) | +| Option | Type | Default | Description | +| ------------------------ | ---------------------------------- | ------- | -------------------------------------------------------------- | +| `byok` | `Record>` | — | Per-request provider credentials. [Details](/gateway/byok) | +| `zeroDataRetention` | `boolean` | `false` | Route only to ZDR-verified providers. [Details](/gateway/zdr) | +| `disallowPromptTraining` | `boolean` | `false` | Route only to no-train providers. [Details](/gateway/no-train) | +| `caching` | `'auto'` | — | Auto-inject prompt cache markers. [Details](/gateway/caching) | +| `cache` | `false` | — | Set `false` to skip response cache for this request. | +| `order` | `string[]` | — | Provider priority order. [Details](/gateway/smart-routing) | +| `only` | `string[]` | — | Restrict to these providers only. | +| `models` | `string[]` | — | Fallback model list. [Details](/gateway/fallbacks) | +| `timeout` | `number \| Record` | `30000` | Timeout in ms. [Details](/gateway/timeouts) | ## Example: everything at once diff --git a/pages/gateway/rate-limiting.mdx b/pages/gateway/rate-limiting.mdx index 587ed74f..7b0962d2 100644 --- a/pages/gateway/rate-limiting.mdx +++ b/pages/gateway/rate-limiting.mdx @@ -9,13 +9,13 @@ The gateway enforces sliding-window rate limits per authentication method. ## Limits -| Auth method | Rate limit | Daily limit | -|-------------|-----------|-------------| -| API Key | 60 req/min | Unlimited (with credits) | -| Session | 30 req/min | Unlimited (with credits) | -| SpendAuth | 120 req/min | Unlimited | -| Anonymous | 10 req/min | 5 req/day | -| Authenticated (no credits) | 30 req/min | 20 req/day | +| Auth method | Rate limit | Daily limit | +| -------------------------- | ----------- | ------------------------ | +| API Key | 60 req/min | Unlimited (with credits) | +| Session | 30 req/min | Unlimited (with credits) | +| SpendAuth | 120 req/min | Unlimited | +| Anonymous | 10 req/min | 5 req/day | +| Authenticated (no credits) | 30 req/min | 20 req/day | ## Response headers diff --git a/pages/gateway/response-headers.mdx b/pages/gateway/response-headers.mdx index f791ef95..40350b0c 100644 --- a/pages/gateway/response-headers.mdx +++ b/pages/gateway/response-headers.mdx @@ -9,33 +9,33 @@ Every response from the gateway includes metadata headers. ## Standard headers -| Header | Description | Example | -|--------|-------------|---------| -| `X-Generation-Id` | Unique request ID | `gen_01J5K7ABCD...` | -| `X-Tangle-Price-Input` | USD per input token | `0.000003` | -| `X-Tangle-Price-Output` | USD per output token | `0.000015` | -| `X-Tangle-Cache` | Response cache status | `HIT` or `MISS` | -| `X-RateLimit-Limit` | Requests allowed per window | `60` | -| `X-RateLimit-Remaining` | Requests remaining | `42` | -| `X-RateLimit-Reset` | Window reset (Unix seconds) | `1712793600` | +| Header | Description | Example | +| ----------------------- | --------------------------- | ------------------- | +| `X-Generation-Id` | Unique request ID | `gen_01J5K7ABCD...` | +| `X-Tangle-Price-Input` | USD per input token | `0.000003` | +| `X-Tangle-Price-Output` | USD per output token | `0.000015` | +| `X-Tangle-Cache` | Response cache status | `HIT` or `MISS` | +| `X-RateLimit-Limit` | Requests allowed per window | `60` | +| `X-RateLimit-Remaining` | Requests remaining | `42` | +| `X-RateLimit-Reset` | Window reset (Unix seconds) | `1712793600` | ## Conditional headers -| Header | When present | Description | -|--------|-------------|-------------| -| `X-Tangle-Routing-Trace` | When `ENABLE_ROUTING_TRACE` is on | Compact routing path | -| `X-Tangle-Operator` | When served by an operator | Operator slug | -| `X-Tangle-BYOK` | When BYOK credentials used | `true` | -| `X-Tangle-Caching` | When prompt caching applied | `auto` | -| `X-Tangle-Guardrails` | When guardrails flagged content | `pii:low,prompt_injection:high` | -| `X-Payment-Settled` | When SpendAuth payment succeeded | `true` | -| `X-Free-Tier-Remaining` | Free tier requests | `3` | -| `X-Free-Tier-Limit` | Free tier daily cap | `5` | +| Header | When present | Description | +| ------------------------ | --------------------------------- | ------------------------------- | +| `X-Tangle-Routing-Trace` | When `ENABLE_ROUTING_TRACE` is on | Compact routing path | +| `X-Tangle-Operator` | When served by an operator | Operator slug | +| `X-Tangle-BYOK` | When BYOK credentials used | `true` | +| `X-Tangle-Caching` | When prompt caching applied | `auto` | +| `X-Tangle-Guardrails` | When guardrails flagged content | `pii:low,prompt_injection:high` | +| `X-Payment-Settled` | When SpendAuth payment succeeded | `true` | +| `X-Free-Tier-Remaining` | Free tier requests | `3` | +| `X-Free-Tier-Limit` | Free tier daily cap | `5` | ## Error response headers -| Header | When present | Description | -|--------|-------------|-------------| +| Header | When present | Description | +| -------------------- | ------------- | ------------------------- | | `X-Payment-Required` | 402 responses | Amount needed (micro-USD) | -| `X-Payment-Currency` | 402 responses | `tsUSD` | -| `X-Payment-Methods` | 402 responses | `credits,spend_auth` | +| `X-Payment-Currency` | 402 responses | `tsUSD` | +| `X-Payment-Methods` | 402 responses | `credits,spend_auth` | diff --git a/pages/gateway/routing-trace.mdx b/pages/gateway/routing-trace.mdx index 23b643d5..9e23653d 100644 --- a/pages/gateway/routing-trace.mdx +++ b/pages/gateway/routing-trace.mdx @@ -24,6 +24,7 @@ X-Tangle-Routing-Trace: openai/gpt-4o[openai(500:2100ms)], anthropic/claude-sonn ## Sanitization The trace header is sanitized for safety: + - Operator names are shown as generic `operator` (slugs not exposed) - Error messages are not included (only status codes) - Internal URLs and hostnames are never leaked diff --git a/pages/gateway/smart-routing.mdx b/pages/gateway/smart-routing.mdx index 54cf002b..9521f60f 100644 --- a/pages/gateway/smart-routing.mdx +++ b/pages/gateway/smart-routing.mdx @@ -13,11 +13,11 @@ When multiple operators running the same [Blueprint](/developers/blueprints/intr score = reputation(40%) + latency(30%) + price(30%) ``` -| Factor | Weight | What it measures | -|--------|--------|-----------------| -| **Reputation** | 40% | Normalized reputation score (0-100) from on-chain history | -| **Latency** | 30% | Inverse of average response time (lower = better) | -| **Price** | 30% | Inverse of per-token price (cheaper = better) | +| Factor | Weight | What it measures | +| -------------- | ------ | --------------------------------------------------------- | +| **Reputation** | 40% | Normalized reputation score (0-100) from on-chain history | +| **Latency** | 30% | Inverse of average response time (lower = better) | +| **Price** | 30% | Inverse of per-token price (cheaper = better) | ## Operator selection diff --git a/pages/gateway/spend-auth.mdx b/pages/gateway/spend-auth.mdx index e6da980a..d464d36d 100644 --- a/pages/gateway/spend-auth.mdx +++ b/pages/gateway/spend-auth.mdx @@ -47,6 +47,7 @@ SpendAuth requests get a generous 120 req/min limit per commitment since every r ## On-chain contracts SpendAuth uses the `ShieldedCredits` contract deployed on the Tangle network. The contract handles: + - Balance management (deposit, authorize, claim) - Authorization verification (EIP-712 signature recovery) - Payment settlement (operator claims after serving) diff --git a/pages/gateway/timeouts.mdx b/pages/gateway/timeouts.mdx index 10fe9645..6c49cc18 100644 --- a/pages/gateway/timeouts.mdx +++ b/pages/gateway/timeouts.mdx @@ -46,6 +46,7 @@ Without explicit timeouts, the gateway uses a 30-second default for all provider ## Bounds All timeout values are clamped: + - **Minimum:** 1,000ms (1 second) - **Maximum:** 120,000ms (2 minutes) diff --git a/pages/gateway/zdr.mdx b/pages/gateway/zdr.mdx index 21340db9..e9f44b4b 100644 --- a/pages/gateway/zdr.mdx +++ b/pages/gateway/zdr.mdx @@ -39,33 +39,33 @@ When ZDR is enabled: ## ZDR-verified providers -| Provider | ZDR | No-Train | Policy | -|----------|-----|----------|--------| -| Anthropic | Yes | Yes | [ZDR policy](https://platform.claude.com/docs/en/build-with-claude/zero-data-retention) | -| Amazon Bedrock | Yes | Yes | [Data protection](https://docs.aws.amazon.com/bedrock/latest/userguide/data-protection.html) | -| Azure OpenAI | Yes | Yes | [Data privacy](https://learn.microsoft.com/en-us/azure/foundry/responsible-ai/openai/data-privacy) | -| Groq | Yes | Yes | [ZDR policy](https://console.groq.com/docs/your-data#zero-data-retention) | -| Mistral | Yes | Yes | [Terms](https://legal.mistral.ai/terms) | -| Fireworks | Yes | Yes | [Data handling](https://docs.fireworks.ai/guides/security_compliance/data_handling) | -| Together | Yes | Yes | [Terms](https://www.together.ai/terms-of-service) | -| Cerebras | Yes | Yes | [Privacy](https://www.cerebras.ai/privacy-policy) | -| Google Vertex | Yes | Yes | [ZDR policy](https://docs.cloud.google.com/vertex-ai/generative-ai/docs/vertex-ai-zero-data-retention) | -| Nebius | Yes | Yes | [Legal guide](https://docs.tokenfactory.nebius.com/legal/legal-quick-guide) | -| Parasail | Yes | Yes | [Terms](https://parasail.io/legal/terms-of-service) | -| Baseten | Yes | Yes | [Security](https://docs.baseten.co/observability/security) | -| DeepInfra | Yes | Yes | [Data handling](https://deepinfra.com/docs/data) | +| Provider | ZDR | No-Train | Policy | +| -------------- | --- | -------- | ------------------------------------------------------------------------------------------------------ | +| Anthropic | Yes | Yes | [ZDR policy](https://platform.claude.com/docs/en/build-with-claude/zero-data-retention) | +| Amazon Bedrock | Yes | Yes | [Data protection](https://docs.aws.amazon.com/bedrock/latest/userguide/data-protection.html) | +| Azure OpenAI | Yes | Yes | [Data privacy](https://learn.microsoft.com/en-us/azure/foundry/responsible-ai/openai/data-privacy) | +| Groq | Yes | Yes | [ZDR policy](https://console.groq.com/docs/your-data#zero-data-retention) | +| Mistral | Yes | Yes | [Terms](https://legal.mistral.ai/terms) | +| Fireworks | Yes | Yes | [Data handling](https://docs.fireworks.ai/guides/security_compliance/data_handling) | +| Together | Yes | Yes | [Terms](https://www.together.ai/terms-of-service) | +| Cerebras | Yes | Yes | [Privacy](https://www.cerebras.ai/privacy-policy) | +| Google Vertex | Yes | Yes | [ZDR policy](https://docs.cloud.google.com/vertex-ai/generative-ai/docs/vertex-ai-zero-data-retention) | +| Nebius | Yes | Yes | [Legal guide](https://docs.tokenfactory.nebius.com/legal/legal-quick-guide) | +| Parasail | Yes | Yes | [Terms](https://parasail.io/legal/terms-of-service) | +| Baseten | Yes | Yes | [Security](https://docs.baseten.co/observability/security) | +| DeepInfra | Yes | Yes | [Data handling](https://deepinfra.com/docs/data) | Compliance data is managed via the admin API (`PUT /api/admin/compliance`) and can be updated without code deploys. ## Trust model -| Routing tier | ZDR behavior | -|---|---| -| **Operators** | Skipped. Self-reported backing provider is unverifiable. | -| **LiteLLM** | Skipped. Internal fallback chain is uncontrollable. | -| **Direct provider** | Routed only to verified ZDR providers. | -| **BYOK fallback** | ZDR filters preserved on fallback to platform credentials. | -| **Operator-only + ZDR** | 400 error. Conflicting requirements. | +| Routing tier | ZDR behavior | +| ----------------------- | ---------------------------------------------------------- | +| **Operators** | Skipped. Self-reported backing provider is unverifiable. | +| **LiteLLM** | Skipped. Internal fallback chain is uncontrollable. | +| **Direct provider** | Routed only to verified ZDR providers. | +| **BYOK fallback** | ZDR filters preserved on fallback to platform credentials. | +| **Operator-only + ZDR** | 400 error. Conflicting requirements. | The Tangle chain verifies operator **identity and stake**, not **behavior**. When compliance matters, the gateway routes direct.