diff --git a/pages/_meta.ts b/pages/_meta.ts index ef9a5cab..b700a1ca 100644 --- a/pages/_meta.ts +++ b/pages/_meta.ts @@ -18,6 +18,10 @@ const meta: Meta = { title: "Workbench", type: "page", }, + gateway: { + title: "Gateway", + type: "page", + }, infrastructure: { title: "Runtime", type: "page", diff --git a/pages/ai/_meta.ts b/pages/ai/_meta.ts index 7165fac5..88e4c860 100644 --- a/pages/ai/_meta.ts +++ b/pages/ai/_meta.ts @@ -2,6 +2,26 @@ import type { Meta } from "nextra"; const meta: Meta = { index: "AI Introduction", + "-- gateway": { + type: "separator", + title: "Inference Gateway", + }, + "gateway-intro": { + title: "Introduction", + href: "/gateway", + }, + "gateway-start": { + title: "Getting Started", + href: "/gateway/getting-started", + }, + "gateway-models": { + title: "Models & Providers", + href: "/gateway/models", + }, + "gateway-zdr": { + title: "Zero Data Retention", + href: "/gateway/zdr", + }, "-- workbench": { type: "separator", title: "Agentic Workbench", diff --git a/pages/ai/index.mdx b/pages/ai/index.mdx index fdfbcaed..629fda09 100644 --- a/pages/ai/index.mdx +++ b/pages/ai/index.mdx @@ -44,8 +44,20 @@ Core capabilities: Each run produces task and agent evaluations. That data feeds back into the workbench to improve prompts, policies, and workflows over time. +## Inference Gateway + +The [Tangle Gateway](/gateway) is the inference routing layer. Agents and applications call a single API to access hundreds of models across centralized providers and decentralized operators. The gateway handles model selection, compliance routing, billing, and payment settlement. + +Key capabilities: + +- **One API, any model.** OpenAI, Anthropic, Google, Groq, and 20+ providers. +- **Decentralized operators.** Route to operators on the Tangle network who compete on price and latency. +- **Compliance.** [Zero Data Retention](/gateway/zdr) and [no-train](/gateway/no-train) routing with verified provider agreements. +- **On-chain payments.** [SpendAuth](/gateway/spend-auth) — pay operators directly without a credit card. + ## Learn More +- [Gateway — Getting Started](/gateway/getting-started) - [Workbench details](/vibe/introduction) - [Runtime and sandboxing](/infrastructure/introduction) - [Operator onboarding](/operators/introduction) diff --git a/pages/developers/blueprints/use-cases.mdx b/pages/developers/blueprints/use-cases.mdx index c329e097..39953f8e 100644 --- a/pages/developers/blueprints/use-cases.mdx +++ b/pages/developers/blueprints/use-cases.mdx @@ -21,7 +21,21 @@ Tangle Network enables developers to rapidly build and deploy secure multi-party displayStyle="row" /> -## AI +## AI & Inference + + + + `) | + +## Response + +```json +{ + "data": { + "id": "gen_01ARZ3NDEKTSV4RRFFQ69G5FAV", + "total_cost": 0.00123, + "usage": 0.00123, + "created_at": "2026-04-10T12:00:00.000Z", + "model": "anthropic/claude-sonnet-4-6", + "is_byok": false, + "provider_name": "anthropic", + "streamed": true, + "latency": 200, + "generation_time": 1500, + "tokens_prompt": 100, + "tokens_completion": 50, + "native_tokens_cached": 80, + "native_tokens_reasoning": 0, + "status": "success", + "routing_trace": {...}, + "cache_hit": false + } +} +``` + +## Errors + +| Status | Code | Description | +| ------ | ----------- | ----------------------------------------------- | +| 400 | — | Missing or invalid generation ID | +| 401 | — | Authentication required | +| 404 | `not_found` | Generation not found or belongs to another user | diff --git a/pages/gateway/authentication.mdx b/pages/gateway/authentication.mdx new file mode 100644 index 00000000..5ce04f4e --- /dev/null +++ b/pages/gateway/authentication.mdx @@ -0,0 +1,57 @@ +--- +title: Authentication +description: Authentication methods for Tangle Gateway. +--- + +# Authentication + +Four authentication methods, each with different rate limits and capabilities. + +## API Key + +Create keys at the dashboard. Keys start with `sk-tan-` and are SHA256-hashed before storage. + +```bash +curl -H "Authorization: Bearer sk-tan-YOUR_KEY" \ + https://router.tangle.tools/v1/chat/completions +``` + +- **Rate limit:** 60 req/min +- **Credit check:** Yes (402 if balance is zero on non-free-tier models) +- **Key features:** Expiration dates, soft revocation, last-used tracking + +## Session (Cookie) + +Browser-based authentication via Better Auth. Supports email/password and OAuth (Google, GitHub). + +- **Rate limit:** 30 req/min +- **Credit check:** Yes + +## SIWE (Sign-In with Ethereum) + +Wallet-based authentication via EIP-191 signatures. Authenticate with your Ethereum wallet. + +``` +POST /api/siwe/verify +{ "address": "0x...", "signature": "0x...", "message": "..." } +``` + +## SpendAuth (On-Chain Payment) + +EIP-712 signed payment authorization. No account needed — pay operators directly on-chain. + +```bash +curl -H "X-Payment-Signature: {\"commitment\":\"0x...\",\"amount\":\"1000000\",...}" \ + https://router.tangle.tools/v1/chat/completions +``` + +- **Rate limit:** 120 req/min per commitment +- **Credit check:** No (payment is on-chain) +- See [SpendAuth](/gateway/spend-auth) for details. + +## Anonymous + +No authentication required for [free tier models](/gateway/free-tier). + +- **Rate limit:** 10 req/min, 5 req/day +- **Model access:** Free tier only (gpt-4o-mini, llama-3.1-8b, etc.) diff --git a/pages/gateway/byok.mdx b/pages/gateway/byok.mdx new file mode 100644 index 00000000..120af118 --- /dev/null +++ b/pages/gateway/byok.mdx @@ -0,0 +1,86 @@ +--- +title: Bring Your Own Key (BYOK) +description: Use your own provider API keys with Tangle Gateway for zero-markup access. +--- + +# Bring Your Own Key (BYOK) + +Use your existing provider API keys with Tangle Gateway. BYOK requests have **zero platform markup** — you pay the provider's list price directly. + +## Per-request BYOK + +Pass credentials in `providerOptions.gateway.byok`: + +```json +{ + "model": "anthropic/claude-sonnet-4-6", + "messages": [{"role": "user", "content": "Hello"}], + "providerOptions": { + "gateway": { + "byok": { + "anthropic": [{"apiKey": "sk-ant-your-key"}] + } + } + } +} +``` + +### Multiple credentials + +Specify multiple credentials per provider. The gateway tries them in order: + +```json +{ + "providerOptions": { + "gateway": { + "byok": { + "anthropic": [ + {"apiKey": "sk-ant-primary"}, + {"apiKey": "sk-ant-backup"} + ] + } + } + } +} +``` + +### Multiple providers + +```json +{ + "providerOptions": { + "gateway": { + "byok": { + "anthropic": [{"apiKey": "sk-ant-..."}], + "openai": [{"apiKey": "sk-..."}] + } + } + } +} +``` + +## Automatic fallback + +If your BYOK credentials fail (401, 403, rate limit), the gateway automatically falls back to platform credentials. This fallback preserves all compliance filters — if you requested [ZDR](/gateway/zdr), the fallback will only use ZDR-compliant system credentials. + +The `X-Tangle-BYOK` response header indicates whether the request used your credentials: + +``` +X-Tangle-BYOK: true # Your key was used +``` + +If the header is absent, platform credentials were used (possibly via fallback). + +## Pricing + +| Credential type | Markup | +| -------------------- | ---------------------------- | +| BYOK | **0%** — provider list price | +| Platform credentials | 20% markup (configurable) | + +## Security + +- BYOK credentials are never logged, stored, or persisted. +- Credentials exist only in memory for the duration of the request. +- The `providerOptions` field is stripped from the request body before forwarding to providers. +- Credentials are validated by structure (`apiKey` must be a string) and sanitized against prototype pollution. diff --git a/pages/gateway/caching.mdx b/pages/gateway/caching.mdx new file mode 100644 index 00000000..6e2417d5 --- /dev/null +++ b/pages/gateway/caching.mdx @@ -0,0 +1,72 @@ +--- +title: Automatic Caching +description: Enable prompt caching across providers with a single flag. +--- + +# Automatic Caching + +Some providers require explicit cache markers to enable prompt caching, while others cache automatically. Use `caching: 'auto'` to let the gateway handle it. + +## Usage + +```json +{ + "model": "anthropic/claude-sonnet-4-6", + "messages": [ + {"role": "system", "content": "You are a helpful assistant with a large knowledge base..."}, + {"role": "user", "content": "What is Tangle?"} + ], + "providerOptions": { + "gateway": { + "caching": "auto" + } + } +} +``` + +## How it works + +| Provider | Caching Type | What `auto` does | +| ------------------------------ | ------------ | ----------------------------------------------------------------------- | +| OpenAI | Implicit | No change needed. Caching happens automatically. | +| Google | Implicit | No change needed. | +| DeepSeek | Implicit | No change needed. | +| Anthropic | Explicit | Adds `cache_control: { type: 'ephemeral' }` to the last system message. | +| Anthropic (via Bedrock/Vertex) | Explicit | Same as Anthropic direct. | + +For Anthropic, the gateway converts: + +```json +{"role": "system", "content": "You are helpful..."} +``` + +Into: + +```json +{"role": "system", "content": [{"type": "text", "text": "You are helpful...", "cache_control": {"type": "ephemeral"}}]} +``` + +This caches the system prompt so subsequent messages in the same conversation reuse it, reducing costs by up to 90%. + +## Response caching + +Separately from prompt caching, the gateway caches complete responses for **deterministic requests** (temperature ≤ 0.01, non-streaming). Cached responses are free. + +``` +X-Tangle-Cache: HIT # Served from cache +X-Tangle-Cache: MISS # Fetched from provider +``` + +Disable per-request: + +```json +{ + "providerOptions": { + "gateway": { + "cache": false + } + } +} +``` + +The response cache key includes: model, messages, temperature, max_tokens, tools, response_format, and top_p. Different parameters always produce different cache entries. diff --git a/pages/gateway/enterprise-zdr.mdx b/pages/gateway/enterprise-zdr.mdx new file mode 100644 index 00000000..c1d157ae --- /dev/null +++ b/pages/gateway/enterprise-zdr.mdx @@ -0,0 +1,94 @@ +--- +title: Enterprise ZDR Setup +description: Configure Zero Data Retention for your organization. +--- + +# Enterprise ZDR Setup + +This guide walks through configuring ZDR for an organization that needs to guarantee no prompts or responses are retained by AI providers. + +## Step 1: Understand the trust model + +Read the [ZDR trust model](/gateway/zdr#trust-model) first. Key points: + +- ZDR is enforced at the **direct provider** level only. +- **Operators are skipped** when ZDR is enabled (their backing provider is unverifiable). +- **LiteLLM is skipped** (its internal routing is uncontrollable). +- BYOK fallback to platform credentials preserves ZDR filtering. + +## Step 2: Choose your approach + +### Option A: Team-wide ZDR (recommended) + +Enable ZDR for all requests from your team. No code changes needed — every request is automatically filtered. + +Contact your admin to set `zdrEnabled: true` on your team record via the admin API: + +```bash +# Admin sets team-wide ZDR +curl -X PUT https://router.tangle.tools/api/admin/compliance \ + -H "Cookie: session_token=ADMIN_SESSION" \ + -d '{"providerId": "...", "zdr": true}' +``` + +### Option B: Per-request ZDR + +Add `zeroDataRetention: true` to individual requests. Useful for mixed workloads where only some requests handle sensitive data. + +```python +response = client.chat.completions.create( + model="anthropic/claude-sonnet-4-6", + messages=[...], + extra_body={ + "providerOptions": { + "gateway": {"zeroDataRetention": True} + } + } +) +``` + +## Step 3: Verify provider coverage + +Check which providers are ZDR-verified for the models you need: + +```bash +curl https://router.tangle.tools/api/gateway/compliance | jq '.providers[] | select(.zdr == true)' +``` + +If your required model is only available from a non-ZDR provider, the request will return 400 with a clear error listing which providers were considered. + +## Step 4: Set up BYOK (optional) + +For maximum control, use [BYOK](/gateway/byok) with your own provider keys. This gives you: + +- Zero platform markup +- Direct contractual relationship with the provider +- ZDR enforcement still applies on the fallback path + +## Step 5: Monitor compliance + +Use the [generation lookup API](/gateway/generation-lookup) to audit requests: + +```bash +# Check if a specific request used a ZDR provider +curl -H "Authorization: Bearer sk-tan-..." \ + "https://router.tangle.tools/v1/generation?id=gen_..." \ + | jq '.data.provider_name' +``` + +The `routing_trace` field shows exactly which providers were considered and filtered. + +## Combining ZDR + no-train + +Both flags work as an AND: when both are enabled, requests are routed only to providers that satisfy both criteria. This is the strictest compliance level. + +```json +{ + "providerOptions": { + "gateway": { + "zeroDataRetention": true, + "disallowPromptTraining": true + } + } +} +``` diff --git a/pages/gateway/fallbacks.mdx b/pages/gateway/fallbacks.mdx new file mode 100644 index 00000000..bd01e96f --- /dev/null +++ b/pages/gateway/fallbacks.mdx @@ -0,0 +1,71 @@ +--- +title: Model Fallbacks +description: Configure backup models that are tried when the primary model fails. +--- + +# Model Fallbacks + +Specify backup models that are tried in order if the primary model fails or is unavailable. + +## Usage + +Pass a `models` array in `providerOptions.gateway`: + +```json +{ + "model": "openai/gpt-4o", + "messages": [{"role": "user", "content": "Hello"}], + "providerOptions": { + "gateway": { + "models": ["anthropic/claude-sonnet-4-6", "groq/llama-3.1-70b-versatile"] + } + } +} +``` + +The gateway tries: + +1. `openai/gpt-4o` (primary model) +2. `anthropic/claude-sonnet-4-6` (first fallback) +3. `groq/llama-3.1-70b-versatile` (second fallback) + +The response comes from the first model that succeeds. + +## How fallback works + +For each model in the list, the gateway runs the full routing chain: + +1. **Operators** — try operators serving this model (if available) +2. **LiteLLM** — try the proxy with built-in retries +3. **Direct provider** — call the provider API directly + +If all tiers fail for a model, the gateway moves to the next model in the list. + +## Combining with provider ordering + +Use `models` with `order` to control both model fallback and provider preference: + +```json +{ + "model": "openai/gpt-4o", + "providerOptions": { + "gateway": { + "models": ["anthropic/claude-sonnet-4-6"], + "order": ["bedrock", "anthropic"] + } + } +} +``` + +This tries: + +1. `openai/gpt-4o` via available providers +2. `anthropic/claude-sonnet-4-6` via Bedrock first, then Anthropic direct + +## Observability + +When fallbacks occur, the [routing trace](/gateway/routing-trace) shows every model and provider attempted: + +``` +X-Tangle-Routing-Trace: openai/gpt-4o[openai(err:5001ms)], anthropic/claude-sonnet-4-6[anthropic(200:1847ms)] +``` diff --git a/pages/gateway/feature-flags.mdx b/pages/gateway/feature-flags.mdx new file mode 100644 index 00000000..128480e7 --- /dev/null +++ b/pages/gateway/feature-flags.mdx @@ -0,0 +1,35 @@ +--- +title: Feature Flags +description: Disable gateway features without a code deploy. +--- + +# Feature Flags + +All gateway features are on by default. Set any flag to `false` to disable it without deploying new code. + +## Available flags + +| Environment Variable | Default | Controls | +| -------------------------- | ------- | ------------------------------------------------------------ | +| `ENABLE_GUARDRAILS` | `true` | PII detection, prompt injection scanning | +| `ENABLE_RESPONSE_CACHE` | `true` | Response caching for deterministic requests | +| `ENABLE_COMPLIANCE_FILTER` | `true` | Early ZDR/no-train validation (routing enforcement stays on) | +| `ENABLE_PROMPT_CACHING` | `true` | Auto `cache_control` injection for Anthropic | +| `ENABLE_ROUTING_TRACE` | `true` | `X-Tangle-Routing-Trace` response header | + +## Usage + +Set in your environment: + +```bash +ENABLE_GUARDRAILS=false # Disable all guardrail scanning +ENABLE_RESPONSE_CACHE=false # Disable response cache reads/writes +``` + +## Notes + +- `ENABLE_COMPLIANCE_FILTER` only disables the early validation check that returns a 400 before routing. The actual ZDR/no-train enforcement in the routing tiers (skip operators, skip LiteLLM) stays active regardless. This flag is for suppressing the early error, not for bypassing compliance. + +- When `ENABLE_GUARDRAILS=false`, no PII or injection scanning occurs. The `X-Tangle-Guardrails` header is never set. GuardrailEvent records are not created. + +- When `ENABLE_RESPONSE_CACHE=false`, every request hits the provider. Cached entries are not read or written. Existing cache entries are not purged (they expire naturally via TTL). diff --git a/pages/gateway/free-tier.mdx b/pages/gateway/free-tier.mdx new file mode 100644 index 00000000..ff008510 --- /dev/null +++ b/pages/gateway/free-tier.mdx @@ -0,0 +1,62 @@ +--- +title: Free Tier +description: Free access to small models with daily limits. +--- + +# Free Tier + +Try the gateway without credits. Free tier restricts to cheap, fast models with daily request limits. + +## Limits + +| Tier | Daily limit | Rate limit | +| ---------------------------- | ----------- | ---------- | +| Anonymous (no auth) | 5 req/day | 10 req/min | +| Authenticated (zero credits) | 20 req/day | 30 req/min | +| Paid (any credits) | Unlimited | 60 req/min | + +## Allowed models + +Free tier requests can use: + +| Model | Provider | Why it's free | +| --------------------------- | --------- | ------------------- | +| `gpt-4o-mini` | OpenAI | Small, cheap | +| `claude-3-5-haiku-20241022` | Anthropic | Fast, cheap | +| `llama-3.1-8b-instant` | Groq | Free tier inference | +| `llama-3.2-1b-preview` | Groq | Tiny model | +| `llama-3.2-3b-preview` | Groq | Small model | +| `gemini-2.0-flash-lite` | Google | Free tier | +| `cerebras/llama-3.1-8b` | Cerebras | Fast, cheap | +| `deepseek-chat` | DeepSeek | Very cheap | + +## Blocked models + +These models require credits: + +- **OpenAI reasoning:** o1, o3, o4 (all variants) +- **OpenAI flagship:** gpt-4o, gpt-4, gpt-5 (gpt-4o-mini is allowed) +- **Anthropic flagship:** claude-opus, claude-sonnet (haiku is allowed) +- **Google flagship:** gemini-2.5-pro, gemini-2.5-ultra +- **xAI flagship:** grok-2, grok-3 + +Requesting a blocked model without credits returns 402: + +```json +{ + "error": { + "message": "Model \"gpt-4o\" requires credits. Free tier models: gpt-4o-mini, llama-3.1-8b-instant, gemini-2.0-flash-lite, deepseek-chat. Add credits or use a free tier model.", + "type": "insufficient_funds", + "code": "free_tier_limit" + } +} +``` + +## Response headers + +Free tier responses include remaining quota: + +``` +X-Free-Tier-Remaining: 3 +X-Free-Tier-Limit: 5 +``` diff --git a/pages/gateway/generation-lookup.mdx b/pages/gateway/generation-lookup.mdx new file mode 100644 index 00000000..ccc988f8 --- /dev/null +++ b/pages/gateway/generation-lookup.mdx @@ -0,0 +1,71 @@ +--- +title: Generation Lookup +description: Retrieve detailed information about any request by its generation ID. +--- + +# Generation Lookup + +Every request returns a unique generation ID in the `X-Generation-Id` header. Use it to look up full request details. + +## Endpoint + +``` +GET /v1/generation?id=gen_01ARZ3NDEKTSV4RRFFQ69G5FAV +``` + +Requires authentication. Returns details only for requests made by the authenticated user. + +## Example + +```bash +curl -H "Authorization: Bearer sk-tan-YOUR_KEY" \ + "https://router.tangle.tools/v1/generation?id=gen_01ARZ3NDEKTSV4RRFFQ69G5FAV" +``` + +## Response + +```json +{ + "data": { + "id": "gen_01ARZ3NDEKTSV4RRFFQ69G5FAV", + "total_cost": 0.00123, + "usage": 0.00123, + "created_at": "2026-04-10T12:00:00.000Z", + "model": "anthropic/claude-sonnet-4-6", + "is_byok": false, + "provider_name": "anthropic", + "streamed": true, + "latency": 200, + "generation_time": 1500, + "tokens_prompt": 100, + "tokens_completion": 50, + "native_tokens_cached": 80, + "native_tokens_reasoning": 0, + "status": "success", + "routing_trace": { + "planningReasoning": "ZDR requested: filtering to 13 ZDR providers", + "modelAttempts": [...], + "totalLatencyMs": 1500 + }, + "cache_hit": false + } +} +``` + +## Fields + +| Field | Description | +| ------------------------------------- | ---------------------------------------------- | +| `id` | Generation ID (`gen_`) | +| `total_cost` | Total cost in USD | +| `model` | Model that served the request | +| `is_byok` | Whether BYOK credentials were used | +| `provider_name` | Provider that served the request | +| `streamed` | Whether the request used streaming | +| `latency` | Time to first token (ms) | +| `generation_time` | Total generation time (ms) | +| `tokens_prompt` / `tokens_completion` | Token counts | +| `native_tokens_cached` | Tokens served from provider cache | +| `native_tokens_reasoning` | Reasoning tokens (o1/o3/o4 models) | +| `routing_trace` | Full routing attempt history | +| `cache_hit` | Whether response was served from gateway cache | diff --git a/pages/gateway/getting-started.mdx b/pages/gateway/getting-started.mdx new file mode 100644 index 00000000..30d1edd2 --- /dev/null +++ b/pages/gateway/getting-started.mdx @@ -0,0 +1,94 @@ +--- +title: Getting Started +description: Make your first inference request through Tangle Gateway in 2 minutes. +--- + +# Getting Started + +## 1. Get an API key + +Sign up at [router.tangle.tools](https://router.tangle.tools) and create an API key from the dashboard. Keys start with `sk-tan-`. + +## 2. Make a request + +### curl + +```bash +curl -X POST "https://router.tangle.tools/v1/chat/completions" \ + -H "Authorization: Bearer sk-tan-YOUR_KEY" \ + -H "Content-Type: application/json" \ + -d '{ + "model": "openai/gpt-4o-mini", + "messages": [{"role": "user", "content": "What is Tangle?"}], + "stream": false + }' +``` + +### Python (OpenAI SDK) + +```python +from openai import OpenAI + +client = OpenAI( + api_key="sk-tan-YOUR_KEY", + base_url="https://router.tangle.tools/v1" +) + +response = client.chat.completions.create( + model="anthropic/claude-sonnet-4-6", + messages=[{"role": "user", "content": "What is Tangle?"}] +) +print(response.choices[0].message.content) +``` + +### TypeScript (AI SDK) + +```typescript +import { generateText } from 'ai' +import { createOpenAI } from '@ai-sdk/openai' + +const tangle = createOpenAI({ + apiKey: 'sk-tan-YOUR_KEY', + baseURL: 'https://router.tangle.tools/v1', +}) + +const { text } = await generateText({ + model: tangle('anthropic/claude-sonnet-4-6'), + prompt: 'What is Tangle?', +}) +``` + +## 3. Check the response headers + +Every response includes metadata headers: + +``` +X-Generation-Id: gen_01J5K7... # Unique request ID +X-Tangle-Price-Input: 0.000003 # USD per input token +X-Tangle-Price-Output: 0.000015 # USD per output token +X-Tangle-Cache: MISS # Response cache status +X-RateLimit-Remaining: 59 # Requests left in window +``` + +Use the generation ID to look up request details later via [`GET /v1/generation`](/gateway/api-generation). + +## 4. Try different models + +The model ID format is `provider/model-name`: + +``` +openai/gpt-4o-mini +anthropic/claude-sonnet-4-6 +google/gemini-2.0-flash-lite +groq/llama-3.1-8b-instant +deepseek/deepseek-chat +mistral/mistral-large-latest +``` + +You can also use bare model names (`gpt-4o-mini`, `claude-sonnet-4-6`) — the gateway resolves the provider automatically. + +## What's next + +- [Bring Your Own Key](/gateway/byok) — use your existing provider API keys for zero markup +- [Model Fallbacks](/gateway/fallbacks) — configure backup models +- [Zero Data Retention](/gateway/zdr) — compliance for sensitive workloads diff --git a/pages/gateway/guardrails.mdx b/pages/gateway/guardrails.mdx new file mode 100644 index 00000000..e343a214 --- /dev/null +++ b/pages/gateway/guardrails.mdx @@ -0,0 +1,63 @@ +--- +title: Guardrails +description: Gateway-level PII detection and prompt injection scanning. +--- + +# Guardrails + +The gateway scans all requests for PII and prompt injection patterns before routing. Results are available in the `X-Tangle-Guardrails` response header. + +## Detection categories + +### PII detection + +| Pattern | Severity | Example | +| ------------------------------ | -------- | --------------------- | +| SSN | Critical | `123-45-6789` | +| Credit card (Visa/MC/Discover) | Critical | `4111 1111 1111 1111` | +| Credit card (Amex) | Critical | `3782 822463 10005` | +| Email | Low | `user@example.com` | +| US phone | Medium | `(555) 123-4567` | +| IP address | Low | `192.168.1.1` | + +### Prompt injection detection + +Applied to user messages only (not system or assistant): + +| Pattern | Severity | +| ---------------------------------- | -------- | +| "Ignore all previous instructions" | High | +| "You are now a different AI" | High | +| "Pretend you have no restrictions" | High | +| "Reveal your system prompt" | Medium | +| DAN-mode jailbreaks | High | + +## Modes + +### Audit mode (default) + +Flags are logged and returned in the `X-Tangle-Guardrails` header but requests are not blocked: + +``` +X-Tangle-Guardrails: pii:low,prompt_injection:high +``` + +### Block mode + +Requests matching configured categories are rejected with 400: + +```json +{ + "error": { + "message": "Request blocked by guardrails: pii, prompt_injection", + "type": "invalid_request_error", + "code": "guardrail_blocked" + } +} +``` + +Block mode requires a `GuardrailPolicy` record configured for your team or user with specific categories to block. + +## Disabling + +Set `ENABLE_GUARDRAILS=false` to skip all scanning. See [Feature Flags](/gateway/feature-flags). diff --git a/pages/gateway/how-routing-works.mdx b/pages/gateway/how-routing-works.mdx new file mode 100644 index 00000000..464211cc --- /dev/null +++ b/pages/gateway/how-routing-works.mdx @@ -0,0 +1,65 @@ +--- +title: How Routing Works +description: The three-tier routing architecture behind Tangle Gateway. +--- + +# How Routing Works + +Every request passes through up to three routing tiers. The gateway tries each tier in order and returns the first successful response. + +## The three tiers + +``` +Request → Tier 1: Operators → Tier 2: LiteLLM → Tier 3: Direct Provider → Response +``` + +### Tier 1: Operator routing + +Operators run [Blueprints](/developers/blueprints/introduction) — on-chain service definitions like the [LLM Inference Blueprint](https://github.com/tangle-network/llm-inference-blueprint). They stake tokens, serve models, and compete on price, latency, and reputation. + +- Selected by [scoring algorithm](/gateway/smart-routing): reputation (40%) + latency (30%) + price (30%) +- Discovered automatically from on-chain [Blueprint Service Manager](/developers/blueprints/service-lifecycle) contracts +- Can be pinned by blueprint, service, or operator address +- See [Operator Routing](/gateway/operator-routing) for the full Blueprint catalog + +**When it's used:** Default for `auto` routing mode, required for SpendAuth (on-chain payments). + +**When it's skipped:** When [ZDR](/gateway/zdr) or [no-train](/gateway/no-train) is requested (operators can't verify compliance). When `routing: "provider"` is set explicitly. + +### Tier 2: LiteLLM proxy + +An internal proxy that handles 100+ provider integrations with built-in retries and provider-level fallbacks. + +**When it's used:** Default for standard requests when no operator is available. + +**When it's skipped:** When ZDR or no-train is requested (LiteLLM's downstream routing is not compliance-controllable). When LiteLLM is not configured (`LITELLM_URL` unset). + +### Tier 3: Direct provider + +The gateway calls the provider API directly using platform credentials (or [BYOK](/gateway/byok) credentials). + +**When it's used:** Fallback when tiers 1 and 2 fail. Only tier used when compliance routing is active. + +**Always used for:** ZDR requests, no-train requests, BYOK with compliance flags. + +## Compliance mode + +When `zeroDataRetention` or `disallowPromptTraining` is set: + +``` +Request → Tier 3: Direct Provider (verified only) → Response +``` + +Tiers 1 and 2 are completely bypassed. The gateway routes only to providers with verified compliance agreements. See [Zero Data Retention](/gateway/zdr) for the trust model. + +## Routing control + +| Method | Effect | +| ------------------------------- | ---------------------------------------------------- | +| `routing: "auto"` | Try all three tiers (default) | +| `routing: "operator"` | Operators only. Fails if no operator available. | +| `routing: "provider"` | Skip operators, use LiteLLM + direct only. | +| `X-Tangle-Blueprint: ` | Pin to operators under this Blueprint. | +| `X-Tangle-Operator: ` | Pin to a specific operator. | +| `providerOptions.gateway.order` | Control which providers are tried and in what order. | +| `providerOptions.gateway.only` | Restrict to these providers only. | diff --git a/pages/gateway/index.mdx b/pages/gateway/index.mdx new file mode 100644 index 00000000..57c7630a --- /dev/null +++ b/pages/gateway/index.mdx @@ -0,0 +1,58 @@ +--- +title: Tangle Gateway +description: Unified API for hundreds of AI models with built-in routing, compliance, and on-chain payments. +--- + +# Tangle Gateway + +Tangle Gateway is a unified inference API. One endpoint, hundreds of models, automatic routing across centralized providers and decentralized operators. + +## What it does + +- **One key, any model.** Access OpenAI, Anthropic, Google, Groq, Mistral, and 20+ providers through a single API key. +- **Operator network.** Route to decentralized operators running [Blueprints](/developers/blueprints/introduction) on the Tangle network who compete on price, latency, and reputation. +- **Compliance routing.** Zero Data Retention and no-train filtering with verified provider agreements. +- **BYOK.** Bring your own provider keys for zero-markup access. +- **On-chain payments.** Pay operators directly via SpendAuth — no credit card required. + +## Quick example + +```bash +curl -X POST "https://router.tangle.tools/v1/chat/completions" \ + -H "Authorization: Bearer $TANGLE_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{ + "model": "anthropic/claude-sonnet-4-6", + "messages": [{"role": "user", "content": "Hello"}], + "stream": true + }' +``` + +Works with any OpenAI-compatible SDK. Change the base URL and you're done. + +## Architecture + +The gateway routes through three tiers, in order: + +| Tier | What | When | +| ------------- | ---------------------------------------------------------- | -------------------------------------------------------------- | +| **Operators** | Decentralized inference providers on Tangle | Default for operator-pinned requests and SpendAuth | +| **LiteLLM** | Proxy with 100+ provider integrations and built-in retries | Default for standard requests | +| **Direct** | Straight to provider API (OpenAI, Anthropic, etc.) | Fallback when LiteLLM unavailable, or when compliance required | + +When [Zero Data Retention](/gateway/zdr) or [no-train](/gateway/no-train) is requested, operators and LiteLLM are skipped — the gateway routes directly to verified providers only. + +## How it fits + +``` +Workbench (agents) → Gateway (inference) → Operators (serving) → Protocol (settlement) +``` + +The gateway sits between the [Workbench](/vibe/introduction) where agents run and the [Protocol](/network/overview) where operators get paid. Agents in the workbench call the gateway for model access. The gateway selects the best provider or operator, routes the request, tracks usage, and settles payment. + +## Next steps + +- [Getting Started](/gateway/getting-started) — make your first request in 2 minutes +- [Supported Models](/gateway/models) — browse the model catalog +- [How Routing Works](/gateway/how-routing-works) — understand the 3-tier architecture +- [Zero Data Retention](/gateway/zdr) — compliance for regulated industries diff --git a/pages/gateway/migrate-openai.mdx b/pages/gateway/migrate-openai.mdx new file mode 100644 index 00000000..c27336f5 --- /dev/null +++ b/pages/gateway/migrate-openai.mdx @@ -0,0 +1,77 @@ +--- +title: Migrate from OpenAI +description: Switch from OpenAI's API to Tangle Gateway in under 5 minutes. +--- + +# Migrate from OpenAI + +Tangle Gateway is OpenAI-compatible. Change two lines and you're done. + +## Python + +```diff + from openai import OpenAI + + client = OpenAI( +- api_key="sk-...", ++ api_key="sk-tan-YOUR_KEY", ++ base_url="https://router.tangle.tools/v1", + ) + + response = client.chat.completions.create( +- model="gpt-4o", ++ model="openai/gpt-4o", # or just "gpt-4o" — auto-resolved + messages=[{"role": "user", "content": "Hello"}] + ) +``` + +## TypeScript + +```diff + import OpenAI from 'openai' + + const client = new OpenAI({ +- apiKey: 'sk-...', ++ apiKey: 'sk-tan-YOUR_KEY', ++ baseURL: 'https://router.tangle.tools/v1', + }) +``` + +## curl + +```diff +- curl https://api.openai.com/v1/chat/completions \ +- -H "Authorization: Bearer sk-..." \ ++ curl https://router.tangle.tools/v1/chat/completions \ ++ -H "Authorization: Bearer sk-tan-YOUR_KEY" \ + -H "Content-Type: application/json" \ + -d '{"model": "gpt-4o", "messages": [...]}' +``` + +## What you get + +By switching to Tangle Gateway, you get: + +- **Access to every provider** through the same client. Try `anthropic/claude-sonnet-4-6` or `groq/llama-3.1-70b` without changing SDKs. +- **Automatic fallbacks.** If OpenAI is down, configure backup models. +- **Cost visibility.** Every response tells you exactly what it cost via `X-Tangle-Price-*` headers. +- **Compliance routing.** One flag for ZDR, one flag for no-train. +- **BYOK.** Keep using your OpenAI key with zero markup. Add it to [`providerOptions.gateway.byok`](/gateway/byok). + +## Keep your OpenAI key (zero markup) + +If you already have an OpenAI API key, use [BYOK](/gateway/byok) for zero platform markup: + +```python +response = client.chat.completions.create( + model="openai/gpt-4o", + messages=[{"role": "user", "content": "Hello"}], + extra_body={ + "providerOptions": { + "gateway": { + "byok": {"openai": [{"apiKey": "sk-YOUR_OPENAI_KEY"}]} + } + } + } +) +``` diff --git a/pages/gateway/migrate-vercel.mdx b/pages/gateway/migrate-vercel.mdx new file mode 100644 index 00000000..abad708f --- /dev/null +++ b/pages/gateway/migrate-vercel.mdx @@ -0,0 +1,80 @@ +--- +title: Migrate from Vercel AI Gateway +description: Switch from Vercel AI Gateway to Tangle Gateway. +--- + +# Migrate from Vercel AI Gateway + +Tangle Gateway supports the same `providerOptions.gateway` schema as Vercel AI Gateway. Most code works unchanged. + +## What maps directly + +| Vercel Feature | Tangle Equivalent | Notes | +| ------------------------------------------------ | ----------------- | --------------------------------- | +| `providerOptions.gateway.byok` | Same | Identical schema | +| `providerOptions.gateway.zeroDataRetention` | Same | 13 verified providers | +| `providerOptions.gateway.disallowPromptTraining` | Same | 25 verified providers | +| `providerOptions.gateway.caching: 'auto'` | Same | Anthropic cache_control injection | +| `providerOptions.gateway.order` | Same | Provider priority | +| `providerOptions.gateway.only` | Same | Provider allowlist | +| `models` fallback array | Same | Model-level failover | +| `GET /v1/credits` | Same | Balance check | +| `GET /v1/generation` | Same | Request detail lookup | + +## What's different + +| Feature | Vercel | Tangle | +| --------------------- | ------------------------------------- | -------------------------------------------------------- | +| **Base URL** | `ai-gateway.vercel.sh/v1` | `router.tangle.tools/v1` | +| **Auth** | API key or OIDC token | API key, session, SIWE (wallet), or SpendAuth (on-chain) | +| **Pricing** | Zero markup | 20% markup (0% with BYOK) | +| **Operator network** | None | Decentralized operators compete on price/latency | +| **On-chain payments** | None | SpendAuth (EIP-712) — pay without a credit card | +| **Guardrails** | None | PII + injection detection built-in | +| **Web search tools** | Perplexity, Parallel, provider-native | Not yet (planned) | +| **OIDC auth** | Vercel-only | Not applicable | + +## Code change + +### AI SDK + +```diff + import { generateText } from 'ai' ++ import { createOpenAI } from '@ai-sdk/openai' + ++ const tangle = createOpenAI({ ++ apiKey: 'sk-tan-YOUR_KEY', ++ baseURL: 'https://router.tangle.tools/v1', ++ }) + + const { text } = await generateText({ +- model: 'anthropic/claude-sonnet-4-6', ++ model: tangle('anthropic/claude-sonnet-4-6'), + prompt: 'Hello', + providerOptions: { + gateway: { + zeroDataRetention: true, // works the same + caching: 'auto', // works the same + }, + }, + }) +``` + +### OpenAI SDK + +```diff + const client = new OpenAI({ +- apiKey: process.env.AI_GATEWAY_API_KEY, +- baseURL: 'https://ai-gateway.vercel.sh/v1', ++ apiKey: process.env.TANGLE_API_KEY, ++ baseURL: 'https://router.tangle.tools/v1', + }) +``` + +## What you gain + +- **Operator network.** Access decentralized inference providers who compete on price and latency. +- **On-chain payments.** Pay with crypto via SpendAuth — no Stripe/credit card required. +- **Wallet auth.** Sign in with Ethereum (SIWE) for web3-native access. +- **Guardrails.** Built-in PII and prompt injection detection on every request. +- **Self-hostable.** Deploy your own gateway instance — it's open source. diff --git a/pages/gateway/models.mdx b/pages/gateway/models.mdx new file mode 100644 index 00000000..4c058018 --- /dev/null +++ b/pages/gateway/models.mdx @@ -0,0 +1,77 @@ +--- +title: Supported Models +description: Browse models available through Tangle Gateway across 20+ providers. +--- + +# Supported Models + +Tangle Gateway provides access to models from 20+ providers through a single API. + +## Providers + +| Provider | Slug | Models | +| ----------- | ----------- | ----------------------------------------------------- | +| OpenAI | `openai` | GPT-4o, GPT-4o-mini, o1, o3, o4, DALL-E, Whisper, TTS | +| Anthropic | `anthropic` | Claude Opus, Sonnet, Haiku | +| Google | `google` | Gemini 2.5 Pro, Flash, Flash-Lite | +| Groq | `groq` | Llama 3.1/3.2 (fast inference) | +| Together AI | `together` | Open-source models (Llama, Qwen, Mixtral) | +| DeepSeek | `deepseek` | DeepSeek Chat, DeepSeek Coder | +| Mistral | `mistral` | Mistral Large, Codestral, Pixtral | +| Fireworks | `fireworks` | Phi, StarCoder, open models | +| Cohere | `cohere` | Command R/R+ | +| xAI | `xai` | Grok 2, Grok 3 | +| Cerebras | `cerebras` | Llama (fast inference) | +| SambaNova | `sambanova` | Fast open-model inference | +| AI21 | `ai21` | Jamba | +| Nvidia | `nvidia` | Nemotron | +| Z.ai | `zai` | GLM-4.7, GLM-5 | +| Moonshot | `moonshot` | Kimi | + +Plus decentralized operators on the Tangle network running [Blueprints](/developers/blueprints/introduction): + +| Blueprint | Models | How to route | +| -------------------------------------------------------------------------- | ----------------------------------------------- | ------------------------------------- | +| [LLM Inference](https://github.com/tangle-network/llm-inference-blueprint) | Llama, Qwen, Mistral, any vLLM-compatible model | `X-Tangle-Routing: operator` or auto | +| Vector Store | Embedding models for RAG | `/v1/collections` and `/v1/rag/query` | +| Custom Blueprints | Any model the operator deploys | Pin by Blueprint ID or operator slug | + +Operators set their own pricing and the gateway [scores them](/gateway/smart-routing) on reputation, latency, and price. See [Operator Routing](/gateway/operator-routing) for details. + +## Model ID format + +Use `provider/model-name`: + +``` +anthropic/claude-sonnet-4-6 +openai/gpt-4o-mini +groq/llama-3.1-70b-versatile +``` + +Or use bare names — the gateway resolves the provider by prefix: + +| Prefix | Resolves to | +| --------------------------- | ----------- | +| `gpt-`, `o1-`, `o3-`, `o4-` | OpenAI | +| `claude-` | Anthropic | +| `gemini-`, `gemma-` | Google | +| `llama-`, `mixtral-` | Groq | +| `deepseek-` | DeepSeek | +| `mistral-`, `codestral-` | Mistral | +| `grok-` | xAI | +| `glm-` | Z.ai | +| `command-` | Cohere | + +## Modalities + +| Modality | Endpoint | Examples | +| ---------- | ---------------------------------------------- | -------------------------------------- | +| Text | `/v1/chat/completions` | All chat models | +| Images | `/v1/images/generations` | DALL-E, FLUX | +| Audio | `/v1/audio/transcriptions`, `/v1/audio/speech` | Whisper, TTS | +| Embeddings | `/v1/embeddings` | text-embedding-3-small/large | +| Video | `/v1/video/*` | Avatar generation, dubbing (via ph0ny) | + +## Dynamic discovery + +The model catalog is available at [`GET /api/models`](https://router.tangle.tools/api/models) with pricing, context length, and modality information for every model. diff --git a/pages/gateway/no-train.mdx b/pages/gateway/no-train.mdx new file mode 100644 index 00000000..f6461c01 --- /dev/null +++ b/pages/gateway/no-train.mdx @@ -0,0 +1,43 @@ +--- +title: Disallow Prompt Training +description: Route only through providers that don't use your data for model training. +--- + +# Disallow Prompt Training + +Ensure your prompts and responses are never used by providers to train their models. + +## Usage + +```json +{ + "providerOptions": { + "gateway": { + "disallowPromptTraining": true + } + } +} +``` + +## Relationship to ZDR + +Disallow prompt training is a **subset** of [Zero Data Retention](/gateway/zdr). All ZDR-compliant providers also disallow prompt training, but more providers disallow training than offer full ZDR. + +| Filter | Verified providers | +| ----------------------- | ------------------ | +| No-train only | 25 providers | +| ZDR (includes no-train) | 13 providers | + +Use `disallowPromptTraining` when you care about IP protection but don't need full data deletion guarantees. + +## No-train verified providers + +All ZDR providers plus: OpenAI, Google AI Studio, Cohere, Perplexity, xAI, Morph AI, Novita AI, Voyage AI, and others. + +See the full list at [`GET /api/gateway/compliance`](/gateway/api-compliance). + +## Routing behavior + +Same as ZDR: operators and LiteLLM are skipped. Only direct provider calls to verified no-train providers. + +Can be enabled team-wide via `noTrainEnabled: true` on the team record. diff --git a/pages/gateway/operator-routing.mdx b/pages/gateway/operator-routing.mdx new file mode 100644 index 00000000..687d38d2 --- /dev/null +++ b/pages/gateway/operator-routing.mdx @@ -0,0 +1,89 @@ +--- +title: Operator Routing +description: Route inference through decentralized operators on the Tangle network. +--- + +# Operator Routing + +Operators are independent inference providers registered on the Tangle network. They run models on their own hardware, set their own prices, and earn from every request routed through them. + +## Blueprints + +Operators run **Blueprints** — on-chain service definitions that specify what an operator does. The inference-related Blueprints the gateway routes through: + +| Blueprint | What it serves | Repo | +| ----------------- | -------------------------------------------------------------- | --------------------------------------------------------------------------------------------------- | +| **LLM Inference** | Chat completions, text generation (Llama, Qwen, Mistral, etc.) | [tangle-network/llm-inference-blueprint](https://github.com/tangle-network/llm-inference-blueprint) | +| **Vector Store** | Embedding storage and retrieval for RAG | Operator-deployed | +| **Custom** | Any model/pipeline an operator chooses to serve | [Build your own](/developers/blueprints/introduction) | + +The LLM Inference Blueprint uses [tangle-inference-core](https://github.com/tangle-network/tangle-inference-core) — a shared Rust crate for EIP-712 signature verification, nonce management, and on-chain settlement. Operators compile it into a binary (`operator-lite`) that runs alongside their model server. + +To build and deploy your own inference Blueprint, see the [Blueprint SDK docs](/developers/blueprints/introduction) and the [Blueprint Runner](/developers/blueprint-runner/introduction). + +## How operators are discovered + +1. Operators register on-chain via the [Blueprint Service Manager (BSM)](/developers/blueprints/service-lifecycle) contract +2. The gateway syncs operator data from the chain every 60 seconds +3. Operators are stored in the database with their endpoint URL, pricing, and status +4. The [scoring algorithm](/gateway/smart-routing) ranks operators per-request + +## Routing to operators + +### Automatic (default) + +In `auto` mode, the gateway checks for operators serving the requested model before trying centralized providers: + +```bash +curl -X POST "https://router.tangle.tools/v1/chat/completions" \ + -H "Authorization: Bearer sk-tan-YOUR_KEY" \ + -d '{"model": "llama-3.1-70b", "messages": [...]}' +``` + +### Pin to a Blueprint + +Route only to operators registered under a specific Blueprint: + +```bash +curl -X POST "https://router.tangle.tools/v1/chat/completions" \ + -H "X-Tangle-Blueprint: 42" \ + -d '{"model": "llama-3.1-70b", "messages": [...]}' +``` + +### Pin to an operator + +Route to a specific operator by slug or Ethereum address: + +```bash +curl -X POST "https://router.tangle.tools/v1/chat/completions" \ + -H "X-Tangle-Operator: tangle-core-1" \ + -d '{"model": "llama-3.1-70b", "messages": [...]}' +``` + +### Pin to a service instance + +```bash +curl -X POST "https://router.tangle.tools/v1/chat/completions" \ + -H "X-Tangle-Service: 7" \ + -d '{"model": "llama-3.1-70b", "messages": [...]}' +``` + +## What's verified on-chain + +| Data | Verified? | +| ------------------------- | ---------------------------------- | +| Operator Ethereum address | Yes (signed transaction) | +| Active/inactive status | Yes (BSM contract state) | +| Staked amount | Yes (on-chain balance) | +| Pricing (per-token) | Yes (BSM contract) | +| Endpoint URL | No (self-reported at registration) | +| Backing provider | No (not tracked) | + +Because endpoint URL and backing provider are self-reported, operator routing is **not compatible with [ZDR](/gateway/zdr) or [no-train](/gateway/no-train)** compliance requirements. When compliance is required, operators are skipped and the gateway routes directly to verified providers. + +## Payment + +Operator requests can be paid two ways: + +1. **Platform credits** — deducted from your credit balance at the operator's listed price +2. **SpendAuth (on-chain)** — direct EIP-712 signed payment to the operator. No credit card needed. See [SpendAuth](/gateway/spend-auth). diff --git a/pages/gateway/pricing.mdx b/pages/gateway/pricing.mdx new file mode 100644 index 00000000..6634f841 --- /dev/null +++ b/pages/gateway/pricing.mdx @@ -0,0 +1,55 @@ +--- +title: Credits & Pricing +description: How billing works on Tangle Gateway. +--- + +# Credits & Pricing + +## Pricing model + +| Credential type | Markup | +| -------------------------------- | ------------------------------------------- | +| Platform credentials | 20% above provider list price | +| [BYOK](/gateway/byok) | **0%** — provider list price, no markup | +| [SpendAuth](/gateway/spend-auth) | Operator-set prices (typically competitive) | + +The 20% platform markup on non-BYOK requests funds operator payouts and platform infrastructure. Operators earn a share of every request routed through them. + +## Credits + +Credits are denominated in USD. Purchase via Stripe or receive as part of a subscription plan. + +Check your balance: + +```bash +curl -H "Authorization: Bearer sk-tan-YOUR_KEY" \ + https://router.tangle.tools/v1/credits +``` + +```json +{ + "balance": "95.50", + "total_used": "4.50" +} +``` + +## Cost per request + +Each request is charged based on tokens: + +``` +cost = (input_tokens × input_price) + (output_tokens × output_price) +``` + +Pricing varies by model. Check per-model pricing at [`GET /api/models`](https://router.tangle.tools/api/models) or in the `X-Tangle-Price-Input` / `X-Tangle-Price-Output` response headers. + +## Billing transparency + +Every response includes pricing headers so you know the cost before it hits your balance: + +``` +X-Tangle-Price-Input: 0.000003 # USD per input token +X-Tangle-Price-Output: 0.000015 # USD per output token +``` + +Look up detailed billing for any request via [`GET /v1/generation`](/gateway/api-generation). diff --git a/pages/gateway/provider-options.mdx b/pages/gateway/provider-options.mdx new file mode 100644 index 00000000..ccb49797 --- /dev/null +++ b/pages/gateway/provider-options.mdx @@ -0,0 +1,70 @@ +--- +title: providerOptions.gateway +description: Complete reference for gateway-specific request options. +--- + +# providerOptions.gateway + +All gateway-specific options are passed inside `providerOptions.gateway` in the request body. These are stripped before forwarding to providers. + +## Full schema + +```typescript +interface GatewayOptions { + // Bring Your Own Key + byok?: Record> + + // Compliance routing + zeroDataRetention?: boolean + disallowPromptTraining?: boolean + + // Caching + caching?: 'auto' | false + cache?: false // disable response caching + + // Provider routing + order?: string[] // provider priority + only?: string[] // provider allowlist + + // Model fallbacks + models?: string[] // tried in order after primary model + + // Timeouts (1s-120s, clamped) + timeout?: number | Record +} +``` + +## Options reference + +| Option | Type | Default | Description | +| ------------------------ | ---------------------------------- | ------- | -------------------------------------------------------------- | +| `byok` | `Record>` | — | Per-request provider credentials. [Details](/gateway/byok) | +| `zeroDataRetention` | `boolean` | `false` | Route only to ZDR-verified providers. [Details](/gateway/zdr) | +| `disallowPromptTraining` | `boolean` | `false` | Route only to no-train providers. [Details](/gateway/no-train) | +| `caching` | `'auto'` | — | Auto-inject prompt cache markers. [Details](/gateway/caching) | +| `cache` | `false` | — | Set `false` to skip response cache for this request. | +| `order` | `string[]` | — | Provider priority order. [Details](/gateway/smart-routing) | +| `only` | `string[]` | — | Restrict to these providers only. | +| `models` | `string[]` | — | Fallback model list. [Details](/gateway/fallbacks) | +| `timeout` | `number \| Record` | `30000` | Timeout in ms. [Details](/gateway/timeouts) | + +## Example: everything at once + +```json +{ + "model": "anthropic/claude-sonnet-4-6", + "messages": [{"role": "user", "content": "Hello"}], + "providerOptions": { + "gateway": { + "byok": { + "anthropic": [{"apiKey": "sk-ant-..."}] + }, + "zeroDataRetention": true, + "caching": "auto", + "models": ["openai/gpt-4o"], + "timeout": {"anthropic": 10000, "openai": 5000}, + "order": ["anthropic", "openai"] + } + } +} +``` diff --git a/pages/gateway/rate-limiting.mdx b/pages/gateway/rate-limiting.mdx new file mode 100644 index 00000000..7b0962d2 --- /dev/null +++ b/pages/gateway/rate-limiting.mdx @@ -0,0 +1,44 @@ +--- +title: Rate Limiting +description: Rate limits by authentication method. +--- + +# Rate Limiting + +The gateway enforces sliding-window rate limits per authentication method. + +## Limits + +| Auth method | Rate limit | Daily limit | +| -------------------------- | ----------- | ------------------------ | +| API Key | 60 req/min | Unlimited (with credits) | +| Session | 30 req/min | Unlimited (with credits) | +| SpendAuth | 120 req/min | Unlimited | +| Anonymous | 10 req/min | 5 req/day | +| Authenticated (no credits) | 30 req/min | 20 req/day | + +## Response headers + +Every response includes rate limit headers: + +``` +X-RateLimit-Limit: 60 +X-RateLimit-Remaining: 42 +X-RateLimit-Reset: 1712793600 +``` + +## 429 responses + +When rate limited: + +```json +{ + "error": { + "message": "Rate limit exceeded for this API key.", + "type": "rate_limit_error", + "code": "rate_limit_exceeded" + } +} +``` + +The `X-RateLimit-Reset` header indicates when the window resets (Unix timestamp in seconds). diff --git a/pages/gateway/response-headers.mdx b/pages/gateway/response-headers.mdx new file mode 100644 index 00000000..40350b0c --- /dev/null +++ b/pages/gateway/response-headers.mdx @@ -0,0 +1,41 @@ +--- +title: Response Headers +description: Headers returned on every gateway response. +--- + +# Response Headers + +Every response from the gateway includes metadata headers. + +## Standard headers + +| Header | Description | Example | +| ----------------------- | --------------------------- | ------------------- | +| `X-Generation-Id` | Unique request ID | `gen_01J5K7ABCD...` | +| `X-Tangle-Price-Input` | USD per input token | `0.000003` | +| `X-Tangle-Price-Output` | USD per output token | `0.000015` | +| `X-Tangle-Cache` | Response cache status | `HIT` or `MISS` | +| `X-RateLimit-Limit` | Requests allowed per window | `60` | +| `X-RateLimit-Remaining` | Requests remaining | `42` | +| `X-RateLimit-Reset` | Window reset (Unix seconds) | `1712793600` | + +## Conditional headers + +| Header | When present | Description | +| ------------------------ | --------------------------------- | ------------------------------- | +| `X-Tangle-Routing-Trace` | When `ENABLE_ROUTING_TRACE` is on | Compact routing path | +| `X-Tangle-Operator` | When served by an operator | Operator slug | +| `X-Tangle-BYOK` | When BYOK credentials used | `true` | +| `X-Tangle-Caching` | When prompt caching applied | `auto` | +| `X-Tangle-Guardrails` | When guardrails flagged content | `pii:low,prompt_injection:high` | +| `X-Payment-Settled` | When SpendAuth payment succeeded | `true` | +| `X-Free-Tier-Remaining` | Free tier requests | `3` | +| `X-Free-Tier-Limit` | Free tier daily cap | `5` | + +## Error response headers + +| Header | When present | Description | +| -------------------- | ------------- | ------------------------- | +| `X-Payment-Required` | 402 responses | Amount needed (micro-USD) | +| `X-Payment-Currency` | 402 responses | `tsUSD` | +| `X-Payment-Methods` | 402 responses | `credits,spend_auth` | diff --git a/pages/gateway/routing-trace.mdx b/pages/gateway/routing-trace.mdx new file mode 100644 index 00000000..9e23653d --- /dev/null +++ b/pages/gateway/routing-trace.mdx @@ -0,0 +1,36 @@ +--- +title: Routing Trace +description: See exactly which providers were tried for every request. +--- + +# Routing Trace + +Every response includes an `X-Tangle-Routing-Trace` header showing the routing path — which providers were tried, whether they succeeded, and how long each took. + +## Header format + +``` +X-Tangle-Routing-Trace: openai/gpt-4o[operator(err:5001ms)→litellm(200:340ms)] +``` + +Format: `model[provider(status:latency)→provider(status:latency)]` + +Multiple models (from [fallbacks](/gateway/fallbacks)): + +``` +X-Tangle-Routing-Trace: openai/gpt-4o[openai(500:2100ms)], anthropic/claude-sonnet-4-6[anthropic(200:1847ms)] +``` + +## Sanitization + +The trace header is sanitized for safety: + +- Operator names are shown as generic `operator` (slugs not exposed) +- Error messages are not included (only status codes) +- Internal URLs and hostnames are never leaked + +For the full unredacted trace including error messages, use the [generation lookup API](/gateway/generation-lookup) — the `routing_trace` field in the response contains the complete history. + +## Disabling + +Set `ENABLE_ROUTING_TRACE=false` to omit the header from all responses. See [Feature Flags](/gateway/feature-flags). diff --git a/pages/gateway/smart-routing.mdx b/pages/gateway/smart-routing.mdx new file mode 100644 index 00000000..9521f60f --- /dev/null +++ b/pages/gateway/smart-routing.mdx @@ -0,0 +1,58 @@ +--- +title: Smart Routing +description: How the gateway scores and selects operators. +--- + +# Smart Routing + +When multiple operators running the same [Blueprint](/developers/blueprints/introduction) serve the same model, the gateway selects the best one using a weighted scoring algorithm. + +## Scoring formula + +``` +score = reputation(40%) + latency(30%) + price(30%) +``` + +| Factor | Weight | What it measures | +| -------------- | ------ | --------------------------------------------------------- | +| **Reputation** | 40% | Normalized reputation score (0-100) from on-chain history | +| **Latency** | 30% | Inverse of average response time (lower = better) | +| **Price** | 30% | Inverse of per-token price (cheaper = better) | + +## Operator selection + +1. Query all operators serving the requested model +2. Filter: only `active` or `degraded` status, must be pipeline head +3. Score each operator +4. Sort by score descending +5. Route to the highest-scoring operator + +If a preferred operator is specified (via `X-Tangle-Operator`), it's moved to the top of the ranked list regardless of score. + +## Health tracking + +The gateway tracks operator health via: + +- **Health checks** — periodic probes stored in `OperatorHealthCheck` +- **Request outcomes** — success/failure recorded per request +- **Latency tracking** — rolling average updated per request + +Operators that consistently fail are automatically deprioritized by their dropping reputation and rising latency scores. + +## Provider ordering (non-operator) + +For direct provider routing, use `providerOptions.gateway.order` and `only`: + +```json +{ + "providerOptions": { + "gateway": { + "order": ["bedrock", "anthropic"], + "only": ["bedrock", "anthropic"] + } + } +} +``` + +- `order`: Try providers in this order. First with valid credentials wins. +- `only`: Restrict to these providers. Others are excluded even if they have credentials. diff --git a/pages/gateway/spend-auth.mdx b/pages/gateway/spend-auth.mdx new file mode 100644 index 00000000..d464d36d --- /dev/null +++ b/pages/gateway/spend-auth.mdx @@ -0,0 +1,55 @@ +--- +title: SpendAuth (On-Chain Payments) +description: Pay operators directly on-chain via EIP-712 signed authorizations. +--- + +# SpendAuth + +SpendAuth lets you pay operators directly on-chain without a credit card or account. Sign an EIP-712 typed data message with your wallet, attach it to the request, and the operator claims payment after serving inference. + +## How it works + +1. **Sign:** Create an EIP-712 SpendAuth payload with your wallet +2. **Send:** Attach the signature as `X-Payment-Signature` header +3. **Authorize:** The gateway verifies the signature and calls `authorizeSpend` on-chain +4. **Serve:** The operator processes your inference request +5. **Claim:** The operator calls `claimPayment` to receive funds + +## Request format + +```bash +curl -X POST "https://router.tangle.tools/v1/chat/completions" \ + -H "Content-Type: application/json" \ + -H "X-Payment-Signature: { + \"commitment\": \"0xabc...\", + \"serviceId\": \"1\", + \"jobIndex\": 0, + \"amount\": \"1000000\", + \"operator\": \"0x70997970...\", + \"nonce\": \"42\", + \"expiry\": \"1712793600\", + \"signature\": \"0xff...\" + }" \ + -d '{"model": "llama-3.1-70b", "messages": [...]}' +``` + +## Security + +- **EIP-712 signatures:** Cryptographically verified against the signing address +- **Nonce replay protection:** Each nonce can only be used once per commitment (in-memory + Redis store) +- **On-chain verification:** The `ShieldedCredits` contract validates authorization and deducts balance +- **Expiry:** SpendAuth payloads have a timestamp-based expiry + +## Rate limits + +SpendAuth requests get a generous 120 req/min limit per commitment since every request is paid. + +## On-chain contracts + +SpendAuth uses the `ShieldedCredits` contract deployed on the Tangle network. The contract handles: + +- Balance management (deposit, authorize, claim) +- Authorization verification (EIP-712 signature recovery) +- Payment settlement (operator claims after serving) + +The operator-side settlement logic is implemented in [tangle-inference-core](https://github.com/tangle-network/tangle-inference-core), a shared Rust crate used by the [LLM Inference Blueprint](https://github.com/tangle-network/llm-inference-blueprint) and other inference Blueprints. diff --git a/pages/gateway/timeouts.mdx b/pages/gateway/timeouts.mdx new file mode 100644 index 00000000..6c49cc18 --- /dev/null +++ b/pages/gateway/timeouts.mdx @@ -0,0 +1,57 @@ +--- +title: Provider Timeouts +description: Configure per-provider timeouts for fast failover. +--- + +# Provider Timeouts + +Set timeouts to trigger fast failover when a provider is slow. Values are clamped to 1-120 seconds. + +## Global timeout + +Apply the same timeout to all providers: + +```json +{ + "providerOptions": { + "gateway": { + "timeout": 5000 + } + } +} +``` + +## Per-provider timeouts + +Different providers have different latency profiles. Set timeouts individually: + +```json +{ + "providerOptions": { + "gateway": { + "timeout": { + "openai": 5000, + "anthropic": 10000, + "groq": 3000 + } + } + } +} +``` + +## Default behavior + +Without explicit timeouts, the gateway uses a 30-second default for all providers and a 30-second idle timeout for streaming responses. + +## Bounds + +All timeout values are clamped: + +- **Minimum:** 1,000ms (1 second) +- **Maximum:** 120,000ms (2 minutes) + +Values outside this range are silently clamped to the nearest bound. + +## Interaction with fallbacks + +When a provider times out, it counts as a failure in the [routing trace](/gateway/routing-trace) and the gateway moves to the next option — either a different provider for the same model, or the next [fallback model](/gateway/fallbacks). diff --git a/pages/gateway/zdr.mdx b/pages/gateway/zdr.mdx new file mode 100644 index 00000000..e9f44b4b --- /dev/null +++ b/pages/gateway/zdr.mdx @@ -0,0 +1,84 @@ +--- +title: Zero Data Retention +description: Route requests only through providers with verified zero data retention agreements. +--- + +# Zero Data Retention (ZDR) + +When ZDR is enabled, the gateway routes requests **only** through providers that have verified agreements to delete all request data immediately after processing. + +## Enable per-request + +```json +{ + "model": "anthropic/claude-sonnet-4-6", + "messages": [{"role": "user", "content": "Analyze this sensitive data..."}], + "providerOptions": { + "gateway": { + "zeroDataRetention": true + } + } +} +``` + +## Enable team-wide + +Set `zdrEnabled: true` on your team record. All requests from team members will enforce ZDR. Team-wide ZDR overrides per-request `zeroDataRetention: false`. + +## How it works + +When ZDR is enabled: + +1. **Operators are skipped.** Operators self-report their backing provider. The gateway cannot verify what provider an operator actually routes through, so operators are excluded from ZDR-compliant routing. + +2. **LiteLLM is skipped.** LiteLLM has its own internal fallback chain that may route to non-ZDR providers. Since we can't control LiteLLM's routing decisions, it's excluded. + +3. **Direct provider only.** The gateway calls the provider API directly, selecting only from verified ZDR providers. + +4. **BYOK fallback preserves ZDR.** If your [BYOK](/gateway/byok) credentials fail, the fallback to platform credentials still enforces ZDR filtering. + +## ZDR-verified providers + +| Provider | ZDR | No-Train | Policy | +| -------------- | --- | -------- | ------------------------------------------------------------------------------------------------------ | +| Anthropic | Yes | Yes | [ZDR policy](https://platform.claude.com/docs/en/build-with-claude/zero-data-retention) | +| Amazon Bedrock | Yes | Yes | [Data protection](https://docs.aws.amazon.com/bedrock/latest/userguide/data-protection.html) | +| Azure OpenAI | Yes | Yes | [Data privacy](https://learn.microsoft.com/en-us/azure/foundry/responsible-ai/openai/data-privacy) | +| Groq | Yes | Yes | [ZDR policy](https://console.groq.com/docs/your-data#zero-data-retention) | +| Mistral | Yes | Yes | [Terms](https://legal.mistral.ai/terms) | +| Fireworks | Yes | Yes | [Data handling](https://docs.fireworks.ai/guides/security_compliance/data_handling) | +| Together | Yes | Yes | [Terms](https://www.together.ai/terms-of-service) | +| Cerebras | Yes | Yes | [Privacy](https://www.cerebras.ai/privacy-policy) | +| Google Vertex | Yes | Yes | [ZDR policy](https://docs.cloud.google.com/vertex-ai/generative-ai/docs/vertex-ai-zero-data-retention) | +| Nebius | Yes | Yes | [Legal guide](https://docs.tokenfactory.nebius.com/legal/legal-quick-guide) | +| Parasail | Yes | Yes | [Terms](https://parasail.io/legal/terms-of-service) | +| Baseten | Yes | Yes | [Security](https://docs.baseten.co/observability/security) | +| DeepInfra | Yes | Yes | [Data handling](https://deepinfra.com/docs/data) | + +Compliance data is managed via the admin API (`PUT /api/admin/compliance`) and can be updated without code deploys. + +## Trust model + +| Routing tier | ZDR behavior | +| ----------------------- | ---------------------------------------------------------- | +| **Operators** | Skipped. Self-reported backing provider is unverifiable. | +| **LiteLLM** | Skipped. Internal fallback chain is uncontrollable. | +| **Direct provider** | Routed only to verified ZDR providers. | +| **BYOK fallback** | ZDR filters preserved on fallback to platform credentials. | +| **Operator-only + ZDR** | 400 error. Conflicting requirements. | + +The Tangle chain verifies operator **identity and stake**, not **behavior**. When compliance matters, the gateway routes direct. + +## Error responses + +If no ZDR-compliant provider is available for the requested model: + +```json +{ + "error": { + "message": "No ZDR providers available for model: deepseek/deepseek-chat. Providers considered: anthropic, groq, mistral, ...", + "type": "invalid_request_error", + "code": "no_providers_available" + } +} +``` diff --git a/pages/vision/architecture.mdx b/pages/vision/architecture.mdx index d64cd5bc..6b68d079 100644 --- a/pages/vision/architecture.mdx +++ b/pages/vision/architecture.mdx @@ -20,6 +20,7 @@ Tangle ties together three layers most platforms separate: the workbench where w | Layer | Runs here | Examples | | --------------- | ----------------------------- | -------------------------------------------------------- | | Workbench | Human and agent collaboration | Workflows, profiles, simulations, reviews | +| Gateway | Inference routing and billing | Model access, BYOK, ZDR compliance, operator selection | | Sandbox runtime | Executed tasks and tools | Agent sessions, tool calls, file edits | | Protocol | Coordination and settlement | Service registry, operator payments, staking, incentives | @@ -28,10 +29,13 @@ Tangle ties together three layers most platforms separate: the workbench where w **1) Execution Layer** Sandboxed runtimes with isolation, resource limits, and audit logs. This is where tasks actually run. -**2) Protocol Layer** +**2) Inference Layer** +The [Gateway](/gateway) routes inference requests across centralized providers and decentralized operators. It handles model selection, compliance filtering ([ZDR](/gateway/zdr), [no-train](/gateway/no-train)), [BYOK](/gateway/byok) credential management, and billing. + +**3) Protocol Layer** The coordination plane. It handles operator discovery, payment routing, and incentive enforcement. -**3) Experience Layer** +**4) Experience Layer** The agentic workbench and [Blueprint SDK](https://github.com/tangle-network/blueprint/tree/main). This is where teams design workflows, run simulations, and ship services. The SDK also includes optional gateways for integrating external events and payment-driven HTTP job execution (webhooks,