diff --git a/.changeset/intent-skills.md b/.changeset/intent-skills.md new file mode 100644 index 000000000..e42f0fa94 --- /dev/null +++ b/.changeset/intent-skills.md @@ -0,0 +1,11 @@ +--- +'@tanstack/ai': patch +'@tanstack/ai-code-mode': patch +--- + +Add @tanstack/intent agent skills for AI coding assistants + +Adds 10 skill files covering chat-experience, tool-calling, media-generation, +code-mode, structured-outputs, adapter-configuration, ag-ui-protocol, +middleware, and custom-backend-integration. Skills guide AI agents to generate +correct TanStack AI code patterns and avoid common mistakes. diff --git a/_artifacts/domain_map.yaml b/_artifacts/domain_map.yaml new file mode 100644 index 000000000..e75db5747 --- /dev/null +++ b/_artifacts/domain_map.yaml @@ -0,0 +1,845 @@ +# domain_map.yaml +# Generated by skill-domain-discovery +# Library: @tanstack/ai +# Version: 0.10.0 +# Date: 2026-04-08 +# Status: reviewed + +library: + name: '@tanstack/ai' + version: '0.10.0' + repository: 'https://github.com/TanStack/ai' + description: 'Type-safe, provider-agnostic AI SDK for building chat, tool calling, media generation, and code execution features across React, Solid, Vue, Svelte, and Preact.' + primary_framework: 'framework-agnostic' + +domains: + - name: 'Building chat experiences' + slug: 'chat-experiences' + description: 'End-to-end chat implementation — server endpoints, streaming, client hooks, message rendering, thinking content, and multimodal messages.' + + - name: 'Implementing tool calling' + slug: 'tool-system' + description: 'Isomorphic tool definitions, server/client execution, approval flows, lazy discovery, and rendering tool results in UI.' + + - name: 'Generating media content' + slug: 'media-generation' + description: 'Image, video, text-to-speech, transcription, and summarization using activity-specific adapters and generation hooks.' + + - name: 'Executing LLM-generated code' + slug: 'code-execution' + description: 'Code Mode sandbox setup, isolate drivers, skills system, and client-side execution progress display.' + + - name: 'Configuring adapters and models' + slug: 'adapter-management' + description: 'Provider adapter selection, per-model type safety, model options, runtime switching, extending adapters with custom models.' + + - name: 'Transport and protocol' + slug: 'transport-protocol' + description: 'AG-UI streaming protocol implementation, SSE/HTTP stream formats, and custom backend connections.' + + - name: 'Extending behavior' + slug: 'extensibility' + description: 'Middleware hooks for analytics, caching, observability, and cross-cutting concerns on the chat lifecycle.' + +skills: + - name: 'Chat Experience' + slug: 'chat-experience' + domain: 'chat-experiences' + description: 'Implement an end-to-end chatbot: server endpoint with streaming response, client-side UI with useChat, message rendering, multimodal content, and thinking/reasoning display.' + type: 'core' + packages: + - '@tanstack/ai' + - '@tanstack/ai-client' + - '@tanstack/ai-react' + - '@tanstack/ai-solid' + - '@tanstack/ai-vue' + - '@tanstack/ai-svelte' + - '@tanstack/ai-preact' + covers: + - 'chat()' + - 'toServerSentEventsResponse()' + - 'toServerSentEventsStream()' + - 'toHttpResponse()' + - 'toHttpStream()' + - 'useChat()' + - 'ChatClient' + - 'fetchServerSentEvents()' + - 'fetchHttpStream()' + - 'UIMessage' + - 'ModelMessage' + - 'ContentPart (text, image, audio, video, document)' + - 'ThinkingPart' + - 'StreamChunk / AG-UI events' + - 'convertMessagesToModelMessages()' + - 'uiMessageToModelMessages()' + - 'modelMessagesToUIMessages()' + tasks: + - 'Set up a server-side chat endpoint that streams responses' + - 'Build a React/Solid/Vue/Svelte chat UI with useChat' + - 'Render assistant messages with thinking/reasoning content' + - 'Send multimodal content (images, audio, video, documents) in messages' + - 'Handle streaming states (loading, error, ready) in the UI' + - 'Convert between UIMessage and ModelMessage formats' + failure_modes: + - mistake: 'Using monolithic openai() instead of openaiText()' + mechanism: 'The legacy monolithic adapter (openai(), anthropic()) is deprecated. The new tree-shakeable adapters (openaiText, anthropicText) take the model name as an argument and are the correct API.' + wrong_pattern: | + import { openai } from '@tanstack/ai-openai' + const stream = chat({ adapter: openai(), model: 'gpt-5.2', messages }) + correct_pattern: | + import { openaiText } from '@tanstack/ai-openai' + const stream = chat({ adapter: openaiText('gpt-5.2'), messages }) + source: 'docs/migration/migration.md' + priority: 'CRITICAL' + status: 'active' + version_context: 'Changed in v0.5.0; agents trained on older code will produce the monolithic pattern' + + - mistake: 'Using toResponseStream instead of toServerSentEventsResponse' + mechanism: 'toResponseStream was renamed to toServerSentEventsResponse (returns a Response) or toServerSentEventsStream (returns a ReadableStream). The old name no longer exists.' + wrong_pattern: | + import { toResponseStream } from '@tanstack/ai' + return toResponseStream(stream, { abortController }) + correct_pattern: | + import { toServerSentEventsResponse } from '@tanstack/ai' + return toServerSentEventsResponse(stream) + source: 'docs/migration/migration.md' + priority: 'HIGH' + status: 'active' + version_context: 'Renamed in v0.5.0' + + - mistake: 'Passing model as separate parameter to chat()' + mechanism: 'Model name is now passed to the adapter factory function, not as a separate parameter to chat(). Passing model to chat() is silently ignored.' + wrong_pattern: | + const stream = chat({ + adapter: openaiText(), + model: 'gpt-5.2', + messages, + }) + correct_pattern: | + const stream = chat({ + adapter: openaiText('gpt-5.2'), + messages, + }) + source: 'docs/migration/migration.md' + priority: 'HIGH' + status: 'active' + version_context: 'Changed in v0.5.0' + + - mistake: 'Nesting temperature/maxTokens in options object' + mechanism: 'Common options (temperature, topP, maxTokens) are now flattened at the top level of the chat config, not nested under an options key.' + wrong_pattern: | + chat({ + adapter: openaiText('gpt-5.2'), + messages, + options: { temperature: 0.7, maxTokens: 1000 }, + }) + correct_pattern: | + chat({ + adapter: openaiText('gpt-5.2'), + messages, + temperature: 0.7, + maxTokens: 1000, + }) + source: 'docs/migration/migration.md' + priority: 'HIGH' + status: 'active' + version_context: 'Changed in v0.5.0' + + - mistake: 'Using providerOptions instead of modelOptions' + mechanism: 'The providerOptions parameter was renamed to modelOptions. Using the old name is silently ignored — provider-specific options will not be applied.' + wrong_pattern: | + chat({ + adapter: openaiText('gpt-5.2'), + messages, + providerOptions: { responseFormat: { type: 'json_object' } }, + }) + correct_pattern: | + chat({ + adapter: openaiText('gpt-5.2'), + messages, + modelOptions: { responseFormat: { type: 'json_object' } }, + }) + source: 'docs/migration/migration.md' + priority: 'HIGH' + status: 'active' + version_context: 'Renamed in v0.5.0' + + - mistake: 'Not handling RUN_ERROR events in streaming context' + mechanism: 'Streaming errors do not throw — they arrive as RUN_ERROR events. If the client does not explicitly subscribe to and handle these events, errors are silently lost and the UI appears to hang.' + source: 'docs/chat/streaming.md' + priority: 'MEDIUM' + status: 'active' + + - mistake: 'Using Vercel AI SDK patterns (streamText, generateText)' + mechanism: 'Agents frequently generate Vercel AI SDK code instead of TanStack AI code. streamText() and generateText() do not exist — the correct function is chat(). This is the most common AI-agent confusion pattern.' + wrong_pattern: | + import { streamText } from 'ai' + import { openai } from '@ai-sdk/openai' + const result = streamText({ model: openai('gpt-4o'), messages }) + correct_pattern: | + import { chat } from '@tanstack/ai' + import { openaiText } from '@tanstack/ai-openai' + const stream = chat({ adapter: openaiText('gpt-5.2'), messages }) + source: 'maintainer interview' + priority: 'CRITICAL' + status: 'active' + version_context: 'Vercel AI SDK patterns dominate agent training data; agents default to these' + + - mistake: 'Using Vercel createOpenAI() provider pattern' + mechanism: 'Agents generate createOpenAI() or similar Vercel provider factory patterns. TanStack AI uses openaiText(model) adapter factories instead.' + wrong_pattern: | + import { createOpenAI } from '@ai-sdk/openai' + const openai = createOpenAI({ apiKey: process.env.OPENAI_API_KEY }) + streamText({ model: openai('gpt-4o'), messages }) + correct_pattern: | + import { openaiText } from '@tanstack/ai-openai' + import { chat } from '@tanstack/ai' + chat({ adapter: openaiText('gpt-5.2'), messages }) + source: 'maintainer interview' + priority: 'CRITICAL' + status: 'active' + version_context: 'createOpenAI/createAnthropic patterns from Vercel AI SDK are widespread in training data' + + - mistake: 'Implementing custom SSE stream instead of using toServerSentEventsResponse' + mechanism: 'Agents often write manual SSE formatting (writing "data: " prefixes, handling encoding, setting headers) instead of using the built-in toServerSentEventsResponse() or toServerSentEventsStream() utilities. The manual implementation is error-prone and misses edge cases.' + wrong_pattern: | + const encoder = new TextEncoder() + const stream = new ReadableStream({ + async start(controller) { + for await (const chunk of chatStream) { + controller.enqueue(encoder.encode(`data: ${JSON.stringify(chunk)}\n\n`)) + } + controller.close() + } + }) + return new Response(stream, { headers: { 'Content-Type': 'text/event-stream' } }) + correct_pattern: | + import { chat, toServerSentEventsResponse } from '@tanstack/ai' + const stream = chat({ adapter: openaiText('gpt-5.2'), messages }) + return toServerSentEventsResponse(stream) + source: 'maintainer interview' + priority: 'HIGH' + status: 'active' + + - mistake: 'Implementing custom onEnd/onFinish callbacks instead of middleware' + mechanism: 'Agents try to add onEnd, onFinish, or onComplete callback options to chat() or useChat(). These do not exist. The correct pattern is to use middleware hooks (onFinish, onUsage, onError) for post-stream lifecycle events.' + wrong_pattern: | + chat({ + adapter: openaiText('gpt-5.2'), + messages, + onEnd: (result) => { trackAnalytics(result) }, + onFinish: (result) => { saveToDatabase(result) }, + }) + correct_pattern: | + chat({ + adapter: openaiText('gpt-5.2'), + messages, + middleware: [{ + onFinish: (ctx) => { trackAnalytics(ctx) }, + onUsage: (ctx) => { saveToDatabase(ctx) }, + }], + }) + source: 'maintainer interview' + priority: 'HIGH' + status: 'active' + skills: ['chat-experience', 'middleware'] + + - mistake: 'Importing from @tanstack/ai-client instead of framework package' + mechanism: 'The core @tanstack/ai-client package is headless and re-exported by framework packages (@tanstack/ai-react, ai-solid, etc.). Agents should import from the framework package so developers dont need to install both. Only import from @tanstack/ai-client for vanilla JS.' + wrong_pattern: | + import { fetchServerSentEvents } from '@tanstack/ai-client' + import { useChat } from '@tanstack/ai-react' + correct_pattern: | + import { useChat, fetchServerSentEvents } from '@tanstack/ai-react' + source: 'maintainer interview' + priority: 'HIGH' + status: 'active' + + - name: 'Tool Calling' + slug: 'tool-calling' + domain: 'tool-system' + description: 'Define isomorphic tools with toolDefinition(), implement server and client execution, set up approval flows, use lazy tool discovery, and render tool results in the UI.' + type: 'core' + packages: + - '@tanstack/ai' + - '@tanstack/ai-client' + - '@tanstack/ai-react' + covers: + - 'toolDefinition()' + - '.server()' + - '.client()' + - 'clientTools()' + - 'needsApproval' + - 'addToolApprovalResponse()' + - 'lazy: true' + - 'ToolCallManager' + - 'ToolCallPart / ToolResultPart' + - 'createChatClientOptions()' + - 'InferChatMessages' + tasks: + - 'Define a tool with toolDefinition() and Zod schemas' + - 'Add server-side tool execution with .server()' + - 'Add client-side tool execution with .client()' + - 'Build tools that execute on both server and client' + - 'Implement tool approval UI for sensitive operations' + - 'Use lazy tool discovery to reduce token usage' + - 'Render tool call progress and results in chat UI' + - 'Pass tool definitions to both server chat() and client useChat()' + failure_modes: + - mistake: 'Missing @standard-schema/spec causes tool types to be unknown' + mechanism: 'Without the @standard-schema/spec package installed, Zod schema inference in toolDefinition() breaks silently — input types become unknown in .server() and .client() callbacks. This is an undocumented peer dependency.' + wrong_pattern: | + // package.json missing @standard-schema/spec + const tool = toolDefinition({ + name: 'getTodos', + inputSchema: z.object({ userId: z.string() }), + }) + // .server() callback input is typed as unknown + const serverTool = tool.server(async (input) => { /* input: unknown */ }) + correct_pattern: | + // pnpm add @standard-schema/spec + const tool = toolDefinition({ + name: 'getTodos', + inputSchema: z.object({ userId: z.string() }), + }) + // .server() callback input is correctly typed + const serverTool = tool.server(async ({ userId }) => { /* userId: string */ }) + source: 'https://github.com/TanStack/ai/issues/235' + priority: 'CRITICAL' + status: 'active' + version_context: 'Undocumented peer dependency; multiple users hit this independently' + + - mistake: 'Not passing tool definitions to both server and client' + mechanism: 'Server tools need the definition on the server (in chat()), and client tools need the definition on the client (in useChat/clientTools). Forgetting either side causes the tool to not execute or not render.' + wrong_pattern: | + // Server only — client can't render tool results + chat({ adapter, messages, tools: [getProducts] }) + // Client has no tool definitions + useChat({ connection: fetchServerSentEvents('/api/chat') }) + correct_pattern: | + // Server + chat({ adapter, messages, tools: [getProducts] }) + // Client also receives tool definitions for rendering + useChat({ + connection: fetchServerSentEvents('/api/chat'), + clientTools: clientTools([getProductsDef.client()]), + }) + source: 'docs/tools/tools.md, maintainer interview' + priority: 'HIGH' + status: 'active' + + - mistake: 'Multiple client tools stall in same round' + mechanism: 'Race condition in drainPostStreamActions() where nested drain calls steal queued actions, permanently stalling conversations when the LLM calls multiple client tools in a single round.' + source: 'https://github.com/TanStack/ai/issues/302' + priority: 'HIGH' + status: 'active' + version_context: 'Known bug; community patch exists but no official fix' + + - mistake: 'Server tool output missing from UIMessage parts' + mechanism: 'Server-executed tools show state "input-complete" with no output field in the UIMessage ToolCallPart, while client tools show state "complete" with output. Code that checks part.output for server tools will get undefined.' + source: 'https://github.com/TanStack/ai/issues/176' + priority: 'HIGH' + status: 'active' + + - mistake: 'Anthropic adapter passes null tool input, stalling agent loops' + mechanism: 'When Anthropic produces an empty tool_use block, JSON.parse("null") returns null instead of {}, failing Zod validation and silently killing the conversation.' + source: 'https://github.com/TanStack/ai/issues/265' + priority: 'HIGH' + status: 'active' + skills: ['tool-calling', 'adapter-configuration'] + + - mistake: 'Tool results always stringified, blocking multimodal responses' + mechanism: 'JSON.stringify() is applied at three locations in the tool result pipeline, forcing all tool results to strings. This blocks image-returning tools with providers that support multimodal tool responses (e.g., OpenAI Responses API).' + source: 'https://github.com/TanStack/ai/issues/363' + priority: 'MEDIUM' + status: 'active' + + - name: 'Media Generation' + slug: 'media-generation' + domain: 'media-generation' + description: 'Generate images, video, speech (TTS), and transcriptions using activity-specific adapters and React/framework generation hooks.' + type: 'core' + packages: + - '@tanstack/ai' + - '@tanstack/ai-openai' + - '@tanstack/ai-gemini' + - '@tanstack/ai-fal' + - '@tanstack/ai-elevenlabs' + - '@tanstack/ai-react' + covers: + - 'generateImage()' + - 'generateVideo()' + - 'getVideoJobStatus()' + - 'generateSpeech()' + - 'generateTranscription()' + - 'summarize()' + - 'useGenerateImage()' + - 'useGenerateSpeech()' + - 'useTranscription()' + - 'useSummarize()' + - 'useGenerateVideo()' + - 'useGeneration()' + - 'openaiImage / geminiImage / openaiSpeech / openaiTranscription / openaiVideo' + tasks: + - 'Generate images with OpenAI DALL-E/GPT-Image or Gemini Imagen' + - 'Generate video with OpenAI Sora (async polling pattern)' + - 'Implement text-to-speech with OpenAI TTS or ElevenLabs' + - 'Transcribe audio with OpenAI Whisper' + - 'Summarize text' + - 'Use React generation hooks for streaming progress' + - 'Connect generation hooks to TanStack Start server functions' + subsystems: + - name: 'Image generation' + package: '@tanstack/ai-openai, @tanstack/ai-gemini' + config_surface: 'Size formats differ (OpenAI WIDTHxHEIGHT vs Gemini aspectRatio), numberOfImages, quality levels' + - name: 'Video generation' + package: '@tanstack/ai-openai' + config_surface: 'Async job/polling pattern, pollingInterval, maxDuration, size, duration (4/8/12s)' + - name: 'Text-to-speech' + package: '@tanstack/ai-openai, @tanstack/ai-gemini, @tanstack/ai-elevenlabs' + config_surface: 'Voice selection, output format (mp3/opus/aac/flac/wav/pcm), streaming audio' + - name: 'Transcription' + package: '@tanstack/ai-openai' + config_surface: 'Input File/base64/data URL, language code, response format (json/text/srt/vtt), word-level timestamps' + failure_modes: + - mistake: 'Using removed embedding() function' + mechanism: 'Embeddings support was removed from TanStack AI. Use your vector database built-in embedding support or call the provider SDK directly.' + wrong_pattern: | + import { embedding } from '@tanstack/ai' + import { openaiEmbed } from '@tanstack/ai-openai' + const result = await embedding({ adapter: openaiEmbed(), model: 'text-embedding-3-small', input: 'Hello' }) + correct_pattern: | + import OpenAI from 'openai' + const openai = new OpenAI({ apiKey: process.env.OPENAI_API_KEY }) + const result = await openai.embeddings.create({ model: 'text-embedding-3-small', input: 'Hello' }) + source: 'docs/migration/migration.md' + priority: 'HIGH' + status: 'removed' + version_context: 'Removed in v0.5.0; agents trained on pre-removal code will still generate this' + + - mistake: 'Not downloading OpenAI image URLs before they expire' + mechanism: 'OpenAI image generation URLs expire after 1 hour. If the URL is stored or displayed without downloading the image data, it will break silently after expiration.' + source: 'docs/media/image-generation.md' + priority: 'MEDIUM' + status: 'active' + + - mistake: 'Using stream:true for activities that do not support streaming' + mechanism: 'Not all generation activities support streaming (e.g., some models require polling for video). Using stream:true on unsupported activities may hang or return no progress events.' + source: 'docs/media/generations.md' + priority: 'MEDIUM' + status: 'active' + + - mistake: 'Forgetting toServerSentEventsResponse when using TanStack Start server functions' + mechanism: 'When using generation hooks with TanStack Start server functions, the stream must be wrapped with toServerSentEventsResponse(). Without it, the server function returns raw data instead of a streamable SSE response. This is the key integration step that cannot be skipped.' + source: 'maintainer interview' + priority: 'HIGH' + status: 'active' + + - name: 'Code Mode' + slug: 'code-mode' + domain: 'code-execution' + description: 'Set up LLM-generated TypeScript execution in sandboxed environments with isolate drivers, skills system, and client-side progress display.' + type: 'core' + packages: + - '@tanstack/ai-code-mode' + - '@tanstack/ai-code-mode-skills' + - '@tanstack/ai-isolate-node' + - '@tanstack/ai-isolate-quickjs' + - '@tanstack/ai-isolate-cloudflare' + covers: + - 'createCodeModeTool()' + - 'createCodeModeSystemPrompt()' + - 'codeModeWithSkills()' + - 'createNodeIsolateDriver()' + - 'createQuickJSIsolateDriver()' + - 'createCloudflareIsolateDriver()' + - 'Trust strategies (ask, auto, none)' + - 'SkillStorage (FileSystem, LocalStorage, InMemory, Mongo)' + - 'Custom events: code_mode:execution_started, code_mode:console, code_mode:external_call' + tasks: + - 'Set up Code Mode with an isolate driver' + - 'Add tools that are callable from within generated code' + - 'Implement persistent skill storage for reusable code patterns' + - 'Display code execution progress in the client UI' + - 'Choose between Node, QuickJS, and Cloudflare isolate drivers' + subsystems: + - name: 'Node.js isolated-vm driver' + package: '@tanstack/ai-isolate-node' + config_surface: 'memoryLimit (128MB default), timeout (30s), skipProbe, requires native module compilation' + - name: 'QuickJS WASM driver' + package: '@tanstack/ai-isolate-quickjs' + config_surface: 'memoryLimit (128MB), timeout (30s), maxStackSize (512KB), universal but limited stdlib' + - name: 'Cloudflare Workers driver' + package: '@tanstack/ai-isolate-cloudflare' + config_surface: 'workerUrl (required), authorization, timeout (30s), maxToolRounds (10), client-server execution model' + failure_modes: + - mistake: 'Passing API keys or secrets to the sandbox environment' + mechanism: 'Code Mode executes LLM-generated code. Any secrets passed into the sandbox context are accessible to generated code, which could exfiltrate them via tool calls or network access.' + source: 'docs/code-mode/code-mode.md' + priority: 'CRITICAL' + status: 'active' + + - mistake: 'Not setting timeout for code execution' + mechanism: 'LLM-generated code may contain infinite loops. Without a timeout, execution hangs indefinitely. The default timeout is 30s, but developers may override it to 0 or remove it.' + source: 'packages/typescript/ai-code-mode/src/' + priority: 'HIGH' + status: 'active' + + - mistake: 'Using Node isolated-vm driver without checking platform compatibility' + mechanism: 'isolated-vm requires native module compilation. On incompatible platforms (wrong Node version, missing build tools), it causes segfaults. The driver has a probe mechanism (probeIsolatedVm()) but skipProbe:true bypasses it.' + source: 'packages/typescript/ai-isolate-node/src/' + priority: 'HIGH' + status: 'active' + + - mistake: 'Expecting identical behavior across isolate drivers' + mechanism: 'Node driver has full V8 support, QuickJS has limited stdlib (no File I/O, limited async), Cloudflare has network latency per tool call and a maxToolRounds limit (default 10). Same code may work in Node but fail in QuickJS or hit round limits on Cloudflare.' + source: 'docs/code-mode/code-mode-isolates.md' + priority: 'MEDIUM' + status: 'active' + + - name: 'Structured Outputs' + slug: 'structured-outputs' + domain: 'chat-experiences' + description: 'Get type-safe JSON responses from the LLM using schema constraints with Zod, ArkType, or Valibot. Just pass outputSchema to chat() — the adapter handles provider differences transparently.' + type: 'core' + packages: + - '@tanstack/ai' + covers: + - 'outputSchema parameter in chat()' + - 'convertSchemaToJsonSchema()' + - 'Zod, ArkType, Valibot schema support' + tasks: + - 'Extract typed data from LLM responses using outputSchema on chat()' + - 'Use Zod schemas for structured output validation' + failure_modes: + - mistake: 'Trying to implement provider-specific structured output strategies' + mechanism: 'Agents try to manually configure provider-specific structured output (e.g., setting response_format for OpenAI, forcing tool calls for Anthropic, setting responseMimeType for Gemini). The correct approach is to just pass outputSchema to chat() — the adapter handles everything transparently. There is no scenario where the developer needs to know the provider strategy.' + wrong_pattern: | + chat({ + adapter: openaiText('gpt-5.2'), + messages, + modelOptions: { responseFormat: { type: 'json_schema', json_schema: mySchema } }, + }) + correct_pattern: | + chat({ + adapter: openaiText('gpt-5.2'), + messages, + outputSchema: z.object({ name: z.string(), age: z.number() }), + }) + source: 'maintainer interview' + priority: 'HIGH' + status: 'active' + + - mistake: 'Using convertSchemaToJsonSchema with ArkType' + mechanism: 'convertSchemaToJsonSchema() is broken for ArkType schemas, producing incorrect JSON Schema output. Zod works correctly; other schema libraries may have similar issues.' + source: 'https://github.com/TanStack/ai/issues/276' + priority: 'MEDIUM' + status: 'active' + + - mistake: 'Missing required array in OpenAI structured output schema' + mechanism: 'OpenAI structured outputs require all properties in the required array and additionalProperties: false. Schemas that dont meet this constraint fail silently or produce validation errors.' + source: 'Source code: openai adapter structured output transformation' + priority: 'MEDIUM' + status: 'active' + + - name: 'Adapter Configuration' + slug: 'adapter-configuration' + domain: 'adapter-management' + description: 'Select and configure provider adapters, set per-model type safety, configure model options (reasoning, thinking), switch adapters at runtime, and extend adapters with custom models.' + type: 'core' + packages: + - '@tanstack/ai' + - '@tanstack/ai-openai' + - '@tanstack/ai-anthropic' + - '@tanstack/ai-gemini' + - '@tanstack/ai-ollama' + - '@tanstack/ai-grok' + - '@tanstack/ai-groq' + - '@tanstack/ai-openrouter' + covers: + - 'openaiText() / anthropicText() / geminiText() / ollamaText() / grokText() / groqText() / openRouterText()' + - 'modelOptions' + - 'Per-model TypeScript type narrowing' + - 'extendAdapter()' + - 'createModel()' + - 'Runtime adapter switching pattern' + - 'Reasoning/thinking configuration' + tasks: + - 'Choose and configure a provider adapter' + - 'Set up per-model type-safe options' + - 'Configure reasoning/thinking for Claude or OpenAI o-series' + - 'Switch between providers at runtime based on user selection' + - 'Extend an adapter with custom or fine-tuned models' + - 'Set up API keys via environment variables' + subsystems: + - name: 'OpenAI adapter' + package: '@tanstack/ai-openai' + config_surface: 'Responses API, reasoning effort/summary, service tier, store, prompt caching, structured output via json_schema' + - name: 'Anthropic adapter' + package: '@tanstack/ai-anthropic' + config_surface: 'Extended thinking budget, prompt caching, MCP servers, tool-based structured output' + - name: 'Gemini adapter' + package: '@tanstack/ai-gemini' + config_surface: 'responseMimeType for JSON, thinking, widest modality support (audio/video/document)' + - name: 'Ollama adapter' + package: '@tanstack/ai-ollama' + config_surface: 'Local-only, arbitrary model names, extensive sampling params (mirostat, numa, gpu layers)' + - name: 'OpenRouter adapter' + package: '@tanstack/ai-openrouter' + config_surface: 'Multi-provider routing, web search tool, provider preferences, 300+ models' + - name: 'Grok adapter' + package: '@tanstack/ai-grok' + config_surface: 'OpenAI-compatible API, 2M context, caching support' + - name: 'Groq adapter' + package: '@tanstack/ai-groq' + config_surface: 'Fastest inference, vision, browser search, code execution features' + reference_candidates: + - topic: 'Model metadata and supported features per model' + reason: '>50 models across adapters with distinct capabilities, context windows, and pricing' + - topic: 'Provider-specific modelOptions' + reason: 'Each adapter has 10+ unique provider options that differ in naming and behavior' + failure_modes: + - mistake: 'Missing @standard-schema/spec makes all types any' + mechanism: 'Without @standard-schema/spec installed, the entire type system degrades — chat() returns any, tool definitions lose type inference, modelOptions has no type checking. The library core value proposition (type safety) disappears with no warning.' + wrong_pattern: | + // package.json has no @standard-schema/spec + // Everything compiles but all types are 'any' + const result = chat({ adapter: openaiText('gpt-5.2'), messages }) + // result: any — no type checking at all + correct_pattern: | + // pnpm add @standard-schema/spec + // Full type safety restored + const result = chat({ adapter: openaiText('gpt-5.2'), messages }) + // result: AsyncIterable with full type checking + source: 'https://github.com/TanStack/ai/issues/235, https://github.com/TanStack/ai/discussions/191' + priority: 'CRITICAL' + status: 'active' + version_context: 'Undocumented peer dependency; #1 first-5-minutes failure for new users' + skills: ['adapter-configuration', 'tool-calling', 'structured-outputs'] + + - mistake: 'Confusing legacy monolithic adapter with tree-shakeable adapter' + mechanism: 'Both openai() (legacy) and openaiText() (current) exist. Legacy adapters are deprecated but not removed. Using the legacy adapter misses model-specific type safety and tree-shaking benefits.' + source: 'https://github.com/TanStack/ai/issues/407' + priority: 'HIGH' + status: 'active' + + - mistake: 'Ollama adapter silently drops systemPrompts' + mechanism: 'mapCommonOptionsToOllama() never accesses options.systemPrompts. System prompts passed to chat() with Ollama are silently ignored — no error, no warning.' + source: 'https://github.com/TanStack/ai/issues/388' + priority: 'HIGH' + status: 'active' + + - mistake: 'Anthropic prompt caching fails on system prompts' + mechanism: 'The Anthropic adapter joins system prompts into a plain string instead of TextBlockParam[], preventing Anthropics cache_control from working. This can cost $1.50-2.00 extra per agentic run.' + source: 'https://github.com/TanStack/ai/issues/379' + priority: 'HIGH' + status: 'active' + + - mistake: 'Wrong API key environment variable name' + mechanism: 'Each adapter looks for a specific env var: OPENAI_API_KEY, ANTHROPIC_API_KEY, GOOGLE_API_KEY or GEMINI_API_KEY (not GOOGLE_GENAI_API_KEY), XAI_API_KEY (not GROK_API_KEY), GROQ_API_KEY, OPENROUTER_API_KEY, OLLAMA_HOST. Using the wrong name causes silent auth failures.' + source: 'Source code: adapter env key lookup functions' + priority: 'MEDIUM' + status: 'active' + + - name: 'Custom Backend Integration' + slug: 'custom-backend-integration' + domain: 'transport-protocol' + description: 'Connect the client-side useChat hook to a non-TanStack-AI backend through custom connection adapters.' + type: 'composition' + packages: + - '@tanstack/ai-client' + - '@tanstack/ai-react' + covers: + - 'Custom ConnectionAdapter interface' + - 'ConnectConnectionAdapter vs SubscribeConnectionAdapter' + - 'fetchServerSentEvents() customization' + - 'fetchHttpStream() customization' + tasks: + - 'Connect useChat to a custom backend that speaks SSE' + - 'Connect useChat to a custom backend with NDJSON streaming' + - 'Implement a fully custom connection adapter' + - 'Add authentication headers to connection requests' + failure_modes: + - mistake: 'Providing both connect and subscribe+send in connection adapter' + mechanism: 'The ConnectionAdapter interface has two mutually exclusive modes. Providing both throws: "Connection adapter must provide either connect or both subscribe and send, not both modes".' + source: 'Source code assertion: ai-client/src/connection-adapters.ts line 116' + priority: 'HIGH' + status: 'active' + + - mistake: 'Not handling SSE browser connection limits' + mechanism: 'Browsers limit SSE connections to 6-8 per domain. Opening multiple chat sessions on the same page can exhaust this limit, causing new connections to queue indefinitely.' + source: 'docs/chat/connection-adapters.md' + priority: 'MEDIUM' + status: 'active' + + - mistake: 'Using HTTP stream without implementing reconnection' + mechanism: 'SSE has built-in browser auto-reconnection, but HTTP stream (NDJSON) does not. Connection drops with HTTP stream silently lose the response without retrying.' + source: 'docs/protocol/http-stream-protocol.md' + priority: 'MEDIUM' + status: 'active' + + - name: 'AG-UI Protocol' + slug: 'ag-ui-protocol' + domain: 'transport-protocol' + description: 'Implement the AG-UI streaming protocol server-side without using client packages — for backends that serve AG-UI events to any compatible frontend.' + type: 'core' + packages: + - '@tanstack/ai' + covers: + - 'StreamChunk / AG-UI event types' + - 'RUN_STARTED, TEXT_MESSAGE_START/CONTENT/END, TOOL_CALL_START/ARGS/END, RUN_FINISHED, RUN_ERROR' + - 'STEP_STARTED/STEP_FINISHED (thinking)' + - 'STATE_SNAPSHOT/STATE_DELTA' + - 'CUSTOM events' + - 'toServerSentEventsStream()' + - 'toHttpStream()' + tasks: + - 'Build a server that emits AG-UI protocol events' + - 'Choose between SSE and HTTP stream transport format' + - 'Handle tool calls within the AG-UI event lifecycle' + - 'Emit custom events for application-specific data' + failure_modes: + - mistake: 'TanStack AI message format doesnt match AG-UI spec' + mechanism: 'TanStack AIs internal request format (messages with parts containing content) diverges from the AG-UI spec (messages with content as string/InputContent, plus threadId, runId, tools, context, forwardedProps). Clients expecting strict AG-UI compliance will fail.' + source: 'https://github.com/TanStack/ai/issues/311' + priority: 'HIGH' + status: 'active' + + - mistake: 'Proxy buffering breaks SSE streaming' + mechanism: 'Reverse proxies (nginx, Cloudflare, etc.) may buffer SSE responses, delaying or batching events. Must set X-Accel-Buffering: no or equivalent proxy headers.' + source: 'docs/protocol/sse-protocol.md' + priority: 'MEDIUM' + status: 'active' + + - mistake: 'Assuming all AG-UI events arrive in every response' + mechanism: 'Not all events are guaranteed. STEP_STARTED/STEP_FINISHED only appear with thinking-enabled models. TOOL_CALL events only appear when tools are called. Code that expects a fixed event sequence will break.' + source: 'docs/protocol/chunk-definitions.md' + priority: 'MEDIUM' + status: 'active' + + - name: 'Middleware' + slug: 'middleware' + domain: 'extensibility' + description: 'Intercept the chat lifecycle with middleware hooks for analytics, event firing, tool caching, and custom cross-cutting concerns.' + type: 'core' + packages: + - '@tanstack/ai' + covers: + - 'middleware array in chat() config' + - 'Hooks: onConfig, onStart, onChunk, onBeforeToolCall, onAfterToolCall, onUsage, onFinish, onAbort, onError' + - 'toolCacheMiddleware' + - 'Middleware execution order (left-to-right)' + tasks: + - 'Add analytics tracking to chat completions' + - 'Fire events at the end of a stream' + - 'Cache tool call results with TTL' + - 'Implement custom logging or tracing' + - 'Add rate limiting or request validation' + failure_modes: + - mistake: 'Structured output request runs outside middleware pipeline' + mechanism: 'When using chat({ outputSchema }), the final structured output request to the provider executes outside the middleware chain. Observability/tracing middleware will miss this call.' + source: 'https://github.com/TanStack/ai/issues/390' + priority: 'HIGH' + status: 'active' + + - mistake: 'Trying to modify StreamChunks in middleware' + mechanism: 'Middleware hooks provide read-only access to stream chunks via onChunk. They cannot modify, filter, or transform chunks after emission. Attempting to mutate chunk objects has no effect.' + source: 'docs/advanced/middleware.md' + priority: 'MEDIUM' + status: 'active' + + - mistake: 'Middleware exceptions breaking the stream' + mechanism: 'Unhandled exceptions in middleware hooks propagate up and break the streaming response. All middleware hooks should be wrapped in try-catch to prevent analytics/logging failures from killing the chat stream.' + source: 'docs/advanced/middleware.md' + priority: 'MEDIUM' + status: 'active' + +tensions: + - name: 'Type safety vs. quick prototyping' + skills: ['adapter-configuration', 'chat-experience'] + description: 'Per-model type safety requires explicit adapter creation, @standard-schema/spec installation, and specific model string literals. Quick prototyping wants dynamic model selection and minimal setup.' + implication: 'Agents optimizing for quick setup will skip @standard-schema/spec and use string variables for model names, silently losing all type safety — the librarys core value proposition.' + + - name: 'Tree-shaking vs. API discoverability' + skills: ['adapter-configuration', 'chat-experience', 'media-generation'] + description: 'Split adapters (openaiText, openaiImage, openaiSpeech) minimize bundle size but require developers to know the exact import path for each activity.' + implication: 'Agents default to importing from the package root or using wildcard imports, pulling in the entire adapter bundle and defeating tree-shaking.' + + - name: 'Server/client tool symmetry vs. state consistency' + skills: ['tool-calling', 'chat-experience'] + description: 'Tools can execute on server or client, but server tool results have different UIMessage state (input-complete with no output) than client tools (complete with output).' + implication: 'Agents building tool rendering UI will write code that checks part.output, which works for client tools but fails silently for server tools.' + + - name: 'AG-UI protocol compliance vs. internal message format' + skills: ['ag-ui-protocol', 'custom-backend-integration'] + description: 'TanStack AIs internal UIMessage format (parts-based) diverges from the AG-UI spec (content-based). Full AG-UI compliance would require a different message structure.' + implication: 'Agents building AG-UI compatible servers will use TanStack AIs message format, which breaks interop with other AG-UI clients expecting the standard format.' + +cross_references: + - from: 'chat-experience' + to: 'tool-calling' + reason: 'Most chat implementations include tool calling. Tools require definitions on both server (chat()) and client (useChat/clientTools).' + + - from: 'chat-experience' + to: 'adapter-configuration' + reason: 'Chat setup requires adapter selection and model configuration. Adapter choice affects available features (thinking, structured output strategy).' + + - from: 'chat-experience' + to: 'middleware' + reason: 'Middleware hooks into the chat lifecycle. Analytics, logging, and caching are typically set up alongside chat configuration.' + + - from: 'tool-calling' + to: 'code-mode' + reason: 'Code Mode is an advanced alternative to tool calling for complex multi-step operations. Users building chat with tools should consider code mode for token efficiency.' + + - from: 'structured-outputs' + to: 'adapter-configuration' + reason: 'Structured output strategy differs by provider (native JSON schema vs tool-based vs responseMimeType). Adapter choice determines how structured outputs work.' + + - from: 'media-generation' + to: 'adapter-configuration' + reason: 'Each media activity (image, video, speech, transcription) requires a specific activity adapter. Provider capabilities vary significantly for media.' + + - from: 'custom-backend-integration' + to: 'ag-ui-protocol' + reason: 'Custom backends must implement either SSE or HTTP stream format. Understanding the AG-UI protocol helps build compatible custom servers.' + + - from: 'code-mode' + to: 'chat-experience' + reason: 'Code Mode is always used on top of a chat experience. Client integration requires handling code_mode:* custom events in useChat.' + +gaps: + - skill: 'chat-experience' + question: 'What is the recommended pattern for message persistence across page reloads? Multiple GitHub discussions request this but no official guidance exists.' + context: 'Users building production chat apps need persistence. Without guidance, agents will implement ad-hoc solutions.' + status: 'open' + + - skill: 'tool-calling' + question: 'What is the expected behavior when a server tool throws an error — should it terminate the agent loop or return an error result to the LLM for self-correction?' + context: 'Docs say "return error in output, dont throw" but the actual error handling behavior in the agent loop is unclear.' + status: 'open' + + - skill: 'adapter-configuration' + question: 'Is @standard-schema/spec intended to be a hard dependency or peer dependency? The current silent degradation to any types seems unintentional.' + context: 'This is the #1 developer pain point. Clarifying intent would determine whether the skill should instruct installing it or whether it should be automatic.' + status: 'open' + + - skill: 'ag-ui-protocol' + question: 'How much AG-UI spec compliance is intended? The message format divergence seems like it could be a design choice or a known gap.' + context: 'Determines whether skills should teach TanStack AI message format or AG-UI spec format for server implementations.' + status: 'open' + + - skill: 'middleware' + question: 'Is the structured output request running outside middleware (#390) a known limitation or a bug that will be fixed?' + context: 'Affects whether the skill should include a workaround or document it as expected behavior.' + status: 'open' + + - skill: 'structured-outputs' + question: 'Should the skill teach provider-specific structured output strategies or just outputSchema?' + context: 'Resolved: maintainer confirmed always just use outputSchema on chat() — the adapter handles everything transparently. No scenario requires knowing provider strategy.' + status: 'resolved' + + - skill: 'chat-experience' + question: 'Should agents import from @tanstack/ai-client or from framework packages?' + context: 'Resolved: always import from framework package (e.g., @tanstack/ai-react). Core is headless and re-exported. Only use @tanstack/ai-client for vanilla JS.' + status: 'resolved' diff --git a/_artifacts/skill_spec.md b/_artifacts/skill_spec.md new file mode 100644 index 000000000..1a3e3654e --- /dev/null +++ b/_artifacts/skill_spec.md @@ -0,0 +1,193 @@ +# TanStack AI — Skill Spec + +TanStack AI is a type-safe, provider-agnostic AI SDK for building AI-powered applications. It provides chat completion, streaming, isomorphic tool calling, media generation, and code execution across React, Solid, Vue, Svelte, and Preact, with adapters for OpenAI, Anthropic, Gemini, Ollama, Grok, Groq, and OpenRouter. + +## Domains + +| Domain | Description | Skills | +| ------------------------------- | ------------------------------------------------------------------------------ | ------------------------------------------ | +| Building chat experiences | End-to-end chat — server endpoints, streaming, client hooks, message rendering | chat-experience, structured-outputs | +| Implementing tool calling | Isomorphic tools, server/client execution, approval flows, lazy discovery | tool-calling | +| Generating media content | Image, video, TTS, transcription via activity-specific adapters | media-generation | +| Executing LLM-generated code | Code Mode sandbox setup, isolate drivers, skills system | code-mode | +| Configuring adapters and models | Provider selection, type safety, model options, runtime switching | adapter-configuration | +| Transport and protocol | AG-UI protocol, SSE/HTTP stream, custom backend connections | custom-backend-integration, ag-ui-protocol | +| Extending behavior | Middleware hooks for analytics, caching, observability | middleware | + +## Skill Inventory + +| Skill | Type | Domain | What it covers | Failure modes | +| -------------------------- | ----------- | ------------------ | ------------------------------------------------------------------------------------- | ------------- | +| chat-experience | core | chat-experiences | chat(), useChat, streaming, SSE/HTTP responses, message formats, multimodal, thinking | 12 | +| tool-calling | core | tool-system | toolDefinition(), .server()/.client(), approval, lazy discovery, rendering | 6 | +| media-generation | core | media-generation | generateImage/Video/Speech/Transcription, summarize, generation hooks | 4 | +| code-mode | core | code-execution | createCodeModeTool, isolate drivers, skills, client events | 4 | +| structured-outputs | core | chat-experiences | outputSchema on chat(), schema conversion | 3 | +| adapter-configuration | core | adapter-management | Provider adapters, modelOptions, type safety, extending, reasoning | 5 | +| custom-backend-integration | composition | transport-protocol | Custom ConnectionAdapter, SSE/HTTP stream connections | 3 | +| ag-ui-protocol | core | transport-protocol | AG-UI events, StreamChunk types, SSE/NDJSON formats | 3 | +| middleware | core | extensibility | Lifecycle hooks, tool caching, analytics, event firing | 3 | + +## Failure Mode Inventory + +### Chat Experience (12 failure modes) + +| # | Mistake | Priority | Source | Cross-skill? | +| --- | -------------------------------------------------------------------- | -------- | -------------------- | ------------ | +| 1 | Using monolithic openai() instead of openaiText() | CRITICAL | migration guide | — | +| 2 | Using Vercel AI SDK patterns (streamText, generateText) | CRITICAL | maintainer interview | — | +| 3 | Using Vercel createOpenAI() provider pattern | CRITICAL | maintainer interview | — | +| 4 | Using toResponseStream instead of toServerSentEventsResponse | HIGH | migration guide | — | +| 5 | Passing model as separate parameter to chat() | HIGH | migration guide | — | +| 6 | Nesting temperature/maxTokens in options object | HIGH | migration guide | — | +| 7 | Using providerOptions instead of modelOptions | HIGH | migration guide | — | +| 8 | Implementing custom SSE stream instead of toServerSentEventsResponse | HIGH | maintainer interview | — | +| 9 | Implementing custom onEnd instead of middleware | HIGH | maintainer interview | middleware | +| 10 | Importing from @tanstack/ai-client instead of framework package | HIGH | maintainer interview | — | +| 11 | Not handling RUN_ERROR events in streaming | MEDIUM | docs | — | + +### Tool Calling (6 failure modes) + +| # | Mistake | Priority | Source | Cross-skill? | +| --- | ------------------------------------------------------------- | -------- | ---------------- | --------------------- | +| 1 | Missing @standard-schema/spec causes tool types to be unknown | CRITICAL | issue #235 | adapter-configuration | +| 2 | Not passing tool definitions to both server and client | HIGH | docs + interview | — | +| 3 | Multiple client tools stall in same round | HIGH | issue #302 | — | +| 4 | Server tool output missing from UIMessage parts | HIGH | issue #176 | — | +| 5 | Anthropic null tool input stalling loops | HIGH | issue #265 | adapter-configuration | +| 6 | Tool results always stringified, blocking multimodal | MEDIUM | issue #363 | — | + +### Media Generation (4 failure modes) + +| # | Mistake | Priority | Source | Cross-skill? | +| --- | --------------------------------------------------------- | -------- | -------------------- | ------------ | +| 1 | Using removed embedding() function | HIGH | migration guide | — | +| 2 | Forgetting toServerSentEventsResponse with TanStack Start | HIGH | maintainer interview | — | +| 3 | Not downloading OpenAI image URLs before expiry | MEDIUM | docs | — | +| 4 | Using stream:true for unsupported activities | MEDIUM | docs | — | + +### Code Mode (4 failure modes) + +| # | Mistake | Priority | Source | Cross-skill? | +| --- | ------------------------------------------- | -------- | ----------- | ------------ | +| 1 | Passing API keys/secrets to sandbox | CRITICAL | docs | — | +| 2 | Not setting timeout for execution | HIGH | source code | — | +| 3 | Node isolated-vm platform incompatibility | HIGH | source code | — | +| 4 | Expecting identical behavior across drivers | MEDIUM | docs | — | + +### Structured Outputs (3 failure modes) + +| # | Mistake | Priority | Source | Cross-skill? | +| --- | ------------------------------------------------------------------ | -------- | -------------------- | ------------ | +| 1 | Trying to implement provider-specific structured output strategies | HIGH | maintainer interview | — | +| 2 | Using convertSchemaToJsonSchema with ArkType | MEDIUM | issue #276 | — | +| 3 | Missing required array in OpenAI schema | MEDIUM | source code | — | + +### Adapter Configuration (5 failure modes) + +| # | Mistake | Priority | Source | Cross-skill? | +| --- | ------------------------------------------------------- | -------- | ----------------- | -------------------------------- | +| 1 | Missing @standard-schema/spec makes all types any | CRITICAL | issues #235, #191 | tool-calling, structured-outputs | +| 2 | Confusing legacy monolithic with tree-shakeable adapter | HIGH | issue #407 | — | +| 3 | Ollama silently drops systemPrompts | HIGH | issue #388 | — | +| 4 | Anthropic prompt caching fails on system prompts | HIGH | issue #379 | — | +| 5 | Wrong API key environment variable name | MEDIUM | source code | — | + +### Custom Backend Integration (3 failure modes) + +| # | Mistake | Priority | Source | Cross-skill? | +| --- | ----------------------------------------- | -------- | ---------------- | ------------ | +| 1 | Providing both connect and subscribe+send | HIGH | source assertion | — | +| 2 | SSE browser connection limits | MEDIUM | docs | — | +| 3 | HTTP stream without reconnection | MEDIUM | docs | — | + +### AG-UI Protocol (3 failure modes) + +| # | Mistake | Priority | Source | Cross-skill? | +| --- | --------------------------------------- | -------- | ---------- | ------------ | +| 1 | Message format doesn't match AG-UI spec | HIGH | issue #311 | — | +| 2 | Proxy buffering breaks SSE | MEDIUM | docs | — | +| 3 | Assuming fixed event sequence | MEDIUM | docs | — | + +### Middleware (3 failure modes) + +| # | Mistake | Priority | Source | Cross-skill? | +| --- | ----------------------------------------- | -------- | ---------- | ------------ | +| 1 | Structured output runs outside middleware | HIGH | issue #390 | — | +| 2 | Trying to modify StreamChunks (read-only) | MEDIUM | docs | — | +| 3 | Middleware exceptions breaking stream | MEDIUM | docs | — | + +## Tensions + +| Tension | Skills | Agent implication | +| ------------------------------------ | ----------------------------------------------------------- | --------------------------------------------------------------------------------------------------- | +| Type safety vs. quick prototyping | adapter-configuration <-> chat-experience | Agents skip @standard-schema/spec and use dynamic model strings, silently losing all type safety | +| Tree-shaking vs. API discoverability | adapter-configuration <-> chat-experience, media-generation | Agents import from package root or use wildcards, defeating bundle optimization | +| Server/client tool state asymmetry | tool-calling <-> chat-experience | Agents build tool UI that checks part.output, working for client tools but failing for server tools | +| AG-UI protocol vs. internal format | ag-ui-protocol <-> custom-backend-integration | Agents use TanStack AI message format, breaking interop with standard AG-UI clients | + +## Cross-References + +| From | To | Reason | +| -------------------------- | --------------------- | ------------------------------------------------------------------------------------- | +| chat-experience | tool-calling | Most chats include tools; tools need definitions on both server and client | +| chat-experience | adapter-configuration | Chat requires adapter selection; adapter choice affects available features | +| chat-experience | middleware | Analytics, logging, and caching set up alongside chat | +| tool-calling | code-mode | Code Mode is advanced alternative to tools for complex multi-step operations | +| structured-outputs | adapter-configuration | Just use outputSchema — but adapter choice determines internal strategy transparently | +| media-generation | adapter-configuration | Each media activity requires a specific activity adapter | +| custom-backend-integration | ag-ui-protocol | Custom backends must implement SSE or HTTP stream format | +| code-mode | chat-experience | Code Mode always used on top of chat; requires handling custom events | + +## Subsystems & Reference Candidates + +| Skill | Subsystems | Reference candidates | +| -------------------------- | ------------------------------------------------------------------------------------------------ | ----------------------------------------------------------------------------- | +| media-generation | Image (OpenAI, Gemini), Video (OpenAI), TTS (OpenAI, Gemini, ElevenLabs), Transcription (OpenAI) | — | +| code-mode | Node.js isolated-vm, QuickJS WASM, Cloudflare Workers | — | +| adapter-configuration | OpenAI, Anthropic, Gemini, Ollama, Grok, Groq, OpenRouter | Model metadata (50+ models), Provider-specific modelOptions (10+ per adapter) | +| chat-experience | — | — | +| tool-calling | — | — | +| structured-outputs | — | — | +| custom-backend-integration | — | — | +| ag-ui-protocol | — | — | +| middleware | — | — | + +## Remaining Gaps + +| Skill | Question | Status | +| --------------------- | ------------------------------------------------------------------------------------------------------------------ | -------- | +| chat-experience | Recommended pattern for message persistence across page reloads? | open | +| tool-calling | Expected behavior when server tool throws — terminate loop or return error to LLM? | open | +| adapter-configuration | Is @standard-schema/spec intended as hard dep or peer dep? Silent degradation seems unintentional. | open | +| ag-ui-protocol | How much AG-UI spec compliance is intended? Message format divergence — design choice or known gap? | open | +| middleware | Is structured output running outside middleware (#390) a known limitation or bug? | open | +| structured-outputs | Should skill teach provider strategies? **Resolved:** always use outputSchema, adapter handles it. | resolved | +| chat-experience | Import from ai-client or framework package? **Resolved:** always framework package, only ai-client for vanilla JS. | resolved | + +## Key Rules from Maintainer Interview + +These rules must be embedded in every relevant skill: + +1. **Always import from framework package** (e.g., `@tanstack/ai-react`), never from `@tanstack/ai-client` — unless vanilla JS. +2. **Always use outputSchema on chat()** for structured outputs — never implement provider-specific strategies. +3. **Always ask the user which adapter and model** they want when implementing features — suggest the latest model. +4. **Always prompt the user about Code Mode** when they're building chat — it's an option they should know about. +5. **Tools must be passed to both server (chat()) and client (useChat/clientTools)** — this is the #1 implicit knowledge gap. + +## Recommended Skill File Structure + +- **Core skills:** chat-experience, tool-calling, media-generation, code-mode, structured-outputs, adapter-configuration, ag-ui-protocol, middleware +- **Framework skills:** None needed separately — framework-specific guidance folded into chat-experience and media-generation +- **Lifecycle skills:** None identified (no migration between major versions yet — library is pre-1.0) +- **Composition skills:** custom-backend-integration +- **Reference files:** adapter-configuration (model metadata, provider options) + +## Composition Opportunities + +| Library | Integration points | Composition skill needed? | +| --------------- | ---------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------- | +| TanStack Start | Server functions with generation hooks (must use toServerSentEventsResponse) | No — documented within skills, but toServerSentEventsResponse is a critical integration point | +| React Router v7 | API routes for chat endpoints | No — standard framework routing | +| Next.js | API routes / App Router for chat endpoints | No — standard framework routing | +| Zod | Tool schemas, structured output schemas | No — Zod is a core dependency, not a composition | diff --git a/_artifacts/skill_tree.yaml b/_artifacts/skill_tree.yaml new file mode 100644 index 000000000..b3976f83a --- /dev/null +++ b/_artifacts/skill_tree.yaml @@ -0,0 +1,251 @@ +# skills/_artifacts/skill_tree.yaml +library: + name: '@tanstack/ai' + version: '0.10.0' + repository: 'https://github.com/TanStack/ai' + description: 'Type-safe, provider-agnostic AI SDK for building chat, tool calling, media generation, and code execution features.' +generated_from: + domain_map: '_artifacts/domain_map.yaml' + skill_spec: '_artifacts/skill_spec.md' +generated_at: '2026-04-08' + +skills: + # ── Core skills (in @tanstack/ai package) ── + + - name: 'TanStack AI — Core' + slug: 'ai-core' + type: 'core' + domain: 'chat-experiences' + path: 'skills/ai-core/SKILL.md' + package: 'packages/typescript/ai' + description: > + Entry point for TanStack AI skills. Routes to chat-experience, + tool-calling, media-generation, structured-outputs, adapter-configuration, + ag-ui-protocol, middleware, and custom-backend-integration based on + developer task. Covers chat(), toolDefinition(), generateImage(), + outputSchema, openaiText(), toServerSentEventsResponse(), middleware hooks. + requires: [] + + - name: 'Chat Experience' + slug: 'ai-core/chat-experience' + type: 'sub-skill' + domain: 'chat-experiences' + path: 'skills/ai-core/chat-experience/SKILL.md' + package: 'packages/typescript/ai' + description: > + End-to-end chat implementation: server endpoint with chat() and + toServerSentEventsResponse(), client-side useChat hook with + fetchServerSentEvents(), message rendering with UIMessage parts, + multimodal content, thinking/reasoning display. Covers streaming + states, connection adapters, and message format conversions. + NOT Vercel AI SDK — uses chat() not streamText(). + requires: + - 'ai-core' + sources: + - 'TanStack/ai:docs/getting-started/quick-start.md' + - 'TanStack/ai:docs/chat/streaming.md' + - 'TanStack/ai:docs/chat/connection-adapters.md' + - 'TanStack/ai:docs/chat/thinking-content.md' + - 'TanStack/ai:docs/advanced/multimodal-content.md' + - 'TanStack/ai:packages/typescript/ai/src/core/chat.ts' + - 'TanStack/ai:packages/typescript/ai-client/src/chat-client.ts' + + - name: 'Tool Calling' + slug: 'ai-core/tool-calling' + type: 'sub-skill' + domain: 'tool-system' + path: 'skills/ai-core/tool-calling/SKILL.md' + package: 'packages/typescript/ai' + description: > + Isomorphic tool system: toolDefinition() with Zod schemas, + .server() and .client() implementations, passing tools to both + chat() on server and useChat/clientTools on client, tool approval + flows with needsApproval and addToolApprovalResponse(), lazy tool + discovery with lazy:true, rendering ToolCallPart and ToolResultPart + in UI. Requires @standard-schema/spec for type inference. + requires: + - 'ai-core' + sources: + - 'TanStack/ai:docs/tools/tools.md' + - 'TanStack/ai:docs/tools/server-tools.md' + - 'TanStack/ai:docs/tools/client-tools.md' + - 'TanStack/ai:docs/tools/tool-approval.md' + - 'TanStack/ai:docs/tools/lazy-tool-discovery.md' + - 'TanStack/ai:docs/tools/tool-architecture.md' + - 'TanStack/ai:packages/typescript/ai/src/tools/' + + - name: 'Media Generation' + slug: 'ai-core/media-generation' + type: 'sub-skill' + domain: 'media-generation' + path: 'skills/ai-core/media-generation/SKILL.md' + package: 'packages/typescript/ai' + description: > + Image, video, speech (TTS), and transcription generation using + activity-specific adapters: generateImage() with openaiImage/geminiImage, + generateVideo() with async polling, generateSpeech() with openaiSpeech, + generateTranscription() with openaiTranscription. React hooks: + useGenerateImage, useGenerateSpeech, useTranscription, useGenerateVideo. + TanStack Start server function integration with toServerSentEventsResponse. + requires: + - 'ai-core' + sources: + - 'TanStack/ai:docs/media/generations.md' + - 'TanStack/ai:docs/media/generation-hooks.md' + - 'TanStack/ai:docs/media/image-generation.md' + - 'TanStack/ai:docs/media/video-generation.md' + - 'TanStack/ai:docs/media/text-to-speech.md' + - 'TanStack/ai:docs/media/transcription.md' + subsystems: + - 'image-generation' + - 'video-generation' + - 'text-to-speech' + - 'transcription' + + - name: 'Structured Outputs' + slug: 'ai-core/structured-outputs' + type: 'sub-skill' + domain: 'chat-experiences' + path: 'skills/ai-core/structured-outputs/SKILL.md' + package: 'packages/typescript/ai' + description: > + Type-safe JSON schema responses from LLMs using outputSchema on chat(). + Supports Zod, ArkType, and Valibot schemas. The adapter handles + provider-specific strategies transparently — never configure structured + output at the provider level. convertSchemaToJsonSchema() for manual + schema conversion. + requires: + - 'ai-core' + sources: + - 'TanStack/ai:docs/chat/structured-outputs.md' + - 'TanStack/ai:packages/typescript/ai/src/tools/schema-converter.ts' + + - name: 'Adapter Configuration' + slug: 'ai-core/adapter-configuration' + type: 'sub-skill' + domain: 'adapter-management' + path: 'skills/ai-core/adapter-configuration/SKILL.md' + package: 'packages/typescript/ai' + description: > + Provider adapter selection and configuration: openaiText, anthropicText, + geminiText, ollamaText, grokText, groqText, openRouterText. Per-model + type safety with modelOptions, reasoning/thinking configuration, + runtime adapter switching, extendAdapter() for custom models, createModel(). + API key env vars: OPENAI_API_KEY, ANTHROPIC_API_KEY, GOOGLE_API_KEY/GEMINI_API_KEY, + XAI_API_KEY, GROQ_API_KEY, OPENROUTER_API_KEY, OLLAMA_HOST. + requires: + - 'ai-core' + sources: + - 'TanStack/ai:docs/adapters/openai.md' + - 'TanStack/ai:docs/adapters/anthropic.md' + - 'TanStack/ai:docs/adapters/gemini.md' + - 'TanStack/ai:docs/adapters/ollama.md' + - 'TanStack/ai:docs/adapters/grok.md' + - 'TanStack/ai:docs/adapters/groq.md' + - 'TanStack/ai:docs/adapters/openrouter.md' + - 'TanStack/ai:docs/advanced/per-model-type-safety.md' + - 'TanStack/ai:docs/advanced/runtime-adapter-switching.md' + - 'TanStack/ai:docs/advanced/extend-adapter.md' + subsystems: + - 'openai' + - 'anthropic' + - 'gemini' + - 'ollama' + - 'grok' + - 'groq' + - 'openrouter' + references: + - 'references/openai-adapter.md' + - 'references/anthropic-adapter.md' + - 'references/gemini-adapter.md' + - 'references/ollama-adapter.md' + - 'references/grok-adapter.md' + - 'references/groq-adapter.md' + - 'references/openrouter-adapter.md' + + - name: 'AG-UI Protocol' + slug: 'ai-core/ag-ui-protocol' + type: 'sub-skill' + domain: 'transport-protocol' + path: 'skills/ai-core/ag-ui-protocol/SKILL.md' + package: 'packages/typescript/ai' + description: > + Server-side AG-UI streaming protocol implementation: StreamChunk event + types (RUN_STARTED, TEXT_MESSAGE_START/CONTENT/END, TOOL_CALL_START/ARGS/END, + RUN_FINISHED, RUN_ERROR, STEP_STARTED/STEP_FINISHED, STATE_SNAPSHOT/DELTA, + CUSTOM), toServerSentEventsStream() for SSE format, toHttpStream() for + NDJSON format. For backends serving AG-UI events without client packages. + requires: + - 'ai-core' + sources: + - 'TanStack/ai:docs/protocol/chunk-definitions.md' + - 'TanStack/ai:docs/protocol/sse-protocol.md' + - 'TanStack/ai:docs/protocol/http-stream-protocol.md' + + - name: 'Middleware' + slug: 'ai-core/middleware' + type: 'sub-skill' + domain: 'extensibility' + path: 'skills/ai-core/middleware/SKILL.md' + package: 'packages/typescript/ai' + description: > + Chat lifecycle middleware hooks: onConfig, onStart, onChunk, + onBeforeToolCall, onAfterToolCall, onUsage, onFinish, onAbort, onError. + Use for analytics, event firing, tool caching (toolCacheMiddleware), + logging, and tracing. Middleware array in chat() config, left-to-right + execution order. NOT onEnd/onFinish callbacks on chat() — use middleware. + requires: + - 'ai-core' + sources: + - 'TanStack/ai:docs/advanced/middleware.md' + - 'TanStack/ai:packages/typescript/ai/src/middlewares/' + + - name: 'Custom Backend Integration' + slug: 'ai-core/custom-backend-integration' + type: 'composition' + domain: 'transport-protocol' + path: 'skills/ai-core/custom-backend-integration/SKILL.md' + package: 'packages/typescript/ai' + description: > + Connect useChat to a non-TanStack-AI backend through custom connection + adapters. ConnectConnectionAdapter (single async iterable) vs + SubscribeConnectionAdapter (separate subscribe/send). Customize + fetchServerSentEvents() and fetchHttpStream() with auth headers, + custom URLs, and request options. Import from framework package, + not @tanstack/ai-client. + requires: + - 'ai-core' + - 'ai-core/chat-experience' + sources: + - 'TanStack/ai:docs/chat/connection-adapters.md' + - 'TanStack/ai:packages/typescript/ai-client/src/connection-adapters.ts' + + # ── Code Mode skills (in @tanstack/ai-code-mode package) ── + + - name: 'Code Mode' + slug: 'ai-code-mode' + type: 'core' + domain: 'code-execution' + path: 'skills/ai-code-mode/SKILL.md' + package: 'packages/typescript/ai-code-mode' + description: > + LLM-generated TypeScript execution in sandboxed environments: + createCodeModeTool() with isolate drivers (createNodeIsolateDriver, + createQuickJSIsolateDriver, createCloudflareIsolateDriver), + codeModeWithSkills() for persistent skill libraries, trust strategies, + skill storage (FileSystem, LocalStorage, InMemory, Mongo), client-side + execution progress via code_mode:* custom events in useChat. + requires: + - 'ai-core' + - 'ai-core/chat-experience' + sources: + - 'TanStack/ai:docs/code-mode/code-mode.md' + - 'TanStack/ai:docs/code-mode/code-mode-isolates.md' + - 'TanStack/ai:docs/code-mode/code-mode-with-skills.md' + - 'TanStack/ai:docs/code-mode/client-integration.md' + - 'TanStack/ai:packages/typescript/ai-code-mode/src/' + - 'TanStack/ai:packages/typescript/ai-code-mode-skills/src/' + subsystems: + - 'node-isolate' + - 'quickjs-isolate' + - 'cloudflare-isolate' diff --git a/packages/typescript/ai-code-mode/package.json b/packages/typescript/ai-code-mode/package.json index 999707248..4fe1ac8e3 100644 --- a/packages/typescript/ai-code-mode/package.json +++ b/packages/typescript/ai-code-mode/package.json @@ -24,7 +24,8 @@ }, "files": [ "dist", - "src" + "src", + "skills" ], "scripts": { "build": "vite build", @@ -42,7 +43,8 @@ "code-mode", "llm", "sandbox", - "isolate" + "isolate", + "tanstack-intent" ], "dependencies": { "esbuild": "^0.25.12" diff --git a/packages/typescript/ai-code-mode/skills/ai-code-mode/SKILL.md b/packages/typescript/ai-code-mode/skills/ai-code-mode/SKILL.md new file mode 100644 index 000000000..c769d5a94 --- /dev/null +++ b/packages/typescript/ai-code-mode/skills/ai-code-mode/SKILL.md @@ -0,0 +1,432 @@ +--- +name: ai-code-mode +description: > + LLM-generated TypeScript execution in sandboxed environments: + createCodeModeTool() with isolate drivers (createNodeIsolateDriver, + createQuickJSIsolateDriver, createCloudflareIsolateDriver), + codeModeWithSkills() for persistent skill libraries, trust strategies, + skill storage (FileSystem, LocalStorage, InMemory, Mongo), client-side + execution progress via code_mode:* custom events in useChat. +type: core +library: tanstack-ai +library_version: '0.10.0' +sources: + - 'TanStack/ai:docs/code-mode/code-mode.md' + - 'TanStack/ai:docs/code-mode/code-mode-isolates.md' + - 'TanStack/ai:docs/code-mode/code-mode-with-skills.md' + - 'TanStack/ai:docs/code-mode/client-integration.md' +--- + +> **Note**: This skill requires familiarity with ai-core and ai-core/chat-experience. Code Mode is always used on top of a chat experience. + +## Setup + +Complete Code Mode setup with Node.js isolate driver: + +```typescript +import { chat, toServerSentEventsResponse } from '@tanstack/ai' +import { openaiText } from '@tanstack/ai-openai' +import { createCodeModeTool } from '@tanstack/ai-code-mode' +import { createNodeIsolateDriver } from '@tanstack/ai-isolate-node' +import { toolDefinition } from '@tanstack/ai' +import { z } from 'zod' + +// Define a tool that code can call +const fetchWeather = toolDefinition({ + name: 'fetchWeather', + description: 'Get current weather for a city', + inputSchema: z.object({ city: z.string() }), + outputSchema: z.object({ temp: z.number(), condition: z.string() }), +}).server(async ({ city }) => { + const res = await fetch(`https://api.weather.com/${city}`) + return res.json() +}) + +// Create code mode tool with Node isolate +const codeModeTool = createCodeModeTool({ + driver: createNodeIsolateDriver({ + memoryLimit: 128, + timeout: 30000, + }), + tools: [fetchWeather], +}) + +// Use in chat +const stream = chat({ + adapter: openaiText('gpt-5.2'), + messages, + tools: [codeModeTool], +}) + +return toServerSentEventsResponse(stream) +``` + +The recommended higher-level entry point is `createCodeMode()`, which returns both the tool and a matching system prompt: + +```typescript +import { chat } from '@tanstack/ai' +import { createCodeMode } from '@tanstack/ai-code-mode' +import { createNodeIsolateDriver } from '@tanstack/ai-isolate-node' +import { openaiText } from '@tanstack/ai-openai' + +const { tool, systemPrompt } = createCodeMode({ + driver: createNodeIsolateDriver(), + tools: [fetchWeather], + timeout: 30_000, +}) + +const stream = chat({ + adapter: openaiText('gpt-4o'), + systemPrompts: ['You are a helpful assistant.', systemPrompt], + tools: [tool], + messages, +}) +``` + +`createCodeMode` calls `createCodeModeTool` and `createCodeModeSystemPrompt` internally. The system prompt includes generated TypeScript type stubs for each tool so the LLM writes correct calls. + +## Core Patterns + +### 1. Choosing an Isolate Driver + +Three drivers implement the `IsolateDriver` interface. All are interchangeable. + +**Node.js** (`createNodeIsolateDriver`) -- Full V8 with JIT. Fastest option. Requires `isolated-vm` native C++ addon. + +```typescript +import { createNodeIsolateDriver } from '@tanstack/ai-isolate-node' + +const driver = createNodeIsolateDriver({ + memoryLimit: 128, // MB, default 128 + timeout: 30_000, // ms, default 30000 + // skipProbe: false -- set true only after verifying compatibility +}) +``` + +**QuickJS** (`createQuickJSIsolateDriver`) -- WASM-based, no native deps. Works in Node.js, browsers, Deno, Bun, and edge runtimes. Slower (interpreted, no JIT). Limited stdlib (no File I/O). + +```typescript +import { createQuickJSIsolateDriver } from '@tanstack/ai-isolate-quickjs' + +const driver = createQuickJSIsolateDriver({ + memoryLimit: 128, // MB, default 128 + timeout: 30_000, // ms, default 30000 + maxStackSize: 524288, // bytes, default 512 KiB +}) +``` + +**Cloudflare** (`createCloudflareIsolateDriver`) -- Edge execution via a deployed Cloudflare Worker. Requires a `workerUrl` pointing to your deployed worker. Network latency on each tool call. + +```typescript +import { createCloudflareIsolateDriver } from '@tanstack/ai-isolate-cloudflare' + +const driver = createCloudflareIsolateDriver({ + workerUrl: 'https://my-code-mode-worker.my-account.workers.dev', + authorization: process.env.CODE_MODE_WORKER_SECRET, + timeout: 30_000, // ms, default 30000 + maxToolRounds: 10, // max tool-call/result cycles, default 10 +}) +``` + +| Driver | Best for | Native deps | Browser support | Performance | +| ---------- | --------------------------- | --------------- | --------------- | -------------------- | +| Node | Server-side Node.js | Yes (C++ addon) | No | Fast (V8 JIT) | +| QuickJS | Browsers, edge, portability | None (WASM) | Yes | Slower (interpreted) | +| Cloudflare | Edge deployments | None | N/A | Fast (V8 on edge) | + +### 2. Adding Persistent Skills with codeModeWithSkills() + +Skills let the LLM save reusable code snippets. On future requests, relevant skills are loaded and exposed as callable tools. + +```typescript +import { chat, maxIterations } from '@tanstack/ai' +import { createNodeIsolateDriver } from '@tanstack/ai-isolate-node' +import { codeModeWithSkills } from '@tanstack/ai-code-mode-skills' +import { createFileSkillStorage } from '@tanstack/ai-code-mode-skills/storage' +import { + createDefaultTrustStrategy, + createAlwaysTrustedStrategy, + createCustomTrustStrategy, +} from '@tanstack/ai-code-mode-skills' +import { openaiText } from '@tanstack/ai-openai' + +// Trust strategies control how skills earn trust through executions +// Default: untrusted -> provisional (10+ runs, >=90%) -> trusted (100+ runs, >=95%) +// Relaxed: untrusted -> provisional (3+ runs, >=80%) -> trusted (10+ runs, >=90%) +// Always trusted: immediately trusted (dev/testing) +// Custom: configurable thresholds +const trustStrategy = createDefaultTrustStrategy() + +// Storage options: file system (production) or memory (testing) +const storage = createFileSkillStorage({ + directory: './.skills', + trustStrategy, +}) + +const driver = createNodeIsolateDriver() + +// High-level API: automatic LLM-based skill selection +const { toolsRegistry, systemPrompt, selectedSkills } = + await codeModeWithSkills({ + config: { + driver, + tools: [myTool1, myTool2], + timeout: 60_000, + memoryLimit: 128, + }, + adapter: openaiText('gpt-4o-mini'), // cheap model for skill selection + skills: { + storage, + maxSkillsInContext: 5, + }, + messages, + }) + +const stream = chat({ + adapter: openaiText('gpt-4o'), + tools: toolsRegistry.getTools(), + messages, + systemPrompts: ['You are a helpful assistant.', systemPrompt], + agentLoopStrategy: maxIterations(15), +}) +``` + +The registry includes: `execute_typescript`, `search_skills`, `get_skill`, `register_skill`, and one tool per selected skill. + +Custom trust strategy example: + +```typescript +const strategy = createCustomTrustStrategy({ + initialLevel: 'untrusted', + provisionalThreshold: { executions: 5, successRate: 0.85 }, + trustedThreshold: { executions: 50, successRate: 0.95 }, +}) +``` + +Storage implementations: + +```typescript +// File storage (production) -- persists skills as files on disk +import { createFileSkillStorage } from '@tanstack/ai-code-mode-skills/storage' +const fileStorage = createFileSkillStorage({ directory: './.skills' }) + +// Memory storage (testing) -- in-memory, lost on restart +import { createMemorySkillStorage } from '@tanstack/ai-code-mode-skills/storage' +const memStorage = createMemorySkillStorage() +``` + +### 3. Client-Side Execution Progress Display + +Code Mode emits custom events during sandbox execution. Handle them in `useChat` via `onCustomEvent`. + +Events emitted: + +| Event | When | Key fields | +| ----------------------------- | ------------------------------------ | -------------------------------- | +| `code_mode:execution_started` | Sandbox begins | `timestamp`, `codeLength` | +| `code_mode:console` | Each console.log/error/warn/info | `level`, `message`, `timestamp` | +| `code_mode:external_call` | Before an external\_\* function runs | `function`, `args`, `timestamp` | +| `code_mode:external_result` | After successful external\_\* call | `function`, `result`, `duration` | +| `code_mode:external_error` | When external\_\* call fails | `function`, `error`, `duration` | + +```typescript +import { useCallback, useRef, useState } from 'react' +import { useChat, fetchServerSentEvents } from '@tanstack/ai-react' + +interface VMEvent { + id: string + eventType: string + data: unknown + timestamp: number +} + +export function CodeModeChat() { + const [toolCallEvents, setToolCallEvents] = useState< + Map> + >(new Map()) + const eventIdCounter = useRef(0) + + const handleCustomEvent = useCallback( + ( + eventType: string, + data: unknown, + context: { toolCallId?: string }, + ) => { + const { toolCallId } = context + if (!toolCallId) return + + const event: VMEvent = { + id: `event-${eventIdCounter.current++}`, + eventType, + data, + timestamp: Date.now(), + } + + setToolCallEvents((prev) => { + const next = new Map(prev) + const events = next.get(toolCallId) || [] + next.set(toolCallId, [...events, event]) + return next + }) + }, + [], + ) + + const { messages, sendMessage, isLoading } = useChat({ + connection: fetchServerSentEvents('/api/chat'), + onCustomEvent: handleCustomEvent, + }) + + return ( +
+ {messages.map((message) => ( +
+ {message.parts.map((part) => { + if (part.type === 'text') { + return

{part.content}

+ } + if ( + part.type === 'tool-call' && + part.name === 'execute_typescript' + ) { + const events = toolCallEvents.get(part.id) || [] + return ( +
+
{JSON.parse(part.arguments)?.typescriptCode}
+ {events.map((evt) => ( +
+ {evt.eventType}: {JSON.stringify(evt.data)} +
+ ))} + {part.output && ( +
{JSON.stringify(part.output, null, 2)}
+ )} +
+ ) + } + return null + })} +
+ ))} +
+ ) +} +``` + +The `onCustomEvent` callback signature is identical across all framework integrations (`@tanstack/ai-react`, `@tanstack/ai-solid`, `@tanstack/ai-vue`, `@tanstack/ai-svelte`): + +```typescript +(eventType: string, data: unknown, context: { toolCallId?: string }) => void +``` + +Skill-specific events (when using `codeModeWithSkills`): + +| Event | When | Key fields | +| ------------------------ | ------------------ | ----------------------------- | +| `code_mode:skill_call` | Skill tool invoked | `skill`, `input`, `timestamp` | +| `code_mode:skill_result` | Skill completed | `skill`, `result`, `duration` | +| `code_mode:skill_error` | Skill failed | `skill`, `error`, `duration` | +| `skill:registered` | New skill saved | `id`, `name`, `description` | + +## Common Mistakes + +### CRITICAL: Passing API keys or secrets to the sandbox environment + +Code Mode executes LLM-generated code. Any secrets available in the sandbox context are accessible to generated code, which could exfiltrate them via tool calls. Never pass API keys, database credentials, or tokens into the sandbox. Keep secrets in your tool server implementations, which run in the host process outside the sandbox. + +Wrong: + +```typescript +const codeModeTool = createCodeModeTool({ + driver, + tools: [ + toolDefinition({ + name: 'callApi', + inputSchema: z.object({ url: z.string(), apiKey: z.string() }), + outputSchema: z.any(), + }).server(async ({ url, apiKey }) => + fetch(url, { + headers: { Authorization: apiKey }, + }), + ), + ], +}) +``` + +Right: + +```typescript +const codeModeTool = createCodeModeTool({ + driver, + tools: [ + toolDefinition({ + name: 'callApi', + inputSchema: z.object({ url: z.string() }), + outputSchema: z.any(), + }).server(async ({ url }) => + fetch(url, { + headers: { Authorization: process.env.API_KEY }, // secret stays in host + }), + ), + ], +}) +``` + +Source: docs/code-mode/code-mode.md + +### HIGH: Not setting timeout for code execution + +LLM-generated code may contain infinite loops. The default timeout is 30s, but developers may override to 0 (no timeout). Always set an explicit, finite timeout. + +Wrong: + +```typescript +const driver = createNodeIsolateDriver({ timeout: 0 }) +``` + +Right: + +```typescript +const driver = createNodeIsolateDriver({ timeout: 30_000 }) +``` + +Source: ai-code-mode source (default timeout in CodeModeToolConfig) + +### HIGH: Using Node isolated-vm driver without checking platform compatibility + +`isolated-vm` requires native module compilation. An incompatible build (wrong Node.js version, missing build tools) causes segfaults that no JS error handling can catch. The driver runs a subprocess probe by default. Never set `skipProbe: true` unless you have independently verified compatibility. Use `probeIsolatedVm()` to check before creating the driver. + +```typescript +import { + createNodeIsolateDriver, + probeIsolatedVm, +} from '@tanstack/ai-isolate-node' + +const probe = probeIsolatedVm() +if (!probe.compatible) { + console.error('isolated-vm not compatible:', probe.error) + // Fall back to QuickJS +} + +// Never do this unless you verified compatibility yourself: +// const driver = createNodeIsolateDriver({ skipProbe: true }) +``` + +Source: ai-isolate-node source (probeIsolatedVm implementation) + +### MEDIUM: Expecting identical behavior across isolate drivers + +The three drivers have different capabilities. Same code may work in Node but fail elsewhere. + +- **Node**: Full V8 support, JIT compilation, configurable memory limit +- **QuickJS**: Interpreted, limited stdlib (no File I/O), configurable stack size, asyncified execution (serialized through global queue) +- **Cloudflare**: Network latency per tool call round-trip, `maxToolRounds` limit (default 10), requires deployed worker with `UNSAFE_EVAL` or `eval` unsafe binding + +Test generated code against your target driver. If you need portability, target QuickJS's subset. + +Source: docs/code-mode/code-mode-isolates.md + +## Cross-References + +- See also: ai-core/tool-calling/SKILL.md -- Code Mode is an alternative to standard tool calling for complex multi-step operations +- See also: ai-core/chat-experience/SKILL.md -- Code Mode requires handling custom events in useChat diff --git a/packages/typescript/ai/package.json b/packages/typescript/ai/package.json index 9705b9ef6..9617f4bdc 100644 --- a/packages/typescript/ai/package.json +++ b/packages/typescript/ai/package.json @@ -32,7 +32,8 @@ }, "files": [ "dist", - "src" + "src", + "skills" ], "scripts": { "build": "vite build", @@ -52,7 +53,8 @@ "sdk", "llm", "chat", - "embeddings" + "embeddings", + "tanstack-intent" ], "dependencies": { "@tanstack/ai-event-client": "workspace:*", diff --git a/packages/typescript/ai/skills/ai-core/SKILL.md b/packages/typescript/ai/skills/ai-core/SKILL.md new file mode 100644 index 000000000..f641274cd --- /dev/null +++ b/packages/typescript/ai/skills/ai-core/SKILL.md @@ -0,0 +1,59 @@ +--- +name: ai-core +description: > + Entry point for TanStack AI skills. Routes to chat-experience, tool-calling, + media-generation, structured-outputs, adapter-configuration, ag-ui-protocol, + middleware, and custom-backend-integration. Use chat() not streamText(), + openaiText() not createOpenAI(), toServerSentEventsResponse() not manual SSE, + middleware hooks not onEnd callbacks. +type: core +library: tanstack-ai +library_version: '0.10.0' +--- + +# TanStack AI — Core Concepts + +TanStack AI is a type-safe, provider-agnostic AI SDK. Server-side functions +live in `@tanstack/ai` and provider adapter packages. Client-side hooks live +in framework packages (`@tanstack/ai-react`, `@tanstack/ai-solid`, etc.). +Always import from the framework package on the client — never from +`@tanstack/ai-client` directly (unless vanilla JS). + +## Sub-Skills + +| Need to... | Read | +| ------------------------------------------------- | ------------------------------------------- | +| Build a chat UI with streaming | ai-core/chat-experience/SKILL.md | +| Add tool calling (server, client, or both) | ai-core/tool-calling/SKILL.md | +| Generate images, video, speech, or transcriptions | ai-core/media-generation/SKILL.md | +| Get typed JSON responses from the LLM | ai-core/structured-outputs/SKILL.md | +| Choose and configure a provider adapter | ai-core/adapter-configuration/SKILL.md | +| Implement AG-UI streaming protocol server-side | ai-core/ag-ui-protocol/SKILL.md | +| Add analytics, logging, or lifecycle hooks | ai-core/middleware/SKILL.md | +| Connect to a non-TanStack-AI backend | ai-core/custom-backend-integration/SKILL.md | +| Set up Code Mode (LLM code execution) | See `@tanstack/ai-code-mode` package skills | + +## Quick Decision Tree + +- Setting up a chatbot? → ai-core/chat-experience +- Adding function calling? → ai-core/tool-calling +- Generating media (images, audio, video)? → ai-core/media-generation +- Need structured JSON output? → ai-core/structured-outputs +- Choosing/configuring a provider? → ai-core/adapter-configuration +- Building a server-only AG-UI backend? → ai-core/ag-ui-protocol +- Adding analytics or post-stream events? → ai-core/middleware +- Connecting to a custom backend? → ai-core/custom-backend-integration +- Debugging mistakes? → Check Common Mistakes in the relevant sub-skill + +## Critical Rules + +1. **This is NOT the Vercel AI SDK.** Use `chat()` not `streamText()`. Use `openaiText()` not `createOpenAI()`. Import from `@tanstack/ai`, not `ai`. +2. **Import from framework package on client.** Use `@tanstack/ai-react` (or solid/vue/svelte/preact), not `@tanstack/ai-client`. +3. **Use `toServerSentEventsResponse()`** to convert streams to HTTP responses. Never implement SSE manually. +4. **Use middleware for lifecycle events.** No `onEnd`/`onFinish` callbacks on `chat()` — use `middleware: [{ onFinish: ... }]`. +5. **Ask the user which adapter and model** they want. Suggest the latest model. Also ask if they want Code Mode. +6. **Tools must be passed to both server and client.** Server gets the tool in `chat({ tools })`, client gets the definition in `useChat({ clientTools })`. + +## Version + +Targets TanStack AI v0.10.0. diff --git a/packages/typescript/ai/skills/ai-core/adapter-configuration/SKILL.md b/packages/typescript/ai/skills/ai-core/adapter-configuration/SKILL.md new file mode 100644 index 000000000..04d7e8742 --- /dev/null +++ b/packages/typescript/ai/skills/ai-core/adapter-configuration/SKILL.md @@ -0,0 +1,283 @@ +--- +name: ai-core/adapter-configuration +description: > + Provider adapter selection and configuration: openaiText, anthropicText, + geminiText, ollamaText, grokText, groqText, openRouterText. Per-model + type safety with modelOptions, reasoning/thinking configuration, + runtime adapter switching, extendAdapter() for custom models, createModel(). + API key env vars: OPENAI_API_KEY, ANTHROPIC_API_KEY, GOOGLE_API_KEY/GEMINI_API_KEY, + XAI_API_KEY, GROQ_API_KEY, OPENROUTER_API_KEY, OLLAMA_HOST. +type: sub-skill +library: tanstack-ai +library_version: '0.10.0' +sources: + - 'TanStack/ai:docs/adapters/openai.md' + - 'TanStack/ai:docs/adapters/anthropic.md' + - 'TanStack/ai:docs/adapters/gemini.md' + - 'TanStack/ai:docs/adapters/ollama.md' + - 'TanStack/ai:docs/advanced/per-model-type-safety.md' + - 'TanStack/ai:docs/advanced/runtime-adapter-switching.md' + - 'TanStack/ai:docs/advanced/extend-adapter.md' +--- + +# Adapter Configuration + +> **Dependency:** This skill builds on ai-core. Read it first for critical rules. + +> **Before implementing:** Ask the user which provider and model they want. +> Then fetch the latest available models from the provider's source code +> (check the adapter's model metadata file, e.g. `packages/typescript/ai-openai/src/model-meta.ts`) +> or from the provider's API/docs to recommend the most current model. +> The model lists in this skill and its reference files may be outdated. +> Always verify against the source before recommending a specific model. + +## Setup + +Create an adapter and use it with `chat()`: + +```typescript +import { chat, toServerSentEventsResponse } from '@tanstack/ai' +import { openaiText } from '@tanstack/ai-openai' + +const stream = chat({ + adapter: openaiText('gpt-5.2'), + messages, + temperature: 0.7, + maxTokens: 1000, +}) + +return toServerSentEventsResponse(stream) +``` + +The adapter factory function takes the model name as a string literal and an +optional config object (API key, base URL, etc.). The model name is passed +into the factory, not into `chat()`. + +## Core Patterns + +### 1. Adapter Selection + +Each provider has a dedicated package with tree-shakeable adapter factories. +The text adapter is the primary one for chat/completions: + +| Provider | Package | Factory | Env Var | +| ---------- | ------------------------- | ---------------- | ------------------------------------------------- | +| OpenAI | `@tanstack/ai-openai` | `openaiText` | `OPENAI_API_KEY` | +| Anthropic | `@tanstack/ai-anthropic` | `anthropicText` | `ANTHROPIC_API_KEY` | +| Gemini | `@tanstack/ai-gemini` | `geminiText` | `GOOGLE_API_KEY` or `GEMINI_API_KEY` | +| Grok (xAI) | `@tanstack/ai-grok` | `grokText` | `XAI_API_KEY` | +| Groq | `@tanstack/ai-groq` | `groqText` | `GROQ_API_KEY` | +| OpenRouter | `@tanstack/ai-openrouter` | `openRouterText` | `OPENROUTER_API_KEY` | +| Ollama | `@tanstack/ai-ollama` | `ollamaText` | `OLLAMA_HOST` (default: `http://localhost:11434`) | + +```typescript +// Each factory takes model as first arg, optional config as second +import { openaiText } from '@tanstack/ai-openai' +import { anthropicText } from '@tanstack/ai-anthropic' +import { geminiText } from '@tanstack/ai-gemini' +import { grokText } from '@tanstack/ai-grok' +import { groqText } from '@tanstack/ai-groq' +import { openRouterText } from '@tanstack/ai-openrouter' +import { ollamaText } from '@tanstack/ai-ollama' + +// Model string is passed to the factory, NOT to chat() +const adapter = openaiText('gpt-5.2') +const adapter2 = anthropicText('claude-sonnet-4-6') +const adapter3 = geminiText('gemini-2.5-pro') +const adapter4 = grokText('grok-4') +const adapter5 = groqText('llama-3.3-70b-versatile') +const adapter6 = openRouterText('anthropic/claude-sonnet-4') +const adapter7 = ollamaText('llama3.3') + +// Optional: pass explicit API key +const adapterWithKey = openaiText('gpt-5.2', { + apiKey: 'sk-...', +}) +``` + +### 2. Runtime Adapter Switching + +Use an adapter factory map to switch providers dynamically based on user +input or configuration: + +```typescript +import { chat, toServerSentEventsResponse } from '@tanstack/ai' +import type { TextAdapter } from '@tanstack/ai/adapters' +import { openaiText } from '@tanstack/ai-openai' +import { anthropicText } from '@tanstack/ai-anthropic' +import { geminiText } from '@tanstack/ai-gemini' + +// Define a map of provider+model to adapter factory calls +const adapters: Record TextAdapter> = { + 'openai/gpt-5.2': () => openaiText('gpt-5.2'), + 'anthropic/claude-sonnet-4-6': () => anthropicText('claude-sonnet-4-6'), + 'gemini/gemini-2.5-pro': () => geminiText('gemini-2.5-pro'), +} + +export function handleChat(providerModel: string, messages: Array) { + const createAdapter = adapters[providerModel] + if (!createAdapter) { + throw new Error(`Unknown provider/model: ${providerModel}`) + } + + const stream = chat({ + adapter: createAdapter(), + messages, + }) + + return toServerSentEventsResponse(stream) +} +``` + +### 3. Configuring Reasoning / Thinking + +Different providers expose reasoning/thinking through their `modelOptions`: + +```typescript +import { chat } from '@tanstack/ai' +import { openaiText } from '@tanstack/ai-openai' +import { anthropicText } from '@tanstack/ai-anthropic' +import { geminiText } from '@tanstack/ai-gemini' + +// OpenAI: reasoning with effort and summary +const openaiStream = chat({ + adapter: openaiText('gpt-5.2'), + messages, + modelOptions: { + reasoning: { + effort: 'high', + summary: 'auto', + }, + }, +}) + +// Anthropic: extended thinking with budget_tokens +const anthropicStream = chat({ + adapter: anthropicText('claude-sonnet-4-6'), + messages, + maxTokens: 16000, + modelOptions: { + thinking: { + type: 'enabled', + budget_tokens: 8000, // must be >= 1024 and < maxTokens + }, + }, +}) + +// Anthropic: adaptive thinking (claude-sonnet-4-6 and newer) +const adaptiveStream = chat({ + adapter: anthropicText('claude-sonnet-4-6'), + messages, + maxTokens: 16000, + modelOptions: { + thinking: { + type: 'adaptive', + }, + effort: 'high', // 'max' | 'high' | 'medium' | 'low' + }, +}) + +// Gemini: thinking config with budget or level +const geminiStream = chat({ + adapter: geminiText('gemini-2.5-pro'), + messages, + modelOptions: { + thinkingConfig: { + includeThoughts: true, + thinkingBudget: 4096, + }, + }, +}) +``` + +### 4. Extending Adapters with Custom Models + +Use `extendAdapter()` and `createModel()` to add custom or fine-tuned models +while preserving type safety for the original models: + +```typescript +import { extendAdapter, createModel } from '@tanstack/ai' +import { openaiText } from '@tanstack/ai-openai' + +// Define custom models +const customModels = [ + createModel('ft:gpt-5.2:my-org:custom-model:abc123', ['text', 'image']), + createModel('my-local-proxy-model', ['text']), +] as const + +// Create extended factory - original models still fully typed +const myOpenai = extendAdapter(openaiText, customModels) + +// Use original models - full type inference preserved +const gpt5 = myOpenai('gpt-5.2') + +// Use custom models - accepted by the type system +const custom = myOpenai('ft:gpt-5.2:my-org:custom-model:abc123') + +// Type error: 'nonexistent-model' is not a valid model +// myOpenai('nonexistent-model') +``` + +At runtime, `extendAdapter` simply passes through to the original factory. +The `_customModels` parameter is only used for type inference. + +## Common Mistakes + +### a. HIGH: Confusing legacy monolithic with tree-shakeable adapter + +The legacy `openai()` (and `anthropic()`, etc.) monolithic adapters are +deprecated. They take the model in `chat()`, not in the factory. + +```typescript +// WRONG: Legacy monolithic adapter pattern +import { openai } from '@tanstack/ai-openai' +chat({ adapter: openai(), model: 'gpt-5.2', messages }) + +// CORRECT: Tree-shakeable adapter, model in factory +import { openaiText } from '@tanstack/ai-openai' +chat({ adapter: openaiText('gpt-5.2'), messages }) +``` + +Source: docs/migration/migration.md + +### b. MEDIUM: Wrong API key environment variable name + +Each provider uses a specific env var name. Using the wrong one causes a +runtime error: + +| Provider | Correct Env Var | Common Mistake | +| ---------- | ------------------------------------ | ------------------------------------------------------------------------ | +| OpenAI | `OPENAI_API_KEY` | | +| Anthropic | `ANTHROPIC_API_KEY` | | +| Gemini | `GOOGLE_API_KEY` or `GEMINI_API_KEY` | `GOOGLE_GENAI_API_KEY` (does not work) | +| Grok (xAI) | `XAI_API_KEY` | `GROK_API_KEY` (does not work) | +| Groq | `GROQ_API_KEY` | | +| OpenRouter | `OPENROUTER_API_KEY` | | +| Ollama | `OLLAMA_HOST` | No API key needed, just the host URL (default: `http://localhost:11434`) | + +Source: adapter source code (`utils/client.ts` in each adapter package). + +## References + +Detailed per-adapter reference files: + +- [OpenAI Adapter](references/openai-adapter.md) +- [Anthropic Adapter](references/anthropic-adapter.md) +- [Gemini Adapter](references/gemini-adapter.md) +- [Ollama Adapter](references/ollama-adapter.md) +- [Grok Adapter](references/grok-adapter.md) +- [Groq Adapter](references/groq-adapter.md) +- [OpenRouter Adapter](references/openrouter-adapter.md) + +## Tension + +**HIGH Tension: Type safety vs. quick prototyping** -- Per-model type safety +requires specific model string literals. Quick prototyping wants dynamic +selection with `string` variables. Agents optimizing for quick setup silently +lose type safety. If model names come from user input or config files, use +`extendAdapter()` to add custom names. + +## Cross-References + +- See also: `ai-core/chat-experience/SKILL.md` -- Adapter choice affects chat setup +- See also: `ai-core/structured-outputs/SKILL.md` -- `outputSchema` handles provider differences transparently diff --git a/packages/typescript/ai/skills/ai-core/adapter-configuration/references/anthropic-adapter.md b/packages/typescript/ai/skills/ai-core/adapter-configuration/references/anthropic-adapter.md new file mode 100644 index 000000000..be4903432 --- /dev/null +++ b/packages/typescript/ai/skills/ai-core/adapter-configuration/references/anthropic-adapter.md @@ -0,0 +1,97 @@ +# Anthropic Adapter Reference + +## Package + +``` +@tanstack/ai-anthropic +``` + +## Adapter Factories + +| Factory | Type | Description | +| -------------------- | --------- | ------------------ | +| `anthropicText` | Text/Chat | Chat completions | +| `anthropicSummarize` | Summarize | Text summarization | + +## Import + +```typescript +import { anthropicText } from '@tanstack/ai-anthropic' +``` + +## Key Chat Models + +| Model | Context Window | Max Output | Notes | +| ------------------- | -------------- | ---------- | ------------------------------- | +| `claude-opus-4-6` | 200K | 128K | Most capable, adaptive thinking | +| `claude-sonnet-4-6` | 1M | 64K | Best balance, adaptive thinking | +| `claude-sonnet-4-5` | 200K | 64K | Previous gen balanced | +| `claude-opus-4-5` | 200K | 32K | Previous gen most capable | +| `claude-haiku-4-5` | 200K | 64K | Fast and affordable | +| `claude-sonnet-4` | 200K | 64K | Older balanced model | +| `claude-opus-4` | 200K | 32K | Older most capable | + +Note: Model IDs use the format `claude-opus-4-6`, `claude-sonnet-4-6`, etc. + +## Provider-Specific modelOptions + +```typescript +chat({ + adapter: anthropicText('claude-sonnet-4-6'), + messages, + maxTokens: 16000, + modelOptions: { + // Extended thinking (budget-based) + thinking: { + type: 'enabled', + budget_tokens: 8000, // must be >= 1024 and < maxTokens + }, + // Adaptive thinking (claude-sonnet-4-6, claude-opus-4-6+) + thinking: { + type: 'adaptive', + }, + effort: 'high', // 'max' | 'high' | 'medium' | 'low' + // Service tier + service_tier: 'auto', // 'auto' | 'standard_only' + // Stop sequences + stop_sequences: ['END'], + // Tool choice + tool_choice: { type: 'auto' }, + // Context management + context_management: { + /* BetaContextManagementConfig */ + }, + // MCP servers (max 20) + mcp_servers: [ + { + name: 'my-server', + url: 'https://mcp.example.com', + type: 'url', + tool_configuration: { enabled: true }, + }, + ], + // Container (skills) + container: { + id: 'container-id', + skills: [{ skill_id: 'analysis', type: 'anthropic' }], + }, + // Sampling + top_k: 40, + }, +}) +``` + +## Environment Variable + +``` +ANTHROPIC_API_KEY +``` + +## Gotchas + +- `thinking.budget_tokens` must be >= 1024 AND less than `maxTokens`. + Failing either check throws a validation error. +- Cannot set both `top_p` and `temperature` at the same time (throws error). +- `claude-3-5-haiku` and `claude-3-haiku` do NOT support extended thinking. +- System prompts support prompt caching via `cache_control` on `TextBlockParam[]`. +- All Claude models accept `text`, `image`, and `document` (PDF) input. diff --git a/packages/typescript/ai/skills/ai-core/adapter-configuration/references/gemini-adapter.md b/packages/typescript/ai/skills/ai-core/adapter-configuration/references/gemini-adapter.md new file mode 100644 index 000000000..55e5d012c --- /dev/null +++ b/packages/typescript/ai/skills/ai-core/adapter-configuration/references/gemini-adapter.md @@ -0,0 +1,102 @@ +# Gemini Adapter Reference + +## Package + +``` +@tanstack/ai-gemini +``` + +## Adapter Factories + +| Factory | Type | Description | +| ----------------- | --------- | ----------------------------- | +| `geminiText` | Text/Chat | Chat completions | +| `geminiImage` | Image | Image generation (Imagen) | +| `geminiSpeech` | TTS | Text-to-speech (experimental) | +| `geminiSummarize` | Summarize | Text summarization | + +## Import + +```typescript +import { geminiText } from '@tanstack/ai-gemini' +import { geminiImage } from '@tanstack/ai-gemini' +``` + +## Key Chat Models + +| Model | Max Input | Max Output | Notes | +| ------------------------------- | --------- | ---------- | ---------------------------- | +| `gemini-3.1-pro-preview` | 1M | 65K | Latest flagship, thinking | +| `gemini-3-pro-preview` | 1M | 65K | Previous flagship | +| `gemini-3-flash-preview` | 1M | 65K | Fast, thinking, multimodal | +| `gemini-3.1-flash-lite-preview` | 1M | 65K | Budget, still capable | +| `gemini-2.5-pro` | 1M | 65K | Stable release, all features | +| `gemini-2.5-flash` | 1M | 65K | Fast stable release | + +All Gemini text models accept `text`, `image`, `audio`, `video`, and `document` input. + +## Provider-Specific modelOptions + +```typescript +chat({ + adapter: geminiText('gemini-2.5-pro'), + messages, + modelOptions: { + // Thinking (budget-based) + thinkingConfig: { + includeThoughts: true, + thinkingBudget: 4096, + }, + // Thinking (level-based, advanced models) + thinkingConfig: { + thinkingLevel: 'THINKING_LEVEL_HIGH', + }, + // Safety settings + safetySettings: [ + { + category: 'HARM_CATEGORY_HATE_SPEECH', + threshold: 'BLOCK_MEDIUM_AND_ABOVE', + }, + ], + // Tool config + toolConfig: { + /* ToolConfig */ + }, + // Structured output + responseMimeType: 'application/json', + responseSchema: { + /* Schema */ + }, + // Cached content + cachedContent: 'cachedContents/abc123', + // Response modalities + responseModalities: ['TEXT'], + // Sampling + topK: 40, + seed: 42, + presencePenalty: 0.5, + frequencyPenalty: 0.5, + candidateCount: 1, + stopSequences: ['END'], + }, +}) +``` + +## Environment Variable + +``` +GOOGLE_API_KEY (preferred) +GEMINI_API_KEY (also accepted) +``` + +The adapter checks `GOOGLE_API_KEY` first, then falls back to `GEMINI_API_KEY`. +Note: `GOOGLE_GENAI_API_KEY` does NOT work. + +## Gotchas + +- All Gemini models are multimodal (text, image, audio, video, document input). +- Image generation models (`gemini-3-pro-image-preview`, etc.) have smaller + input limits (65K tokens) compared to text models (1M tokens). +- `thinkingConfig.thinkingLevel` (level-based) and `thinkingConfig.thinkingBudget` + (budget-based) serve different models. Check which your model supports. +- `cachedContent` must follow the format `cachedContents/{id}`. diff --git a/packages/typescript/ai/skills/ai-core/adapter-configuration/references/grok-adapter.md b/packages/typescript/ai/skills/ai-core/adapter-configuration/references/grok-adapter.md new file mode 100644 index 000000000..971bcba41 --- /dev/null +++ b/packages/typescript/ai/skills/ai-core/adapter-configuration/references/grok-adapter.md @@ -0,0 +1,77 @@ +# Grok (xAI) Adapter Reference + +## Package + +``` +@tanstack/ai-grok +``` + +## Adapter Factories + +| Factory | Type | Description | +| --------------- | --------- | ------------------ | +| `grokText` | Text/Chat | Chat completions | +| `grokImage` | Image | Image generation | +| `grokSummarize` | Summarize | Text summarization | + +## Import + +```typescript +import { grokText } from '@tanstack/ai-grok' +import { grokImage } from '@tanstack/ai-grok' +``` + +## Key Chat Models + +| Model | Context Window | Notes | +| ----------------------------- | -------------- | ---------------------------- | +| `grok-4-1-fast-reasoning` | 2M | Latest, fast reasoning | +| `grok-4-1-fast-non-reasoning` | 2M | Latest, no reasoning | +| `grok-code-fast-1` | 256K | Code-specialized, reasoning | +| `grok-4` | 256K | Full reasoning, tool calling | +| `grok-4-fast-reasoning` | 2M | Fast reasoning variant | +| `grok-3` | 131K | Previous gen, no reasoning | +| `grok-3-mini` | 131K | Budget reasoning | +| `grok-2-vision-1212` | 32K | Vision input | + +Image model: `grok-2-image-1212` + +## Provider-Specific modelOptions + +Grok uses an OpenAI-compatible API. Options are straightforward: + +```typescript +chat({ + adapter: grokText('grok-4'), + messages, + modelOptions: { + temperature: 0.7, + max_tokens: 4096, + top_p: 0.9, + frequency_penalty: 0.5, + presence_penalty: 0.5, + stop: ['\n\n'], + user: 'user-123', + }, +}) +``` + +## Environment Variable + +``` +XAI_API_KEY +``` + +**Important:** The env var is `XAI_API_KEY`, not `GROK_API_KEY`. +The adapter uses the OpenAI SDK with xAI's base URL (`https://api.x.ai/v1`). + +## Gotchas + +- Uses the OpenAI SDK under the hood with a custom `baseURL`. +- `grok-4-1-fast-non-reasoning` and `grok-4-fast-non-reasoning` explicitly + do NOT support reasoning. Other grok-4+ models do. +- `grok-2-vision-1212` is the only model with image input support in the + older generation. +- The grok-4-1 fast models have a massive 2M context window. +- Provider options are simpler than OpenAI's (no Responses API features, + no structured outputs config, no metadata). diff --git a/packages/typescript/ai/skills/ai-core/adapter-configuration/references/groq-adapter.md b/packages/typescript/ai/skills/ai-core/adapter-configuration/references/groq-adapter.md new file mode 100644 index 000000000..26dd4d9fa --- /dev/null +++ b/packages/typescript/ai/skills/ai-core/adapter-configuration/references/groq-adapter.md @@ -0,0 +1,106 @@ +# Groq Adapter Reference + +## Package + +``` +@tanstack/ai-groq +``` + +## Adapter Factories + +| Factory | Type | Description | +| ---------- | --------- | ---------------- | +| `groqText` | Text/Chat | Chat completions | + +Groq currently only has a text adapter (no image, TTS, etc.). + +## Import + +```typescript +import { groqText } from '@tanstack/ai-groq' +``` + +## Key Chat Models + +| Model | Context Window | Notes | +| ----------------------------------------------- | -------------- | ------------------------- | +| `llama-3.3-70b-versatile` | 131K | General purpose | +| `meta-llama/llama-4-maverick-17b-128e-instruct` | 131K | Vision, JSON schema | +| `meta-llama/llama-4-scout-17b-16e-instruct` | 131K | Vision, tool calling | +| `openai/gpt-oss-120b` | 131K | Reasoning, browser search | +| `openai/gpt-oss-20b` | 131K | Budget reasoning | +| `qwen/qwen3-32b` | 131K | Reasoning, tool calling | +| `moonshotai/kimi-k2-instruct-0905` | 262K | Large context | +| `llama-3.1-8b-instant` | 131K | Ultra-fast, budget | + +Guard models: `meta-llama/llama-guard-4-12b`, `meta-llama/llama-prompt-guard-2-86m` + +## Provider-Specific modelOptions + +```typescript +chat({ + adapter: groqText('llama-3.3-70b-versatile'), + messages, + modelOptions: { + // Reasoning + reasoning_effort: 'medium', // 'none' | 'default' | 'low' | 'medium' | 'high' + reasoning_format: 'parsed', // 'hidden' | 'raw' | 'parsed' (mutually exclusive with include_reasoning) + include_reasoning: true, // mutually exclusive with reasoning_format + // Response format + response_format: { + type: 'json_schema', + json_schema: { + /* ... */ + }, + }, + // Sampling + temperature: 0.7, + top_p: 0.9, + frequency_penalty: 0.5, + presence_penalty: 0.5, + seed: 42, + stop: ['\n\n'], + // Token limits + max_completion_tokens: 8192, + // Tool calling + tool_choice: 'auto', + parallel_tool_calls: true, + disable_tool_validation: false, + // Citations + citation_options: 'enabled', + // Documents for context + documents: [{ text: '...' }], + // Search settings (for web search tool) + search_settings: { + /* SearchSettings */ + }, + // Service tier + service_tier: 'auto', // 'auto' | 'on_demand' | 'flex' | 'performance' + // Metadata + metadata: { session: 'abc' }, + // Logging + logprobs: true, + top_logprobs: 5, + // User tracking + user: 'user-123', + }, +}) +``` + +## Environment Variable + +``` +GROQ_API_KEY +``` + +## Gotchas + +- `reasoning_effort` and `reasoning_format` behave differently per model: + - qwen3 models: `'none'` disables reasoning, `'default'` or null enables it + - openai/gpt-oss models: `'low'`, `'medium'` (default), or `'high'` +- `include_reasoning` and `reasoning_format` are mutually exclusive. +- Most models have `max_completion_tokens` of 8K-65K, not unlimited. +- Groq specializes in inference speed; model selection is more limited + than other providers. +- Guard models (`llama-guard-4-12b`, `llama-prompt-guard-2-*`) are for + content moderation, not general chat. diff --git a/packages/typescript/ai/skills/ai-core/adapter-configuration/references/ollama-adapter.md b/packages/typescript/ai/skills/ai-core/adapter-configuration/references/ollama-adapter.md new file mode 100644 index 000000000..6ae4462f1 --- /dev/null +++ b/packages/typescript/ai/skills/ai-core/adapter-configuration/references/ollama-adapter.md @@ -0,0 +1,82 @@ +# Ollama Adapter Reference + +## Package + +``` +@tanstack/ai-ollama +``` + +## Adapter Factories + +| Factory | Type | Description | +| ----------------- | --------- | ------------------ | +| `ollamaText` | Text/Chat | Chat completions | +| `ollamaSummarize` | Summarize | Text summarization | + +## Import + +```typescript +import { ollamaText } from '@tanstack/ai-ollama' +``` + +## Key Models (Local) + +Ollama runs models locally. The adapter supports a large catalog of models. +Key families include: + +| Model Family | Example Names | Notes | +| ------------ | -------------------------------- | ----------------------- | +| Llama 4 | `llama4`, `llama4:scout` | Latest Meta models | +| Llama 3.3 | `llama3.3`, `llama3.3:70b` | Strong general purpose | +| Qwen 3 | `qwen3`, `qwen3:32b` | Reasoning capable | +| DeepSeek R1 | `deepseek-r1`, `deepseek-r1:70b` | Reasoning focused | +| Gemma 3 | `gemma3`, `gemma3:27b` | Google's open model | +| Phi 4 | `phi4`, `phi4:14b` | Microsoft's small model | +| Mistral | `mistral`, `mistral-large` | Mistral AI models | + +Models must be pulled first: `ollama pull llama3.3` + +## Provider-Specific modelOptions + +Ollama models use a generic options type. Provider options vary by the +underlying model. The adapter passes options through to the Ollama API. + +```typescript +import { chat } from '@tanstack/ai' +import { ollamaText } from '@tanstack/ai-ollama' + +const stream = chat({ + adapter: ollamaText('llama3.3'), + messages, + temperature: 0.7, + // Ollama-specific options are limited compared to cloud providers +}) +``` + +## Configuration + +```typescript +// With explicit host +const adapter = ollamaText('llama3.3', { + host: 'http://my-server:11434', +}) +``` + +## Environment Variable + +``` +OLLAMA_HOST (default: http://localhost:11434) +``` + +No API key is needed. Ollama runs locally by default. + +## Gotchas + +- **System prompts:** Pass system prompts via the `systemPrompts` option in `chat()`. +- Ollama requires models to be downloaded first (`ollama pull `). + The adapter does not auto-download models. +- The model catalog is very large (60+ model families). Model names follow + Ollama's naming: `family:variant` (e.g., `llama3.3:70b`). +- Vision models (e.g., `llama3.2-vision`, `llava`, `gemma3`) support + image input. Text-only models do not. +- No image generation, TTS, or transcription adapters for Ollama. diff --git a/packages/typescript/ai/skills/ai-core/adapter-configuration/references/openai-adapter.md b/packages/typescript/ai/skills/ai-core/adapter-configuration/references/openai-adapter.md new file mode 100644 index 000000000..87a4d793d --- /dev/null +++ b/packages/typescript/ai/skills/ai-core/adapter-configuration/references/openai-adapter.md @@ -0,0 +1,95 @@ +# OpenAI Adapter Reference + +## Package + +``` +@tanstack/ai-openai +``` + +## Adapter Factories + +| Factory | Type | Description | +| --------------------- | -------------- | ------------------------------------ | +| `openaiText` | Text/Chat | Chat completions (Responses API) | +| `openaiImage` | Image | Image generation (DALL-E, GPT Image) | +| `openaiSpeech` | TTS | Text-to-speech | +| `openaiTranscription` | Transcription | Speech-to-text | +| `openaiVideo` | Video | Video generation (experimental) | +| `openaiSummarize` | Summarize | Text summarization | +| `openaiRealtime` | Realtime/Voice | Realtime voice conversations | + +## Import + +```typescript +import { openaiText } from '@tanstack/ai-openai' +import { openaiImage } from '@tanstack/ai-openai' +import { openaiSpeech } from '@tanstack/ai-openai' +``` + +## Key Chat Models + +| Model | Context Window | Max Output | Notes | +| --------------------- | -------------- | ---------- | -------------------------------------- | +| `gpt-5.4` | 400K | 128K | Flagship, reasoning, image input | +| `gpt-5.4-pro` | 400K | 128K | Higher reasoning, no structured output | +| `gpt-5.4-chat-latest` | 128K | 16K | Chat-optimized variant | +| `gpt-5.1` | 400K | 128K | Previous flagship, image I/O | +| `gpt-5` | 400K | 128K | Previous gen flagship | +| `gpt-5-mini` | 400K | 128K | Cost-efficient | + +## Provider-Specific modelOptions + +```typescript +chat({ + adapter: openaiText('gpt-5.4'), + messages, + modelOptions: { + // Reasoning (effort levels: none, minimal, low, medium, high) + reasoning: { + effort: 'high', + summary: 'auto', // 'auto' | 'detailed' + }, + // Service tier + service_tier: 'auto', // 'auto' | 'default' | 'flex' | 'priority' + // Response storage + store: true, + // Truncation strategy + truncation: 'auto', // 'auto' | 'disabled' + // Tool calling + max_tool_calls: 10, + parallel_tool_calls: true, + tool_choice: 'auto', // 'auto' | 'none' | 'required' + // Structured output + text: { + /* ResponseTextConfig */ + }, + // Metadata (max 16 key-value pairs) + metadata: { session_id: 'abc' }, + // Streaming + stream_options: { include_obfuscation: true }, + // Verbosity + verbosity: 'medium', // 'low' | 'medium' | 'high' + // Prompt caching + prompt_cache_key: 'my-cache', + prompt_cache_retention: '24h', + // Conversations API + conversation: { id: 'conv-123' }, + // Background processing + background: false, + }, +}) +``` + +## Environment Variable + +``` +OPENAI_API_KEY +``` + +## Gotchas + +- Uses the **Responses API** (not Chat Completions) by default. +- `gpt-5.1` defaults reasoning effort to `none`; you must explicitly set + `effort: 'low'` or higher to enable reasoning. +- `o3-pro` only supports `high` reasoning effort. +- `conversation` and `previous_response_id` cannot be used together. diff --git a/packages/typescript/ai/skills/ai-core/adapter-configuration/references/openrouter-adapter.md b/packages/typescript/ai/skills/ai-core/adapter-configuration/references/openrouter-adapter.md new file mode 100644 index 000000000..386fa7596 --- /dev/null +++ b/packages/typescript/ai/skills/ai-core/adapter-configuration/references/openrouter-adapter.md @@ -0,0 +1,99 @@ +# OpenRouter Adapter Reference + +## Package + +``` +@tanstack/ai-openrouter +``` + +## Adapter Factories + +| Factory | Type | Description | +| --------------------- | --------- | ------------------ | +| `openRouterText` | Text/Chat | Chat completions | +| `openRouterImage` | Image | Image generation | +| `openRouterSummarize` | Summarize | Text summarization | + +## Import + +```typescript +import { openRouterText } from '@tanstack/ai-openrouter' +``` + +## Key Models + +OpenRouter routes to hundreds of models across providers. Model IDs use +the format `provider/model-name`: + +| Model ID | Notes | +| ----------------------------- | -------------------------- | +| `anthropic/claude-sonnet-4` | Claude via OpenRouter | +| `openai/gpt-5.2` | GPT-5.2 via OpenRouter | +| `google/gemini-2.5-pro` | Gemini via OpenRouter | +| `meta-llama/llama-4-maverick` | Open-source via OpenRouter | +| `deepseek/deepseek-r1` | Reasoning model | + +## Provider-Specific modelOptions + +OpenRouter has unique routing and provider selection options: + +```typescript +chat({ + adapter: openRouterText('anthropic/claude-sonnet-4'), + messages, + modelOptions: { + // Reasoning + reasoning: { + effort: 'high', // 'none' | 'minimal' | 'low' | 'medium' | 'high' + max_tokens: 4096, + exclude: false, + }, + // Sampling + temperature: 0.7, + topP: 0.9, + topK: 40, + frequencyPenalty: 0.5, + presencePenalty: 0.5, + repetitionPenalty: 1.1, + minP: 0.05, + seed: 42, + // Token limits + maxCompletionTokens: 8192, + // Stop sequences + stop: ['\n\n'], + // Tool calling + toolChoice: 'auto', + parallelToolCalls: true, + // Response format + responseFormat: { type: 'json_object' }, + // Web search + webSearchOptions: { + search_context_size: 'medium', // 'low' | 'medium' | 'high' + }, + // Verbosity + verbosity: 'medium', + // Logprobs + logprobs: true, + topLogprobs: 5, + }, +}) +``` + +## Environment Variable + +``` +OPENROUTER_API_KEY +``` + +## Gotchas + +- Model IDs are `provider/model-name` format (e.g., `openai/gpt-5.2`). +- OpenRouter has unique features not found in direct provider adapters: + - `variant` option: `'free'`, `'nitro'`, `'online'`, `'thinking'`, etc. + - `provider` routing preferences (order, fallbacks, data collection policies) + - `transforms: ['middle-out']` for context compression + - `prediction` for latency reduction + - `plugins: [{ id: 'web' }]` for web search +- Uses `camelCase` for option names (e.g., `topP`, `frequencyPenalty`), + unlike OpenAI's `snake_case`. +- `route: 'fallback'` with `models` array tries models in order. diff --git a/packages/typescript/ai/skills/ai-core/ag-ui-protocol/SKILL.md b/packages/typescript/ai/skills/ai-core/ag-ui-protocol/SKILL.md new file mode 100644 index 000000000..561843174 --- /dev/null +++ b/packages/typescript/ai/skills/ai-core/ag-ui-protocol/SKILL.md @@ -0,0 +1,232 @@ +--- +name: ai-core/ag-ui-protocol +description: > + Server-side AG-UI streaming protocol implementation: StreamChunk event + types (RUN_STARTED, TEXT_MESSAGE_START/CONTENT/END, TOOL_CALL_START/ARGS/END, + RUN_FINISHED, RUN_ERROR, STEP_STARTED/STEP_FINISHED, STATE_SNAPSHOT/DELTA, + CUSTOM), toServerSentEventsStream() for SSE format, toHttpStream() for + NDJSON format. For backends serving AG-UI events without client packages. +type: sub-skill +library: tanstack-ai +library_version: '0.10.0' +sources: + - 'TanStack/ai:docs/protocol/chunk-definitions.md' + - 'TanStack/ai:docs/protocol/sse-protocol.md' + - 'TanStack/ai:docs/protocol/http-stream-protocol.md' +--- + +# AG-UI Protocol + +This skill builds on ai-core. Read it first for critical rules. + +## Setup — Server Endpoint Producing AG-UI Events via SSE + +```typescript +import { chat, toServerSentEventsResponse } from '@tanstack/ai' +import { openaiText } from '@tanstack/ai-openai' + +export async function POST(request: Request) { + const { messages } = await request.json() + const stream = chat({ + adapter: openaiText('gpt-5.2'), + messages, + }) + return toServerSentEventsResponse(stream) +} +``` + +`chat()` returns an `AsyncIterable`. Each `StreamChunk` is a +typed AG-UI event (discriminated union on `type`). The `toServerSentEventsResponse()` +helper encodes that iterable into an SSE-formatted `Response` with correct headers. + +## Core Patterns + +### 1. SSE Format — toServerSentEventsStream / toServerSentEventsResponse + +**Wire format:** Each event is `data: \n\n`. Stream ends with `data: [DONE]\n\n`. + +```typescript +import { + chat, + toServerSentEventsStream, + toServerSentEventsResponse, +} from '@tanstack/ai' +import { openaiText } from '@tanstack/ai-openai' + +// Option A: Get a ReadableStream (manual Response construction) +const abortController = new AbortController() +const stream = chat({ + adapter: openaiText('gpt-5.2'), + messages, + abortController, +}) +const sseStream = toServerSentEventsStream(stream, abortController) + +const response = new Response(sseStream, { + headers: { + 'Content-Type': 'text/event-stream', + 'Cache-Control': 'no-cache', + Connection: 'keep-alive', + }, +}) + +// Option B: Use the helper (sets headers automatically) +const response2 = toServerSentEventsResponse(stream, { abortController }) +// Default headers: Content-Type: text/event-stream, Cache-Control: no-cache, Connection: keep-alive +``` + +**Default response headers set by `toServerSentEventsResponse()`:** + +| Header | Value | +| --------------- | ------------------- | +| `Content-Type` | `text/event-stream` | +| `Cache-Control` | `no-cache` | +| `Connection` | `keep-alive` | + +Custom headers merge on top (user headers override defaults): + +```typescript +toServerSentEventsResponse(stream, { + headers: { + 'X-Accel-Buffering': 'no', // Disable nginx buffering + 'Cache-Control': 'no-store', // Override default + }, + abortController, +}) +``` + +**Error handling:** If the stream throws, a `RUN_ERROR` event is emitted +automatically before the stream closes. If the `abortController` is already +aborted, the error event is suppressed and the stream closes silently. + +### 2. HTTP Stream (NDJSON) — toHttpStream / toHttpResponse + +**Wire format:** Each event is `\n` (newline-delimited JSON, no SSE prefix, no `[DONE]` marker). + +```typescript +import { chat, toHttpStream, toHttpResponse } from '@tanstack/ai' +import { openaiText } from '@tanstack/ai-openai' + +// Option A: Get a ReadableStream +const abortController = new AbortController() +const stream = chat({ + adapter: openaiText('gpt-5.2'), + messages, + abortController, +}) +const ndjsonStream = toHttpStream(stream, abortController) + +const response = new Response(ndjsonStream, { + headers: { + 'Content-Type': 'application/x-ndjson', + }, +}) + +// Option B: Use the helper (does NOT set headers automatically) +const response2 = toHttpResponse(stream, { abortController }) +// Note: toHttpResponse does NOT set Content-Type automatically. +// You should pass headers explicitly: +const response3 = toHttpResponse(stream, { + headers: { 'Content-Type': 'application/x-ndjson' }, + abortController, +}) +``` + +**Client-side pairing:** SSE endpoints are consumed by `fetchServerSentEvents()`. +HTTP stream endpoints are consumed by `fetchHttpStream()`. Both are connection +adapters from `@tanstack/ai-react` (or the framework-specific package). + +### 3. AG-UI Event Types Reference + +All events extend `BaseAGUIEvent` which carries `type`, `timestamp`, optional +`model`, and optional `rawEvent`. + +| Event Type | Description | +| ---------------------- | --------------------------------------------------------------------------------------------------------------------------- | +| `RUN_STARTED` | First event in a stream. Carries `runId` and optional `threadId`. | +| `TEXT_MESSAGE_START` | New text message begins. Carries `messageId` and `role`. | +| `TEXT_MESSAGE_CONTENT` | Incremental text token. Carries `messageId` and `delta` (the new text). | +| `TEXT_MESSAGE_END` | Text message complete. Carries `messageId`. | +| `TOOL_CALL_START` | Tool invocation begins. Carries `toolCallId`, `toolName`, and `index`. | +| `TOOL_CALL_ARGS` | Incremental tool arguments JSON. Carries `toolCallId` and `delta`. | +| `TOOL_CALL_END` | Tool call arguments complete. Carries `toolCallId` and `toolName`. | +| `STEP_STARTED` | Thinking/reasoning step begins. Carries `stepId` and optional `stepType`. | +| `STEP_FINISHED` | Thinking step complete. Carries `stepId`, `delta`, and optional `content`. | +| `MESSAGES_SNAPSHOT` | Full conversation transcript snapshot. Carries `messages: Array`. | +| `STATE_SNAPSHOT` | Full application state snapshot. Carries `state: Record`. | +| `STATE_DELTA` | Incremental state update. Carries `delta: Record`. | +| `CUSTOM` | Extension point. Carries `name` (string) and optional `value` (unknown). | +| `RUN_FINISHED` | Stream complete. Carries `runId` and `finishReason` (`'stop'` / `'length'` / `'content_filter'` / `'tool_calls'` / `null`). | +| `RUN_ERROR` | Error during stream. Carries optional `runId` and `error: { message, code? }`. | + +**Typical event sequence for a text-only response:** + +``` +RUN_STARTED -> TEXT_MESSAGE_START -> TEXT_MESSAGE_CONTENT (repeated) -> TEXT_MESSAGE_END -> RUN_FINISHED +``` + +**Typical event sequence with tool calls:** + +``` +RUN_STARTED -> TEXT_MESSAGE_START -> TEXT_MESSAGE_CONTENT* -> TEXT_MESSAGE_END + -> TOOL_CALL_START -> TOOL_CALL_ARGS* -> TOOL_CALL_END + -> RUN_FINISHED (finishReason: 'tool_calls') +``` + +**Type aliases:** `StreamChunk` is an alias for `AGUIEvent` (the discriminated +union of all event interfaces). `StreamChunkType` is an alias for `AGUIEventType` +(the string union of all event type literals). + +## Common Mistakes + +### MEDIUM: Proxy buffering breaks SSE streaming + +Reverse proxies (nginx, Cloudflare, AWS ALB) buffer SSE responses by default, +causing events to arrive in batches instead of streaming token-by-token. + +Fix: Set proxy-bypass headers on the response. + +```typescript +toServerSentEventsResponse(stream, { + headers: { + 'X-Accel-Buffering': 'no', // nginx + 'X-Content-Type-Options': 'nosniff', // Some CDNs + }, + abortController, +}) +``` + +For Cloudflare Workers, SSE streams automatically. For Cloudflare proxied +origins, ensure "Response Buffering" is disabled in the dashboard. + +Source: docs/protocol/sse-protocol.md + +### MEDIUM: Assuming all AG-UI events arrive in every response + +Not all event types appear in every stream: + +- `STEP_STARTED` / `STEP_FINISHED` only appear with thinking-enabled models + (e.g., `o3`, `claude-sonnet-4-5` with extended thinking). Standard models + skip these entirely. +- `TOOL_CALL_START` / `TOOL_CALL_ARGS` / `TOOL_CALL_END` only appear when + the model invokes tools. A text-only response has none. +- `STATE_SNAPSHOT` / `STATE_DELTA` only appear when server code explicitly + emits them for stateful agent workflows. +- `MESSAGES_SNAPSHOT` only appears when the server explicitly sends a + full transcript snapshot. +- `CUSTOM` events are application-defined and never emitted by default. + +Code that expects a fixed sequence (e.g., always waiting for `STEP_FINISHED` +before processing text) will hang or break on models that don't emit those events. + +Source: docs/protocol/chunk-definitions.md + +## Tension + +HIGH Tension: AG-UI protocol compliance vs. internal message format -- TanStack +AI's `UIMessage` format (parts-based) diverges from AG-UI spec (content-based). +Full compliance would require a different message structure. + +## Cross-References + +- See also: `ai-core/custom-backend-integration/SKILL.md` -- Custom backends must implement SSE or HTTP stream format to work with TanStack AI client connection adapters. diff --git a/packages/typescript/ai/skills/ai-core/chat-experience/SKILL.md b/packages/typescript/ai/skills/ai-core/chat-experience/SKILL.md new file mode 100644 index 000000000..918fdec1b --- /dev/null +++ b/packages/typescript/ai/skills/ai-core/chat-experience/SKILL.md @@ -0,0 +1,506 @@ +--- +name: ai-core/chat-experience +description: > + End-to-end chat implementation: server endpoint with chat() and + toServerSentEventsResponse(), client-side useChat hook with + fetchServerSentEvents(), message rendering with UIMessage parts, + multimodal content, thinking/reasoning display. Covers streaming + states, connection adapters, and message format conversions. + NOT Vercel AI SDK — uses chat() not streamText(). +type: sub-skill +library: tanstack-ai +library_version: '0.10.0' +sources: + - 'TanStack/ai:docs/getting-started/quick-start.md' + - 'TanStack/ai:docs/chat/streaming.md' + - 'TanStack/ai:docs/chat/connection-adapters.md' + - 'TanStack/ai:docs/chat/thinking-content.md' + - 'TanStack/ai:docs/advanced/multimodal-content.md' +--- + +# Chat Experience + +This skill builds on ai-core. Read it first for critical rules. + +## Setup — Minimal Chat App + +### Server: API Route (TanStack Start) + +```typescript +// src/routes/api.chat.ts +import { createFileRoute } from '@tanstack/react-router' +import { chat, toServerSentEventsResponse } from '@tanstack/ai' +import { openaiText } from '@tanstack/ai-openai' + +export const Route = createFileRoute('/api/chat')({ + server: { + handlers: { + POST: async ({ request }) => { + const abortController = new AbortController() + const body = await request.json() + const { messages } = body + + const stream = chat({ + adapter: openaiText('gpt-5.2'), + messages, + systemPrompts: ['You are a helpful assistant.'], + abortController, + }) + + return toServerSentEventsResponse(stream, { abortController }) + }, + }, + }, +}) +``` + +### Client: React Component + +```typescript +// src/routes/index.tsx +import { useState } from 'react' +import { useChat, fetchServerSentEvents } from '@tanstack/ai-react' +import type { UIMessage } from '@tanstack/ai-react' + +function ChatPage() { + const [input, setInput] = useState('') + + const { messages, sendMessage, isLoading, error, stop } = useChat({ + connection: fetchServerSentEvents('/api/chat'), + }) + + const handleSubmit = () => { + if (!input.trim()) return + sendMessage(input.trim()) + setInput('') + } + + return ( +
+
+ {messages.map((message: UIMessage) => ( +
+ {message.role}: + {message.parts.map((part, i) => { + if (part.type === 'text') { + return

{part.content}

+ } + return null + })} +
+ ))} +
+ + {error &&
Error: {error.message}
} + +
+ setInput(e.target.value)} + onKeyDown={(e) => { + if (e.key === 'Enter' && !e.shiftKey) { + e.preventDefault() + handleSubmit() + } + }} + disabled={isLoading} + placeholder="Type a message..." + /> + {isLoading ? ( + + ) : ( + + )} +
+
+ ) +} +``` + +Vue/Solid/Svelte/Preact have identical patterns with different hook imports +(e.g., `import { useChat } from '@tanstack/ai-solid'`). + +## Core Patterns + +### 1. Streaming Chat with SSE + +Server returns a streaming SSE Response; client parses it automatically. + +**Server:** + +```typescript +import { chat, toServerSentEventsResponse } from '@tanstack/ai' +import { anthropicText } from '@tanstack/ai-anthropic' + +const stream = chat({ + adapter: anthropicText('claude-sonnet-4-5'), + messages, + temperature: 0.7, + maxTokens: 2000, + systemPrompts: ['You are a helpful assistant.'], + abortController, +}) + +return toServerSentEventsResponse(stream, { abortController }) +``` + +**Client:** + +```typescript +import { useChat, fetchServerSentEvents } from '@tanstack/ai-react' + +const { messages, sendMessage, isLoading, error, stop, status } = useChat({ + connection: fetchServerSentEvents('/api/chat'), + body: { provider: 'anthropic', model: 'claude-sonnet-4-5' }, + onFinish: (message) => { + console.log('Response complete:', message.id) + }, + onError: (err) => { + console.error('Stream error:', err) + }, +}) +``` + +The `body` field is merged into the POST request body alongside `messages`, +letting the server read `data.provider`, `data.model`, etc. + +The `status` field tracks the chat lifecycle: `'ready'` | `'submitted'` | `'streaming'` | `'error'`. + +### 2. Rendering Thinking/Reasoning Content + +Models with extended thinking (Claude, Gemini) emit `ThinkingPart` in the message parts array. + +```typescript +import type { UIMessage } from '@tanstack/ai-react' + +function MessageRenderer({ message }: { message: UIMessage }) { + return ( +
+ {message.parts.map((part, i) => { + if (part.type === 'thinking') { + const isComplete = message.parts + .slice(i + 1) + .some((p) => p.type === 'text') + return ( +
+ {isComplete ? 'Thought process' : 'Thinking...'} +
{part.content}
+
+ ) + } + + if (part.type === 'text' && part.content) { + return

{part.content}

+ } + + if (part.type === 'tool-call') { + return ( +
+ Tool call: {part.name} ({part.state}) +
+ ) + } + + return null + })} +
+ ) +} +``` + +Server-side, enable thinking via `modelOptions` on the adapter: + +```typescript +import { geminiText } from '@tanstack/ai-gemini' + +const stream = chat({ + adapter: geminiText('gemini-2.5-flash'), + messages, + modelOptions: { + thinkingConfig: { + includeThoughts: true, + thinkingBudget: 100, + }, + }, +}) +``` + +### 3. Sending Multimodal Content (Images) + +Use `sendMessage` with a `MultimodalContent` object instead of a plain string. + +```typescript +import { useChat, fetchServerSentEvents } from '@tanstack/ai-react' +import type { ContentPart } from '@tanstack/ai' + +const { sendMessage } = useChat({ + connection: fetchServerSentEvents('/api/chat'), +}) + +function sendImageMessage(text: string, imageBase64: string, mimeType: string) { + const contentParts: Array = [ + { type: 'text', content: text }, + { + type: 'image', + source: { type: 'data', value: imageBase64, mimeType }, + }, + ] + + sendMessage({ content: contentParts }) +} + +function sendImageUrl(text: string, imageUrl: string) { + const contentParts: Array = [ + { type: 'text', content: text }, + { + type: 'image', + source: { type: 'url', value: imageUrl }, + }, + ] + + sendMessage({ content: contentParts }) +} +``` + +Render image parts in received messages: + +```typescript +if (part.type === 'image') { + const src = + part.source.type === 'url' + ? part.source.value + : `data:${part.source.mimeType};base64,${part.source.value}` + return Attached image +} +``` + +### 4. HTTP Stream Format (Alternative to SSE) + +Use `toHttpResponse` + `fetchHttpStream` for newline-delimited JSON instead of SSE. + +**Server:** + +```typescript +import { chat, toHttpResponse } from '@tanstack/ai' +import { openaiText } from '@tanstack/ai-openai' + +const stream = chat({ + adapter: openaiText('gpt-5.2'), + messages, + abortController, +}) + +return toHttpResponse(stream, { abortController }) +``` + +**Client:** + +```typescript +import { useChat, fetchHttpStream } from '@tanstack/ai-react' + +const { messages, sendMessage } = useChat({ + connection: fetchHttpStream('/api/chat'), +}) +``` + +The only difference is swapping `toServerSentEventsResponse` / `fetchServerSentEvents` +for `toHttpResponse` / `fetchHttpStream`. Everything else stays identical. + +## Common Mistakes + +### a. CRITICAL: Using Vercel AI SDK patterns (streamText, generateText) + +```typescript +// WRONG +import { streamText } from 'ai' +import { openai } from '@ai-sdk/openai' +const result = streamText({ model: openai('gpt-4o'), messages }) + +// CORRECT +import { chat } from '@tanstack/ai' +import { openaiText } from '@tanstack/ai-openai' +const stream = chat({ adapter: openaiText('gpt-5.2'), messages }) +``` + +### b. CRITICAL: Using Vercel createOpenAI() provider pattern + +```typescript +// WRONG +import { createOpenAI } from '@ai-sdk/openai' +const openai = createOpenAI({ apiKey }) +streamText({ model: openai('gpt-4o'), messages }) + +// CORRECT +import { openaiText } from '@tanstack/ai-openai' +import { chat } from '@tanstack/ai' +chat({ adapter: openaiText('gpt-5.2'), messages }) +``` + +### c. CRITICAL: Using monolithic openai() instead of openaiText() + +```typescript +// WRONG +import { openai } from '@tanstack/ai-openai' +chat({ adapter: openai(), model: 'gpt-5.2', messages }) + +// CORRECT +import { openaiText } from '@tanstack/ai-openai' +chat({ adapter: openaiText('gpt-5.2'), messages }) +``` + +The monolithic `openai()` adapter is deprecated. Use tree-shakeable adapters: +`openaiText()`, `openaiImage()`, `openaiSpeech()`, etc. + +### d. HIGH: Using toResponseStream instead of toServerSentEventsResponse + +```typescript +// WRONG +import { toResponseStream } from '@tanstack/ai' +return toResponseStream(stream, { abortController }) + +// CORRECT +import { toServerSentEventsResponse } from '@tanstack/ai' +return toServerSentEventsResponse(stream, { abortController }) +``` + +### e. HIGH: Passing model as separate parameter to chat() + +```typescript +// WRONG +chat({ adapter: openaiText(), model: 'gpt-5.2', messages }) + +// CORRECT +chat({ adapter: openaiText('gpt-5.2'), messages }) +``` + +The model is passed to the adapter factory, not to `chat()`. + +### f. HIGH: Nesting temperature/maxTokens in options object + +```typescript +// WRONG +chat({ adapter, messages, options: { temperature: 0.7, maxTokens: 1000 } }) + +// CORRECT +chat({ adapter, messages, temperature: 0.7, maxTokens: 1000 }) +``` + +All parameters are top-level on the `chat()` options object. + +### g. HIGH: Using providerOptions instead of modelOptions + +```typescript +// WRONG +chat({ + adapter, + messages, + providerOptions: { responseFormat: { type: 'json_object' } }, +}) + +// CORRECT +chat({ + adapter, + messages, + modelOptions: { responseFormat: { type: 'json_object' } }, +}) +``` + +### h. HIGH: Implementing custom SSE stream instead of using toServerSentEventsResponse + +```typescript +// WRONG +const readable = new ReadableStream({ + async start(controller) { + const encoder = new TextEncoder() + for await (const chunk of stream) { + controller.enqueue(encoder.encode(`data: ${JSON.stringify(chunk)}\n\n`)) + } + controller.enqueue(encoder.encode('data: [DONE]\n\n')) + controller.close() + }, +}) +return new Response(readable, { + headers: { 'Content-Type': 'text/event-stream' }, +}) + +// CORRECT +import { toServerSentEventsResponse } from '@tanstack/ai' +return toServerSentEventsResponse(stream, { abortController }) +``` + +`toServerSentEventsResponse` handles SSE formatting, abort signals, +error events (RUN_ERROR), and correct headers automatically. + +### i. HIGH: Implementing custom onEnd/onFinish callbacks instead of middleware + +```typescript +// WRONG +chat({ + adapter, + messages, + onEnd: (result) => { + trackAnalytics(result) + }, +}) + +// CORRECT +import type { ChatMiddleware } from '@tanstack/ai' + +const analytics: ChatMiddleware = { + name: 'analytics', + onFinish(ctx, info) { + trackAnalytics({ reason: info.finishReason, iterations: ctx.iteration }) + }, + onUsage(ctx, usage) { + trackTokens(usage.totalTokens) + }, +} + +chat({ adapter, messages, middleware: [analytics] }) +``` + +`chat()` has no `onEnd`/`onFinish` option. Use `middleware` for lifecycle events. +See also: ai-core/middleware/SKILL.md. + +### j. HIGH: Importing from @tanstack/ai-client instead of framework package + +```typescript +// WRONG +import { fetchServerSentEvents } from '@tanstack/ai-client' +import { useChat } from '@tanstack/ai-react' + +// CORRECT +import { useChat, fetchServerSentEvents } from '@tanstack/ai-react' +``` + +Framework packages re-export everything needed from `@tanstack/ai-client`. +Import from `@tanstack/ai-client` only in vanilla JS (no framework). + +### k. MEDIUM: Not handling RUN_ERROR events in streaming context + +Streaming errors arrive as `RUN_ERROR` events in the stream, not as thrown +exceptions. The `useChat` hook surfaces these via the `error` state and +`onError` callback. If you consume the stream manually (without `useChat`), +check for `RUN_ERROR` chunks: + +```typescript +for await (const chunk of stream) { + if (chunk.type === 'RUN_ERROR') { + console.error('Stream error:', chunk.error.message) + break + } + if (chunk.type === 'TEXT_MESSAGE_CONTENT') { + process.stdout.write(chunk.delta) + } +} +``` + +If not handled, the UI appears to hang with no feedback. + +## Cross-References + +- See also: **ai-core/tool-calling/SKILL.md** -- Most chats include tools +- See also: **ai-core/adapter-configuration/SKILL.md** -- Adapter choice affects available features +- See also: **ai-core/middleware/SKILL.md** -- Use middleware for analytics and lifecycle events diff --git a/packages/typescript/ai/skills/ai-core/custom-backend-integration/SKILL.md b/packages/typescript/ai/skills/ai-core/custom-backend-integration/SKILL.md new file mode 100644 index 000000000..37275b6f7 --- /dev/null +++ b/packages/typescript/ai/skills/ai-core/custom-backend-integration/SKILL.md @@ -0,0 +1,463 @@ +--- +name: ai-core/custom-backend-integration +description: > + Connect useChat to a non-TanStack-AI backend through custom connection + adapters. ConnectConnectionAdapter (single async iterable) vs + SubscribeConnectionAdapter (separate subscribe/send). Customize + fetchServerSentEvents() and fetchHttpStream() with auth headers, + custom URLs, and request options. Import from framework package, + not @tanstack/ai-client. +type: composition +library: tanstack-ai +library_version: '0.10.0' +sources: + - 'TanStack/ai:docs/chat/connection-adapters.md' +--- + +# Custom Backend Integration + +This skill builds on ai-core and ai-core/chat-experience. Read them first. + +## Setup + +Connect `useChat` to a custom SSE backend with auth headers: + +```typescript +import { useChat, fetchServerSentEvents } from '@tanstack/ai-react' + +function Chat() { + const { messages, sendMessage, isLoading } = useChat({ + connection: fetchServerSentEvents('https://my-api.com/chat', { + headers: { + Authorization: `Bearer ${token}`, + }, + }), + }) + + return ( +
+ {messages.map((msg) => ( +
+ {msg.role}: + {msg.parts.map((part, i) => { + if (part.type === 'text') { + return

{part.content}

+ } + return null + })} +
+ ))} + +
+ ) +} +``` + +Both `fetchServerSentEvents` and `fetchHttpStream` accept a static URL string +or a function returning a string (evaluated per request), and a static options +object or a sync/async function returning options (also evaluated per request). +This allows dynamic auth tokens and URLs without re-creating the adapter. + +## Core Patterns + +### 1. Custom SSE Backend with fetchServerSentEvents + +Use when your backend speaks SSE (`text/event-stream`) with `data: {json}\n\n` +framing. This is the recommended default. + +**Static options:** + +```typescript +import { useChat, fetchServerSentEvents } from '@tanstack/ai-react' + +const { messages, sendMessage } = useChat({ + connection: fetchServerSentEvents('https://my-api.com/chat', { + headers: { + Authorization: `Bearer ${token}`, + 'X-Tenant-Id': tenantId, + }, + credentials: 'include', + }), +}) +``` + +**Dynamic URL and options (evaluated per request):** + +```typescript +import { useChat, fetchServerSentEvents } from '@tanstack/ai-react' + +const { messages, sendMessage } = useChat({ + connection: fetchServerSentEvents( + () => `https://my-api.com/chat?session=${sessionId}`, + async () => ({ + headers: { + Authorization: `Bearer ${await getAccessToken()}`, + }, + body: { + provider: 'openai', + model: 'gpt-4o', + }, + }), + ), +}) +``` + +The `body` field in options is merged into the POST request body alongside +`messages` and `data`, so the server receives `{ messages, data, provider, model }`. + +**Custom fetch client (for proxies, interceptors, retries):** + +```typescript +import { useChat, fetchServerSentEvents } from '@tanstack/ai-react' + +const { messages, sendMessage } = useChat({ + connection: fetchServerSentEvents('/api/chat', { + fetchClient: myCustomFetch, + }), +}) +``` + +### 2. Custom NDJSON Backend with fetchHttpStream + +Use when your backend sends newline-delimited JSON (`application/x-ndjson`) +instead of SSE. Each line is one JSON-encoded `StreamChunk` followed by `\n`. + +```typescript +import { useChat, fetchHttpStream } from '@tanstack/ai-react' + +const { messages, sendMessage } = useChat({ + connection: fetchHttpStream('https://my-api.com/chat', { + headers: { + Authorization: `Bearer ${token}`, + }, + }), +}) +``` + +`fetchHttpStream` accepts the same URL and options signatures as +`fetchServerSentEvents` (static or dynamic, sync or async). The only difference +is the parsing: no `data:` prefix stripping, no `[DONE]` sentinel -- just one +JSON object per line. + +**Dynamic options work identically:** + +```typescript +import { useChat, fetchHttpStream } from '@tanstack/ai-react' + +const { messages, sendMessage } = useChat({ + connection: fetchHttpStream( + () => `/api/chat?region=${region}`, + async () => ({ + headers: { Authorization: `Bearer ${await refreshToken()}` }, + }), + ), +}) +``` + +### 3. Fully Custom Connection Adapter + +For protocols that don't fit SSE or NDJSON (WebSockets, gRPC-web, custom binary, +server functions), implement the `ConnectionAdapter` interface directly. + +There are two mutually exclusive modes: + +**ConnectConnectionAdapter (pull-based / async iterable):** + +Use when the client initiates a request and consumes the response as a stream. +This is the simpler model and covers most HTTP-based protocols. + +```typescript +import { useChat } from '@tanstack/ai-react' +import type { ConnectionAdapter } from '@tanstack/ai-react' +import type { StreamChunk, UIMessage } from '@tanstack/ai' + +const websocketAdapter: ConnectionAdapter = { + async *connect( + messages: Array, + data?: Record, + abortSignal?: AbortSignal, + ): AsyncGenerator { + const ws = new WebSocket('wss://my-api.com/chat') + + // Wait for connection + await new Promise((resolve, reject) => { + ws.onopen = () => resolve() + ws.onerror = (e) => reject(e) + }) + + // Send messages + ws.send(JSON.stringify({ messages, ...data })) + + // Create an async queue to bridge WebSocket events to an async iterable + const queue: Array = [] + let resolve: (() => void) | null = null + let done = false + + ws.onmessage = (event) => { + const chunk: StreamChunk = JSON.parse(event.data) + queue.push(chunk) + resolve?.() + } + + ws.onclose = () => { + done = true + resolve?.() + } + + ws.onerror = () => { + done = true + resolve?.() + } + + abortSignal?.addEventListener('abort', () => { + ws.close() + }) + + // Yield chunks as they arrive + while (!done || queue.length > 0) { + if (queue.length > 0) { + yield queue.shift()! + } else { + await new Promise((r) => { + resolve = r + }) + } + } + }, +} + +function Chat() { + const { messages, sendMessage } = useChat({ + connection: websocketAdapter, + }) + + // ... render messages +} +``` + +**SubscribeConnectionAdapter (push-based / separate subscribe + send):** + +Use for push-based protocols where the server can send data at any time +(persistent WebSocket connections, MQTT, server push). The `subscribe` method +returns an `AsyncIterable` that stays open, and `send` dispatches +messages through it. + +```typescript +import type { StreamChunk, UIMessage } from '@tanstack/ai' + +// SubscribeConnectionAdapter is exported from @tanstack/ai-client +// (not re-exported by framework packages -- use ConnectionAdapter +// union type from @tanstack/ai-react for typing) +const pushAdapter = { + subscribe(abortSignal?: AbortSignal): AsyncIterable { + // Return a long-lived async iterable that yields chunks + // whenever the server pushes them + return createPersistentStream(abortSignal) + }, + + async send( + messages: Array, + data?: Record, + abortSignal?: AbortSignal, + ): Promise { + // Dispatch messages; chunks arrive through subscribe() + await persistentConnection.send(JSON.stringify({ messages, ...data })) + }, +} + +function Chat() { + const { messages, sendMessage } = useChat({ + connection: pushAdapter, + }) + + // ... render messages +} +``` + +The `stream()` helper function (re-exported from `@tanstack/ai-react`) provides +a shorthand for creating a `ConnectConnectionAdapter` from an async generator: + +```typescript +import { useChat, stream } from '@tanstack/ai-react' +import type { StreamChunk, UIMessage } from '@tanstack/ai' + +const directAdapter = stream(async function* ( + messages: Array, + data?: Record, +): AsyncGenerator { + const response = await fetch('https://my-api.com/chat', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ messages, ...data }), + }) + + const reader = response.body!.getReader() + const decoder = new TextDecoder() + let buffer = '' + + while (true) { + const { done, value } = await reader.read() + if (done) break + + buffer += decoder.decode(value, { stream: true }) + const lines = buffer.split('\n') + buffer = lines.pop() || '' + + for (const line of lines) { + if (line.trim()) { + yield JSON.parse(line) as StreamChunk + } + } + } +}) + +const { messages, sendMessage } = useChat({ + connection: directAdapter, +}) +``` + +## Common Mistakes + +### a. HIGH: Providing both connect and subscribe+send in connection adapter + +The `ConnectionAdapter` interface has two mutually exclusive modes. Providing +both throws at runtime. + +```typescript +// WRONG -- throws "Connection adapter must provide either connect or both +// subscribe and send, not both modes" +const adapter = { + async *connect(messages) { + /* ... */ + }, + subscribe(signal) { + /* ... */ + }, + async send(messages) { + /* ... */ + }, +} + +// CORRECT -- pick one mode +// Option A: ConnectConnectionAdapter (pull-based) +const pullAdapter = { + async *connect(messages, data, abortSignal) { + // ... yield StreamChunks + }, +} + +// Option B: SubscribeConnectionAdapter (push-based) +const pushAdapter = { + subscribe(abortSignal) { + return longLivedAsyncIterable + }, + async send(messages, data, abortSignal) { + await connection.dispatch({ messages, ...data }) + }, +} +``` + +Source: `ai-client/src/connection-adapters.ts` line 116 + +### b. MEDIUM: SSE browser connection limits + +Browsers limit SSE connections to 6-8 per domain (the HTTP/1.1 connection +limit). Multiple chat sessions on the same page, or multiple tabs to the +same origin, can exhaust this limit. New connections queue indefinitely until +an existing one closes. + +Mitigations: + +- Use HTTP/2 (multiplexes streams over a single TCP connection; no per-domain limit) +- Use `fetchHttpStream` instead of `fetchServerSentEvents` (each request is a + standard POST, not a long-lived EventSource) +- Close idle connections when not actively streaming +- Use a single persistent WebSocket via `SubscribeConnectionAdapter` instead of + per-request SSE connections + +Source: `docs/chat/connection-adapters.md` + +### c. MEDIUM: HTTP stream without implementing reconnection + +SSE has built-in browser auto-reconnection via the `EventSource` API. HTTP +stream (NDJSON via `fetchHttpStream`) does not -- if the connection drops +mid-stream, the partial response is silently lost with no automatic retry. + +If your application needs resilience to transient network errors with HTTP +streaming, implement retry logic in your connection adapter: + +```typescript +import { useChat } from '@tanstack/ai-react' +import type { ConnectionAdapter } from '@tanstack/ai-react' +import type { StreamChunk, UIMessage } from '@tanstack/ai' + +const resilientAdapter: ConnectionAdapter = { + async *connect( + messages: Array, + data?: Record, + abortSignal?: AbortSignal, + ): AsyncGenerator { + const maxRetries = 3 + let attempt = 0 + + while (attempt < maxRetries) { + try { + const response = await fetch('https://my-api.com/chat', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ messages, ...data }), + signal: abortSignal, + }) + + if (!response.ok) { + throw new Error(`HTTP ${response.status}`) + } + + const reader = response.body!.getReader() + const decoder = new TextDecoder() + let buffer = '' + + while (true) { + const { done, value } = await reader.read() + if (done) break + + buffer += decoder.decode(value, { stream: true }) + const lines = buffer.split('\n') + buffer = lines.pop() || '' + + for (const line of lines) { + if (line.trim()) { + yield JSON.parse(line) as StreamChunk + } + } + } + + return // Stream completed successfully + } catch (err) { + if (abortSignal?.aborted) throw err + attempt++ + if (attempt >= maxRetries) throw err + // Exponential backoff + await new Promise((r) => setTimeout(r, 1000 * 2 ** attempt)) + } + } + }, +} + +const { messages, sendMessage } = useChat({ + connection: resilientAdapter, +}) +``` + +Note: `fetchServerSentEvents` in TanStack AI uses `fetch()` under the hood (not +the browser `EventSource` API), so it also does not auto-reconnect. The SSE +auto-reconnection advantage only applies when using the native `EventSource` API +directly. + +Source: `docs/protocol/http-stream-protocol.md` + +## Cross-References + +- See also: **ai-core/ag-ui-protocol/SKILL.md** -- Understanding the AG-UI protocol helps build compatible custom servers +- See also: **ai-core/chat-experience/SKILL.md** -- Full chat setup patterns including server-side `chat()` and `toServerSentEventsResponse()` +- See also: **ai-core/middleware/SKILL.md** -- Use middleware for analytics and lifecycle events on the server side diff --git a/packages/typescript/ai/skills/ai-core/media-generation/SKILL.md b/packages/typescript/ai/skills/ai-core/media-generation/SKILL.md new file mode 100644 index 000000000..6bb97097d --- /dev/null +++ b/packages/typescript/ai/skills/ai-core/media-generation/SKILL.md @@ -0,0 +1,471 @@ +--- +name: ai-core/media-generation +description: > + Image, video, speech (TTS), and transcription generation using + activity-specific adapters: generateImage() with openaiImage/geminiImage, + generateVideo() with async polling, generateSpeech() with openaiSpeech, + generateTranscription() with openaiTranscription. React hooks: + useGenerateImage, useGenerateSpeech, useTranscription, useGenerateVideo. + TanStack Start server function integration with toServerSentEventsResponse. +type: sub-skill +library: tanstack-ai +library_version: '0.10.0' +sources: + - 'TanStack/ai:docs/media/generations.md' + - 'TanStack/ai:docs/media/generation-hooks.md' + - 'TanStack/ai:docs/media/image-generation.md' + - 'TanStack/ai:docs/media/video-generation.md' + - 'TanStack/ai:docs/media/text-to-speech.md' + - 'TanStack/ai:docs/media/transcription.md' +--- + +# Media Generation + +> **Dependency note:** This skill builds on ai-core. Read it first for critical rules. + +All media activities (image, speech, transcription, video) follow the same +server/client architecture: a `generate*()` function on the server, an SSE +transport via `toServerSentEventsResponse()`, and a framework hook on the +client. + +## Setup -- Image Generation End-to-End + +### Server (API route or TanStack Start server function) + +```typescript +// routes/api/generate/image.ts +import { generateImage, toServerSentEventsResponse } from '@tanstack/ai' +import { openaiImage } from '@tanstack/ai-openai' + +export async function POST(req: Request) { + const { prompt, size, numberOfImages } = await req.json() + + const stream = generateImage({ + adapter: openaiImage('gpt-image-1'), + prompt, + size, + numberOfImages, + stream: true, + }) + + return toServerSentEventsResponse(stream) +} +``` + +### Client (React) + +```tsx +import { useGenerateImage, fetchServerSentEvents } from '@tanstack/ai-react' +import { useState } from 'react' + +function ImageGenerator() { + const [prompt, setPrompt] = useState('') + const { generate, result, isLoading, error, reset } = useGenerateImage({ + connection: fetchServerSentEvents('/api/generate/image'), + }) + + return ( +
+ setPrompt(e.target.value)} + placeholder="Describe an image..." + /> + + + {error &&

Error: {error.message}

} + + {result?.images.map((img, i) => ( + {img.revisedPrompt + ))} + + {result && } +
+ ) +} +``` + +### TanStack Start: Server Function Streaming (recommended) + +When using TanStack Start, return `toServerSentEventsResponse()` from a +server function. The client fetcher receives a `Response` and the hook +parses it as SSE automatically: + +```typescript +// lib/server-functions.ts +import { createServerFn } from '@tanstack/react-start' +import { generateImage, toServerSentEventsResponse } from '@tanstack/ai' +import { openaiImage } from '@tanstack/ai-openai' + +export const generateImageStreamFn = createServerFn({ method: 'POST' }) + .inputValidator((data: { prompt: string; model?: string }) => data) + .handler(({ data }) => { + return toServerSentEventsResponse( + generateImage({ + adapter: openaiImage(data.model ?? 'gpt-image-1'), + prompt: data.prompt, + stream: true, + }), + ) + }) +``` + +```tsx +import { useGenerateImage } from '@tanstack/ai-react' +import { generateImageStreamFn } from '../lib/server-functions' + +function ImageGenerator() { + const { generate, result, isLoading } = useGenerateImage({ + fetcher: (input) => generateImageStreamFn({ data: input }), + }) + + return ( + + ) +} +``` + +--- + +## Core Patterns + +### 1. Image Generation + +Supported adapters: `openaiImage` (dall-e-2, dall-e-3, gpt-image-1, +gpt-image-1-mini) and `geminiImage` (gemini-3.1-flash-image-preview, +imagen-4.0-generate-001, etc.). + +```typescript +import { generateImage } from '@tanstack/ai' +import { openaiImage } from '@tanstack/ai-openai' +import { geminiImage } from '@tanstack/ai-gemini' + +// OpenAI with quality/background options +const openaiResult = await generateImage({ + adapter: openaiImage('gpt-image-1'), + prompt: 'A cat wearing a hat', + size: '1024x1024', + numberOfImages: 2, + modelOptions: { + quality: 'high', + background: 'transparent', + outputFormat: 'png', + }, +}) + +// Gemini native model with aspect-ratio sizes +const geminiResult = await generateImage({ + adapter: geminiImage('gemini-3.1-flash-image-preview'), + prompt: 'A futuristic cityscape at night', + size: '16:9_4K', +}) + +// Gemini Imagen model +const imagenResult = await generateImage({ + adapter: geminiImage('imagen-4.0-generate-001'), + prompt: 'A landscape photo', + modelOptions: { aspectRatio: '16:9' }, +}) +``` + +Result shape: `ImageGenerationResult` with `images` array where each entry +has `b64Json?`, `url?`, and `revisedPrompt?`. OpenAI image URLs expire +after 1 hour -- download or display immediately. + +### 2. Text-to-Speech + +Adapter: `openaiSpeech` (tts-1, tts-1-hd, gpt-4o-audio-preview). + +```typescript +import { generateSpeech } from '@tanstack/ai' +import { openaiSpeech } from '@tanstack/ai-openai' + +const result = await generateSpeech({ + adapter: openaiSpeech('tts-1-hd'), + text: 'Hello, welcome to TanStack AI!', + voice: 'alloy', // alloy | echo | fable | onyx | nova | shimmer | ash | ballad | coral | sage | verse + format: 'mp3', // mp3 | opus | aac | flac | wav | pcm + speed: 1.0, // 0.25 to 4.0 +}) + +// result.audio is base64-encoded audio +// result.format is the output format string +// result.contentType is the MIME type (e.g. "audio/mpeg") +``` + +Client hook: + +```tsx +import { useGenerateSpeech, fetchServerSentEvents } from '@tanstack/ai-react' + +const { generate, result, isLoading } = useGenerateSpeech({ + connection: fetchServerSentEvents('/api/generate/speech'), +}) + +// Trigger: generate({ text: 'Hello!', voice: 'alloy' }) +// Play: