From 7b22f6d250b6c4687b5824382c5f6e2abd3adfe0 Mon Sep 17 00:00:00 2001 From: David Carroll Date: Thu, 26 Mar 2026 10:49:35 -0500 Subject: [PATCH 1/2] feat(webuiapps): add local llama.cpp LLM support --- .../src/components/ChatPanel/index.tsx | 17 ++++--- .../src/lib/__tests__/llmClient.test.ts | 39 ++++++++++++++++ apps/webuiapps/src/lib/llmClient.ts | 45 ++++++++++++------- apps/webuiapps/src/lib/llmModels.ts | 8 ++++ 4 files changed, 87 insertions(+), 22 deletions(-) diff --git a/apps/webuiapps/src/components/ChatPanel/index.tsx b/apps/webuiapps/src/components/ChatPanel/index.tsx index 92442b5..740488c 100644 --- a/apps/webuiapps/src/components/ChatPanel/index.tsx +++ b/apps/webuiapps/src/components/ChatPanel/index.tsx @@ -95,6 +95,10 @@ interface CharacterDisplayMessage extends DisplayMessage { toolCalls?: string[]; // collapsed tool call summaries } +function hasUsableLLMConfig(config: LLMConfig | null | undefined): config is LLMConfig { + return !!config?.baseUrl.trim() && !!config.model.trim(); +} + // --------------------------------------------------------------------------- // Tool definitions for character system // --------------------------------------------------------------------------- @@ -649,7 +653,7 @@ const ChatPanel: React.FC<{ while (actionQueueRef.current.length > 0) { const actionMsg = actionQueueRef.current.shift()!; const cfg = configRef.current; - if (!cfg?.apiKey) break; + if (!hasUsableLLMConfig(cfg)) break; const newHistory: ChatMessage[] = [ ...chatHistoryRef.current, @@ -672,7 +676,7 @@ const ChatPanel: React.FC<{ useEffect(() => { const unsubscribe = onUserAction((event: unknown) => { const cfg = configRef.current; - if (!cfg?.apiKey) return; + if (!hasUsableLLMConfig(cfg)) return; const evt = event as { app_action?: { @@ -704,7 +708,7 @@ const ChatPanel: React.FC<{ async (overrideText?: string) => { const text = overrideText ?? input.trim(); if (!text || loading) return; - if (!config?.apiKey) { + if (!hasUsableLLMConfig(config)) { setShowSettings(true); return; } @@ -1102,9 +1106,9 @@ const ChatPanel: React.FC<{
{messages.length === 0 && (
- {config?.apiKey + {hasUsableLLMConfig(config) ? `${character.character_name} is ready to chat...` - : 'Click the gear icon to configure your LLM API key'} + : 'Click the gear icon to configure your LLM connection'}
)} {messages.map((msg) => ( @@ -1287,6 +1291,7 @@ const SettingsModal: React.FC<{ + @@ -1301,7 +1306,7 @@ const SettingsModal: React.FC<{ type="password" value={apiKey} onChange={(e) => setApiKey(e.target.value)} - placeholder="sk-..." + placeholder="Optional for local servers" />
diff --git a/apps/webuiapps/src/lib/__tests__/llmClient.test.ts b/apps/webuiapps/src/lib/__tests__/llmClient.test.ts index e9dd605..8012248 100644 --- a/apps/webuiapps/src/lib/__tests__/llmClient.test.ts +++ b/apps/webuiapps/src/lib/__tests__/llmClient.test.ts @@ -36,6 +36,13 @@ const MOCK_ANTHROPIC_CONFIG: LLMConfig = { model: 'claude-opus-4-6', }; +const MOCK_LLAMACPP_CONFIG: LLMConfig = { + provider: 'llama.cpp', + apiKey: '', + baseUrl: 'http://athena:8081', + model: 'Qwen_Qwen3.5-35B-A3B', +}; + const MOCK_MESSAGES: ChatMessage[] = [{ role: 'user', content: 'Hello' }]; const MOCK_TOOLS: ToolDef[] = [ @@ -114,6 +121,13 @@ describe('getDefaultProviderConfig()', () => { expect(cfg.model).toBe('deepseek-chat'); }); + it('returns correct defaults for llama.cpp', () => { + const cfg = getDefaultProviderConfig('llama.cpp'); + expect(cfg.provider).toBe('llama.cpp'); + expect(cfg.baseUrl).toBe('http://localhost:8080'); + expect(cfg.model).toBe('local-model'); + }); + it('returns correct defaults for minimax', () => { const cfg = getDefaultProviderConfig('minimax'); expect(cfg.provider).toBe('minimax'); @@ -422,6 +436,31 @@ describe('chat()', () => { }); }); + describe('llama.cpp provider (OpenAI-compatible)', () => { + it('routes to OpenAI path without requiring an API key', async () => { + const mockFetch = vi.fn().mockResolvedValueOnce(makeOpenAIResponse('Local response')); + globalThis.fetch = mockFetch; + + const result = await chat(MOCK_MESSAGES, [], MOCK_LLAMACPP_CONFIG); + + expect(result.content).toBe('Local response'); + const headers = mockFetch.mock.calls[0][1].headers as Record; + expect(headers['Authorization']).toBeUndefined(); + expect(headers['X-LLM-Target-URL']).toBe('http://athena:8081/v1/chat/completions'); + }); + + it('strips Qwen-style think tags from assistant content', async () => { + const mockFetch = vi + .fn() + .mockResolvedValueOnce(makeOpenAIResponse('hidden reasoningHello there')); + globalThis.fetch = mockFetch; + + const result = await chat(MOCK_MESSAGES, [], MOCK_LLAMACPP_CONFIG); + + expect(result.content).toBe('Hello there'); + }); + }); + describe('Anthropic provider', () => { it('uses x-api-key and anthropic-version headers', async () => { const mockFetch = vi.fn().mockResolvedValueOnce(makeAnthropicResponse('Anthropic response')); diff --git a/apps/webuiapps/src/lib/llmClient.ts b/apps/webuiapps/src/lib/llmClient.ts index cae2d61..0314df0 100644 --- a/apps/webuiapps/src/lib/llmClient.ts +++ b/apps/webuiapps/src/lib/llmClient.ts @@ -1,6 +1,6 @@ /** * Minimal LLM API Client - * Supports OpenAI / DeepSeek / Anthropic formats + * Supports OpenAI-compatible / Anthropic-compatible formats */ import type { LLMConfig } from './llmModels'; @@ -88,6 +88,13 @@ interface LLMResponse { toolCalls: ToolCall[]; } +function stripThinkTags(content: string): string { + const withoutBlocks = content + .replace(/]*>[\s\S]*?<\/think>/gi, '') + .replace(/<\/?think\b[^>]*>/gi, ''); + return withoutBlocks === content ? content : withoutBlocks.trim(); +} + function hasVersionSuffix(url: string): boolean { return /\/v\d+\/?$/.test(url); } @@ -162,14 +169,17 @@ async function chatOpenAI( messageCount: messages.length, toolCount: tools.length, }); + const headers: Record = { + 'Content-Type': 'application/json', + 'X-LLM-Target-URL': targetUrl, + ...parseCustomHeaders(config.customHeaders), + }; + if (config.apiKey.trim()) { + headers.Authorization = `Bearer ${config.apiKey}`; + } const res = await fetch('/api/llm-proxy', { method: 'POST', - headers: { - 'Content-Type': 'application/json', - Authorization: `Bearer ${config.apiKey}`, - 'X-LLM-Target-URL': targetUrl, - ...parseCustomHeaders(config.customHeaders), - }, + headers, body: JSON.stringify(body), }); @@ -195,7 +205,7 @@ async function chatOpenAI( calledNames, ); return { - content: choice?.content || '', + content: stripThinkTags(choice?.content || ''), toolCalls, }; } @@ -267,15 +277,18 @@ async function chatAnthropic( messageCount: anthropicMessages.length, toolCount: anthropicTools.length, }); + const headers: Record = { + 'Content-Type': 'application/json', + 'anthropic-version': '2023-06-01', + 'X-LLM-Target-URL': targetUrl, + ...parseCustomHeaders(config.customHeaders), + }; + if (config.apiKey.trim()) { + headers['x-api-key'] = config.apiKey; + } const res = await fetch('/api/llm-proxy', { method: 'POST', - headers: { - 'Content-Type': 'application/json', - 'x-api-key': config.apiKey, - 'anthropic-version': '2023-06-01', - 'X-LLM-Target-URL': targetUrl, - ...parseCustomHeaders(config.customHeaders), - }, + headers, body: JSON.stringify(body), }); @@ -314,5 +327,5 @@ async function chatAnthropic( 'calledNames=', calledNames, ); - return { content, toolCalls }; + return { content: stripThinkTags(content), toolCalls }; } diff --git a/apps/webuiapps/src/lib/llmModels.ts b/apps/webuiapps/src/lib/llmModels.ts index 5a2cbc9..346907e 100644 --- a/apps/webuiapps/src/lib/llmModels.ts +++ b/apps/webuiapps/src/lib/llmModels.ts @@ -2,6 +2,7 @@ export type LLMProvider = | 'openai' | 'anthropic' | 'deepseek' + | 'llama.cpp' | 'minimax' | 'z.ai' | 'kimi' @@ -77,6 +78,13 @@ export const LLM_PROVIDER_CONFIGS: Record = { ], }, + 'llama.cpp': { + displayName: 'llama.cpp', + baseUrl: 'http://localhost:8080', + defaultModel: 'local-model', + models: [], + }, + minimax: { displayName: 'MiniMax', baseUrl: 'https://api.minimax.io/anthropic/v1', From 847b54b311f6f60c3d4f188569fd4e7ecda80a7b Mon Sep 17 00:00:00 2001 From: David Carroll Date: Thu, 26 Mar 2026 12:04:53 -0500 Subject: [PATCH 2/2] fix(webuiapps): parse inline local-model tool calls --- .../src/lib/__tests__/llmClient.test.ts | 20 ++++++ apps/webuiapps/src/lib/llmClient.ts | 67 ++++++++++++++++++- 2 files changed, 85 insertions(+), 2 deletions(-) diff --git a/apps/webuiapps/src/lib/__tests__/llmClient.test.ts b/apps/webuiapps/src/lib/__tests__/llmClient.test.ts index 8012248..1b53438 100644 --- a/apps/webuiapps/src/lib/__tests__/llmClient.test.ts +++ b/apps/webuiapps/src/lib/__tests__/llmClient.test.ts @@ -459,6 +459,26 @@ describe('chat()', () => { expect(result.content).toBe('Hello there'); }); + + it('converts inline XML-style tool call content into structured tool calls', async () => { + const inlineToolContent = ` +respond_to_user +character_expression +{"content":"What? Did I catch you off guard?","emotion":"happy"} +user_interaction +{"suggested_replies":["Just hanging around","What reunion?","Tell me more"]} +`; + globalThis.fetch = vi.fn().mockResolvedValueOnce(makeOpenAIResponse(inlineToolContent)); + + const result = await chat(MOCK_MESSAGES, MOCK_TOOLS, MOCK_LLAMACPP_CONFIG); + + expect(result.content).toBe(''); + expect(result.toolCalls).toHaveLength(1); + expect(result.toolCalls[0].function.name).toBe('respond_to_user'); + expect(result.toolCalls[0].function.arguments).toBe( + '{"character_expression":{"content":"What? Did I catch you off guard?","emotion":"happy"},"user_interaction":{"suggested_replies":["Just hanging around","What reunion?","Tell me more"]}}', + ); + }); }); describe('Anthropic provider', () => { diff --git a/apps/webuiapps/src/lib/llmClient.ts b/apps/webuiapps/src/lib/llmClient.ts index 0314df0..5c30e12 100644 --- a/apps/webuiapps/src/lib/llmClient.ts +++ b/apps/webuiapps/src/lib/llmClient.ts @@ -88,6 +88,11 @@ interface LLMResponse { toolCalls: ToolCall[]; } +interface InlineToolParseResult { + content: string; + toolCalls: ToolCall[]; +} + function stripThinkTags(content: string): string { const withoutBlocks = content .replace(/]*>[\s\S]*?<\/think>/gi, '') @@ -95,6 +100,61 @@ function stripThinkTags(content: string): string { return withoutBlocks === content ? content : withoutBlocks.trim(); } +function parseInlineArgValue(rawValue: string): unknown { + const trimmed = rawValue.trim(); + if (!trimmed) return ''; + try { + return JSON.parse(trimmed); + } catch { + return trimmed; + } +} + +function extractInlineToolCalls(rawContent: string): InlineToolParseResult { + const content = stripThinkTags(rawContent); + if (!content.includes('') || !content.includes('')) { + return { content, toolCalls: [] }; + } + + const blockRegex = /(?:\s*|\()([a-zA-Z0-9_.-]+)\s*([\s\S]*?)<\/tool_call>/g; + const toolCalls: ToolCall[] = []; + let cleanedContent = content; + let matchIndex = 0; + + for (const match of content.matchAll(blockRegex)) { + const toolName = match[1]?.trim(); + const body = match[2] ?? ''; + if (!toolName) continue; + + const args: Record = {}; + const pairRegex = + /\s*([\s\S]*?)\s*<\/arg_key>\s*\s*([\s\S]*?)\s*<\/arg_value>/g; + + for (const pair of body.matchAll(pairRegex)) { + const key = pair[1]?.trim(); + if (!key) continue; + args[key] = parseInlineArgValue(pair[2] ?? ''); + } + + if (Object.keys(args).length === 0) continue; + + toolCalls.push({ + id: `inline_tool_${matchIndex++}`, + type: 'function', + function: { + name: toolName, + arguments: JSON.stringify(args), + }, + }); + cleanedContent = cleanedContent.replace(match[0], ''); + } + + return { + content: cleanedContent.trim(), + toolCalls, + }; +} + function hasVersionSuffix(url: string): boolean { return /\/v\d+\/?$/.test(url); } @@ -193,7 +253,8 @@ async function chatOpenAI( const data = JSON.parse(text); const choice = data.choices?.[0]?.message; - const toolCalls = choice?.tool_calls || []; + const parsedInline = extractInlineToolCalls(choice?.content || ''); + const toolCalls = choice?.tool_calls?.length ? choice.tool_calls : parsedInline.toolCalls; const calledNames = toolCalls .map((tc: { function?: { name?: string } }) => tc.function?.name) .filter(Boolean); @@ -205,7 +266,9 @@ async function chatOpenAI( calledNames, ); return { - content: stripThinkTags(choice?.content || ''), + content: choice?.tool_calls?.length + ? stripThinkTags(choice?.content || '') + : parsedInline.content, toolCalls, }; }