diff --git a/apps/webuiapps/src/components/ChatPanel/index.tsx b/apps/webuiapps/src/components/ChatPanel/index.tsx
index 92442b5..740488c 100644
--- a/apps/webuiapps/src/components/ChatPanel/index.tsx
+++ b/apps/webuiapps/src/components/ChatPanel/index.tsx
@@ -95,6 +95,10 @@ interface CharacterDisplayMessage extends DisplayMessage {
toolCalls?: string[]; // collapsed tool call summaries
}
+function hasUsableLLMConfig(config: LLMConfig | null | undefined): config is LLMConfig {
+ return !!config?.baseUrl.trim() && !!config.model.trim();
+}
+
// ---------------------------------------------------------------------------
// Tool definitions for character system
// ---------------------------------------------------------------------------
@@ -649,7 +653,7 @@ const ChatPanel: React.FC<{
while (actionQueueRef.current.length > 0) {
const actionMsg = actionQueueRef.current.shift()!;
const cfg = configRef.current;
- if (!cfg?.apiKey) break;
+ if (!hasUsableLLMConfig(cfg)) break;
const newHistory: ChatMessage[] = [
...chatHistoryRef.current,
@@ -672,7 +676,7 @@ const ChatPanel: React.FC<{
useEffect(() => {
const unsubscribe = onUserAction((event: unknown) => {
const cfg = configRef.current;
- if (!cfg?.apiKey) return;
+ if (!hasUsableLLMConfig(cfg)) return;
const evt = event as {
app_action?: {
@@ -704,7 +708,7 @@ const ChatPanel: React.FC<{
async (overrideText?: string) => {
const text = overrideText ?? input.trim();
if (!text || loading) return;
- if (!config?.apiKey) {
+ if (!hasUsableLLMConfig(config)) {
setShowSettings(true);
return;
}
@@ -1102,9 +1106,9 @@ const ChatPanel: React.FC<{
{messages.length === 0 && (
- {config?.apiKey
+ {hasUsableLLMConfig(config)
? `${character.character_name} is ready to chat...`
- : 'Click the gear icon to configure your LLM API key'}
+ : 'Click the gear icon to configure your LLM connection'}
)}
{messages.map((msg) => (
@@ -1287,6 +1291,7 @@ const SettingsModal: React.FC<{
OpenAI
Anthropic
DeepSeek
+
llama.cpp
MiniMax
Z.ai
Kimi
@@ -1301,7 +1306,7 @@ const SettingsModal: React.FC<{
type="password"
value={apiKey}
onChange={(e) => setApiKey(e.target.value)}
- placeholder="sk-..."
+ placeholder="Optional for local servers"
/>
diff --git a/apps/webuiapps/src/lib/__tests__/llmClient.test.ts b/apps/webuiapps/src/lib/__tests__/llmClient.test.ts
index e9dd605..1b53438 100644
--- a/apps/webuiapps/src/lib/__tests__/llmClient.test.ts
+++ b/apps/webuiapps/src/lib/__tests__/llmClient.test.ts
@@ -36,6 +36,13 @@ const MOCK_ANTHROPIC_CONFIG: LLMConfig = {
model: 'claude-opus-4-6',
};
+const MOCK_LLAMACPP_CONFIG: LLMConfig = {
+ provider: 'llama.cpp',
+ apiKey: '',
+ baseUrl: 'http://athena:8081',
+ model: 'Qwen_Qwen3.5-35B-A3B',
+};
+
const MOCK_MESSAGES: ChatMessage[] = [{ role: 'user', content: 'Hello' }];
const MOCK_TOOLS: ToolDef[] = [
@@ -114,6 +121,13 @@ describe('getDefaultProviderConfig()', () => {
expect(cfg.model).toBe('deepseek-chat');
});
+ it('returns correct defaults for llama.cpp', () => {
+ const cfg = getDefaultProviderConfig('llama.cpp');
+ expect(cfg.provider).toBe('llama.cpp');
+ expect(cfg.baseUrl).toBe('http://localhost:8080');
+ expect(cfg.model).toBe('local-model');
+ });
+
it('returns correct defaults for minimax', () => {
const cfg = getDefaultProviderConfig('minimax');
expect(cfg.provider).toBe('minimax');
@@ -422,6 +436,51 @@ describe('chat()', () => {
});
});
+ describe('llama.cpp provider (OpenAI-compatible)', () => {
+ it('routes to OpenAI path without requiring an API key', async () => {
+ const mockFetch = vi.fn().mockResolvedValueOnce(makeOpenAIResponse('Local response'));
+ globalThis.fetch = mockFetch;
+
+ const result = await chat(MOCK_MESSAGES, [], MOCK_LLAMACPP_CONFIG);
+
+ expect(result.content).toBe('Local response');
+ const headers = mockFetch.mock.calls[0][1].headers as Record;
+ expect(headers['Authorization']).toBeUndefined();
+ expect(headers['X-LLM-Target-URL']).toBe('http://athena:8081/v1/chat/completions');
+ });
+
+ it('strips Qwen-style think tags from assistant content', async () => {
+ const mockFetch = vi
+ .fn()
+ .mockResolvedValueOnce(makeOpenAIResponse('hidden reasoning Hello there'));
+ globalThis.fetch = mockFetch;
+
+ const result = await chat(MOCK_MESSAGES, [], MOCK_LLAMACPP_CONFIG);
+
+ expect(result.content).toBe('Hello there');
+ });
+
+ it('converts inline XML-style tool call content into structured tool calls', async () => {
+ const inlineToolContent = `
+respond_to_user
+character_expression
+{"content":"What? Did I catch you off guard?","emotion":"happy"}
+user_interaction
+{"suggested_replies":["Just hanging around","What reunion?","Tell me more"]}
+ `;
+ globalThis.fetch = vi.fn().mockResolvedValueOnce(makeOpenAIResponse(inlineToolContent));
+
+ const result = await chat(MOCK_MESSAGES, MOCK_TOOLS, MOCK_LLAMACPP_CONFIG);
+
+ expect(result.content).toBe('');
+ expect(result.toolCalls).toHaveLength(1);
+ expect(result.toolCalls[0].function.name).toBe('respond_to_user');
+ expect(result.toolCalls[0].function.arguments).toBe(
+ '{"character_expression":{"content":"What? Did I catch you off guard?","emotion":"happy"},"user_interaction":{"suggested_replies":["Just hanging around","What reunion?","Tell me more"]}}',
+ );
+ });
+ });
+
describe('Anthropic provider', () => {
it('uses x-api-key and anthropic-version headers', async () => {
const mockFetch = vi.fn().mockResolvedValueOnce(makeAnthropicResponse('Anthropic response'));
diff --git a/apps/webuiapps/src/lib/llmClient.ts b/apps/webuiapps/src/lib/llmClient.ts
index cae2d61..5c30e12 100644
--- a/apps/webuiapps/src/lib/llmClient.ts
+++ b/apps/webuiapps/src/lib/llmClient.ts
@@ -1,6 +1,6 @@
/**
* Minimal LLM API Client
- * Supports OpenAI / DeepSeek / Anthropic formats
+ * Supports OpenAI-compatible / Anthropic-compatible formats
*/
import type { LLMConfig } from './llmModels';
@@ -88,6 +88,73 @@ interface LLMResponse {
toolCalls: ToolCall[];
}
+interface InlineToolParseResult {
+ content: string;
+ toolCalls: ToolCall[];
+}
+
+function stripThinkTags(content: string): string {
+ const withoutBlocks = content
+ .replace(/]*>[\s\S]*?<\/think>/gi, '')
+ .replace(/<\/?think\b[^>]*>/gi, '');
+ return withoutBlocks === content ? content : withoutBlocks.trim();
+}
+
+function parseInlineArgValue(rawValue: string): unknown {
+ const trimmed = rawValue.trim();
+ if (!trimmed) return '';
+ try {
+ return JSON.parse(trimmed);
+ } catch {
+ return trimmed;
+ }
+}
+
+function extractInlineToolCalls(rawContent: string): InlineToolParseResult {
+ const content = stripThinkTags(rawContent);
+ if (!content.includes('') || !content.includes('')) {
+ return { content, toolCalls: [] };
+ }
+
+ const blockRegex = /(?:\s*|\()([a-zA-Z0-9_.-]+)\s*([\s\S]*?)<\/tool_call>/g;
+ const toolCalls: ToolCall[] = [];
+ let cleanedContent = content;
+ let matchIndex = 0;
+
+ for (const match of content.matchAll(blockRegex)) {
+ const toolName = match[1]?.trim();
+ const body = match[2] ?? '';
+ if (!toolName) continue;
+
+ const args: Record = {};
+ const pairRegex =
+ /\s*([\s\S]*?)\s*<\/arg_key>\s*\s*([\s\S]*?)\s*<\/arg_value>/g;
+
+ for (const pair of body.matchAll(pairRegex)) {
+ const key = pair[1]?.trim();
+ if (!key) continue;
+ args[key] = parseInlineArgValue(pair[2] ?? '');
+ }
+
+ if (Object.keys(args).length === 0) continue;
+
+ toolCalls.push({
+ id: `inline_tool_${matchIndex++}`,
+ type: 'function',
+ function: {
+ name: toolName,
+ arguments: JSON.stringify(args),
+ },
+ });
+ cleanedContent = cleanedContent.replace(match[0], '');
+ }
+
+ return {
+ content: cleanedContent.trim(),
+ toolCalls,
+ };
+}
+
function hasVersionSuffix(url: string): boolean {
return /\/v\d+\/?$/.test(url);
}
@@ -162,14 +229,17 @@ async function chatOpenAI(
messageCount: messages.length,
toolCount: tools.length,
});
+ const headers: Record = {
+ 'Content-Type': 'application/json',
+ 'X-LLM-Target-URL': targetUrl,
+ ...parseCustomHeaders(config.customHeaders),
+ };
+ if (config.apiKey.trim()) {
+ headers.Authorization = `Bearer ${config.apiKey}`;
+ }
const res = await fetch('/api/llm-proxy', {
method: 'POST',
- headers: {
- 'Content-Type': 'application/json',
- Authorization: `Bearer ${config.apiKey}`,
- 'X-LLM-Target-URL': targetUrl,
- ...parseCustomHeaders(config.customHeaders),
- },
+ headers,
body: JSON.stringify(body),
});
@@ -183,7 +253,8 @@ async function chatOpenAI(
const data = JSON.parse(text);
const choice = data.choices?.[0]?.message;
- const toolCalls = choice?.tool_calls || [];
+ const parsedInline = extractInlineToolCalls(choice?.content || '');
+ const toolCalls = choice?.tool_calls?.length ? choice.tool_calls : parsedInline.toolCalls;
const calledNames = toolCalls
.map((tc: { function?: { name?: string } }) => tc.function?.name)
.filter(Boolean);
@@ -195,7 +266,9 @@ async function chatOpenAI(
calledNames,
);
return {
- content: choice?.content || '',
+ content: choice?.tool_calls?.length
+ ? stripThinkTags(choice?.content || '')
+ : parsedInline.content,
toolCalls,
};
}
@@ -267,15 +340,18 @@ async function chatAnthropic(
messageCount: anthropicMessages.length,
toolCount: anthropicTools.length,
});
+ const headers: Record = {
+ 'Content-Type': 'application/json',
+ 'anthropic-version': '2023-06-01',
+ 'X-LLM-Target-URL': targetUrl,
+ ...parseCustomHeaders(config.customHeaders),
+ };
+ if (config.apiKey.trim()) {
+ headers['x-api-key'] = config.apiKey;
+ }
const res = await fetch('/api/llm-proxy', {
method: 'POST',
- headers: {
- 'Content-Type': 'application/json',
- 'x-api-key': config.apiKey,
- 'anthropic-version': '2023-06-01',
- 'X-LLM-Target-URL': targetUrl,
- ...parseCustomHeaders(config.customHeaders),
- },
+ headers,
body: JSON.stringify(body),
});
@@ -314,5 +390,5 @@ async function chatAnthropic(
'calledNames=',
calledNames,
);
- return { content, toolCalls };
+ return { content: stripThinkTags(content), toolCalls };
}
diff --git a/apps/webuiapps/src/lib/llmModels.ts b/apps/webuiapps/src/lib/llmModels.ts
index 5a2cbc9..346907e 100644
--- a/apps/webuiapps/src/lib/llmModels.ts
+++ b/apps/webuiapps/src/lib/llmModels.ts
@@ -2,6 +2,7 @@ export type LLMProvider =
| 'openai'
| 'anthropic'
| 'deepseek'
+ | 'llama.cpp'
| 'minimax'
| 'z.ai'
| 'kimi'
@@ -77,6 +78,13 @@ export const LLM_PROVIDER_CONFIGS: Record = {
],
},
+ 'llama.cpp': {
+ displayName: 'llama.cpp',
+ baseUrl: 'http://localhost:8080',
+ defaultModel: 'local-model',
+ models: [],
+ },
+
minimax: {
displayName: 'MiniMax',
baseUrl: 'https://api.minimax.io/anthropic/v1',